deepspider 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +21 -15
- package/package.json +9 -7
- package/src/agent/core/PanelBridge.js +56 -78
- package/src/agent/core/StreamHandler.js +244 -20
- package/src/agent/index.js +120 -23
- package/src/agent/logger.js +183 -8
- package/src/agent/middleware/memoryFlush.js +48 -0
- package/src/agent/middleware/report.js +95 -37
- package/src/agent/middleware/subagent.js +236 -0
- package/src/agent/middleware/toolAvailability.js +37 -0
- package/src/agent/middleware/toolGuard.js +187 -0
- package/src/agent/middleware/validationWorkflow.js +171 -0
- package/src/agent/prompts/system.js +310 -59
- package/src/agent/run.js +168 -20
- package/src/agent/sessions.js +88 -0
- package/src/agent/skills/anti-detect/SKILL.md +89 -14
- package/src/agent/skills/captcha/SKILL.md +93 -19
- package/src/agent/skills/crawler/SKILL.md +64 -3
- package/src/agent/skills/crawler/evolved.md +9 -1
- package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
- package/src/agent/skills/env/SKILL.md +75 -0
- package/src/agent/skills/js2python/evolved.md +5 -1
- package/src/agent/skills/sandbox/SKILL.md +35 -0
- package/src/agent/skills/static-analysis/SKILL.md +98 -2
- package/src/agent/skills/static-analysis/evolved.md +5 -1
- package/src/agent/subagents/anti-detect.js +36 -24
- package/src/agent/subagents/captcha.js +35 -28
- package/src/agent/subagents/crawler.js +40 -105
- package/src/agent/subagents/factory.js +129 -9
- package/src/agent/subagents/index.js +4 -13
- package/src/agent/subagents/js2python.js +25 -35
- package/src/agent/subagents/reverse.js +180 -0
- package/src/agent/tools/analysis.js +101 -8
- package/src/agent/tools/anti-detect.js +5 -2
- package/src/agent/tools/browser.js +186 -13
- package/src/agent/tools/capture.js +24 -3
- package/src/agent/tools/correlate.js +129 -15
- package/src/agent/tools/crawler.js +3 -2
- package/src/agent/tools/crawlerGenerator.js +90 -0
- package/src/agent/tools/debug.js +43 -6
- package/src/agent/tools/evolve.js +5 -2
- package/src/agent/tools/extractor.js +5 -1
- package/src/agent/tools/file.js +14 -5
- package/src/agent/tools/generateHook.js +66 -0
- package/src/agent/tools/hookManager.js +19 -9
- package/src/agent/tools/index.js +36 -21
- package/src/agent/tools/nodejs.js +41 -6
- package/src/agent/tools/patch.js +1 -1
- package/src/agent/tools/sandbox.js +21 -1
- package/src/agent/tools/scratchpad.js +70 -0
- package/src/agent/tools/store.js +1 -1
- package/src/agent/tools/tracing.js +26 -0
- package/src/agent/tools/verifyAlgorithm.js +117 -0
- package/src/browser/EnvBridge.js +27 -13
- package/src/browser/client.js +128 -18
- package/src/browser/collector.js +101 -22
- package/src/browser/defaultHooks.js +3 -1
- package/src/browser/hooks/index.js +5 -0
- package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
- package/src/browser/interceptors/NetworkInterceptor.js +76 -12
- package/src/browser/interceptors/ScriptInterceptor.js +32 -7
- package/src/browser/interceptors/index.js +1 -0
- package/src/browser/ui/analysisPanel.js +541 -464
- package/src/cli/commands/config.js +11 -3
- package/src/config/paths.js +9 -1
- package/src/config/settings.js +7 -1
- package/src/core/PatchGenerator.js +24 -4
- package/src/core/Sandbox.js +140 -3
- package/src/env/EnvCodeGenerator.js +60 -88
- package/src/env/modules/bom/history.js +6 -0
- package/src/env/modules/bom/location.js +6 -0
- package/src/env/modules/bom/navigator.js +13 -0
- package/src/env/modules/bom/screen.js +6 -0
- package/src/env/modules/bom/storage.js +7 -0
- package/src/env/modules/dom/document.js +14 -0
- package/src/env/modules/dom/event.js +4 -0
- package/src/env/modules/index.js +27 -10
- package/src/env/modules/webapi/fetch.js +4 -0
- package/src/env/modules/webapi/url.js +4 -0
- package/src/env/modules/webapi/xhr.js +8 -0
- package/src/store/DataStore.js +125 -42
- package/src/store/Store.js +2 -1
- package/src/agent/subagents/dynamic.js +0 -64
- package/src/agent/subagents/env-agent.js +0 -82
- package/src/agent/subagents/sandbox.js +0 -55
- package/src/agent/subagents/static.js +0 -66
package/src/browser/collector.js
CHANGED
|
@@ -15,13 +15,17 @@ export class EnvCollector {
|
|
|
15
15
|
* @param {object} options - 采集选项
|
|
16
16
|
*/
|
|
17
17
|
async collect(path, options = {}) {
|
|
18
|
-
const { depth = 1, includeProto = false, useCache = true } = options;
|
|
18
|
+
const { depth = 1, includeProto = false, useCache = true, timeout = 5000 } = options;
|
|
19
19
|
|
|
20
20
|
if (useCache && this.cache.has(path)) {
|
|
21
21
|
return this.cache.get(path);
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
// 使用 Promise.race 添加超时保护
|
|
25
|
+
const evaluatePromise = this.page.evaluate(({ path, depth, includeProto: _includeProto }) => {
|
|
26
|
+
// 用于检测循环引用的 WeakSet
|
|
27
|
+
const seen = new WeakSet();
|
|
28
|
+
|
|
25
29
|
function getByPath(obj, path) {
|
|
26
30
|
return path.split('.').reduce((o, k) => o && o[k], obj);
|
|
27
31
|
}
|
|
@@ -40,29 +44,55 @@ export class EnvCollector {
|
|
|
40
44
|
return { type, value: val };
|
|
41
45
|
}
|
|
42
46
|
|
|
47
|
+
// 检测循环引用
|
|
48
|
+
if (seen.has(val)) {
|
|
49
|
+
return { type: 'object', value: '[Circular]', circular: true };
|
|
50
|
+
}
|
|
51
|
+
|
|
43
52
|
if (currentDepth >= maxDepth) {
|
|
44
53
|
return { type: 'object', value: '[Object]', truncated: true };
|
|
45
54
|
}
|
|
46
55
|
|
|
56
|
+
seen.add(val);
|
|
57
|
+
|
|
47
58
|
if (Array.isArray(val)) {
|
|
48
59
|
return {
|
|
49
60
|
type: 'array',
|
|
50
|
-
|
|
61
|
+
length: val.length,
|
|
62
|
+
value: val.slice(0, 20).map(v => serialize(v, currentDepth + 1, maxDepth))
|
|
51
63
|
};
|
|
52
64
|
}
|
|
53
65
|
|
|
54
66
|
const result = { type: 'object', properties: {} };
|
|
55
|
-
|
|
67
|
+
let keys;
|
|
68
|
+
try {
|
|
69
|
+
keys = Object.getOwnPropertyNames(val);
|
|
70
|
+
} catch (e) {
|
|
71
|
+
return { type: 'object', value: '[Error accessing keys]', error: e.message };
|
|
72
|
+
}
|
|
56
73
|
|
|
57
|
-
for (const key of keys.slice(0,
|
|
74
|
+
for (const key of keys.slice(0, 30)) {
|
|
58
75
|
try {
|
|
59
76
|
const desc = Object.getOwnPropertyDescriptor(val, key);
|
|
77
|
+
if (!desc) continue;
|
|
78
|
+
|
|
79
|
+
// 安全处理:避免触发有副作用的 getter
|
|
60
80
|
if (desc.get) {
|
|
81
|
+
// 对于 getter,只记录描述符信息,不执行 getter
|
|
82
|
+
result.properties[key] = {
|
|
83
|
+
type: 'getter',
|
|
84
|
+
hasGetter: true,
|
|
85
|
+
enumerable: desc.enumerable,
|
|
86
|
+
configurable: desc.configurable
|
|
87
|
+
};
|
|
88
|
+
} else if (desc.set && desc.value === undefined) {
|
|
89
|
+
// 只有 setter 没有 getter
|
|
61
90
|
result.properties[key] = {
|
|
62
|
-
|
|
63
|
-
|
|
91
|
+
type: 'setter',
|
|
92
|
+
hasSetter: true
|
|
64
93
|
};
|
|
65
94
|
} else {
|
|
95
|
+
// 普通值
|
|
66
96
|
result.properties[key] = serialize(desc.value, currentDepth + 1, maxDepth);
|
|
67
97
|
}
|
|
68
98
|
} catch (e) {
|
|
@@ -89,15 +119,19 @@ export class EnvCollector {
|
|
|
89
119
|
|
|
90
120
|
let descriptor = null;
|
|
91
121
|
if (parent) {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
122
|
+
try {
|
|
123
|
+
const desc = Object.getOwnPropertyDescriptor(parent, propName);
|
|
124
|
+
if (desc) {
|
|
125
|
+
descriptor = {
|
|
126
|
+
configurable: desc.configurable,
|
|
127
|
+
enumerable: desc.enumerable,
|
|
128
|
+
writable: desc.writable,
|
|
129
|
+
hasGetter: !!desc.get,
|
|
130
|
+
hasSetter: !!desc.set
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
} catch (e) {
|
|
134
|
+
// 忽略描述符读取错误
|
|
101
135
|
}
|
|
102
136
|
}
|
|
103
137
|
|
|
@@ -112,7 +146,19 @@ export class EnvCollector {
|
|
|
112
146
|
}
|
|
113
147
|
}, { path, depth, includeProto });
|
|
114
148
|
|
|
115
|
-
|
|
149
|
+
// 添加超时
|
|
150
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
151
|
+
setTimeout(() => reject(new Error('采集超时')), timeout)
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
let result;
|
|
155
|
+
try {
|
|
156
|
+
result = await Promise.race([evaluatePromise, timeoutPromise]);
|
|
157
|
+
} catch (e) {
|
|
158
|
+
result = { success: false, error: e.message };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (result?.success && useCache) {
|
|
116
162
|
this.cache.set(path, result);
|
|
117
163
|
}
|
|
118
164
|
|
|
@@ -154,9 +200,12 @@ export class EnvCollector {
|
|
|
154
200
|
* 深度采集整个对象
|
|
155
201
|
*/
|
|
156
202
|
async collectDeep(rootPath, options = {}) {
|
|
157
|
-
const { maxDepth = 3, maxProps = 100 } = options;
|
|
203
|
+
const { maxDepth = 3, maxProps = 100, timeout = 5000 } = options;
|
|
204
|
+
|
|
205
|
+
const evaluatePromise = this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
|
|
206
|
+
// 用于检测循环引用的 WeakSet
|
|
207
|
+
const seen = new WeakSet();
|
|
158
208
|
|
|
159
|
-
return await this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
|
|
160
209
|
function getByPath(obj, path) {
|
|
161
210
|
return path.split('.').reduce((o, k) => o && o[k], obj);
|
|
162
211
|
}
|
|
@@ -165,19 +214,38 @@ export class EnvCollector {
|
|
|
165
214
|
if (depth > maxDepth || collected.size > maxProps) return;
|
|
166
215
|
if (!obj || typeof obj !== 'object') return;
|
|
167
216
|
|
|
217
|
+
// 检测循环引用
|
|
218
|
+
if (seen.has(obj)) return;
|
|
219
|
+
seen.add(obj);
|
|
220
|
+
|
|
168
221
|
const keys = Object.getOwnPropertyNames(obj);
|
|
169
|
-
for (const key of keys) {
|
|
222
|
+
for (const key of keys.slice(0, 30)) {
|
|
170
223
|
if (collected.size > maxProps) break;
|
|
171
224
|
|
|
172
225
|
const fullPath = path ? `${path}.${key}` : key;
|
|
173
226
|
try {
|
|
174
|
-
const
|
|
175
|
-
|
|
227
|
+
const desc = Object.getOwnPropertyDescriptor(obj, key);
|
|
228
|
+
if (!desc) continue;
|
|
229
|
+
|
|
230
|
+
// 安全处理:避免触发有副作用的 getter
|
|
231
|
+
let val;
|
|
232
|
+
let type;
|
|
233
|
+
if (desc.get) {
|
|
234
|
+
type = 'getter';
|
|
235
|
+
val = '[Getter]';
|
|
236
|
+
} else if (desc.set && desc.value === undefined) {
|
|
237
|
+
type = 'setter';
|
|
238
|
+
val = '[Setter]';
|
|
239
|
+
} else {
|
|
240
|
+
val = desc.value;
|
|
241
|
+
type = typeof val;
|
|
242
|
+
}
|
|
176
243
|
|
|
177
244
|
collected.set(fullPath, {
|
|
178
245
|
type,
|
|
179
246
|
value: type === 'function' ? '[Function]' :
|
|
180
247
|
type === 'object' ? '[Object]' :
|
|
248
|
+
type === 'getter' || type === 'setter' ? val :
|
|
181
249
|
val
|
|
182
250
|
});
|
|
183
251
|
|
|
@@ -204,6 +272,17 @@ export class EnvCollector {
|
|
|
204
272
|
properties: Object.fromEntries(collected)
|
|
205
273
|
};
|
|
206
274
|
}, { rootPath, maxDepth, maxProps });
|
|
275
|
+
|
|
276
|
+
// 添加超时保护
|
|
277
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
278
|
+
setTimeout(() => reject(new Error('collectDeep timeout')), timeout)
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
try {
|
|
282
|
+
return await Promise.race([evaluatePromise, timeoutPromise]);
|
|
283
|
+
} catch (e) {
|
|
284
|
+
return { success: false, error: e.message };
|
|
285
|
+
}
|
|
207
286
|
}
|
|
208
287
|
|
|
209
288
|
// === 特殊环境采集 ===
|
|
@@ -160,7 +160,9 @@ function getCookieHook() {
|
|
|
160
160
|
return value;
|
|
161
161
|
},
|
|
162
162
|
set: function(val) {
|
|
163
|
-
|
|
163
|
+
// 解析 cookie name(cookie 格式: "name=value; expires=...; path=...")
|
|
164
|
+
const cookieName = val?.split('=')[0]?.trim();
|
|
165
|
+
deepspider.log('cookie', { action: 'write', name: cookieName, value: val });
|
|
164
166
|
return cookieDesc.set.call(document, val);
|
|
165
167
|
},
|
|
166
168
|
configurable: true
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
export class HookManager {
|
|
8
8
|
constructor() {
|
|
9
9
|
this.logs = [];
|
|
10
|
+
this.maxLogs = 5000;
|
|
10
11
|
this.onLog = null;
|
|
11
12
|
this.injected = false;
|
|
12
13
|
}
|
|
@@ -37,6 +38,10 @@ export class HookManager {
|
|
|
37
38
|
text,
|
|
38
39
|
timestamp: Date.now(),
|
|
39
40
|
});
|
|
41
|
+
// 超过上限时丢弃最旧的 20%
|
|
42
|
+
if (this.logs.length > this.maxLogs) {
|
|
43
|
+
this.logs = this.logs.slice(Math.floor(this.maxLogs * 0.2));
|
|
44
|
+
}
|
|
40
45
|
if (this.onLog) {
|
|
41
46
|
this.onLog({ type: msg.type(), text });
|
|
42
47
|
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 反无限 debugger 拦截器
|
|
3
|
+
* 通过 CDP Debugger.setBlackboxedRanges 跳过包含 debugger 语句的脚本
|
|
4
|
+
* 零运行时开销,不修改源码,不触发完整性校验
|
|
5
|
+
*
|
|
6
|
+
* 已知限制:/\bdebugger\b/ 会匹配字符串/注释中的 debugger,
|
|
7
|
+
* 对反爬场景可接受(误 blackbox 的脚本仍正常执行,只是不可调试)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export class AntiDebugInterceptor {
|
|
11
|
+
constructor(cdpClient) {
|
|
12
|
+
this.client = cdpClient;
|
|
13
|
+
this.blackboxedScripts = new Set();
|
|
14
|
+
// 高频 debugger 检测
|
|
15
|
+
this.pausedCount = 0;
|
|
16
|
+
this.pausedWindowStart = 0;
|
|
17
|
+
this.PAUSED_WINDOW_MS = 1000; // 1秒窗口
|
|
18
|
+
this.PAUSED_THRESHOLD = 5; // 1秒内超过5次paused认为是debugger风暴
|
|
19
|
+
this.stormMode = false; // 风暴模式:跳过所有断点
|
|
20
|
+
this.stormTimer = null; // 风暴模式自动退出定时器
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async start() {
|
|
24
|
+
// 兜底:对于 blackbox 来不及处理的同步 debugger(时序竞争),自动 resume
|
|
25
|
+
// reason 可能是 'other' 或 'debugCommand'(不同 Chrome 版本),
|
|
26
|
+
// 只要不是我们主动设的断点(hitBreakpoints 非空 / reason=breakpoint)就 resume
|
|
27
|
+
this.client.on('Debugger.paused', (params) => {
|
|
28
|
+
// 手动设置的断点(除非在风暴模式)
|
|
29
|
+
if (!this.stormMode && params.reason === 'breakpoint') return;
|
|
30
|
+
if (!this.stormMode && params.hitBreakpoints?.length > 0) return;
|
|
31
|
+
|
|
32
|
+
// 风暴模式下直接 resume,不参与计数
|
|
33
|
+
if (this.stormMode) {
|
|
34
|
+
this.client.send('Debugger.resume').catch(() => {});
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// 高频 debugger 检测
|
|
39
|
+
const now = Date.now();
|
|
40
|
+
if (now - this.pausedWindowStart > this.PAUSED_WINDOW_MS) {
|
|
41
|
+
// 新窗口
|
|
42
|
+
this.pausedWindowStart = now;
|
|
43
|
+
this.pausedCount = 1;
|
|
44
|
+
} else {
|
|
45
|
+
this.pausedCount++;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 触发风暴模式
|
|
49
|
+
if (this.pausedCount > this.PAUSED_THRESHOLD) {
|
|
50
|
+
console.log('[AntiDebugInterceptor] 检测到 debugger 风暴,启用风暴模式');
|
|
51
|
+
this.stormMode = true;
|
|
52
|
+
// 清除之前的定时器
|
|
53
|
+
if (this.stormTimer) {
|
|
54
|
+
clearTimeout(this.stormTimer);
|
|
55
|
+
}
|
|
56
|
+
// 3秒后退出风暴模式
|
|
57
|
+
this.stormTimer = setTimeout(() => {
|
|
58
|
+
console.log('[AntiDebugInterceptor] 退出风暴模式');
|
|
59
|
+
this.stormMode = false;
|
|
60
|
+
this.pausedCount = 0;
|
|
61
|
+
this.stormTimer = null;
|
|
62
|
+
}, 3000);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// 自动 resume
|
|
66
|
+
this.client.send('Debugger.resume').catch(() => {});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
console.log('[AntiDebugInterceptor] 已启动');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* 检查脚本源码,包含 debugger 则 blackbox 整个脚本
|
|
74
|
+
* 由 ScriptInterceptor.onSource 回调驱动,避免重复拉取源码
|
|
75
|
+
*/
|
|
76
|
+
checkScript(scriptId, scriptSource) {
|
|
77
|
+
if (/\bdebugger\b/.test(scriptSource)) {
|
|
78
|
+
this.client.send('Debugger.setBlackboxedRanges', {
|
|
79
|
+
scriptId,
|
|
80
|
+
positions: [{ lineNumber: 0, columnNumber: 0 }],
|
|
81
|
+
}).then(() => {
|
|
82
|
+
this.blackboxedScripts.add(scriptId);
|
|
83
|
+
}).catch(() => {});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* 取消指定脚本的 blackbox(供断点工具调用)
|
|
89
|
+
*/
|
|
90
|
+
async unblackbox(scriptId) {
|
|
91
|
+
if (this.blackboxedScripts.has(scriptId)) {
|
|
92
|
+
await this.client.send('Debugger.setBlackboxedRanges', {
|
|
93
|
+
scriptId,
|
|
94
|
+
positions: [],
|
|
95
|
+
});
|
|
96
|
+
this.blackboxedScripts.delete(scriptId);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* 手动启用/禁用风暴模式
|
|
102
|
+
* 用于绕过强反调试场景
|
|
103
|
+
*/
|
|
104
|
+
setStormMode(enabled) {
|
|
105
|
+
// 清除之前的定时器
|
|
106
|
+
if (this.stormTimer) {
|
|
107
|
+
clearTimeout(this.stormTimer);
|
|
108
|
+
this.stormTimer = null;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
this.stormMode = enabled;
|
|
112
|
+
if (enabled) {
|
|
113
|
+
console.log('[AntiDebugInterceptor] 手动启用风暴模式');
|
|
114
|
+
// 自动退出
|
|
115
|
+
this.stormTimer = setTimeout(() => {
|
|
116
|
+
this.stormMode = false;
|
|
117
|
+
this.stormTimer = null;
|
|
118
|
+
console.log('[AntiDebugInterceptor] 自动退出风暴模式');
|
|
119
|
+
}, 5000);
|
|
120
|
+
} else {
|
|
121
|
+
console.log('[AntiDebugInterceptor] 手动禁用风暴模式');
|
|
122
|
+
this.pausedCount = 0;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* 检查当前是否在风暴模式
|
|
128
|
+
*/
|
|
129
|
+
isStormMode() {
|
|
130
|
+
return this.stormMode;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -46,11 +46,16 @@ export class NetworkInterceptor {
|
|
|
46
46
|
this.onLoadingFinished(params);
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
+
// 监听加载失败(清理 pendingRequests,防止内存泄漏)
|
|
50
|
+
this.client.on('Network.loadingFailed', (params) => {
|
|
51
|
+
this.pendingRequests.delete(params.requestId);
|
|
52
|
+
});
|
|
53
|
+
|
|
49
54
|
console.log('[NetworkInterceptor] 已启动');
|
|
50
55
|
}
|
|
51
56
|
|
|
52
57
|
onRequest(params) {
|
|
53
|
-
const { requestId, request, timestamp } = params;
|
|
58
|
+
const { requestId, request, timestamp, initiator } = params;
|
|
54
59
|
|
|
55
60
|
// 只记录 XHR/Fetch 请求
|
|
56
61
|
const type = params.type;
|
|
@@ -62,10 +67,37 @@ export class NetworkInterceptor {
|
|
|
62
67
|
headers: request.headers,
|
|
63
68
|
postData: request.postData,
|
|
64
69
|
timestamp: timestamp * 1000,
|
|
65
|
-
pageUrl: this.getPageUrl()
|
|
70
|
+
pageUrl: this.getPageUrl(),
|
|
71
|
+
initiator: this.formatInitiator(initiator),
|
|
66
72
|
});
|
|
67
73
|
}
|
|
68
74
|
|
|
75
|
+
/**
|
|
76
|
+
* 精简 initiator 调用栈(只保留前 5 帧,过滤内部帧)
|
|
77
|
+
*/
|
|
78
|
+
formatInitiator(initiator) {
|
|
79
|
+
if (!initiator) return null;
|
|
80
|
+
const result = { type: initiator.type };
|
|
81
|
+
if (initiator.url) {
|
|
82
|
+
result.url = initiator.url;
|
|
83
|
+
result.lineNumber = initiator.lineNumber;
|
|
84
|
+
}
|
|
85
|
+
if (initiator.stack?.callFrames) {
|
|
86
|
+
result.callFrames = initiator.stack.callFrames
|
|
87
|
+
.filter(f => f.url && !f.url.includes('patchright') && !f.url.includes('__playwright'))
|
|
88
|
+
.slice(0, 5)
|
|
89
|
+
.map(f => ({
|
|
90
|
+
functionName: f.functionName || '(anonymous)',
|
|
91
|
+
url: f.url,
|
|
92
|
+
lineNumber: f.lineNumber,
|
|
93
|
+
columnNumber: f.columnNumber,
|
|
94
|
+
}));
|
|
95
|
+
}
|
|
96
|
+
// 只有 type 没有实际定位信息时返回 null
|
|
97
|
+
if (!result.url && !result.callFrames?.length) return null;
|
|
98
|
+
return result;
|
|
99
|
+
}
|
|
100
|
+
|
|
69
101
|
onResponse(params) {
|
|
70
102
|
const { requestId, response } = params;
|
|
71
103
|
const pending = this.pendingRequests.get(requestId);
|
|
@@ -81,15 +113,31 @@ export class NetworkInterceptor {
|
|
|
81
113
|
if (!pending) return;
|
|
82
114
|
|
|
83
115
|
try {
|
|
84
|
-
//
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
116
|
+
// 获取响应体,添加超时保护防止 CDP 命令挂起
|
|
117
|
+
const bodyPromise = this.client.send('Network.getResponseBody', { requestId });
|
|
118
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
119
|
+
setTimeout(() => reject(new Error('getResponseBody timeout')), 5000)
|
|
88
120
|
);
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
121
|
+
const { body, base64Encoded } = await Promise.race([bodyPromise, timeoutPromise]);
|
|
122
|
+
|
|
123
|
+
// 处理响应体:检测二进制内容,避免损坏
|
|
124
|
+
let responseBody;
|
|
125
|
+
const contentType = pending.responseHeaders?.['content-type'] || '';
|
|
126
|
+
|
|
127
|
+
if (this.isBinaryContent(contentType)) {
|
|
128
|
+
// 二进制内容:存储元数据而非原始内容
|
|
129
|
+
// base64 长度计算:每 4 个字符 = 3 字节,考虑 padding
|
|
130
|
+
const binarySize = base64Encoded
|
|
131
|
+
? Math.floor(body.length * 0.75) - (body.match(/=*$/)?.[0].length || 0)
|
|
132
|
+
: body.length;
|
|
133
|
+
responseBody = `[Binary: ${contentType}, ${binarySize} bytes]`;
|
|
134
|
+
} else {
|
|
135
|
+
// 文本内容:安全地转换为字符串
|
|
136
|
+
const rawBody = base64Encoded
|
|
137
|
+
? Buffer.from(body, 'base64').toString('utf-8')
|
|
138
|
+
: body;
|
|
139
|
+
responseBody = rawBody.slice(0, 50000);
|
|
140
|
+
}
|
|
93
141
|
|
|
94
142
|
// 异步存储到文件
|
|
95
143
|
this.store.saveResponse({
|
|
@@ -98,9 +146,10 @@ export class NetworkInterceptor {
|
|
|
98
146
|
status: pending.status,
|
|
99
147
|
requestHeaders: pending.headers,
|
|
100
148
|
requestBody: pending.postData,
|
|
101
|
-
responseBody
|
|
149
|
+
responseBody,
|
|
102
150
|
timestamp: pending.timestamp,
|
|
103
|
-
pageUrl: pending.pageUrl
|
|
151
|
+
pageUrl: pending.pageUrl,
|
|
152
|
+
initiator: pending.initiator,
|
|
104
153
|
}).catch(e => {
|
|
105
154
|
console.error('[NetworkInterceptor] 保存失败:', e.message);
|
|
106
155
|
});
|
|
@@ -111,6 +160,21 @@ export class NetworkInterceptor {
|
|
|
111
160
|
|
|
112
161
|
this.pendingRequests.delete(requestId);
|
|
113
162
|
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* 检测是否为二进制内容类型
|
|
166
|
+
*/
|
|
167
|
+
isBinaryContent(contentType) {
|
|
168
|
+
if (!contentType) return false;
|
|
169
|
+
const binaryTypes = [
|
|
170
|
+
'image/', 'audio/', 'video/', 'application/pdf',
|
|
171
|
+
'application/octet-stream', 'application/zip',
|
|
172
|
+
'application/gzip', 'application/x-protobuf',
|
|
173
|
+
'font/', 'application/vnd.'
|
|
174
|
+
];
|
|
175
|
+
const lowerType = contentType.toLowerCase();
|
|
176
|
+
return binaryTypes.some(type => lowerType.includes(type));
|
|
177
|
+
}
|
|
114
178
|
}
|
|
115
179
|
|
|
116
180
|
export default NetworkInterceptor;
|
|
@@ -11,6 +11,7 @@ export class ScriptInterceptor {
|
|
|
11
11
|
this.page = page; // Playwright page 对象
|
|
12
12
|
this.store = getDataStore();
|
|
13
13
|
this.scriptIds = new Set();
|
|
14
|
+
this.onSource = null; // 回调: (scriptId, scriptSource) => void
|
|
14
15
|
}
|
|
15
16
|
|
|
16
17
|
/**
|
|
@@ -40,22 +41,46 @@ export class ScriptInterceptor {
|
|
|
40
41
|
async onScriptParsed(params) {
|
|
41
42
|
const { scriptId, url, length: _length } = params;
|
|
42
43
|
|
|
43
|
-
//
|
|
44
|
-
if (
|
|
44
|
+
// 跳过扩展脚本
|
|
45
|
+
if (url?.startsWith('chrome-extension://')) return;
|
|
45
46
|
if (this.scriptIds.has(scriptId)) return;
|
|
46
47
|
|
|
47
48
|
this.scriptIds.add(scriptId);
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
if (url) {
|
|
51
|
+
// 有 URL 的脚本:获取源码、通知订阅者、存储
|
|
52
|
+
this.fetchAndSave(scriptId, url).catch(() => {});
|
|
53
|
+
} else if (this.onSource) {
|
|
54
|
+
// 无 URL 脚本(eval/new Function 生成):仅通知订阅者用于 debugger 检测,不存储
|
|
55
|
+
this.fetchAndNotify(scriptId).catch(() => {});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async fetchAndNotify(scriptId) {
|
|
60
|
+
try {
|
|
61
|
+
// 添加超时保护防止 CDP 命令挂起
|
|
62
|
+
const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
|
|
63
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
64
|
+
setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
|
|
65
|
+
);
|
|
66
|
+
const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
|
|
67
|
+
try { this.onSource(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
|
|
68
|
+
} catch {
|
|
69
|
+
// 获取失败(脚本已卸载等),忽略
|
|
70
|
+
}
|
|
51
71
|
}
|
|
52
72
|
|
|
53
73
|
async fetchAndSave(scriptId, url) {
|
|
54
74
|
try {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
75
|
+
// 添加超时保护防止 CDP 命令挂起
|
|
76
|
+
const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
|
|
77
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
78
|
+
setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
|
|
58
79
|
);
|
|
80
|
+
const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
|
|
81
|
+
|
|
82
|
+
// 通知订阅者(AntiDebugInterceptor 等)
|
|
83
|
+
try { this.onSource?.(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
|
|
59
84
|
|
|
60
85
|
// 限制大小,超大脚本只保存部分
|
|
61
86
|
const source = scriptSource.slice(0, 500000);
|