deepspider 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.env.example +3 -0
  2. package/README.md +21 -15
  3. package/package.json +9 -7
  4. package/src/agent/core/PanelBridge.js +56 -78
  5. package/src/agent/core/StreamHandler.js +244 -20
  6. package/src/agent/index.js +120 -23
  7. package/src/agent/logger.js +183 -8
  8. package/src/agent/middleware/memoryFlush.js +48 -0
  9. package/src/agent/middleware/report.js +95 -37
  10. package/src/agent/middleware/subagent.js +236 -0
  11. package/src/agent/middleware/toolAvailability.js +37 -0
  12. package/src/agent/middleware/toolGuard.js +187 -0
  13. package/src/agent/middleware/validationWorkflow.js +171 -0
  14. package/src/agent/prompts/system.js +310 -59
  15. package/src/agent/run.js +168 -20
  16. package/src/agent/sessions.js +88 -0
  17. package/src/agent/skills/anti-detect/SKILL.md +89 -14
  18. package/src/agent/skills/captcha/SKILL.md +93 -19
  19. package/src/agent/skills/crawler/SKILL.md +64 -3
  20. package/src/agent/skills/crawler/evolved.md +9 -1
  21. package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
  22. package/src/agent/skills/env/SKILL.md +75 -0
  23. package/src/agent/skills/js2python/evolved.md +5 -1
  24. package/src/agent/skills/sandbox/SKILL.md +35 -0
  25. package/src/agent/skills/static-analysis/SKILL.md +98 -2
  26. package/src/agent/skills/static-analysis/evolved.md +5 -1
  27. package/src/agent/subagents/anti-detect.js +36 -24
  28. package/src/agent/subagents/captcha.js +35 -28
  29. package/src/agent/subagents/crawler.js +40 -105
  30. package/src/agent/subagents/factory.js +129 -9
  31. package/src/agent/subagents/index.js +4 -13
  32. package/src/agent/subagents/js2python.js +25 -35
  33. package/src/agent/subagents/reverse.js +180 -0
  34. package/src/agent/tools/analysis.js +101 -8
  35. package/src/agent/tools/anti-detect.js +5 -2
  36. package/src/agent/tools/browser.js +186 -13
  37. package/src/agent/tools/capture.js +24 -3
  38. package/src/agent/tools/correlate.js +129 -15
  39. package/src/agent/tools/crawler.js +3 -2
  40. package/src/agent/tools/crawlerGenerator.js +90 -0
  41. package/src/agent/tools/debug.js +43 -6
  42. package/src/agent/tools/evolve.js +5 -2
  43. package/src/agent/tools/extractor.js +5 -1
  44. package/src/agent/tools/file.js +14 -5
  45. package/src/agent/tools/generateHook.js +66 -0
  46. package/src/agent/tools/hookManager.js +19 -9
  47. package/src/agent/tools/index.js +36 -21
  48. package/src/agent/tools/nodejs.js +41 -6
  49. package/src/agent/tools/patch.js +1 -1
  50. package/src/agent/tools/sandbox.js +21 -1
  51. package/src/agent/tools/scratchpad.js +70 -0
  52. package/src/agent/tools/store.js +1 -1
  53. package/src/agent/tools/tracing.js +26 -0
  54. package/src/agent/tools/verifyAlgorithm.js +117 -0
  55. package/src/browser/EnvBridge.js +27 -13
  56. package/src/browser/client.js +128 -18
  57. package/src/browser/collector.js +101 -22
  58. package/src/browser/defaultHooks.js +3 -1
  59. package/src/browser/hooks/index.js +5 -0
  60. package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
  61. package/src/browser/interceptors/NetworkInterceptor.js +76 -12
  62. package/src/browser/interceptors/ScriptInterceptor.js +32 -7
  63. package/src/browser/interceptors/index.js +1 -0
  64. package/src/browser/ui/analysisPanel.js +541 -464
  65. package/src/cli/commands/config.js +11 -3
  66. package/src/config/paths.js +9 -1
  67. package/src/config/settings.js +7 -1
  68. package/src/core/PatchGenerator.js +24 -4
  69. package/src/core/Sandbox.js +140 -3
  70. package/src/env/EnvCodeGenerator.js +60 -88
  71. package/src/env/modules/bom/history.js +6 -0
  72. package/src/env/modules/bom/location.js +6 -0
  73. package/src/env/modules/bom/navigator.js +13 -0
  74. package/src/env/modules/bom/screen.js +6 -0
  75. package/src/env/modules/bom/storage.js +7 -0
  76. package/src/env/modules/dom/document.js +14 -0
  77. package/src/env/modules/dom/event.js +4 -0
  78. package/src/env/modules/index.js +27 -10
  79. package/src/env/modules/webapi/fetch.js +4 -0
  80. package/src/env/modules/webapi/url.js +4 -0
  81. package/src/env/modules/webapi/xhr.js +8 -0
  82. package/src/store/DataStore.js +125 -42
  83. package/src/store/Store.js +2 -1
  84. package/src/agent/subagents/dynamic.js +0 -64
  85. package/src/agent/subagents/env-agent.js +0 -82
  86. package/src/agent/subagents/sandbox.js +0 -55
  87. package/src/agent/subagents/static.js +0 -66
@@ -15,13 +15,17 @@ export class EnvCollector {
15
15
  * @param {object} options - 采集选项
16
16
  */
17
17
  async collect(path, options = {}) {
18
- const { depth = 1, includeProto = false, useCache = true } = options;
18
+ const { depth = 1, includeProto = false, useCache = true, timeout = 5000 } = options;
19
19
 
20
20
  if (useCache && this.cache.has(path)) {
21
21
  return this.cache.get(path);
22
22
  }
23
23
 
24
- const result = await this.page.evaluate(({ path, depth, includeProto: _includeProto }) => {
24
+ // 使用 Promise.race 添加超时保护
25
+ const evaluatePromise = this.page.evaluate(({ path, depth, includeProto: _includeProto }) => {
26
+ // 用于检测循环引用的 WeakSet
27
+ const seen = new WeakSet();
28
+
25
29
  function getByPath(obj, path) {
26
30
  return path.split('.').reduce((o, k) => o && o[k], obj);
27
31
  }
@@ -40,29 +44,55 @@ export class EnvCollector {
40
44
  return { type, value: val };
41
45
  }
42
46
 
47
+ // 检测循环引用
48
+ if (seen.has(val)) {
49
+ return { type: 'object', value: '[Circular]', circular: true };
50
+ }
51
+
43
52
  if (currentDepth >= maxDepth) {
44
53
  return { type: 'object', value: '[Object]', truncated: true };
45
54
  }
46
55
 
56
+ seen.add(val);
57
+
47
58
  if (Array.isArray(val)) {
48
59
  return {
49
60
  type: 'array',
50
- value: val.map(v => serialize(v, currentDepth + 1, maxDepth))
61
+ length: val.length,
62
+ value: val.slice(0, 20).map(v => serialize(v, currentDepth + 1, maxDepth))
51
63
  };
52
64
  }
53
65
 
54
66
  const result = { type: 'object', properties: {} };
55
- const keys = Object.getOwnPropertyNames(val);
67
+ let keys;
68
+ try {
69
+ keys = Object.getOwnPropertyNames(val);
70
+ } catch (e) {
71
+ return { type: 'object', value: '[Error accessing keys]', error: e.message };
72
+ }
56
73
 
57
- for (const key of keys.slice(0, 50)) {
74
+ for (const key of keys.slice(0, 30)) {
58
75
  try {
59
76
  const desc = Object.getOwnPropertyDescriptor(val, key);
77
+ if (!desc) continue;
78
+
79
+ // 安全处理:避免触发有副作用的 getter
60
80
  if (desc.get) {
81
+ // 对于 getter,只记录描述符信息,不执行 getter
82
+ result.properties[key] = {
83
+ type: 'getter',
84
+ hasGetter: true,
85
+ enumerable: desc.enumerable,
86
+ configurable: desc.configurable
87
+ };
88
+ } else if (desc.set && desc.value === undefined) {
89
+ // 只有 setter 没有 getter
61
90
  result.properties[key] = {
62
- ...serialize(val[key], currentDepth + 1, maxDepth),
63
- hasGetter: true
91
+ type: 'setter',
92
+ hasSetter: true
64
93
  };
65
94
  } else {
95
+ // 普通值
66
96
  result.properties[key] = serialize(desc.value, currentDepth + 1, maxDepth);
67
97
  }
68
98
  } catch (e) {
@@ -89,15 +119,19 @@ export class EnvCollector {
89
119
 
90
120
  let descriptor = null;
91
121
  if (parent) {
92
- const desc = Object.getOwnPropertyDescriptor(parent, propName);
93
- if (desc) {
94
- descriptor = {
95
- configurable: desc.configurable,
96
- enumerable: desc.enumerable,
97
- writable: desc.writable,
98
- hasGetter: !!desc.get,
99
- hasSetter: !!desc.set
100
- };
122
+ try {
123
+ const desc = Object.getOwnPropertyDescriptor(parent, propName);
124
+ if (desc) {
125
+ descriptor = {
126
+ configurable: desc.configurable,
127
+ enumerable: desc.enumerable,
128
+ writable: desc.writable,
129
+ hasGetter: !!desc.get,
130
+ hasSetter: !!desc.set
131
+ };
132
+ }
133
+ } catch (e) {
134
+ // 忽略描述符读取错误
101
135
  }
102
136
  }
103
137
 
@@ -112,7 +146,19 @@ export class EnvCollector {
112
146
  }
113
147
  }, { path, depth, includeProto });
114
148
 
115
- if (result.success && useCache) {
149
+ // 添加超时
150
+ const timeoutPromise = new Promise((_, reject) =>
151
+ setTimeout(() => reject(new Error('采集超时')), timeout)
152
+ );
153
+
154
+ let result;
155
+ try {
156
+ result = await Promise.race([evaluatePromise, timeoutPromise]);
157
+ } catch (e) {
158
+ result = { success: false, error: e.message };
159
+ }
160
+
161
+ if (result?.success && useCache) {
116
162
  this.cache.set(path, result);
117
163
  }
118
164
 
@@ -154,9 +200,12 @@ export class EnvCollector {
154
200
  * 深度采集整个对象
155
201
  */
156
202
  async collectDeep(rootPath, options = {}) {
157
- const { maxDepth = 3, maxProps = 100 } = options;
203
+ const { maxDepth = 3, maxProps = 100, timeout = 5000 } = options;
204
+
205
+ const evaluatePromise = this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
206
+ // 用于检测循环引用的 WeakSet
207
+ const seen = new WeakSet();
158
208
 
159
- return await this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
160
209
  function getByPath(obj, path) {
161
210
  return path.split('.').reduce((o, k) => o && o[k], obj);
162
211
  }
@@ -165,19 +214,38 @@ export class EnvCollector {
165
214
  if (depth > maxDepth || collected.size > maxProps) return;
166
215
  if (!obj || typeof obj !== 'object') return;
167
216
 
217
+ // 检测循环引用
218
+ if (seen.has(obj)) return;
219
+ seen.add(obj);
220
+
168
221
  const keys = Object.getOwnPropertyNames(obj);
169
- for (const key of keys) {
222
+ for (const key of keys.slice(0, 30)) {
170
223
  if (collected.size > maxProps) break;
171
224
 
172
225
  const fullPath = path ? `${path}.${key}` : key;
173
226
  try {
174
- const val = obj[key];
175
- const type = typeof val;
227
+ const desc = Object.getOwnPropertyDescriptor(obj, key);
228
+ if (!desc) continue;
229
+
230
+ // 安全处理:避免触发有副作用的 getter
231
+ let val;
232
+ let type;
233
+ if (desc.get) {
234
+ type = 'getter';
235
+ val = '[Getter]';
236
+ } else if (desc.set && desc.value === undefined) {
237
+ type = 'setter';
238
+ val = '[Setter]';
239
+ } else {
240
+ val = desc.value;
241
+ type = typeof val;
242
+ }
176
243
 
177
244
  collected.set(fullPath, {
178
245
  type,
179
246
  value: type === 'function' ? '[Function]' :
180
247
  type === 'object' ? '[Object]' :
248
+ type === 'getter' || type === 'setter' ? val :
181
249
  val
182
250
  });
183
251
 
@@ -204,6 +272,17 @@ export class EnvCollector {
204
272
  properties: Object.fromEntries(collected)
205
273
  };
206
274
  }, { rootPath, maxDepth, maxProps });
275
+
276
+ // 添加超时保护
277
+ const timeoutPromise = new Promise((_, reject) =>
278
+ setTimeout(() => reject(new Error('collectDeep timeout')), timeout)
279
+ );
280
+
281
+ try {
282
+ return await Promise.race([evaluatePromise, timeoutPromise]);
283
+ } catch (e) {
284
+ return { success: false, error: e.message };
285
+ }
207
286
  }
208
287
 
209
288
  // === 特殊环境采集 ===
@@ -160,7 +160,9 @@ function getCookieHook() {
160
160
  return value;
161
161
  },
162
162
  set: function(val) {
163
- deepspider.log('cookie', { action: 'write', value: val });
163
+ // 解析 cookie name(cookie 格式: "name=value; expires=...; path=...")
164
+ const cookieName = val?.split('=')[0]?.trim();
165
+ deepspider.log('cookie', { action: 'write', name: cookieName, value: val });
164
166
  return cookieDesc.set.call(document, val);
165
167
  },
166
168
  configurable: true
@@ -7,6 +7,7 @@
7
7
  export class HookManager {
8
8
  constructor() {
9
9
  this.logs = [];
10
+ this.maxLogs = 5000;
10
11
  this.onLog = null;
11
12
  this.injected = false;
12
13
  }
@@ -37,6 +38,10 @@ export class HookManager {
37
38
  text,
38
39
  timestamp: Date.now(),
39
40
  });
41
+ // 超过上限时丢弃最旧的 20%
42
+ if (this.logs.length > this.maxLogs) {
43
+ this.logs = this.logs.slice(Math.floor(this.maxLogs * 0.2));
44
+ }
40
45
  if (this.onLog) {
41
46
  this.onLog({ type: msg.type(), text });
42
47
  }
@@ -0,0 +1,132 @@
1
+ /**
2
+ * DeepSpider - 反无限 debugger 拦截器
3
+ * 通过 CDP Debugger.setBlackboxedRanges 跳过包含 debugger 语句的脚本
4
+ * 零运行时开销,不修改源码,不触发完整性校验
5
+ *
6
+ * 已知限制:/\bdebugger\b/ 会匹配字符串/注释中的 debugger,
7
+ * 对反爬场景可接受(误 blackbox 的脚本仍正常执行,只是不可调试)
8
+ */
9
+
10
+ export class AntiDebugInterceptor {
11
+ constructor(cdpClient) {
12
+ this.client = cdpClient;
13
+ this.blackboxedScripts = new Set();
14
+ // 高频 debugger 检测
15
+ this.pausedCount = 0;
16
+ this.pausedWindowStart = 0;
17
+ this.PAUSED_WINDOW_MS = 1000; // 1秒窗口
18
+ this.PAUSED_THRESHOLD = 5; // 1秒内超过5次paused认为是debugger风暴
19
+ this.stormMode = false; // 风暴模式:跳过所有断点
20
+ this.stormTimer = null; // 风暴模式自动退出定时器
21
+ }
22
+
23
+ async start() {
24
+ // 兜底:对于 blackbox 来不及处理的同步 debugger(时序竞争),自动 resume
25
+ // reason 可能是 'other' 或 'debugCommand'(不同 Chrome 版本),
26
+ // 只要不是我们主动设的断点(hitBreakpoints 非空 / reason=breakpoint)就 resume
27
+ this.client.on('Debugger.paused', (params) => {
28
+ // 手动设置的断点(除非在风暴模式)
29
+ if (!this.stormMode && params.reason === 'breakpoint') return;
30
+ if (!this.stormMode && params.hitBreakpoints?.length > 0) return;
31
+
32
+ // 风暴模式下直接 resume,不参与计数
33
+ if (this.stormMode) {
34
+ this.client.send('Debugger.resume').catch(() => {});
35
+ return;
36
+ }
37
+
38
+ // 高频 debugger 检测
39
+ const now = Date.now();
40
+ if (now - this.pausedWindowStart > this.PAUSED_WINDOW_MS) {
41
+ // 新窗口
42
+ this.pausedWindowStart = now;
43
+ this.pausedCount = 1;
44
+ } else {
45
+ this.pausedCount++;
46
+ }
47
+
48
+ // 触发风暴模式
49
+ if (this.pausedCount > this.PAUSED_THRESHOLD) {
50
+ console.log('[AntiDebugInterceptor] 检测到 debugger 风暴,启用风暴模式');
51
+ this.stormMode = true;
52
+ // 清除之前的定时器
53
+ if (this.stormTimer) {
54
+ clearTimeout(this.stormTimer);
55
+ }
56
+ // 3秒后退出风暴模式
57
+ this.stormTimer = setTimeout(() => {
58
+ console.log('[AntiDebugInterceptor] 退出风暴模式');
59
+ this.stormMode = false;
60
+ this.pausedCount = 0;
61
+ this.stormTimer = null;
62
+ }, 3000);
63
+ }
64
+
65
+ // 自动 resume
66
+ this.client.send('Debugger.resume').catch(() => {});
67
+ });
68
+
69
+ console.log('[AntiDebugInterceptor] 已启动');
70
+ }
71
+
72
+ /**
73
+ * 检查脚本源码,包含 debugger 则 blackbox 整个脚本
74
+ * 由 ScriptInterceptor.onSource 回调驱动,避免重复拉取源码
75
+ */
76
+ checkScript(scriptId, scriptSource) {
77
+ if (/\bdebugger\b/.test(scriptSource)) {
78
+ this.client.send('Debugger.setBlackboxedRanges', {
79
+ scriptId,
80
+ positions: [{ lineNumber: 0, columnNumber: 0 }],
81
+ }).then(() => {
82
+ this.blackboxedScripts.add(scriptId);
83
+ }).catch(() => {});
84
+ }
85
+ }
86
+
87
+ /**
88
+ * 取消指定脚本的 blackbox(供断点工具调用)
89
+ */
90
+ async unblackbox(scriptId) {
91
+ if (this.blackboxedScripts.has(scriptId)) {
92
+ await this.client.send('Debugger.setBlackboxedRanges', {
93
+ scriptId,
94
+ positions: [],
95
+ });
96
+ this.blackboxedScripts.delete(scriptId);
97
+ }
98
+ }
99
+
100
+ /**
101
+ * 手动启用/禁用风暴模式
102
+ * 用于绕过强反调试场景
103
+ */
104
+ setStormMode(enabled) {
105
+ // 清除之前的定时器
106
+ if (this.stormTimer) {
107
+ clearTimeout(this.stormTimer);
108
+ this.stormTimer = null;
109
+ }
110
+
111
+ this.stormMode = enabled;
112
+ if (enabled) {
113
+ console.log('[AntiDebugInterceptor] 手动启用风暴模式');
114
+ // 自动退出
115
+ this.stormTimer = setTimeout(() => {
116
+ this.stormMode = false;
117
+ this.stormTimer = null;
118
+ console.log('[AntiDebugInterceptor] 自动退出风暴模式');
119
+ }, 5000);
120
+ } else {
121
+ console.log('[AntiDebugInterceptor] 手动禁用风暴模式');
122
+ this.pausedCount = 0;
123
+ }
124
+ }
125
+
126
+ /**
127
+ * 检查当前是否在风暴模式
128
+ */
129
+ isStormMode() {
130
+ return this.stormMode;
131
+ }
132
+ }
@@ -46,11 +46,16 @@ export class NetworkInterceptor {
46
46
  this.onLoadingFinished(params);
47
47
  });
48
48
 
49
+ // 监听加载失败(清理 pendingRequests,防止内存泄漏)
50
+ this.client.on('Network.loadingFailed', (params) => {
51
+ this.pendingRequests.delete(params.requestId);
52
+ });
53
+
49
54
  console.log('[NetworkInterceptor] 已启动');
50
55
  }
51
56
 
52
57
  onRequest(params) {
53
- const { requestId, request, timestamp } = params;
58
+ const { requestId, request, timestamp, initiator } = params;
54
59
 
55
60
  // 只记录 XHR/Fetch 请求
56
61
  const type = params.type;
@@ -62,10 +67,37 @@ export class NetworkInterceptor {
62
67
  headers: request.headers,
63
68
  postData: request.postData,
64
69
  timestamp: timestamp * 1000,
65
- pageUrl: this.getPageUrl() // 记录请求时的页面 URL
70
+ pageUrl: this.getPageUrl(),
71
+ initiator: this.formatInitiator(initiator),
66
72
  });
67
73
  }
68
74
 
75
+ /**
76
+ * 精简 initiator 调用栈(只保留前 5 帧,过滤内部帧)
77
+ */
78
+ formatInitiator(initiator) {
79
+ if (!initiator) return null;
80
+ const result = { type: initiator.type };
81
+ if (initiator.url) {
82
+ result.url = initiator.url;
83
+ result.lineNumber = initiator.lineNumber;
84
+ }
85
+ if (initiator.stack?.callFrames) {
86
+ result.callFrames = initiator.stack.callFrames
87
+ .filter(f => f.url && !f.url.includes('patchright') && !f.url.includes('__playwright'))
88
+ .slice(0, 5)
89
+ .map(f => ({
90
+ functionName: f.functionName || '(anonymous)',
91
+ url: f.url,
92
+ lineNumber: f.lineNumber,
93
+ columnNumber: f.columnNumber,
94
+ }));
95
+ }
96
+ // 只有 type 没有实际定位信息时返回 null
97
+ if (!result.url && !result.callFrames?.length) return null;
98
+ return result;
99
+ }
100
+
69
101
  onResponse(params) {
70
102
  const { requestId, response } = params;
71
103
  const pending = this.pendingRequests.get(requestId);
@@ -81,15 +113,31 @@ export class NetworkInterceptor {
81
113
  if (!pending) return;
82
114
 
83
115
  try {
84
- // 获取响应体
85
- const { body, base64Encoded } = await this.client.send(
86
- 'Network.getResponseBody',
87
- { requestId }
116
+ // 获取响应体,添加超时保护防止 CDP 命令挂起
117
+ const bodyPromise = this.client.send('Network.getResponseBody', { requestId });
118
+ const timeoutPromise = new Promise((_, reject) =>
119
+ setTimeout(() => reject(new Error('getResponseBody timeout')), 5000)
88
120
  );
89
-
90
- const responseBody = base64Encoded
91
- ? Buffer.from(body, 'base64').toString('utf-8')
92
- : body;
121
+ const { body, base64Encoded } = await Promise.race([bodyPromise, timeoutPromise]);
122
+
123
+ // 处理响应体:检测二进制内容,避免损坏
124
+ let responseBody;
125
+ const contentType = pending.responseHeaders?.['content-type'] || '';
126
+
127
+ if (this.isBinaryContent(contentType)) {
128
+ // 二进制内容:存储元数据而非原始内容
129
+ // base64 长度计算:每 4 个字符 = 3 字节,考虑 padding
130
+ const binarySize = base64Encoded
131
+ ? Math.floor(body.length * 0.75) - (body.match(/=*$/)?.[0].length || 0)
132
+ : body.length;
133
+ responseBody = `[Binary: ${contentType}, ${binarySize} bytes]`;
134
+ } else {
135
+ // 文本内容:安全地转换为字符串
136
+ const rawBody = base64Encoded
137
+ ? Buffer.from(body, 'base64').toString('utf-8')
138
+ : body;
139
+ responseBody = rawBody.slice(0, 50000);
140
+ }
93
141
 
94
142
  // 异步存储到文件
95
143
  this.store.saveResponse({
@@ -98,9 +146,10 @@ export class NetworkInterceptor {
98
146
  status: pending.status,
99
147
  requestHeaders: pending.headers,
100
148
  requestBody: pending.postData,
101
- responseBody: responseBody.slice(0, 50000),
149
+ responseBody,
102
150
  timestamp: pending.timestamp,
103
- pageUrl: pending.pageUrl // 传递页面 URL 用于分站点存储
151
+ pageUrl: pending.pageUrl,
152
+ initiator: pending.initiator,
104
153
  }).catch(e => {
105
154
  console.error('[NetworkInterceptor] 保存失败:', e.message);
106
155
  });
@@ -111,6 +160,21 @@ export class NetworkInterceptor {
111
160
 
112
161
  this.pendingRequests.delete(requestId);
113
162
  }
163
+
164
+ /**
165
+ * 检测是否为二进制内容类型
166
+ */
167
+ isBinaryContent(contentType) {
168
+ if (!contentType) return false;
169
+ const binaryTypes = [
170
+ 'image/', 'audio/', 'video/', 'application/pdf',
171
+ 'application/octet-stream', 'application/zip',
172
+ 'application/gzip', 'application/x-protobuf',
173
+ 'font/', 'application/vnd.'
174
+ ];
175
+ const lowerType = contentType.toLowerCase();
176
+ return binaryTypes.some(type => lowerType.includes(type));
177
+ }
114
178
  }
115
179
 
116
180
  export default NetworkInterceptor;
@@ -11,6 +11,7 @@ export class ScriptInterceptor {
11
11
  this.page = page; // Playwright page 对象
12
12
  this.store = getDataStore();
13
13
  this.scriptIds = new Set();
14
+ this.onSource = null; // 回调: (scriptId, scriptSource) => void
14
15
  }
15
16
 
16
17
  /**
@@ -40,22 +41,46 @@ export class ScriptInterceptor {
40
41
  async onScriptParsed(params) {
41
42
  const { scriptId, url, length: _length } = params;
42
43
 
43
- // 跳过扩展和空脚本
44
- if (!url || url.startsWith('chrome-extension://')) return;
44
+ // 跳过扩展脚本
45
+ if (url?.startsWith('chrome-extension://')) return;
45
46
  if (this.scriptIds.has(scriptId)) return;
46
47
 
47
48
  this.scriptIds.add(scriptId);
48
49
 
49
- // 异步获取并存储源码
50
- this.fetchAndSave(scriptId, url).catch(() => {});
50
+ if (url) {
51
+ // URL 的脚本:获取源码、通知订阅者、存储
52
+ this.fetchAndSave(scriptId, url).catch(() => {});
53
+ } else if (this.onSource) {
54
+ // 无 URL 脚本(eval/new Function 生成):仅通知订阅者用于 debugger 检测,不存储
55
+ this.fetchAndNotify(scriptId).catch(() => {});
56
+ }
57
+ }
58
+
59
+ async fetchAndNotify(scriptId) {
60
+ try {
61
+ // 添加超时保护防止 CDP 命令挂起
62
+ const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
63
+ const timeoutPromise = new Promise((_, reject) =>
64
+ setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
65
+ );
66
+ const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
67
+ try { this.onSource(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
68
+ } catch {
69
+ // 获取失败(脚本已卸载等),忽略
70
+ }
51
71
  }
52
72
 
53
73
  async fetchAndSave(scriptId, url) {
54
74
  try {
55
- const { scriptSource } = await this.client.send(
56
- 'Debugger.getScriptSource',
57
- { scriptId }
75
+ // 添加超时保护防止 CDP 命令挂起
76
+ const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
77
+ const timeoutPromise = new Promise((_, reject) =>
78
+ setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
58
79
  );
80
+ const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
81
+
82
+ // 通知订阅者(AntiDebugInterceptor 等)
83
+ try { this.onSource?.(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
59
84
 
60
85
  // 限制大小,超大脚本只保存部分
61
86
  const source = scriptSource.slice(0, 500000);
@@ -4,3 +4,4 @@
4
4
 
5
5
  export { NetworkInterceptor } from './NetworkInterceptor.js';
6
6
  export { ScriptInterceptor } from './ScriptInterceptor.js';
7
+ export { AntiDebugInterceptor } from './AntiDebugInterceptor.js';