deepspider 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/.env.example +3 -0
  2. package/README.md +13 -13
  3. package/package.json +6 -6
  4. package/src/agent/core/PanelBridge.js +29 -77
  5. package/src/agent/core/StreamHandler.js +139 -14
  6. package/src/agent/index.js +51 -12
  7. package/src/agent/logger.js +184 -9
  8. package/src/agent/middleware/report.js +42 -16
  9. package/src/agent/middleware/subagent.js +233 -0
  10. package/src/agent/middleware/toolGuard.js +77 -0
  11. package/src/agent/middleware/validationWorkflow.js +171 -0
  12. package/src/agent/prompts/system.js +181 -59
  13. package/src/agent/run.js +41 -6
  14. package/src/agent/skills/crawler/SKILL.md +64 -3
  15. package/src/agent/skills/crawler/evolved.md +9 -1
  16. package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
  17. package/src/agent/skills/env/SKILL.md +75 -0
  18. package/src/agent/skills/evolve.js +0 -3
  19. package/src/agent/skills/sandbox/SKILL.md +35 -0
  20. package/src/agent/skills/static-analysis/SKILL.md +98 -2
  21. package/src/agent/subagents/anti-detect.js +10 -20
  22. package/src/agent/subagents/captcha.js +7 -19
  23. package/src/agent/subagents/crawler.js +25 -37
  24. package/src/agent/subagents/factory.js +109 -9
  25. package/src/agent/subagents/index.js +4 -13
  26. package/src/agent/subagents/js2python.js +7 -19
  27. package/src/agent/subagents/reverse.js +180 -0
  28. package/src/agent/tools/analysis.js +84 -1
  29. package/src/agent/tools/anti-detect.js +5 -2
  30. package/src/agent/tools/browser.js +160 -0
  31. package/src/agent/tools/captcha.js +1 -1
  32. package/src/agent/tools/capture.js +24 -3
  33. package/src/agent/tools/correlate.js +129 -15
  34. package/src/agent/tools/crawler.js +2 -1
  35. package/src/agent/tools/crawlerGenerator.js +90 -0
  36. package/src/agent/tools/debug.js +43 -6
  37. package/src/agent/tools/evolve.js +6 -3
  38. package/src/agent/tools/extractor.js +5 -1
  39. package/src/agent/tools/file.js +16 -7
  40. package/src/agent/tools/generateHook.js +66 -0
  41. package/src/agent/tools/hookManager.js +19 -9
  42. package/src/agent/tools/index.js +33 -20
  43. package/src/agent/tools/nodejs.js +41 -6
  44. package/src/agent/tools/python.js +4 -4
  45. package/src/agent/tools/report.js +2 -2
  46. package/src/agent/tools/runtime.js +1 -1
  47. package/src/agent/tools/sandbox.js +21 -1
  48. package/src/agent/tools/scratchpad.js +70 -0
  49. package/src/agent/tools/tracing.js +26 -0
  50. package/src/agent/tools/verifyAlgorithm.js +117 -0
  51. package/src/analyzer/EncryptionAnalyzer.js +2 -2
  52. package/src/browser/EnvBridge.js +27 -13
  53. package/src/browser/client.js +124 -18
  54. package/src/browser/collector.js +101 -22
  55. package/src/browser/defaultHooks.js +3 -1
  56. package/src/browser/hooks/index.js +5 -0
  57. package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
  58. package/src/browser/interceptors/NetworkInterceptor.js +77 -13
  59. package/src/browser/interceptors/ScriptInterceptor.js +34 -9
  60. package/src/browser/interceptors/index.js +1 -0
  61. package/src/browser/ui/analysisPanel.js +469 -464
  62. package/src/cli/commands/config.js +11 -3
  63. package/src/config/paths.js +9 -1
  64. package/src/config/settings.js +7 -1
  65. package/src/core/PatchGenerator.js +26 -6
  66. package/src/core/Sandbox.js +140 -3
  67. package/src/env/EnvCodeGenerator.js +60 -88
  68. package/src/env/modules/bom/history.js +6 -0
  69. package/src/env/modules/bom/location.js +6 -0
  70. package/src/env/modules/bom/navigator.js +13 -0
  71. package/src/env/modules/bom/screen.js +6 -0
  72. package/src/env/modules/bom/storage.js +7 -0
  73. package/src/env/modules/dom/document.js +14 -0
  74. package/src/env/modules/dom/event.js +4 -0
  75. package/src/env/modules/index.js +27 -10
  76. package/src/env/modules/webapi/fetch.js +4 -0
  77. package/src/env/modules/webapi/url.js +4 -0
  78. package/src/env/modules/webapi/xhr.js +8 -0
  79. package/src/store/DataStore.js +130 -47
  80. package/src/store/Store.js +2 -1
  81. package/src/agent/subagents/dynamic.js +0 -64
  82. package/src/agent/subagents/env-agent.js +0 -82
  83. package/src/agent/subagents/sandbox.js +0 -55
  84. package/src/agent/subagents/static.js +0 -66
@@ -0,0 +1,117 @@
1
+ /**
2
+ * DeepSpider - 统一算法验证工具
3
+ * 合并 verify_md5/sha256/hmac/aes + identify_encryption
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import crypto from 'crypto';
9
+
10
+ /**
11
+ * 识别密文特征
12
+ */
13
+ function identifyPattern(ciphertext) {
14
+ const features = [];
15
+ const len = ciphertext.length;
16
+
17
+ if (len === 32) features.push('可能是 MD5');
18
+ if (len === 40) features.push('可能是 SHA1');
19
+ if (len === 64) features.push('可能是 SHA256');
20
+ if (len === 128) features.push('可能是 SHA512');
21
+ if (/^[A-Za-z0-9+/]+=*$/.test(ciphertext)) features.push('Base64 编码');
22
+ if (/^[0-9a-fA-F]+$/.test(ciphertext)) features.push('Hex 编码');
23
+ if (/^eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/.test(ciphertext)) features.push('JWT Token');
24
+
25
+ return features;
26
+ }
27
+
28
+ export const verifyAlgorithm = tool(
29
+ async ({ algorithm, input, expected, key, iv, hmacHash, aesMode }) => {
30
+ // 识别模式:不传 algorithm,只传 expected
31
+ if (!algorithm) {
32
+ return JSON.stringify({
33
+ ciphertext: expected.slice(0, 50) + (expected.length > 50 ? '...' : ''),
34
+ length: expected.length,
35
+ features: identifyPattern(expected),
36
+ }, null, 2);
37
+ }
38
+
39
+ // 验证模式:需要 input + expected
40
+ if (!input) {
41
+ return JSON.stringify({ error: `验证 ${algorithm} 需要 input 参数` });
42
+ }
43
+
44
+ let computed;
45
+ let algoLabel;
46
+
47
+ switch (algorithm) {
48
+ case 'md5':
49
+ case 'sha1':
50
+ case 'sha256':
51
+ case 'sha512': {
52
+ computed = crypto.createHash(algorithm).update(input).digest('hex');
53
+ algoLabel = algorithm.toUpperCase();
54
+ break;
55
+ }
56
+ case 'hmac': {
57
+ if (!key) return JSON.stringify({ error: 'HMAC 需要 key 参数' });
58
+ const hash = hmacHash || 'sha256';
59
+ computed = crypto.createHmac(hash, key).update(input).digest('hex');
60
+ algoLabel = `HMAC-${hash.toUpperCase()}`;
61
+ break;
62
+ }
63
+ case 'aes': {
64
+ if (!key) return JSON.stringify({ error: 'AES 需要 key 参数' });
65
+ try {
66
+ const keyBuf = Buffer.from(key, 'utf8');
67
+ const ivBuf = iv ? Buffer.from(iv, 'utf8') : Buffer.alloc(16, 0);
68
+ const mode = aesMode || 'cbc';
69
+ const cipher = crypto.createCipheriv(
70
+ `aes-${keyBuf.length * 8}-${mode}`,
71
+ keyBuf,
72
+ mode === 'ecb' ? null : ivBuf
73
+ );
74
+ computed = cipher.update(input, 'utf8', 'base64') + cipher.final('base64');
75
+ algoLabel = `AES-${keyBuf.length * 8}-${mode.toUpperCase()}`;
76
+ } catch (e) {
77
+ return JSON.stringify({ error: e.message });
78
+ }
79
+ break;
80
+ }
81
+ default:
82
+ return JSON.stringify({ error: `未知算法: ${algorithm}` });
83
+ }
84
+
85
+ const match = algorithm === 'aes'
86
+ ? computed === expected
87
+ : computed.toLowerCase() === expected.toLowerCase();
88
+
89
+ return JSON.stringify({
90
+ algorithm: algoLabel,
91
+ input,
92
+ computed,
93
+ expected,
94
+ match,
95
+ conclusion: match ? `标准 ${algoLabel}` : '可能魔改或参数不同',
96
+ }, null, 2);
97
+ },
98
+ {
99
+ name: 'verify_algorithm',
100
+ description: `验证是否为标准加密算法,或根据密文特征识别算法类型。
101
+
102
+ 验证模式:传入 algorithm + input + expected,对比计算结果
103
+ 识别模式:只传 expected(密文),自动识别可能的算法类型`,
104
+ schema: z.object({
105
+ algorithm: z.enum(['md5', 'sha1', 'sha256', 'sha512', 'hmac', 'aes']).optional()
106
+ .describe('算法类型。不传则进入识别模式'),
107
+ input: z.string().optional().describe('原始输入'),
108
+ expected: z.string().describe('目标加密结果(验证模式)或待识别的密文(识别模式)'),
109
+ key: z.string().optional().describe('密钥(HMAC/AES 需要)'),
110
+ iv: z.string().optional().describe('IV 向量(AES 可选)'),
111
+ hmacHash: z.enum(['md5', 'sha1', 'sha256', 'sha512']).optional().describe('HMAC 哈希算法,默认 sha256'),
112
+ aesMode: z.enum(['cbc', 'ecb']).optional().describe('AES 模式,默认 cbc'),
113
+ }),
114
+ }
115
+ );
116
+
117
+ export const verifyAlgorithmTools = [verifyAlgorithm];
@@ -51,7 +51,7 @@ export class EncryptionAnalyzer {
51
51
  }
52
52
 
53
53
  traceParam(code, paramName) {
54
- const ast = this.astAnalyzer.parse(code);
54
+ const _ast = this.astAnalyzer.parse(code);
55
55
  const traces = [];
56
56
 
57
57
  // 简化实现:查找参数使用位置
@@ -80,7 +80,7 @@ export class EncryptionAnalyzer {
80
80
  // 使用模式库进行深度检测
81
81
  detectWithPatterns(code) {
82
82
  const detected = [];
83
- for (const [key, pattern] of Object.entries(cryptoPatterns)) {
83
+ for (const [_key, pattern] of Object.entries(cryptoPatterns)) {
84
84
  for (const sig of pattern.signatures) {
85
85
  if (sig.test(code)) {
86
86
  detected.push({
@@ -26,24 +26,38 @@ export class EnvBridge {
26
26
 
27
27
  for (const path of missingPaths) {
28
28
  try {
29
- // 1. 从真实浏览器采集
29
+ // 1. 尝试从真实浏览器采集
30
30
  const collected = await this.collector.collect(path, { depth: 2 });
31
31
 
32
- if (!collected.success) {
33
- results.failed.push({ path, reason: 'collect_failed', error: collected.error });
34
- continue;
35
- }
32
+ if (collected.success) {
33
+ results.collected.push(path);
34
+ this.collectedData.set(path, collected);
36
35
 
37
- results.collected.push(path);
38
- this.collectedData.set(path, collected);
36
+ // 2. 生成补丁代码
37
+ const patch = this._generatePatch(path, collected);
38
+ if (patch) {
39
+ results.patched.push({ path, code: patch });
40
+ continue;
41
+ }
42
+ }
39
43
 
40
- // 2. 生成补丁代码
41
- const patch = this._generatePatch(path, collected);
42
- if (patch) {
43
- results.patched.push({ path, code: patch });
44
+ // 3. 采集失败时 fallback 到 PatchGenerator
45
+ const fallback = await this.patchGenerator.generate(path);
46
+ if (fallback.code) {
47
+ results.patched.push({ path, code: fallback.code, source: fallback.source });
48
+ } else {
49
+ results.failed.push({ path, reason: 'no_patch' });
44
50
  }
45
51
 
46
52
  } catch (e) {
53
+ // 浏览器不可用时也 fallback
54
+ try {
55
+ const fallback = await this.patchGenerator.generate(path);
56
+ if (fallback.code) {
57
+ results.patched.push({ path, code: fallback.code, source: fallback.source });
58
+ continue;
59
+ }
60
+ } catch { /* ignore */ }
47
61
  results.failed.push({ path, reason: 'error', error: e.message });
48
62
  }
49
63
  }
@@ -66,7 +80,7 @@ export class EnvBridge {
66
80
  // 根据数据类型生成不同的补丁
67
81
  switch (data.type) {
68
82
  case 'string':
69
- return `${parentPath}.${propName} = "${data.value}";`;
83
+ return `${parentPath}.${propName} = ${JSON.stringify(data.value)};`;
70
84
 
71
85
  case 'number':
72
86
  return `${parentPath}.${propName} = ${data.value};`;
@@ -123,7 +137,7 @@ export class EnvBridge {
123
137
 
124
138
  _serializeValue(data) {
125
139
  switch (data.type) {
126
- case 'string': return `"${data.value}"`;
140
+ case 'string': return JSON.stringify(data.value);
127
141
  case 'number': return data.value;
128
142
  case 'boolean': return data.value;
129
143
  case 'null': return 'null';
@@ -8,6 +8,7 @@ import { EventEmitter } from 'events';
8
8
  import { getDefaultHookScript } from './defaultHooks.js';
9
9
  import { NetworkInterceptor } from './interceptors/NetworkInterceptor.js';
10
10
  import { ScriptInterceptor } from './interceptors/ScriptInterceptor.js';
11
+ import { AntiDebugInterceptor } from './interceptors/AntiDebugInterceptor.js';
11
12
  import { getDataStore } from '../store/DataStore.js';
12
13
 
13
14
  export class BrowserClient extends EventEmitter {
@@ -20,9 +21,13 @@ export class BrowserClient extends EventEmitter {
20
21
  this.cdpSession = null;
21
22
  this.networkInterceptor = null;
22
23
  this.scriptInterceptor = null;
24
+ this.antiDebugInterceptor = null;
23
25
  this.hookScript = null;
24
26
  this.onMessage = null;
25
27
  this._isCleaningUp = false;
28
+ // CDP session 健康检查节流
29
+ this._cdpLastCheck = 0;
30
+ this._cdpCheckInterval = 5000; // 5秒内不重复检查
26
31
  }
27
32
 
28
33
  /**
@@ -37,6 +42,7 @@ export class BrowserClient extends EventEmitter {
37
42
  headless = false,
38
43
  executablePath = null,
39
44
  args = [],
45
+ userDataDir = null,
40
46
  } = options;
41
47
 
42
48
  const launchOptions = {
@@ -53,12 +59,22 @@ export class BrowserClient extends EventEmitter {
53
59
  launchOptions.executablePath = executablePath;
54
60
  }
55
61
 
56
- this.browser = await chromium.launch(launchOptions);
57
- this.emit('launched', { headless });
58
-
59
- this.context = await this.browser.newContext({
60
- ignoreHTTPSErrors: true,
61
- });
62
+ this._persistent = !!userDataDir;
63
+
64
+ if (userDataDir) {
65
+ // 持久化模式:launchPersistentContext 返回 BrowserContext
66
+ launchOptions.ignoreHTTPSErrors = true;
67
+ this.context = await chromium.launchPersistentContext(userDataDir, launchOptions);
68
+ this.browser = this.context.browser();
69
+ this.emit('launched', { headless, persistent: true });
70
+ } else {
71
+ // 临时模式(原有逻辑)
72
+ this.browser = await chromium.launch(launchOptions);
73
+ this.emit('launched', { headless });
74
+ this.context = await this.browser.newContext({
75
+ ignoreHTTPSErrors: true,
76
+ });
77
+ }
62
78
 
63
79
  // 保存 hook 脚本
64
80
  this.hookScript = getDefaultHookScript();
@@ -66,11 +82,22 @@ export class BrowserClient extends EventEmitter {
66
82
  // 使用 addInitScript 在 context 级别注入
67
83
  await this.context.addInitScript(this.hookScript);
68
84
 
69
- this.page = await this.context.newPage();
85
+ // 持久化上下文自带默认页面,临时模式需要新建
86
+ this.page = this._persistent
87
+ ? (this.context.pages()[0] || await this.context.newPage())
88
+ : await this.context.newPage();
70
89
 
71
90
  // 监听新页面创建(弹窗、新标签页)
72
91
  this.context.on('page', async (newPage) => {
73
92
  console.log('[BrowserClient] 检测到新页面');
93
+
94
+ // 清理旧页面的 CDP session(避免泄漏)
95
+ if (this.cdpSession && this._cdpSessionPage && this._cdpSessionPage !== newPage) {
96
+ await this.cdpSession.detach().catch(() => {});
97
+ this.cdpSession = null;
98
+ this._cdpSessionPage = null;
99
+ }
100
+
74
101
  this.pages.push(newPage);
75
102
  this.page = newPage; // 切换到新页面
76
103
  await this.setupPage(newPage);
@@ -96,6 +123,13 @@ export class BrowserClient extends EventEmitter {
96
123
  */
97
124
  async setupPage(page) {
98
125
  try {
126
+ // 如果这是当前页面的重新设置,先清理旧的 session
127
+ if (page === this.page && this.cdpSession && this._cdpSessionPage === page) {
128
+ await this.cdpSession.detach().catch(() => {});
129
+ this.cdpSession = null;
130
+ this._cdpSessionPage = null;
131
+ }
132
+
99
133
  const cdp = await page.context().newCDPSession(page);
100
134
 
101
135
  // 1. 启用 Runtime 域
@@ -125,11 +159,22 @@ export class BrowserClient extends EventEmitter {
125
159
  await networkInterceptor.start();
126
160
  await scriptInterceptor.start();
127
161
 
128
- // 保存引用
162
+ // 反无限 debugger:必须在 ScriptInterceptor 之后(Debugger 域已启用)
163
+ const antiDebugInterceptor = new AntiDebugInterceptor(cdp);
164
+ await antiDebugInterceptor.start();
165
+
166
+ // ScriptInterceptor 拉取源码后通知 AntiDebugInterceptor,避免重复 CDP 调用
167
+ scriptInterceptor.onSource = (scriptId, source) => {
168
+ antiDebugInterceptor.checkScript(scriptId, source);
169
+ };
170
+
171
+ // 保存引用(仅对当前活动页面)
129
172
  if (page === this.page) {
130
173
  this.cdpSession = cdp;
174
+ this._cdpSessionPage = page; // 关键:设置标记,让 getCDPSession 知道这是当前页面的 session
131
175
  this.networkInterceptor = networkInterceptor;
132
176
  this.scriptInterceptor = scriptInterceptor;
177
+ this.antiDebugInterceptor = antiDebugInterceptor;
133
178
  }
134
179
 
135
180
  // 监听页面导航
@@ -146,17 +191,52 @@ export class BrowserClient extends EventEmitter {
146
191
  }
147
192
 
148
193
  /**
149
- * 获取 CDP 会话(始终使用当前页面的 session
194
+ * 获取 CDP 会话(复用已有 session,仅在 page 变化时重建)
150
195
  */
151
196
  async getCDPSession() {
152
- // 每次都为当前页面创建新的 CDP session,确保上下文正确
153
- if (this.page) {
197
+ if (!this.page) return this.cdpSession;
198
+
199
+ // page 未变且 session 存在 → 复用
200
+ if (this.cdpSession && this._cdpSessionPage === this.page) {
201
+ // 节流:避免频繁健康检查
202
+ const now = Date.now();
203
+ if (now - this._cdpLastCheck < this._cdpCheckInterval) {
204
+ return this.cdpSession;
205
+ }
206
+
207
+ try {
208
+ // 通过简单的 Runtime.evaluate 验证 session 是否还活着
209
+ await this.cdpSession.send('Runtime.evaluate', { expression: '1' });
210
+ this._cdpLastCheck = now;
211
+ return this.cdpSession;
212
+ } catch {
213
+ // session 已失效,需要重新创建
214
+ console.log('[BrowserClient] CDP session 已失效,重新创建');
215
+ this.cdpSession = null;
216
+ this._cdpSessionPage = null;
217
+ }
218
+ }
219
+
220
+ // page 变了或 session 失效 → detach 旧 session,创建新的
221
+ if (this.cdpSession) {
154
222
  try {
155
- this.cdpSession = await this.page.context().newCDPSession(this.page);
156
- } catch (e) {
157
- console.error('[BrowserClient] 创建 CDP session 失败:', e.message);
158
- return null;
223
+ await this.cdpSession.detach();
224
+ } catch {
225
+ // 忽略 detach 错误(session 可能已断开)
159
226
  }
227
+ this.cdpSession = null;
228
+ }
229
+
230
+ try {
231
+ this.cdpSession = await this.page.context().newCDPSession(this.page);
232
+ this._cdpSessionPage = this.page;
233
+ this._cdpLastCheck = Date.now();
234
+ console.log('[BrowserClient] CDP session 已创建');
235
+ } catch (e) {
236
+ console.error('[BrowserClient] 创建 CDP session 失败:', e.message);
237
+ this.cdpSession = null;
238
+ this._cdpSessionPage = null;
239
+ return null;
160
240
  }
161
241
  return this.cdpSession;
162
242
  }
@@ -165,8 +245,19 @@ export class BrowserClient extends EventEmitter {
165
245
  * 导航到 URL
166
246
  */
167
247
  async navigate(url, options = {}) {
168
- const { waitUntil = 'domcontentloaded' } = options;
169
- await this.page.goto(url, { waitUntil });
248
+ const { waitUntil = 'domcontentloaded', timeout = 30000 } = options;
249
+ try {
250
+ await this.page.goto(url, { waitUntil, timeout });
251
+ } catch (e) {
252
+ // 超时不一定是错误,页面可能仍在加载,继续执行
253
+ if (e.message?.includes('timeout')) {
254
+ console.log('[BrowserClient] 导航超时,继续等待页面稳定...');
255
+ // 等待一小段时间让页面尽可能完成加载
256
+ await this.page.waitForTimeout(2000);
257
+ } else {
258
+ throw e;
259
+ }
260
+ }
170
261
  return this.page.url();
171
262
  }
172
263
 
@@ -208,6 +299,9 @@ export class BrowserClient extends EventEmitter {
208
299
  await this.scriptInterceptor.stop?.().catch(() => {});
209
300
  this.scriptInterceptor = null;
210
301
  }
302
+ if (this.antiDebugInterceptor) {
303
+ this.antiDebugInterceptor = null;
304
+ }
211
305
 
212
306
  // 分离 CDP session
213
307
  if (this.cdpSession) {
@@ -216,7 +310,16 @@ export class BrowserClient extends EventEmitter {
216
310
  }
217
311
 
218
312
  // 关闭浏览器
219
- if (this.browser) {
313
+ if (this._persistent) {
314
+ // 持久化模式:关闭 context 即保存数据并关闭浏览器
315
+ if (this.context) {
316
+ await this.context.close();
317
+ this.context = null;
318
+ this.browser = null;
319
+ this.page = null;
320
+ this.pages = [];
321
+ }
322
+ } else if (this.browser) {
220
323
  await this.browser.close();
221
324
  this.browser = null;
222
325
  this.context = null;
@@ -229,6 +332,9 @@ export class BrowserClient extends EventEmitter {
229
332
  this.emit('error', e);
230
333
  } finally {
231
334
  this._isCleaningUp = false;
335
+ // 重置 CDP 相关状态
336
+ this._cdpLastCheck = 0;
337
+ this._cdpSessionPage = null;
232
338
  }
233
339
  }
234
340
  }
@@ -15,13 +15,17 @@ export class EnvCollector {
15
15
  * @param {object} options - 采集选项
16
16
  */
17
17
  async collect(path, options = {}) {
18
- const { depth = 1, includeProto = false, useCache = true } = options;
18
+ const { depth = 1, includeProto = false, useCache = true, timeout = 5000 } = options;
19
19
 
20
20
  if (useCache && this.cache.has(path)) {
21
21
  return this.cache.get(path);
22
22
  }
23
23
 
24
- const result = await this.page.evaluate(({ path, depth, includeProto }) => {
24
+ // 使用 Promise.race 添加超时保护
25
+ const evaluatePromise = this.page.evaluate(({ path, depth, includeProto: _includeProto }) => {
26
+ // 用于检测循环引用的 WeakSet
27
+ const seen = new WeakSet();
28
+
25
29
  function getByPath(obj, path) {
26
30
  return path.split('.').reduce((o, k) => o && o[k], obj);
27
31
  }
@@ -40,29 +44,55 @@ export class EnvCollector {
40
44
  return { type, value: val };
41
45
  }
42
46
 
47
+ // 检测循环引用
48
+ if (seen.has(val)) {
49
+ return { type: 'object', value: '[Circular]', circular: true };
50
+ }
51
+
43
52
  if (currentDepth >= maxDepth) {
44
53
  return { type: 'object', value: '[Object]', truncated: true };
45
54
  }
46
55
 
56
+ seen.add(val);
57
+
47
58
  if (Array.isArray(val)) {
48
59
  return {
49
60
  type: 'array',
50
- value: val.map(v => serialize(v, currentDepth + 1, maxDepth))
61
+ length: val.length,
62
+ value: val.slice(0, 20).map(v => serialize(v, currentDepth + 1, maxDepth))
51
63
  };
52
64
  }
53
65
 
54
66
  const result = { type: 'object', properties: {} };
55
- const keys = Object.getOwnPropertyNames(val);
67
+ let keys;
68
+ try {
69
+ keys = Object.getOwnPropertyNames(val);
70
+ } catch (e) {
71
+ return { type: 'object', value: '[Error accessing keys]', error: e.message };
72
+ }
56
73
 
57
- for (const key of keys.slice(0, 50)) {
74
+ for (const key of keys.slice(0, 30)) {
58
75
  try {
59
76
  const desc = Object.getOwnPropertyDescriptor(val, key);
77
+ if (!desc) continue;
78
+
79
+ // 安全处理:避免触发有副作用的 getter
60
80
  if (desc.get) {
81
+ // 对于 getter,只记录描述符信息,不执行 getter
82
+ result.properties[key] = {
83
+ type: 'getter',
84
+ hasGetter: true,
85
+ enumerable: desc.enumerable,
86
+ configurable: desc.configurable
87
+ };
88
+ } else if (desc.set && desc.value === undefined) {
89
+ // 只有 setter 没有 getter
61
90
  result.properties[key] = {
62
- ...serialize(val[key], currentDepth + 1, maxDepth),
63
- hasGetter: true
91
+ type: 'setter',
92
+ hasSetter: true
64
93
  };
65
94
  } else {
95
+ // 普通值
66
96
  result.properties[key] = serialize(desc.value, currentDepth + 1, maxDepth);
67
97
  }
68
98
  } catch (e) {
@@ -89,15 +119,19 @@ export class EnvCollector {
89
119
 
90
120
  let descriptor = null;
91
121
  if (parent) {
92
- const desc = Object.getOwnPropertyDescriptor(parent, propName);
93
- if (desc) {
94
- descriptor = {
95
- configurable: desc.configurable,
96
- enumerable: desc.enumerable,
97
- writable: desc.writable,
98
- hasGetter: !!desc.get,
99
- hasSetter: !!desc.set
100
- };
122
+ try {
123
+ const desc = Object.getOwnPropertyDescriptor(parent, propName);
124
+ if (desc) {
125
+ descriptor = {
126
+ configurable: desc.configurable,
127
+ enumerable: desc.enumerable,
128
+ writable: desc.writable,
129
+ hasGetter: !!desc.get,
130
+ hasSetter: !!desc.set
131
+ };
132
+ }
133
+ } catch (e) {
134
+ // 忽略描述符读取错误
101
135
  }
102
136
  }
103
137
 
@@ -112,7 +146,19 @@ export class EnvCollector {
112
146
  }
113
147
  }, { path, depth, includeProto });
114
148
 
115
- if (result.success && useCache) {
149
+ // 添加超时
150
+ const timeoutPromise = new Promise((_, reject) =>
151
+ setTimeout(() => reject(new Error('采集超时')), timeout)
152
+ );
153
+
154
+ let result;
155
+ try {
156
+ result = await Promise.race([evaluatePromise, timeoutPromise]);
157
+ } catch (e) {
158
+ result = { success: false, error: e.message };
159
+ }
160
+
161
+ if (result?.success && useCache) {
116
162
  this.cache.set(path, result);
117
163
  }
118
164
 
@@ -154,9 +200,12 @@ export class EnvCollector {
154
200
  * 深度采集整个对象
155
201
  */
156
202
  async collectDeep(rootPath, options = {}) {
157
- const { maxDepth = 3, maxProps = 100 } = options;
203
+ const { maxDepth = 3, maxProps = 100, timeout = 5000 } = options;
204
+
205
+ const evaluatePromise = this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
206
+ // 用于检测循环引用的 WeakSet
207
+ const seen = new WeakSet();
158
208
 
159
- return await this.page.evaluate(({ rootPath, maxDepth, maxProps }) => {
160
209
  function getByPath(obj, path) {
161
210
  return path.split('.').reduce((o, k) => o && o[k], obj);
162
211
  }
@@ -165,19 +214,38 @@ export class EnvCollector {
165
214
  if (depth > maxDepth || collected.size > maxProps) return;
166
215
  if (!obj || typeof obj !== 'object') return;
167
216
 
217
+ // 检测循环引用
218
+ if (seen.has(obj)) return;
219
+ seen.add(obj);
220
+
168
221
  const keys = Object.getOwnPropertyNames(obj);
169
- for (const key of keys) {
222
+ for (const key of keys.slice(0, 30)) {
170
223
  if (collected.size > maxProps) break;
171
224
 
172
225
  const fullPath = path ? `${path}.${key}` : key;
173
226
  try {
174
- const val = obj[key];
175
- const type = typeof val;
227
+ const desc = Object.getOwnPropertyDescriptor(obj, key);
228
+ if (!desc) continue;
229
+
230
+ // 安全处理:避免触发有副作用的 getter
231
+ let val;
232
+ let type;
233
+ if (desc.get) {
234
+ type = 'getter';
235
+ val = '[Getter]';
236
+ } else if (desc.set && desc.value === undefined) {
237
+ type = 'setter';
238
+ val = '[Setter]';
239
+ } else {
240
+ val = desc.value;
241
+ type = typeof val;
242
+ }
176
243
 
177
244
  collected.set(fullPath, {
178
245
  type,
179
246
  value: type === 'function' ? '[Function]' :
180
247
  type === 'object' ? '[Object]' :
248
+ type === 'getter' || type === 'setter' ? val :
181
249
  val
182
250
  });
183
251
 
@@ -204,6 +272,17 @@ export class EnvCollector {
204
272
  properties: Object.fromEntries(collected)
205
273
  };
206
274
  }, { rootPath, maxDepth, maxProps });
275
+
276
+ // 添加超时保护
277
+ const timeoutPromise = new Promise((_, reject) =>
278
+ setTimeout(() => reject(new Error('collectDeep timeout')), timeout)
279
+ );
280
+
281
+ try {
282
+ return await Promise.race([evaluatePromise, timeoutPromise]);
283
+ } catch (e) {
284
+ return { success: false, error: e.message };
285
+ }
207
286
  }
208
287
 
209
288
  // === 特殊环境采集 ===
@@ -160,7 +160,9 @@ function getCookieHook() {
160
160
  return value;
161
161
  },
162
162
  set: function(val) {
163
- deepspider.log('cookie', { action: 'write', value: val });
163
+ // 解析 cookie name(cookie 格式: "name=value; expires=...; path=...")
164
+ const cookieName = val?.split('=')[0]?.trim();
165
+ deepspider.log('cookie', { action: 'write', name: cookieName, value: val });
164
166
  return cookieDesc.set.call(document, val);
165
167
  },
166
168
  configurable: true
@@ -7,6 +7,7 @@
7
7
  export class HookManager {
8
8
  constructor() {
9
9
  this.logs = [];
10
+ this.maxLogs = 5000;
10
11
  this.onLog = null;
11
12
  this.injected = false;
12
13
  }
@@ -37,6 +38,10 @@ export class HookManager {
37
38
  text,
38
39
  timestamp: Date.now(),
39
40
  });
41
+ // 超过上限时丢弃最旧的 20%
42
+ if (this.logs.length > this.maxLogs) {
43
+ this.logs = this.logs.slice(Math.floor(this.maxLogs * 0.2));
44
+ }
40
45
  if (this.onLog) {
41
46
  this.onLog({ type: msg.type(), text });
42
47
  }