deepspider 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/.env.example +3 -0
  2. package/README.md +13 -13
  3. package/package.json +6 -6
  4. package/src/agent/core/PanelBridge.js +29 -77
  5. package/src/agent/core/StreamHandler.js +139 -14
  6. package/src/agent/index.js +51 -12
  7. package/src/agent/logger.js +184 -9
  8. package/src/agent/middleware/report.js +42 -16
  9. package/src/agent/middleware/subagent.js +233 -0
  10. package/src/agent/middleware/toolGuard.js +77 -0
  11. package/src/agent/middleware/validationWorkflow.js +171 -0
  12. package/src/agent/prompts/system.js +181 -59
  13. package/src/agent/run.js +41 -6
  14. package/src/agent/skills/crawler/SKILL.md +64 -3
  15. package/src/agent/skills/crawler/evolved.md +9 -1
  16. package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
  17. package/src/agent/skills/env/SKILL.md +75 -0
  18. package/src/agent/skills/evolve.js +0 -3
  19. package/src/agent/skills/sandbox/SKILL.md +35 -0
  20. package/src/agent/skills/static-analysis/SKILL.md +98 -2
  21. package/src/agent/subagents/anti-detect.js +10 -20
  22. package/src/agent/subagents/captcha.js +7 -19
  23. package/src/agent/subagents/crawler.js +25 -37
  24. package/src/agent/subagents/factory.js +109 -9
  25. package/src/agent/subagents/index.js +4 -13
  26. package/src/agent/subagents/js2python.js +7 -19
  27. package/src/agent/subagents/reverse.js +180 -0
  28. package/src/agent/tools/analysis.js +84 -1
  29. package/src/agent/tools/anti-detect.js +5 -2
  30. package/src/agent/tools/browser.js +160 -0
  31. package/src/agent/tools/captcha.js +1 -1
  32. package/src/agent/tools/capture.js +24 -3
  33. package/src/agent/tools/correlate.js +129 -15
  34. package/src/agent/tools/crawler.js +2 -1
  35. package/src/agent/tools/crawlerGenerator.js +90 -0
  36. package/src/agent/tools/debug.js +43 -6
  37. package/src/agent/tools/evolve.js +6 -3
  38. package/src/agent/tools/extractor.js +5 -1
  39. package/src/agent/tools/file.js +16 -7
  40. package/src/agent/tools/generateHook.js +66 -0
  41. package/src/agent/tools/hookManager.js +19 -9
  42. package/src/agent/tools/index.js +33 -20
  43. package/src/agent/tools/nodejs.js +41 -6
  44. package/src/agent/tools/python.js +4 -4
  45. package/src/agent/tools/report.js +2 -2
  46. package/src/agent/tools/runtime.js +1 -1
  47. package/src/agent/tools/sandbox.js +21 -1
  48. package/src/agent/tools/scratchpad.js +70 -0
  49. package/src/agent/tools/tracing.js +26 -0
  50. package/src/agent/tools/verifyAlgorithm.js +117 -0
  51. package/src/analyzer/EncryptionAnalyzer.js +2 -2
  52. package/src/browser/EnvBridge.js +27 -13
  53. package/src/browser/client.js +124 -18
  54. package/src/browser/collector.js +101 -22
  55. package/src/browser/defaultHooks.js +3 -1
  56. package/src/browser/hooks/index.js +5 -0
  57. package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
  58. package/src/browser/interceptors/NetworkInterceptor.js +77 -13
  59. package/src/browser/interceptors/ScriptInterceptor.js +34 -9
  60. package/src/browser/interceptors/index.js +1 -0
  61. package/src/browser/ui/analysisPanel.js +469 -464
  62. package/src/cli/commands/config.js +11 -3
  63. package/src/config/paths.js +9 -1
  64. package/src/config/settings.js +7 -1
  65. package/src/core/PatchGenerator.js +26 -6
  66. package/src/core/Sandbox.js +140 -3
  67. package/src/env/EnvCodeGenerator.js +60 -88
  68. package/src/env/modules/bom/history.js +6 -0
  69. package/src/env/modules/bom/location.js +6 -0
  70. package/src/env/modules/bom/navigator.js +13 -0
  71. package/src/env/modules/bom/screen.js +6 -0
  72. package/src/env/modules/bom/storage.js +7 -0
  73. package/src/env/modules/dom/document.js +14 -0
  74. package/src/env/modules/dom/event.js +4 -0
  75. package/src/env/modules/index.js +27 -10
  76. package/src/env/modules/webapi/fetch.js +4 -0
  77. package/src/env/modules/webapi/url.js +4 -0
  78. package/src/env/modules/webapi/xhr.js +8 -0
  79. package/src/store/DataStore.js +130 -47
  80. package/src/store/Store.js +2 -1
  81. package/src/agent/subagents/dynamic.js +0 -64
  82. package/src/agent/subagents/env-agent.js +0 -82
  83. package/src/agent/subagents/sandbox.js +0 -55
  84. package/src/agent/subagents/static.js +0 -66
@@ -0,0 +1,132 @@
1
+ /**
2
+ * DeepSpider - 反无限 debugger 拦截器
3
+ * 通过 CDP Debugger.setBlackboxedRanges 跳过包含 debugger 语句的脚本
4
+ * 零运行时开销,不修改源码,不触发完整性校验
5
+ *
6
+ * 已知限制:/\bdebugger\b/ 会匹配字符串/注释中的 debugger,
7
+ * 对反爬场景可接受(误 blackbox 的脚本仍正常执行,只是不可调试)
8
+ */
9
+
10
+ export class AntiDebugInterceptor {
11
+ constructor(cdpClient) {
12
+ this.client = cdpClient;
13
+ this.blackboxedScripts = new Set();
14
+ // 高频 debugger 检测
15
+ this.pausedCount = 0;
16
+ this.pausedWindowStart = 0;
17
+ this.PAUSED_WINDOW_MS = 1000; // 1秒窗口
18
+ this.PAUSED_THRESHOLD = 5; // 1秒内超过5次paused认为是debugger风暴
19
+ this.stormMode = false; // 风暴模式:跳过所有断点
20
+ this.stormTimer = null; // 风暴模式自动退出定时器
21
+ }
22
+
23
+ async start() {
24
+ // 兜底:对于 blackbox 来不及处理的同步 debugger(时序竞争),自动 resume
25
+ // reason 可能是 'other' 或 'debugCommand'(不同 Chrome 版本),
26
+ // 只要不是我们主动设的断点(hitBreakpoints 非空 / reason=breakpoint)就 resume
27
+ this.client.on('Debugger.paused', (params) => {
28
+ // 手动设置的断点(除非在风暴模式)
29
+ if (!this.stormMode && params.reason === 'breakpoint') return;
30
+ if (!this.stormMode && params.hitBreakpoints?.length > 0) return;
31
+
32
+ // 风暴模式下直接 resume,不参与计数
33
+ if (this.stormMode) {
34
+ this.client.send('Debugger.resume').catch(() => {});
35
+ return;
36
+ }
37
+
38
+ // 高频 debugger 检测
39
+ const now = Date.now();
40
+ if (now - this.pausedWindowStart > this.PAUSED_WINDOW_MS) {
41
+ // 新窗口
42
+ this.pausedWindowStart = now;
43
+ this.pausedCount = 1;
44
+ } else {
45
+ this.pausedCount++;
46
+ }
47
+
48
+ // 触发风暴模式
49
+ if (this.pausedCount > this.PAUSED_THRESHOLD) {
50
+ console.log('[AntiDebugInterceptor] 检测到 debugger 风暴,启用风暴模式');
51
+ this.stormMode = true;
52
+ // 清除之前的定时器
53
+ if (this.stormTimer) {
54
+ clearTimeout(this.stormTimer);
55
+ }
56
+ // 3秒后退出风暴模式
57
+ this.stormTimer = setTimeout(() => {
58
+ console.log('[AntiDebugInterceptor] 退出风暴模式');
59
+ this.stormMode = false;
60
+ this.pausedCount = 0;
61
+ this.stormTimer = null;
62
+ }, 3000);
63
+ }
64
+
65
+ // 自动 resume
66
+ this.client.send('Debugger.resume').catch(() => {});
67
+ });
68
+
69
+ console.log('[AntiDebugInterceptor] 已启动');
70
+ }
71
+
72
+ /**
73
+ * 检查脚本源码,包含 debugger 则 blackbox 整个脚本
74
+ * 由 ScriptInterceptor.onSource 回调驱动,避免重复拉取源码
75
+ */
76
+ checkScript(scriptId, scriptSource) {
77
+ if (/\bdebugger\b/.test(scriptSource)) {
78
+ this.client.send('Debugger.setBlackboxedRanges', {
79
+ scriptId,
80
+ positions: [{ lineNumber: 0, columnNumber: 0 }],
81
+ }).then(() => {
82
+ this.blackboxedScripts.add(scriptId);
83
+ }).catch(() => {});
84
+ }
85
+ }
86
+
87
+ /**
88
+ * 取消指定脚本的 blackbox(供断点工具调用)
89
+ */
90
+ async unblackbox(scriptId) {
91
+ if (this.blackboxedScripts.has(scriptId)) {
92
+ await this.client.send('Debugger.setBlackboxedRanges', {
93
+ scriptId,
94
+ positions: [],
95
+ });
96
+ this.blackboxedScripts.delete(scriptId);
97
+ }
98
+ }
99
+
100
+ /**
101
+ * 手动启用/禁用风暴模式
102
+ * 用于绕过强反调试场景
103
+ */
104
+ setStormMode(enabled) {
105
+ // 清除之前的定时器
106
+ if (this.stormTimer) {
107
+ clearTimeout(this.stormTimer);
108
+ this.stormTimer = null;
109
+ }
110
+
111
+ this.stormMode = enabled;
112
+ if (enabled) {
113
+ console.log('[AntiDebugInterceptor] 手动启用风暴模式');
114
+ // 自动退出
115
+ this.stormTimer = setTimeout(() => {
116
+ this.stormMode = false;
117
+ this.stormTimer = null;
118
+ console.log('[AntiDebugInterceptor] 自动退出风暴模式');
119
+ }, 5000);
120
+ } else {
121
+ console.log('[AntiDebugInterceptor] 手动禁用风暴模式');
122
+ this.pausedCount = 0;
123
+ }
124
+ }
125
+
126
+ /**
127
+ * 检查当前是否在风暴模式
128
+ */
129
+ isStormMode() {
130
+ return this.stormMode;
131
+ }
132
+ }
@@ -46,11 +46,16 @@ export class NetworkInterceptor {
46
46
  this.onLoadingFinished(params);
47
47
  });
48
48
 
49
+ // 监听加载失败(清理 pendingRequests,防止内存泄漏)
50
+ this.client.on('Network.loadingFailed', (params) => {
51
+ this.pendingRequests.delete(params.requestId);
52
+ });
53
+
49
54
  console.log('[NetworkInterceptor] 已启动');
50
55
  }
51
56
 
52
57
  onRequest(params) {
53
- const { requestId, request, timestamp } = params;
58
+ const { requestId, request, timestamp, initiator } = params;
54
59
 
55
60
  // 只记录 XHR/Fetch 请求
56
61
  const type = params.type;
@@ -62,10 +67,37 @@ export class NetworkInterceptor {
62
67
  headers: request.headers,
63
68
  postData: request.postData,
64
69
  timestamp: timestamp * 1000,
65
- pageUrl: this.getPageUrl() // 记录请求时的页面 URL
70
+ pageUrl: this.getPageUrl(),
71
+ initiator: this.formatInitiator(initiator),
66
72
  });
67
73
  }
68
74
 
75
+ /**
76
+ * 精简 initiator 调用栈(只保留前 5 帧,过滤内部帧)
77
+ */
78
+ formatInitiator(initiator) {
79
+ if (!initiator) return null;
80
+ const result = { type: initiator.type };
81
+ if (initiator.url) {
82
+ result.url = initiator.url;
83
+ result.lineNumber = initiator.lineNumber;
84
+ }
85
+ if (initiator.stack?.callFrames) {
86
+ result.callFrames = initiator.stack.callFrames
87
+ .filter(f => f.url && !f.url.includes('patchright') && !f.url.includes('__playwright'))
88
+ .slice(0, 5)
89
+ .map(f => ({
90
+ functionName: f.functionName || '(anonymous)',
91
+ url: f.url,
92
+ lineNumber: f.lineNumber,
93
+ columnNumber: f.columnNumber,
94
+ }));
95
+ }
96
+ // 只有 type 没有实际定位信息时返回 null
97
+ if (!result.url && !result.callFrames?.length) return null;
98
+ return result;
99
+ }
100
+
69
101
  onResponse(params) {
70
102
  const { requestId, response } = params;
71
103
  const pending = this.pendingRequests.get(requestId);
@@ -81,15 +113,31 @@ export class NetworkInterceptor {
81
113
  if (!pending) return;
82
114
 
83
115
  try {
84
- // 获取响应体
85
- const { body, base64Encoded } = await this.client.send(
86
- 'Network.getResponseBody',
87
- { requestId }
116
+ // 获取响应体,添加超时保护防止 CDP 命令挂起
117
+ const bodyPromise = this.client.send('Network.getResponseBody', { requestId });
118
+ const timeoutPromise = new Promise((_, reject) =>
119
+ setTimeout(() => reject(new Error('getResponseBody timeout')), 5000)
88
120
  );
89
-
90
- const responseBody = base64Encoded
91
- ? Buffer.from(body, 'base64').toString('utf-8')
92
- : body;
121
+ const { body, base64Encoded } = await Promise.race([bodyPromise, timeoutPromise]);
122
+
123
+ // 处理响应体:检测二进制内容,避免损坏
124
+ let responseBody;
125
+ const contentType = pending.responseHeaders?.['content-type'] || '';
126
+
127
+ if (this.isBinaryContent(contentType)) {
128
+ // 二进制内容:存储元数据而非原始内容
129
+ // base64 长度计算:每 4 个字符 = 3 字节,考虑 padding
130
+ const binarySize = base64Encoded
131
+ ? Math.floor(body.length * 0.75) - (body.match(/=*$/)?.[0].length || 0)
132
+ : body.length;
133
+ responseBody = `[Binary: ${contentType}, ${binarySize} bytes]`;
134
+ } else {
135
+ // 文本内容:安全地转换为字符串
136
+ const rawBody = base64Encoded
137
+ ? Buffer.from(body, 'base64').toString('utf-8')
138
+ : body;
139
+ responseBody = rawBody.slice(0, 50000);
140
+ }
93
141
 
94
142
  // 异步存储到文件
95
143
  this.store.saveResponse({
@@ -98,19 +146,35 @@ export class NetworkInterceptor {
98
146
  status: pending.status,
99
147
  requestHeaders: pending.headers,
100
148
  requestBody: pending.postData,
101
- responseBody: responseBody.slice(0, 50000),
149
+ responseBody,
102
150
  timestamp: pending.timestamp,
103
- pageUrl: pending.pageUrl // 传递页面 URL 用于分站点存储
151
+ pageUrl: pending.pageUrl,
152
+ initiator: pending.initiator,
104
153
  }).catch(e => {
105
154
  console.error('[NetworkInterceptor] 保存失败:', e.message);
106
155
  });
107
156
 
108
- } catch (e) {
157
+ } catch {
109
158
  // 某些响应无法获取 body
110
159
  }
111
160
 
112
161
  this.pendingRequests.delete(requestId);
113
162
  }
163
+
164
+ /**
165
+ * 检测是否为二进制内容类型
166
+ */
167
+ isBinaryContent(contentType) {
168
+ if (!contentType) return false;
169
+ const binaryTypes = [
170
+ 'image/', 'audio/', 'video/', 'application/pdf',
171
+ 'application/octet-stream', 'application/zip',
172
+ 'application/gzip', 'application/x-protobuf',
173
+ 'font/', 'application/vnd.'
174
+ ];
175
+ const lowerType = contentType.toLowerCase();
176
+ return binaryTypes.some(type => lowerType.includes(type));
177
+ }
114
178
  }
115
179
 
116
180
  export default NetworkInterceptor;
@@ -11,6 +11,7 @@ export class ScriptInterceptor {
11
11
  this.page = page; // Playwright page 对象
12
12
  this.store = getDataStore();
13
13
  this.scriptIds = new Set();
14
+ this.onSource = null; // 回调: (scriptId, scriptSource) => void
14
15
  }
15
16
 
16
17
  /**
@@ -38,24 +39,48 @@ export class ScriptInterceptor {
38
39
  }
39
40
 
40
41
  async onScriptParsed(params) {
41
- const { scriptId, url, length } = params;
42
+ const { scriptId, url, length: _length } = params;
42
43
 
43
- // 跳过扩展和空脚本
44
- if (!url || url.startsWith('chrome-extension://')) return;
44
+ // 跳过扩展脚本
45
+ if (url?.startsWith('chrome-extension://')) return;
45
46
  if (this.scriptIds.has(scriptId)) return;
46
47
 
47
48
  this.scriptIds.add(scriptId);
48
49
 
49
- // 异步获取并存储源码
50
- this.fetchAndSave(scriptId, url).catch(() => {});
50
+ if (url) {
51
+ // URL 的脚本:获取源码、通知订阅者、存储
52
+ this.fetchAndSave(scriptId, url).catch(() => {});
53
+ } else if (this.onSource) {
54
+ // 无 URL 脚本(eval/new Function 生成):仅通知订阅者用于 debugger 检测,不存储
55
+ this.fetchAndNotify(scriptId).catch(() => {});
56
+ }
57
+ }
58
+
59
+ async fetchAndNotify(scriptId) {
60
+ try {
61
+ // 添加超时保护防止 CDP 命令挂起
62
+ const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
63
+ const timeoutPromise = new Promise((_, reject) =>
64
+ setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
65
+ );
66
+ const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
67
+ try { this.onSource(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
68
+ } catch {
69
+ // 获取失败(脚本已卸载等),忽略
70
+ }
51
71
  }
52
72
 
53
73
  async fetchAndSave(scriptId, url) {
54
74
  try {
55
- const { scriptSource } = await this.client.send(
56
- 'Debugger.getScriptSource',
57
- { scriptId }
75
+ // 添加超时保护防止 CDP 命令挂起
76
+ const sourcePromise = this.client.send('Debugger.getScriptSource', { scriptId });
77
+ const timeoutPromise = new Promise((_, reject) =>
78
+ setTimeout(() => reject(new Error('getScriptSource timeout')), 5000)
58
79
  );
80
+ const { scriptSource } = await Promise.race([sourcePromise, timeoutPromise]);
81
+
82
+ // 通知订阅者(AntiDebugInterceptor 等)
83
+ try { this.onSource?.(scriptId, scriptSource); } catch { /* 订阅者异常不影响主流程 */ }
59
84
 
60
85
  // 限制大小,超大脚本只保存部分
61
86
  const source = scriptSource.slice(0, 500000);
@@ -67,7 +92,7 @@ export class ScriptInterceptor {
67
92
  timestamp: Date.now(),
68
93
  pageUrl: this.getPageUrl() // 传递页面 URL
69
94
  });
70
- } catch (e) {
95
+ } catch {
71
96
  // 获取失败,跳过
72
97
  }
73
98
  }
@@ -4,3 +4,4 @@
4
4
 
5
5
  export { NetworkInterceptor } from './NetworkInterceptor.js';
6
6
  export { ScriptInterceptor } from './ScriptInterceptor.js';
7
+ export { AntiDebugInterceptor } from './AntiDebugInterceptor.js';