deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,55 @@
1
+ /**
2
+ * DeepSpider - 加密库 Hook
3
+ * 已废弃,请使用 src/env/CryptoHook.js
4
+ */
5
+
6
+ export const cryptoHook = `
7
+ (function() {
8
+ const deepspider = window.__deepspider__;
9
+ if (!deepspider) return;
10
+
11
+ // Hook Function.prototype.apply (CryptoJS)
12
+ const _apply = Function.prototype.apply;
13
+ const applyHook = function() {
14
+ const result = _apply.call(this, ...arguments);
15
+ try {
16
+ if (arguments.length === 2 && arguments[1]?.[0]) {
17
+ const cfg = arguments[1][0];
18
+ if (cfg.ciphertext && cfg.key && cfg.algorithm) {
19
+ deepspider.log('crypto', {
20
+ algo: 'CryptoJS',
21
+ key: cfg.key?.toString?.() || '',
22
+ iv: cfg.iv?.toString?.() || '',
23
+ mode: cfg.mode?.name || 'unknown'
24
+ });
25
+ }
26
+ }
27
+ } catch (e) {}
28
+ return result;
29
+ };
30
+ Function.prototype.apply = deepspider.native(applyHook, _apply);
31
+
32
+ // Hook RSA
33
+ const _call = Function.prototype.call;
34
+ const callHook = function() {
35
+ const result = _call.call(this, ...arguments);
36
+ try {
37
+ const arg = arguments[0];
38
+ if (arg?.__proto__?.getPublicKey && arg?.__proto__?.encrypt) {
39
+ const proto = arg.__proto__.__proto__;
40
+ if (proto?.encrypt && !proto.__hooked__) {
41
+ proto.__hooked__ = true;
42
+ const _enc = proto.encrypt;
43
+ proto.encrypt = deepspider.native(function(data) {
44
+ const enc = _enc.call(this, data);
45
+ deepspider.log('crypto', { algo: 'RSA', data, encrypted: enc });
46
+ return enc;
47
+ }, _enc);
48
+ }
49
+ }
50
+ } catch (e) {}
51
+ return result;
52
+ };
53
+ Function.prototype.call = deepspider.native(callHook, _call);
54
+ })();
55
+ `;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * DeepSpider - Hook 管理器
3
+ */
4
+
5
+ import { cryptoHook } from './crypto.js';
6
+ import { networkHook } from './network.js';
7
+ import { nativeProtect } from './native.js';
8
+
9
+ export class HookManager {
10
+ constructor() {
11
+ this.logs = [];
12
+ this.onLog = null;
13
+ }
14
+
15
+ /**
16
+ * 获取完整的 Hook 脚本
17
+ */
18
+ getCombinedScript() {
19
+ return [
20
+ nativeProtect,
21
+ cryptoHook,
22
+ networkHook,
23
+ ].join('\n\n');
24
+ }
25
+
26
+ /**
27
+ * 注入 Hook 到页面
28
+ */
29
+ async inject(page) {
30
+ const script = this.getCombinedScript();
31
+
32
+ // 在新文档加载前注入
33
+ await page.addInitScript(script);
34
+
35
+ // 监听 console 输出
36
+ page.on('console', (msg) => {
37
+ const text = msg.text();
38
+ if (text.includes('[DeepSpider:')) {
39
+ this.logs.push({
40
+ type: msg.type(),
41
+ text,
42
+ timestamp: Date.now(),
43
+ });
44
+ if (this.onLog) {
45
+ this.onLog({ type: msg.type(), text });
46
+ }
47
+ }
48
+ });
49
+ }
50
+
51
+ /**
52
+ * 获取捕获的日志
53
+ */
54
+ getLogs() {
55
+ return this.logs;
56
+ }
57
+
58
+ /**
59
+ * 清空日志
60
+ */
61
+ clearLogs() {
62
+ this.logs = [];
63
+ }
64
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * DeepSpider - Hook 反检测工具
3
+ * 已废弃,请使用 src/env/HookBase.js
4
+ */
5
+
6
+ import { HookBase } from '../../env/HookBase.js';
7
+
8
+ // 兼容旧代码
9
+ export const nativeProtect = HookBase.getBaseCode();
@@ -0,0 +1,33 @@
1
+ /**
2
+ * DeepSpider - 网络请求 Hook
3
+ * 已废弃,请使用 src/env/NetworkHook.js
4
+ */
5
+
6
+ export const networkHook = `
7
+ (function() {
8
+ const deepspider = window.__deepspider__;
9
+ if (!deepspider) return;
10
+
11
+ // Hook fetch
12
+ const _fetch = window.fetch;
13
+ window.fetch = deepspider.native(async function(url, options = {}) {
14
+ deepspider.log('fetch', { url, body: options.body });
15
+ return _fetch.call(this, url, options);
16
+ }, _fetch);
17
+
18
+ // Hook XHR
19
+ const _open = XMLHttpRequest.prototype.open;
20
+ const _send = XMLHttpRequest.prototype.send;
21
+
22
+ XMLHttpRequest.prototype.open = deepspider.native(function(method, url) {
23
+ this._url = url;
24
+ this._method = method;
25
+ return _open.apply(this, arguments);
26
+ }, _open);
27
+
28
+ XMLHttpRequest.prototype.send = deepspider.native(function(body) {
29
+ deepspider.log('xhr', { method: this._method, url: this._url, body });
30
+ return _send.apply(this, arguments);
31
+ }, _send);
32
+ })();
33
+ `;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * DeepSpider - 浏览器基础设施
3
+ * 提供真实浏览器环境,作为动态分析的基础
4
+ */
5
+
6
+ export { BrowserClient } from './client.js';
7
+ export { CDPSession } from './cdp.js';
8
+ export { HookManager } from './hooks/index.js';
9
+ export { EnvCollector } from './collector.js';
10
+ export { EnvBridge } from './EnvBridge.js';
11
+
12
+ // 单例实例
13
+ let browserInstance = null;
14
+
15
+ /**
16
+ * 获取浏览器实例(单例)
17
+ */
18
+ export async function getBrowser(options = {}) {
19
+ if (!browserInstance) {
20
+ const { BrowserClient } = await import('./client.js');
21
+ browserInstance = new BrowserClient();
22
+ await browserInstance.launch(options);
23
+ }
24
+ return browserInstance;
25
+ }
26
+
27
+ /**
28
+ * 关闭浏览器
29
+ */
30
+ export async function closeBrowser() {
31
+ if (browserInstance) {
32
+ await browserInstance.close();
33
+ browserInstance = null;
34
+ }
35
+ }
36
+
37
+ /**
38
+ * 获取当前浏览器客户端(不创建新实例)
39
+ */
40
+ export function getBrowserClient() {
41
+ return browserInstance;
42
+ }
@@ -0,0 +1,116 @@
1
+ /**
2
+ * DeepSpider - CDP 网络拦截器
3
+ * 通过 CDP 捕获网络请求/响应,按站点存储到文件系统
4
+ */
5
+
6
+ import { getDataStore } from '../../store/DataStore.js';
7
+
8
+ export class NetworkInterceptor {
9
+ constructor(cdpClient, page) {
10
+ this.client = cdpClient;
11
+ this.page = page; // Playwright page 对象
12
+ this.store = getDataStore();
13
+ this.pendingRequests = new Map();
14
+ }
15
+
16
+ /**
17
+ * 获取当前页面 URL
18
+ */
19
+ getPageUrl() {
20
+ try {
21
+ return this.page?.url() || '';
22
+ } catch {
23
+ return '';
24
+ }
25
+ }
26
+
27
+ /**
28
+ * 启动拦截
29
+ */
30
+ async start() {
31
+ // 启用网络域
32
+ await this.client.send('Network.enable');
33
+
34
+ // 监听请求
35
+ this.client.on('Network.requestWillBeSent', (params) => {
36
+ this.onRequest(params);
37
+ });
38
+
39
+ // 监听响应
40
+ this.client.on('Network.responseReceived', (params) => {
41
+ this.onResponse(params);
42
+ });
43
+
44
+ // 监听加载完成
45
+ this.client.on('Network.loadingFinished', (params) => {
46
+ this.onLoadingFinished(params);
47
+ });
48
+
49
+ console.log('[NetworkInterceptor] 已启动');
50
+ }
51
+
52
+ onRequest(params) {
53
+ const { requestId, request, timestamp } = params;
54
+
55
+ // 只记录 XHR/Fetch 请求
56
+ const type = params.type;
57
+ if (type !== 'XHR' && type !== 'Fetch') return;
58
+
59
+ this.pendingRequests.set(requestId, {
60
+ url: request.url,
61
+ method: request.method,
62
+ headers: request.headers,
63
+ postData: request.postData,
64
+ timestamp: timestamp * 1000,
65
+ pageUrl: this.getPageUrl() // 记录请求时的页面 URL
66
+ });
67
+ }
68
+
69
+ onResponse(params) {
70
+ const { requestId, response } = params;
71
+ const pending = this.pendingRequests.get(requestId);
72
+ if (!pending) return;
73
+
74
+ pending.status = response.status;
75
+ pending.responseHeaders = response.headers;
76
+ }
77
+
78
+ async onLoadingFinished(params) {
79
+ const { requestId } = params;
80
+ const pending = this.pendingRequests.get(requestId);
81
+ if (!pending) return;
82
+
83
+ try {
84
+ // 获取响应体
85
+ const { body, base64Encoded } = await this.client.send(
86
+ 'Network.getResponseBody',
87
+ { requestId }
88
+ );
89
+
90
+ const responseBody = base64Encoded
91
+ ? Buffer.from(body, 'base64').toString('utf-8')
92
+ : body;
93
+
94
+ // 异步存储到文件
95
+ this.store.saveResponse({
96
+ url: pending.url,
97
+ method: pending.method,
98
+ status: pending.status,
99
+ requestHeaders: pending.headers,
100
+ requestBody: pending.postData,
101
+ responseBody: responseBody.slice(0, 50000),
102
+ timestamp: pending.timestamp,
103
+ pageUrl: pending.pageUrl // 传递页面 URL 用于分站点存储
104
+ }).catch(e => {
105
+ console.error('[NetworkInterceptor] 保存失败:', e.message);
106
+ });
107
+
108
+ } catch (e) {
109
+ // 某些响应无法获取 body
110
+ }
111
+
112
+ this.pendingRequests.delete(requestId);
113
+ }
114
+ }
115
+
116
+ export default NetworkInterceptor;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * DeepSpider - CDP 脚本拦截器
3
+ * 通过 CDP 捕获 JS 脚本源码,按站点存储到文件系统
4
+ */
5
+
6
+ import { getDataStore } from '../../store/DataStore.js';
7
+
8
+ export class ScriptInterceptor {
9
+ constructor(cdpClient, page) {
10
+ this.client = cdpClient;
11
+ this.page = page; // Playwright page 对象
12
+ this.store = getDataStore();
13
+ this.scriptIds = new Set();
14
+ }
15
+
16
+ /**
17
+ * 获取当前页面 URL
18
+ */
19
+ getPageUrl() {
20
+ try {
21
+ return this.page?.url() || '';
22
+ } catch {
23
+ return '';
24
+ }
25
+ }
26
+
27
+ /**
28
+ * 启动拦截
29
+ */
30
+ async start() {
31
+ await this.client.send('Debugger.enable');
32
+
33
+ this.client.on('Debugger.scriptParsed', (params) => {
34
+ this.onScriptParsed(params);
35
+ });
36
+
37
+ console.log('[ScriptInterceptor] 已启动');
38
+ }
39
+
40
+ async onScriptParsed(params) {
41
+ const { scriptId, url, length } = params;
42
+
43
+ // 跳过扩展和空脚本
44
+ if (!url || url.startsWith('chrome-extension://')) return;
45
+ if (this.scriptIds.has(scriptId)) return;
46
+
47
+ this.scriptIds.add(scriptId);
48
+
49
+ // 异步获取并存储源码
50
+ this.fetchAndSave(scriptId, url).catch(() => {});
51
+ }
52
+
53
+ async fetchAndSave(scriptId, url) {
54
+ try {
55
+ const { scriptSource } = await this.client.send(
56
+ 'Debugger.getScriptSource',
57
+ { scriptId }
58
+ );
59
+
60
+ // 限制大小,超大脚本只保存部分
61
+ const source = scriptSource.slice(0, 500000);
62
+
63
+ await this.store.saveScript({
64
+ url,
65
+ type: 'external',
66
+ source,
67
+ timestamp: Date.now(),
68
+ pageUrl: this.getPageUrl() // 传递页面 URL
69
+ });
70
+ } catch (e) {
71
+ // 获取失败,跳过
72
+ }
73
+ }
74
+ }
75
+
76
+ export default ScriptInterceptor;
@@ -0,0 +1,6 @@
1
+ /**
2
+ * DeepSpider - 拦截器索引
3
+ */
4
+
5
+ export { NetworkInterceptor } from './NetworkInterceptor.js';
6
+ export { ScriptInterceptor } from './ScriptInterceptor.js';