deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,124 @@
1
+ /**
2
+ * DeepSpider - 分析报告工具
3
+ * 保存分析结果、生成 HTML 报告
4
+ * 统一存储到 ~/.deepspider/output/reports/
5
+ */
6
+
7
+ import { z } from 'zod';
8
+ import { tool } from '@langchain/core/tools';
9
+ import { writeFileSync } from 'fs';
10
+ import { join } from 'path';
11
+ import { PATHS, ensureDir, getReportDir } from '../../config/paths.js';
12
+
13
+ const OUTPUT_DIR = PATHS.REPORTS_DIR;
14
+
15
+ function extractDomain(url) {
16
+ try {
17
+ const u = new URL(url);
18
+ return u.hostname.replace(/[^a-zA-Z0-9.-]/g, '_');
19
+ } catch {
20
+ return url.replace(/[^a-zA-Z0-9.-]/g, '_');
21
+ }
22
+ }
23
+
24
+ function escapeHtml(str) {
25
+ return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
26
+ }
27
+
28
+ /**
29
+ * 生成 HTML 报告页面
30
+ */
31
+ function generateHtmlPage(title, markdown, pythonCode, jsCode) {
32
+ return `<!DOCTYPE html>
33
+ <html lang="zh-CN">
34
+ <head>
35
+ <meta charset="UTF-8">
36
+ <title>${escapeHtml(title)} - DeepSpider</title>
37
+ <style>
38
+ body { font-family: system-ui; max-width: 900px; margin: 0 auto; padding: 20px; background: #0d1117; color: #c9d1d9; }
39
+ h1,h2,h3 { color: #58a6ff; }
40
+ pre { background: #161b22; padding: 16px; border-radius: 6px; overflow-x: auto; }
41
+ code { font-family: monospace; }
42
+ table { width: 100%; border-collapse: collapse; }
43
+ th,td { border: 1px solid #30363d; padding: 8px; }
44
+ .tabs { display: flex; gap: 8px; margin: 16px 0; }
45
+ .tab { padding: 8px 16px; background: #21262d; border: 1px solid #30363d; border-radius: 6px; cursor: pointer; color: #c9d1d9; }
46
+ .tab.active { background: #388bfd; color: #fff; }
47
+ .code-panel { display: none; }
48
+ .code-panel.active { display: block; }
49
+ </style>
50
+ </head>
51
+ <body>
52
+ <h1>${escapeHtml(title)}</h1>
53
+ <p>生成时间: ${new Date().toLocaleString('zh-CN')}</p>
54
+ <hr>
55
+ <div class="tabs">
56
+ <button class="tab active" onclick="showCode('python')">Python 代码</button>
57
+ <button class="tab" onclick="showCode('js')">JavaScript 代码</button>
58
+ </div>
59
+ <div id="python" class="code-panel active"><pre><code>${escapeHtml(pythonCode || '# 待生成')}</code></pre></div>
60
+ <div id="js" class="code-panel"><pre><code>${escapeHtml(jsCode || '// 待生成')}</code></pre></div>
61
+ <script>
62
+ function showCode(id) {
63
+ document.querySelectorAll('.code-panel').forEach(p => p.classList.remove('active'));
64
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
65
+ document.getElementById(id).classList.add('active');
66
+ event.target.classList.add('active');
67
+ }
68
+ </script>
69
+ </body>
70
+ </html>`;
71
+ }
72
+
73
+ /**
74
+ * 保存分析报告
75
+ */
76
+ export const saveAnalysisReport = tool(
77
+ async ({ domain, title, markdown, pythonCode, jsCode }) => {
78
+ try {
79
+ const domainDir = join(OUTPUT_DIR, extractDomain(domain));
80
+ ensureDir(domainDir);
81
+
82
+ const paths = {};
83
+
84
+ // 保存 Markdown
85
+ paths.markdown = join(domainDir, 'analysis.md');
86
+ writeFileSync(paths.markdown, markdown, 'utf-8');
87
+
88
+ // 保存 Python 代码
89
+ if (pythonCode) {
90
+ paths.python = join(domainDir, 'decrypt.py');
91
+ writeFileSync(paths.python, pythonCode, 'utf-8');
92
+ }
93
+
94
+ // 保存 JS 代码
95
+ if (jsCode) {
96
+ paths.javascript = join(domainDir, 'decrypt.js');
97
+ writeFileSync(paths.javascript, jsCode, 'utf-8');
98
+ }
99
+
100
+ // 生成 HTML
101
+ paths.html = join(domainDir, 'report.html');
102
+ const html = generateHtmlPage(title || domain, markdown, pythonCode, jsCode);
103
+ writeFileSync(paths.html, html, 'utf-8');
104
+
105
+ console.log('[report] 已保存:', domainDir);
106
+ return JSON.stringify({ success: true, paths, dir: domainDir });
107
+ } catch (e) {
108
+ return JSON.stringify({ success: false, error: e.message });
109
+ }
110
+ },
111
+ {
112
+ name: 'save_analysis_report',
113
+ description: '保存加密分析报告。分析完成后必须调用,保存 Markdown、HTML 和代码文件。',
114
+ schema: z.object({
115
+ domain: z.string().describe('网站域名或 URL'),
116
+ title: z.string().optional().describe('报告标题'),
117
+ markdown: z.string().describe('Markdown 分析报告'),
118
+ pythonCode: z.string().describe('Python 解密代码(必须提供完整可运行代码)'),
119
+ jsCode: z.string().optional().describe('JavaScript 解密代码'),
120
+ }),
121
+ }
122
+ );
123
+
124
+ export const reportTools = [saveAnalysisReport];
@@ -0,0 +1,132 @@
1
+ /**
2
+ * DeepSpider - 浏览器运行时工具
3
+ * 暴露给 Agent 的浏览器操作能力
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import { getBrowser, closeBrowser } from '../../browser/index.js';
9
+ import { HookManager } from '../../browser/hooks/index.js';
10
+
11
+ let hookManager = null;
12
+
13
+ /**
14
+ * 标记 Hook 已注入(供外部调用)
15
+ */
16
+ export function markHookInjected() {
17
+ if (!hookManager) {
18
+ hookManager = new HookManager();
19
+ }
20
+ }
21
+
22
+ /**
23
+ * 启动浏览器
24
+ */
25
+ export const launchBrowser = tool(
26
+ async ({ headless }) => {
27
+ const browser = await getBrowser({ headless });
28
+ // 检查是否已经注入过 Hook
29
+ if (!hookManager) {
30
+ hookManager = new HookManager();
31
+ await hookManager.inject(browser.getPage());
32
+ return JSON.stringify({ success: true, message: '浏览器已启动,Hook 已注入' });
33
+ }
34
+ return JSON.stringify({ success: true, message: '浏览器已就绪' });
35
+ },
36
+ {
37
+ name: 'launch_browser',
38
+ description: '启动浏览器并注入 Hook 脚本(如已启动则复用)',
39
+ schema: z.object({
40
+ headless: z.boolean().default(false).describe('是否无头模式'),
41
+ }),
42
+ }
43
+ );
44
+
45
+ /**
46
+ * 导航到 URL
47
+ */
48
+ export const navigateTo = tool(
49
+ async ({ url }) => {
50
+ const browser = await getBrowser();
51
+ const page = browser.getPage();
52
+ const currentUrl = page.url();
53
+
54
+ // 检查是否已在目标 URL(忽略尾部斜杠差异)
55
+ const normalize = (u) => u.replace(/\/+$/, '');
56
+ if (normalize(currentUrl) === normalize(url)) {
57
+ return JSON.stringify({ success: true, url: currentUrl, message: '已在目标页面' });
58
+ }
59
+
60
+ const finalUrl = await browser.navigate(url);
61
+ return JSON.stringify({ success: true, url: finalUrl });
62
+ },
63
+ {
64
+ name: 'navigate_to',
65
+ description: '导航到指定 URL(如已在目标页面则跳过)',
66
+ schema: z.object({
67
+ url: z.string().describe('目标 URL'),
68
+ }),
69
+ }
70
+ );
71
+
72
+ /**
73
+ * 关闭浏览器
74
+ */
75
+ export const browserClose = tool(
76
+ async () => {
77
+ await closeBrowser();
78
+ hookManager = null;
79
+ return JSON.stringify({ success: true });
80
+ },
81
+ {
82
+ name: 'browser_close',
83
+ description: '关闭浏览器',
84
+ schema: z.object({}),
85
+ }
86
+ );
87
+
88
+ /**
89
+ * 页面加载前注入脚本
90
+ */
91
+ export const addInitScript = tool(
92
+ async ({ script }) => {
93
+ const browser = await getBrowser();
94
+ const page = browser.getPage();
95
+ await page.addInitScript(script);
96
+ return JSON.stringify({ success: true, message: '脚本将在每次页面加载前执行' });
97
+ },
98
+ {
99
+ name: 'add_init_script',
100
+ description: '添加页面加载前执行的脚本(用于在 JS 执行前注入 Hook)',
101
+ schema: z.object({
102
+ script: z.string().describe('要注入的 JS 代码'),
103
+ }),
104
+ }
105
+ );
106
+
107
+ /**
108
+ * 清除 Cookie
109
+ */
110
+ export const clearCookies = tool(
111
+ async ({ domain }) => {
112
+ const browser = await getBrowser();
113
+ const context = browser.getContext();
114
+ await context.clearCookies();
115
+ return JSON.stringify({ success: true, message: 'Cookie 已清除' });
116
+ },
117
+ {
118
+ name: 'clear_cookies',
119
+ description: '清除浏览器 Cookie(用于触发 cookie 生成逻辑)',
120
+ schema: z.object({
121
+ domain: z.string().optional().describe('指定域名(可选)'),
122
+ }),
123
+ }
124
+ );
125
+
126
+ export const runtimeTools = [
127
+ launchBrowser,
128
+ navigateTo,
129
+ browserClose,
130
+ addInitScript,
131
+ clearCookies,
132
+ ];
@@ -0,0 +1,79 @@
1
+ /**
2
+ * DeepSpider - 沙箱工具
3
+ * 基于 @langchain/core/tools 的统一定义
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import { Sandbox } from '../../core/Sandbox.js';
9
+
10
+ // 单例沙箱
11
+ let sandbox = null;
12
+
13
+ export async function getSandbox() {
14
+ if (!sandbox) {
15
+ sandbox = new Sandbox();
16
+ await sandbox.init();
17
+ }
18
+ return sandbox;
19
+ }
20
+
21
+ export async function resetSandbox() {
22
+ if (sandbox) {
23
+ await sandbox.reset();
24
+ }
25
+ }
26
+
27
+ /**
28
+ * 沙箱执行工具
29
+ */
30
+ export const sandboxExecute = tool(
31
+ async ({ code, timeout }) => {
32
+ const sb = await getSandbox();
33
+ const result = await sb.execute(code, { timeout });
34
+ return JSON.stringify(result, null, 2);
35
+ },
36
+ {
37
+ name: 'sandbox_execute',
38
+ description: '在隔离沙箱中执行JS代码,返回执行结果和缺失环境列表。用于检测代码依赖的浏览器环境。',
39
+ schema: z.object({
40
+ code: z.string().describe('要执行的JS代码'),
41
+ timeout: z.number().optional().default(5000).describe('超时时间(ms)'),
42
+ }),
43
+ }
44
+ );
45
+
46
+ /**
47
+ * 沙箱注入工具
48
+ */
49
+ export const sandboxInject = tool(
50
+ async ({ code }) => {
51
+ const sb = await getSandbox();
52
+ const result = await sb.inject(code);
53
+ return JSON.stringify(result);
54
+ },
55
+ {
56
+ name: 'sandbox_inject',
57
+ description: '向沙箱注入环境补丁代码,用于补全缺失的浏览器API。',
58
+ schema: z.object({
59
+ code: z.string().describe('补丁代码'),
60
+ }),
61
+ }
62
+ );
63
+
64
+ /**
65
+ * 沙箱重置工具
66
+ */
67
+ export const sandboxReset = tool(
68
+ async () => {
69
+ await resetSandbox();
70
+ return JSON.stringify({ success: true, message: '沙箱已重置' });
71
+ },
72
+ {
73
+ name: 'sandbox_reset',
74
+ description: '重置沙箱到初始状态,清除所有注入的环境和执行上下文。',
75
+ schema: z.object({}),
76
+ }
77
+ );
78
+
79
+ export const sandboxTools = [sandboxExecute, sandboxInject, sandboxReset];
@@ -0,0 +1,73 @@
1
+ /**
2
+ * DeepSpider - 存储工具
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { tool } from '@langchain/core/tools';
7
+ import { Store } from '../../store/Store.js';
8
+
9
+ let store = null;
10
+
11
+ function getStore() {
12
+ if (!store) {
13
+ store = new Store();
14
+ }
15
+ return store;
16
+ }
17
+
18
+ /**
19
+ * 保存到知识库
20
+ */
21
+ export const saveToStore = tool(
22
+ async ({ type, name, code, metadata }) => {
23
+ const s = getStore();
24
+ return JSON.stringify(s.save(type, name, { code, ...metadata }));
25
+ },
26
+ {
27
+ name: 'save_to_store',
28
+ description: '将验证通过的代码保存到知识库,用于复用。',
29
+ schema: z.object({
30
+ type: z.enum(['env-module', 'crypto-pattern', 'obfuscation']).describe('类型'),
31
+ name: z.string().describe('名称'),
32
+ code: z.string().describe('代码'),
33
+ metadata: z.record(z.string(), z.unknown()).optional().describe('元数据'),
34
+ }),
35
+ }
36
+ );
37
+
38
+ /**
39
+ * 查询知识库
40
+ */
41
+ export const queryStore = tool(
42
+ async ({ type, query }) => {
43
+ const s = getStore();
44
+ return JSON.stringify(s.query(type, query), null, 2);
45
+ },
46
+ {
47
+ name: 'query_store',
48
+ description: '查询知识库中的已有实现。',
49
+ schema: z.object({
50
+ type: z.string().optional().describe('类型'),
51
+ query: z.string().describe('查询关键词'),
52
+ }),
53
+ }
54
+ );
55
+
56
+ /**
57
+ * 列出知识库条目
58
+ */
59
+ export const listStore = tool(
60
+ async ({ type }) => {
61
+ const s = getStore();
62
+ return JSON.stringify(s.list(type));
63
+ },
64
+ {
65
+ name: 'list_store',
66
+ description: '列出知识库中某类型的所有条目。',
67
+ schema: z.object({
68
+ type: z.string().describe('类型'),
69
+ }),
70
+ }
71
+ );
72
+
73
+ export const storeTools = [saveToStore, queryStore, listStore];
@@ -0,0 +1,74 @@
1
+ /**
2
+ * DeepSpider - 追踪工具
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { tool } from '@langchain/core/tools';
7
+ import { ASTAnalyzer } from '../../analyzer/ASTAnalyzer.js';
8
+ import { CallStackAnalyzer } from '../../analyzer/CallStackAnalyzer.js';
9
+
10
+ /**
11
+ * 变量追踪
12
+ */
13
+ export const traceVariable = tool(
14
+ async ({ code, varName }) => {
15
+ const astAnalyzer = new ASTAnalyzer();
16
+ const callAnalyzer = new CallStackAnalyzer();
17
+ return JSON.stringify({
18
+ assignments: astAnalyzer.findAssignments(code, varName),
19
+ dataFlow: callAnalyzer.traceDataFlow(code, varName),
20
+ }, null, 2);
21
+ },
22
+ {
23
+ name: 'trace_variable',
24
+ description: '追踪变量的赋值和数据流,找出变量的来源和去向。',
25
+ schema: z.object({
26
+ code: z.string().describe('JS代码'),
27
+ varName: z.string().describe('变量名'),
28
+ }),
29
+ }
30
+ );
31
+
32
+ /**
33
+ * 请求参数追踪
34
+ */
35
+ export const traceRequestParams = tool(
36
+ async ({ code, funcName }) => {
37
+ const analyzer = new CallStackAnalyzer();
38
+ const astAnalyzer = new ASTAnalyzer();
39
+ return JSON.stringify({
40
+ callGraph: Object.fromEntries(analyzer.buildCallGraph(code)),
41
+ slice: astAnalyzer.extractSlice(code, funcName),
42
+ callers: analyzer.findCallers(code, funcName),
43
+ callChain: analyzer.buildCallChain(code, funcName),
44
+ }, null, 2);
45
+ },
46
+ {
47
+ name: 'trace_request_params',
48
+ description: '追踪请求参数的生成逻辑,提取相关代码切片。',
49
+ schema: z.object({
50
+ code: z.string().describe('JS代码'),
51
+ funcName: z.string().describe('目标函数名'),
52
+ }),
53
+ }
54
+ );
55
+
56
+ /**
57
+ * 调用模式查找
58
+ */
59
+ export const findCallPattern = tool(
60
+ async ({ code, pattern }) => {
61
+ const analyzer = new ASTAnalyzer();
62
+ return JSON.stringify(analyzer.findCallPattern(code, pattern), null, 2);
63
+ },
64
+ {
65
+ name: 'find_call_pattern',
66
+ description: '按正则模式查找函数调用,用于定位特定API调用。',
67
+ schema: z.object({
68
+ code: z.string().describe('JS代码'),
69
+ pattern: z.string().describe('正则模式'),
70
+ }),
71
+ }
72
+ );
73
+
74
+ export const traceTools = [traceVariable, traceRequestParams, findCallPattern];
@@ -0,0 +1,201 @@
1
+ /**
2
+ * DeepSpider - 数据溯源工具
3
+ * 提供给 Agent 的数据分析能力,支持按站点过滤
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import { getDataStore } from '../../store/DataStore.js';
9
+
10
+ /**
11
+ * 获取站点列表
12
+ */
13
+ export const getSiteList = tool(
14
+ async () => {
15
+ const store = getDataStore();
16
+ const sites = store.getSiteList();
17
+ return JSON.stringify(sites, null, 2);
18
+ },
19
+ {
20
+ name: 'get_site_list',
21
+ description: '获取所有已记录数据的站点列表',
22
+ schema: z.object({}),
23
+ }
24
+ );
25
+
26
+ /**
27
+ * 在响应中搜索文本
28
+ */
29
+ export const searchInResponses = tool(
30
+ async ({ text, site }) => {
31
+ const store = getDataStore();
32
+ const results = await store.searchInResponses(text, site || null);
33
+ return JSON.stringify(results, null, 2);
34
+ },
35
+ {
36
+ name: 'search_in_responses',
37
+ description: '在响应中搜索文本,定位数据来源请求',
38
+ schema: z.object({
39
+ text: z.string().describe('要搜索的文本'),
40
+ site: z.string().optional().describe('限定搜索的站点(hostname)'),
41
+ }),
42
+ }
43
+ );
44
+
45
+ /**
46
+ * 获取请求详情
47
+ */
48
+ export const getRequestDetail = tool(
49
+ async ({ site, id }) => {
50
+ const store = getDataStore();
51
+ const result = await store.getResponse(site, id);
52
+ if (!result) {
53
+ return JSON.stringify({ error: '未找到该请求' });
54
+ }
55
+ return JSON.stringify(result, null, 2);
56
+ },
57
+ {
58
+ name: 'get_request_detail',
59
+ description: '获取指定请求的完整信息(Headers、Body、Response)',
60
+ schema: z.object({
61
+ site: z.string().describe('站点 hostname'),
62
+ id: z.string().describe('请求 ID'),
63
+ }),
64
+ }
65
+ );
66
+
67
+ /**
68
+ * 获取请求列表
69
+ */
70
+ export const getRequestList = tool(
71
+ async ({ site }) => {
72
+ const store = getDataStore();
73
+ const result = await store.getResponseList(site || null);
74
+ return JSON.stringify(result, null, 2);
75
+ },
76
+ {
77
+ name: 'get_request_list',
78
+ description: '获取请求列表(仅元数据)',
79
+ schema: z.object({
80
+ site: z.string().optional().describe('限定站点(hostname),不传则返回所有'),
81
+ }),
82
+ }
83
+ );
84
+
85
+ /**
86
+ * 获取脚本列表
87
+ */
88
+ export const getScriptList = tool(
89
+ async ({ site }) => {
90
+ const store = getDataStore();
91
+ const result = await store.getScriptList(site || null);
92
+ return JSON.stringify(result, null, 2);
93
+ },
94
+ {
95
+ name: 'get_script_list',
96
+ description: '获取 JS 脚本列表',
97
+ schema: z.object({
98
+ site: z.string().optional().describe('限定站点(hostname),不传则返回所有'),
99
+ }),
100
+ }
101
+ );
102
+
103
+ /**
104
+ * 获取脚本源码(支持分段)
105
+ */
106
+ export const getScriptSource = tool(
107
+ async ({ site, id, offset, limit }) => {
108
+ const store = getDataStore();
109
+ const source = await store.getScript(site, id);
110
+ if (!source) {
111
+ return JSON.stringify({ error: '未找到该脚本' });
112
+ }
113
+
114
+ const start = offset || 0;
115
+ const size = limit || 5000;
116
+ const chunk = source.slice(start, start + size);
117
+
118
+ return JSON.stringify({
119
+ total: source.length,
120
+ offset: start,
121
+ limit: size,
122
+ hasMore: start + size < source.length,
123
+ content: chunk
124
+ });
125
+ },
126
+ {
127
+ name: 'get_script_source',
128
+ description: '获取指定脚本的源码(支持分段获取)',
129
+ schema: z.object({
130
+ site: z.string().describe('站点 hostname'),
131
+ id: z.string().describe('脚本 ID'),
132
+ offset: z.number().optional().default(0).describe('起始位置(字符偏移)'),
133
+ limit: z.number().optional().default(5000).describe('获取长度(默认 5000)'),
134
+ }),
135
+ }
136
+ );
137
+
138
+ /**
139
+ * 在脚本中搜索文本
140
+ */
141
+ export const searchInScripts = tool(
142
+ async ({ text, site }) => {
143
+ const store = getDataStore();
144
+ const results = await store.searchInScripts(text, site || null);
145
+ return JSON.stringify(results, null, 2);
146
+ },
147
+ {
148
+ name: 'search_in_scripts',
149
+ description: '在 JS 脚本中搜索文本,定位代码位置',
150
+ schema: z.object({
151
+ text: z.string().describe('要搜索的文本'),
152
+ site: z.string().optional().describe('限定搜索的站点(hostname)'),
153
+ }),
154
+ }
155
+ );
156
+
157
+ /**
158
+ * 清除站点数据
159
+ */
160
+ export const clearSiteData = tool(
161
+ async ({ site }) => {
162
+ const store = getDataStore();
163
+ await store.clearSite(site);
164
+ return JSON.stringify({ success: true, message: `站点 ${site} 数据已清除` });
165
+ },
166
+ {
167
+ name: 'clear_site_data',
168
+ description: '清除指定站点的所有数据',
169
+ schema: z.object({
170
+ site: z.string().describe('站点 hostname'),
171
+ }),
172
+ }
173
+ );
174
+
175
+ /**
176
+ * 清除所有数据
177
+ */
178
+ export const clearAllData = tool(
179
+ async () => {
180
+ const store = getDataStore();
181
+ await store.clearAll();
182
+ return JSON.stringify({ success: true, message: '所有数据已清除' });
183
+ },
184
+ {
185
+ name: 'clear_all_data',
186
+ description: '清除所有站点的数据',
187
+ schema: z.object({}),
188
+ }
189
+ );
190
+
191
+ export const tracingTools = [
192
+ getSiteList,
193
+ searchInResponses,
194
+ getRequestDetail,
195
+ getRequestList,
196
+ getScriptList,
197
+ getScriptSource,
198
+ searchInScripts,
199
+ clearSiteData,
200
+ clearAllData,
201
+ ];