deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,179 @@
1
+ /**
2
+ * DeepSpider - 数据采集工具
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { tool } from '@langchain/core/tools';
7
+ import { getBrowser } from '../../browser/index.js';
8
+ import { EnvCollector } from '../../browser/collector.js';
9
+ import { EnvBridge } from '../../browser/EnvBridge.js';
10
+
11
+ /**
12
+ * 采集环境数据快照
13
+ */
14
+ export const collectEnv = tool(
15
+ async () => {
16
+ const browser = await getBrowser();
17
+ const collector = new EnvCollector(browser.getPage());
18
+ const data = await collector.collectFullSnapshot();
19
+ return JSON.stringify(data, null, 2);
20
+ },
21
+ {
22
+ name: 'collect_env',
23
+ description: '采集浏览器完整环境快照(navigator、screen、canvas、webgl、fonts 等)',
24
+ schema: z.object({}),
25
+ }
26
+ );
27
+
28
+ /**
29
+ * 动态采集指定属性
30
+ */
31
+ export const collectProperty = tool(
32
+ async ({ path, depth }) => {
33
+ const browser = await getBrowser();
34
+ const collector = new EnvCollector(browser.getPage());
35
+ const data = await collector.collect(path, { depth });
36
+ return JSON.stringify(data, null, 2);
37
+ },
38
+ {
39
+ name: 'collect_property',
40
+ description: '从真实浏览器采集指定属性路径的值',
41
+ schema: z.object({
42
+ path: z.string().describe('属性路径,如 navigator.connection.effectiveType'),
43
+ depth: z.number().optional().default(2).describe('采集深度'),
44
+ }),
45
+ }
46
+ );
47
+
48
+ /**
49
+ * 自动补环境
50
+ */
51
+ export const autoFixEnv = tool(
52
+ async ({ missingPaths }) => {
53
+ const browser = await getBrowser();
54
+ const bridge = new EnvBridge(browser.getPage());
55
+ const result = await bridge.runFullPipeline(missingPaths);
56
+ return JSON.stringify(result, null, 2);
57
+ },
58
+ {
59
+ name: 'auto_fix_env',
60
+ description: '根据缺失属性列表,自动从真实浏览器采集并生成补丁代码',
61
+ schema: z.object({
62
+ missingPaths: z.array(z.string()).describe('缺失的属性路径列表'),
63
+ }),
64
+ }
65
+ );
66
+
67
+ /**
68
+ * 通过 CDP 在页面执行 JS(复用 session)
69
+ */
70
+ async function evaluateViaCDP(browser, expression) {
71
+ const cdp = await browser.getCDPSession();
72
+ if (!cdp) return null;
73
+ const result = await cdp.send('Runtime.evaluate', {
74
+ expression,
75
+ returnByValue: true,
76
+ });
77
+ return result.result?.value;
78
+ }
79
+
80
+ /**
81
+ * 获取 Hook 日志
82
+ */
83
+ export const getHookLogs = tool(
84
+ async ({ type, limit }) => {
85
+ try {
86
+ const browser = await getBrowser();
87
+ if (!browser.getPage()) {
88
+ return JSON.stringify({ success: false, error: '浏览器未就绪', logs: [] });
89
+ }
90
+
91
+ // 通过 CDP 从浏览器获取日志
92
+ const expression = type
93
+ ? `window.__deepspider__?.getLogs?.('${type}') || '[]'`
94
+ : `window.__deepspider__?.getAllLogs?.() || '[]'`;
95
+
96
+ const logsJson = await evaluateViaCDP(browser, expression);
97
+ if (!logsJson) {
98
+ return JSON.stringify({ success: false, error: 'Hook 未加载', logs: [] });
99
+ }
100
+
101
+ let logs = JSON.parse(logsJson);
102
+
103
+ // 限制返回数量
104
+ if (limit && Array.isArray(logs) && logs.length > limit) {
105
+ logs = logs.slice(-limit);
106
+ }
107
+
108
+ return JSON.stringify({
109
+ success: true,
110
+ count: Array.isArray(logs) ? logs.length : Object.keys(logs).length,
111
+ logs
112
+ });
113
+ } catch (e) {
114
+ return JSON.stringify({ success: false, error: e.message, logs: [] });
115
+ }
116
+ },
117
+ {
118
+ name: 'get_hook_logs',
119
+ description: '获取 Hook 捕获的日志(XHR、Fetch、Cookie、加密调用等)',
120
+ schema: z.object({
121
+ type: z.string().optional().describe('日志类型: xhr, fetch, cookie, crypto, json, eval, storage, encoding, websocket, env, debug, dom。不填则获取全部'),
122
+ limit: z.number().optional().default(50).describe('返回日志数量限制,默认50条'),
123
+ }),
124
+ }
125
+ );
126
+
127
+ /**
128
+ * 搜索 Hook 日志
129
+ */
130
+ export const searchHookLogs = tool(
131
+ async ({ keyword }) => {
132
+ try {
133
+ const browser = await getBrowser();
134
+ if (!browser.getPage()) {
135
+ return JSON.stringify({ success: false, error: '浏览器未就绪' });
136
+ }
137
+ const expression = `window.__deepspider__?.searchLogs?.('${keyword}') || '[]'`;
138
+ const result = await evaluateViaCDP(browser, expression);
139
+ return JSON.stringify({ success: true, results: JSON.parse(result || '[]') });
140
+ } catch (e) {
141
+ return JSON.stringify({ success: false, error: e.message });
142
+ }
143
+ },
144
+ {
145
+ name: 'search_hook_logs',
146
+ description: '按关键词搜索 Hook 日志',
147
+ schema: z.object({
148
+ keyword: z.string().describe('搜索关键词'),
149
+ }),
150
+ }
151
+ );
152
+
153
+ /**
154
+ * 追踪值来源
155
+ */
156
+ export const traceValue = tool(
157
+ async ({ value }) => {
158
+ try {
159
+ const browser = await getBrowser();
160
+ if (!browser.getPage()) {
161
+ return JSON.stringify({ success: false, error: '浏览器未就绪' });
162
+ }
163
+ const expression = `window.__deepspider__?.traceValue?.('${value}') || '[]'`;
164
+ const result = await evaluateViaCDP(browser, expression);
165
+ return JSON.stringify({ success: true, traces: JSON.parse(result || '[]') });
166
+ } catch (e) {
167
+ return JSON.stringify({ success: false, error: e.message });
168
+ }
169
+ },
170
+ {
171
+ name: 'trace_value',
172
+ description: '追踪某个值的来源(在哪个加密函数或请求中出现)',
173
+ schema: z.object({
174
+ value: z.string().describe('要追踪的值'),
175
+ }),
176
+ }
177
+ );
178
+
179
+ export const captureTools = [collectEnv, collectProperty, autoFixEnv, getHookLogs, searchHookLogs, traceValue];
@@ -0,0 +1,303 @@
1
+ /**
2
+ * DeepSpider - 关联分析工具
3
+ * 分析请求与加密调用的关联关系
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+
9
+ /**
10
+ * 分析请求-加密关联
11
+ */
12
+ export const analyzeCorrelation = tool(
13
+ async ({ logs }) => {
14
+ const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
15
+
16
+ // 按请求ID分组
17
+ const byRequest = new Map();
18
+ const orphanCrypto = [];
19
+
20
+ for (const entry of parsed) {
21
+ if (entry._type === 'xhr' || entry._type === 'fetch') {
22
+ const id = entry.requestId;
23
+ if (!byRequest.has(id)) {
24
+ byRequest.set(id, { request: null, response: null, crypto: [] });
25
+ }
26
+ if (entry.action === 'send' || entry.action === 'request') {
27
+ byRequest.get(id).request = entry;
28
+ } else if (entry.action === 'response') {
29
+ byRequest.get(id).response = entry;
30
+ byRequest.get(id).crypto = entry.linkedCrypto || [];
31
+ }
32
+ } else if (entry._type === 'crypto') {
33
+ if (!entry.requestId) {
34
+ orphanCrypto.push(entry);
35
+ }
36
+ }
37
+ }
38
+
39
+ // 生成分析报告
40
+ const correlations = [];
41
+ for (const [id, data] of byRequest) {
42
+ if (data.request) {
43
+ correlations.push({
44
+ requestId: id,
45
+ url: data.request.url,
46
+ method: data.request.method,
47
+ headers: data.request.requestHeaders,
48
+ cryptoCalls: data.crypto.map(c => ({
49
+ algo: c.algo,
50
+ hasKey: !!c.key,
51
+ stackTop: parseStackTop(c.stack)
52
+ }))
53
+ });
54
+ }
55
+ }
56
+
57
+ return JSON.stringify({
58
+ correlations,
59
+ orphanCrypto: orphanCrypto.length,
60
+ summary: {
61
+ totalRequests: correlations.length,
62
+ requestsWithCrypto: correlations.filter(c => c.cryptoCalls.length > 0).length
63
+ }
64
+ }, null, 2);
65
+ },
66
+ {
67
+ name: 'analyze_correlation',
68
+ description: '分析请求与加密调用的关联关系,找出每个请求使用了哪些加密',
69
+ schema: z.object({
70
+ logs: z.string().describe('__deepspider__.getAllLogs() 返回的日志'),
71
+ }),
72
+ }
73
+ );
74
+
75
+ /**
76
+ * 解析调用栈顶部
77
+ */
78
+ function parseStackTop(stack) {
79
+ if (!stack) return null;
80
+ const lines = stack.split('\n').slice(2, 5);
81
+ return lines.map(line => {
82
+ const match = line.match(/at\s+(.+?)\s+\((.+?):(\d+):(\d+)\)/) ||
83
+ line.match(/at\s+(.+?):(\d+):(\d+)/);
84
+ if (match) {
85
+ return {
86
+ func: match[1] || 'anonymous',
87
+ file: match[2] || match[1],
88
+ line: parseInt(match[3] || match[2])
89
+ };
90
+ }
91
+ return { raw: line.trim() };
92
+ });
93
+ }
94
+
95
+ /**
96
+ * 从调用栈定位加密函数
97
+ */
98
+ export const locateCryptoSource = tool(
99
+ async ({ cryptoLog }) => {
100
+ const entry = typeof cryptoLog === 'string' ? JSON.parse(cryptoLog) : cryptoLog;
101
+ const stack = parseStackTop(entry.stack);
102
+
103
+ // 过滤掉 Hook 相关的栈帧
104
+ const filtered = stack.filter(frame => {
105
+ if (!frame.file) return true;
106
+ return !frame.file.includes('deepspider') &&
107
+ !frame.func?.includes('native') &&
108
+ !frame.func?.includes('hook');
109
+ });
110
+
111
+ return JSON.stringify({
112
+ algo: entry.algo,
113
+ sourceLocation: filtered[0] || null,
114
+ callChain: filtered.slice(0, 3),
115
+ suggestion: filtered[0] ?
116
+ `在 ${filtered[0].file}:${filtered[0].line} 设置断点分析` :
117
+ '无法定位源码位置'
118
+ }, null, 2);
119
+ },
120
+ {
121
+ name: 'locate_crypto_source',
122
+ description: '从加密日志的调用栈定位加密函数的源码位置',
123
+ schema: z.object({
124
+ cryptoLog: z.string().describe('单条加密日志'),
125
+ }),
126
+ }
127
+ );
128
+
129
+ /**
130
+ * 分析 Header 加密来源
131
+ */
132
+ export const analyzeHeaderEncryption = tool(
133
+ async ({ logs, headerName }) => {
134
+ const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
135
+
136
+ // 找到设置该 header 的请求
137
+ const relevant = parsed.filter(entry => {
138
+ if (entry._type !== 'xhr' && entry._type !== 'fetch') return false;
139
+ if (!entry.requestHeaders && !entry.headers) return false;
140
+ const headers = entry.requestHeaders || entry.headers;
141
+ return headerName in headers;
142
+ });
143
+
144
+ if (relevant.length === 0) {
145
+ return JSON.stringify({ found: false, message: `未找到设置 ${headerName} 的请求` });
146
+ }
147
+
148
+ // 分析关联的加密调用
149
+ const analysis = relevant.map(req => {
150
+ const headerValue = (req.requestHeaders || req.headers)[headerName];
151
+ return {
152
+ url: req.url,
153
+ headerValue: headerValue?.slice(0, 50),
154
+ linkedCrypto: req.linkedCrypto || [],
155
+ timestamp: req.timestamp
156
+ };
157
+ });
158
+
159
+ return JSON.stringify({
160
+ found: true,
161
+ headerName,
162
+ occurrences: analysis.length,
163
+ analysis,
164
+ suggestion: analysis[0]?.linkedCrypto?.length > 0 ?
165
+ '已找到关联的加密调用,使用 locate_crypto_source 定位源码' :
166
+ '未找到直接关联的加密,可能在请求前已完成加密'
167
+ }, null, 2);
168
+ },
169
+ {
170
+ name: 'analyze_header_encryption',
171
+ description: '分析指定 Header 的加密来源',
172
+ schema: z.object({
173
+ logs: z.string().describe('__deepspider__.getAllLogs() 返回的日志'),
174
+ headerName: z.string().describe('要分析的 Header 名称,如 X-Sign'),
175
+ }),
176
+ }
177
+ );
178
+
179
+ /**
180
+ * 分析 Cookie 加密来源
181
+ */
182
+ export const analyzeCookieEncryption = tool(
183
+ async ({ logs, cookieName }) => {
184
+ const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
185
+
186
+ // 找到设置该 cookie 的日志
187
+ const cookieLogs = parsed.filter(entry => {
188
+ if (entry._type !== 'cookie') return false;
189
+ if (entry.action !== 'write') return false;
190
+ return entry.value?.includes(cookieName);
191
+ });
192
+
193
+ if (cookieLogs.length === 0) {
194
+ return JSON.stringify({ found: false, message: `未找到设置 ${cookieName} 的操作` });
195
+ }
196
+
197
+ // 分析调用栈
198
+ const analysis = cookieLogs.map(log => ({
199
+ value: log.value?.slice(0, 100),
200
+ timestamp: log.timestamp,
201
+ stackTop: parseStackTop(log.stack),
202
+ }));
203
+
204
+ // 查找时间相近的加密调用
205
+ const cryptoLogs = parsed.filter(e => e._type === 'crypto');
206
+ const linkedCrypto = [];
207
+
208
+ for (const cookieLog of cookieLogs) {
209
+ const nearby = cryptoLogs.filter(c =>
210
+ Math.abs(c.timestamp - cookieLog.timestamp) < 100
211
+ );
212
+ linkedCrypto.push(...nearby);
213
+ }
214
+
215
+ return JSON.stringify({
216
+ found: true,
217
+ cookieName,
218
+ occurrences: analysis.length,
219
+ analysis,
220
+ nearbyCrypto: linkedCrypto.map(c => ({
221
+ algo: c.algo,
222
+ timestamp: c.timestamp,
223
+ stackTop: parseStackTop(c.stack)
224
+ })),
225
+ suggestion: linkedCrypto.length > 0 ?
226
+ '找到时间相近的加密调用,可能是 Cookie 值的来源' :
227
+ '未找到相近加密调用,Cookie 值可能来自其他计算'
228
+ }, null, 2);
229
+ },
230
+ {
231
+ name: 'analyze_cookie_encryption',
232
+ description: '分析指定 Cookie 的加密来源(通过时间和调用栈关联)',
233
+ schema: z.object({
234
+ logs: z.string().describe('__deepspider__.getAllLogs() 返回的日志'),
235
+ cookieName: z.string().describe('要分析的 Cookie 名称'),
236
+ }),
237
+ }
238
+ );
239
+
240
+ /**
241
+ * 分析响应解密
242
+ */
243
+ export const analyzeResponseDecryption = tool(
244
+ async ({ logs, urlPattern }) => {
245
+ const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
246
+ const regex = urlPattern ? new RegExp(urlPattern) : null;
247
+
248
+ // 找到响应日志
249
+ const responseLogs = parsed.filter(entry => {
250
+ if (entry._type !== 'xhr' && entry._type !== 'fetch') return false;
251
+ if (entry.action !== 'response') return false;
252
+ if (regex && !regex.test(entry.url)) return false;
253
+ return true;
254
+ });
255
+
256
+ // 分析每个响应后的解密调用
257
+ const analysis = [];
258
+ const cryptoLogs = parsed.filter(e => e._type === 'crypto');
259
+
260
+ for (const resp of responseLogs) {
261
+ // 查找响应后 500ms 内的解密调用
262
+ const decrypts = cryptoLogs.filter(c => {
263
+ const timeDiff = c.timestamp - resp.timestamp;
264
+ return timeDiff > 0 && timeDiff < 500 &&
265
+ c.algo?.toLowerCase().includes('decrypt');
266
+ });
267
+
268
+ analysis.push({
269
+ url: resp.url,
270
+ status: resp.status,
271
+ responsePreview: resp.response?.slice(0, 50),
272
+ timestamp: resp.timestamp,
273
+ decryptCalls: decrypts.map(d => ({
274
+ algo: d.algo,
275
+ timeDiff: d.timestamp - resp.timestamp,
276
+ stackTop: parseStackTop(d.stack)
277
+ }))
278
+ });
279
+ }
280
+
281
+ return JSON.stringify({
282
+ totalResponses: analysis.length,
283
+ withDecrypt: analysis.filter(a => a.decryptCalls.length > 0).length,
284
+ analysis: analysis.slice(0, 10),
285
+ }, null, 2);
286
+ },
287
+ {
288
+ name: 'analyze_response_decryption',
289
+ description: '分析响应后的解密调用(通过时间窗口关联)',
290
+ schema: z.object({
291
+ logs: z.string().describe('__deepspider__.getAllLogs() 返回的日志'),
292
+ urlPattern: z.string().optional().describe('URL 匹配模式'),
293
+ }),
294
+ }
295
+ );
296
+
297
+ export const correlateTools = [
298
+ analyzeCorrelation,
299
+ locateCryptoSource,
300
+ analyzeHeaderEncryption,
301
+ analyzeCookieEncryption,
302
+ analyzeResponseDecryption,
303
+ ];
@@ -0,0 +1,116 @@
1
+ /**
2
+ * DeepSpider - 爬虫编排工具
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { tool } from '@langchain/core/tools';
7
+ import { getBrowser } from '../../browser/index.js';
8
+
9
+ export const siteAnalyze = tool(
10
+ async ({ url }) => {
11
+ try {
12
+ const browser = await getBrowser();
13
+ const page = browser.getPage();
14
+
15
+ if (url) {
16
+ await page.goto(url, { waitUntil: 'networkidle' });
17
+ }
18
+
19
+ const analysis = await page.evaluate(() => {
20
+ const features = {
21
+ hasLogin: !!document.querySelector('input[type="password"]'),
22
+ hasCaptcha: !!document.querySelector('[class*="captcha"], [id*="captcha"]'),
23
+ hasEncryption: false,
24
+ };
25
+
26
+ // 检测加密库
27
+ const scripts = Array.from(document.scripts).map(s => s.src);
28
+ features.hasEncryption = scripts.some(s =>
29
+ s.includes('crypto') || s.includes('encrypt') || s.includes('sign')
30
+ );
31
+
32
+ return features;
33
+ });
34
+
35
+ return JSON.stringify({ success: true, ...analysis });
36
+ } catch (e) {
37
+ return JSON.stringify({ success: false, error: e.message });
38
+ }
39
+ },
40
+ {
41
+ name: 'site_analyze',
42
+ description: '分析目标网站特征',
43
+ schema: z.object({
44
+ url: z.string().optional().describe('目标URL,不填则分析当前页面'),
45
+ }),
46
+ }
47
+ );
48
+
49
+ export const complexityAssess = tool(
50
+ async ({ features }) => {
51
+ let level = 1;
52
+ const reasons = [];
53
+
54
+ if (features.hasEncryption) {
55
+ level = Math.max(level, 2);
56
+ reasons.push('存在加密');
57
+ }
58
+ if (features.hasCaptcha) {
59
+ level = Math.max(level, 2);
60
+ reasons.push('存在验证码');
61
+ }
62
+ if (features.hasLogin) {
63
+ level = Math.max(level, 2);
64
+ reasons.push('需要登录');
65
+ }
66
+ if (features.hasFingerprint) {
67
+ level = 3;
68
+ reasons.push('指纹检测');
69
+ }
70
+
71
+ return JSON.stringify({
72
+ success: true,
73
+ level,
74
+ reasons,
75
+ recommendation: level === 1 ? 'simple' : level === 2 ? 'medium' : 'complex',
76
+ });
77
+ },
78
+ {
79
+ name: 'complexity_assess',
80
+ description: '评估网站复杂度等级',
81
+ schema: z.object({
82
+ features: z.object({
83
+ hasEncryption: z.boolean().optional(),
84
+ hasCaptcha: z.boolean().optional(),
85
+ hasLogin: z.boolean().optional(),
86
+ hasFingerprint: z.boolean().optional(),
87
+ }),
88
+ }),
89
+ }
90
+ );
91
+
92
+ export const e2eTest = tool(
93
+ async ({ script_path, test_params }) => {
94
+ try {
95
+ // TODO: 执行 Python 脚本进行 E2E 测试
96
+ return JSON.stringify({
97
+ success: true,
98
+ message: '需要集成 Python 执行环境',
99
+ script_path,
100
+ test_params,
101
+ });
102
+ } catch (e) {
103
+ return JSON.stringify({ success: false, error: e.message });
104
+ }
105
+ },
106
+ {
107
+ name: 'e2e_test',
108
+ description: '端到端测试爬虫脚本',
109
+ schema: z.object({
110
+ script_path: z.string().describe('脚本路径'),
111
+ test_params: z.record(z.string(), z.unknown()).optional().describe('测试参数'),
112
+ }),
113
+ }
114
+ );
115
+
116
+ export const crawlerTools = [siteAnalyze, complexityAssess, e2eTest];
@@ -0,0 +1,80 @@
1
+ /**
2
+ * DeepSpider - 加密函数 Hook 工具
3
+ */
4
+
5
+ import { z } from 'zod';
6
+ import { tool } from '@langchain/core/tools';
7
+ import { CryptoHook } from '../../env/CryptoHook.js';
8
+
9
+ const cryptoHook = new CryptoHook();
10
+
11
+ export const generateCryptoJSHook = tool(
12
+ async () => {
13
+ const code = cryptoHook.generateCryptoJSHookCode();
14
+ return JSON.stringify({
15
+ success: true,
16
+ code,
17
+ usage: "注入后通过 __deepspider__.getLogs('crypto') 获取日志",
18
+ }, null, 2);
19
+ },
20
+ {
21
+ name: 'generate_cryptojs_hook',
22
+ description: '生成 CryptoJS Hook(AES/DES/MD5/SHA/HMAC)',
23
+ schema: z.object({}),
24
+ }
25
+ );
26
+
27
+ export const generateSMCryptoHook = tool(
28
+ async () => {
29
+ const code = cryptoHook.generateSMCryptoHookCode();
30
+ return JSON.stringify({
31
+ success: true,
32
+ code,
33
+ usage: "注入后通过 __deepspider__.getLogs('crypto') 获取日志",
34
+ }, null, 2);
35
+ },
36
+ {
37
+ name: 'generate_sm_crypto_hook',
38
+ description: '生成国密 Hook(SM2/SM3/SM4)',
39
+ schema: z.object({}),
40
+ }
41
+ );
42
+
43
+ export const generateRSAHook = tool(
44
+ async () => {
45
+ const code = cryptoHook.generateRSAHookCode();
46
+ return JSON.stringify({
47
+ success: true,
48
+ code,
49
+ usage: "注入后通过 __deepspider__.getLogs('crypto') 获取日志",
50
+ }, null, 2);
51
+ },
52
+ {
53
+ name: 'generate_rsa_hook',
54
+ description: '生成 RSA Hook(JSEncrypt/node-forge)',
55
+ schema: z.object({}),
56
+ }
57
+ );
58
+
59
+ export const generateGenericCryptoHook = tool(
60
+ async () => {
61
+ const code = cryptoHook.generateGenericCryptoHookCode();
62
+ return JSON.stringify({
63
+ success: true,
64
+ code,
65
+ usage: "注入后通过 __deepspider__.getLogs('crypto') 获取日志",
66
+ }, null, 2);
67
+ },
68
+ {
69
+ name: 'generate_generic_crypto_hook',
70
+ description: '生成通用加密 Hook(基于函数名关键词匹配)',
71
+ schema: z.object({}),
72
+ }
73
+ );
74
+
75
+ export const cryptoHookTools = [
76
+ generateCryptoJSHook,
77
+ generateSMCryptoHook,
78
+ generateRSAHook,
79
+ generateGenericCryptoHook,
80
+ ];