deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,575 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * DeepSpider Agent 独立运行入口
4
+ * 使用 CDP binding 接收浏览器消息
5
+ * 支持流式输出显示思考过程
6
+ */
7
+
8
+ import 'dotenv/config';
9
+ import readline from 'readline';
10
+ import { readFileSync } from 'fs';
11
+ import { marked } from 'marked';
12
+ import { createDeepSpiderAgent } from './index.js';
13
+ import { getBrowser } from '../browser/index.js';
14
+ import { markHookInjected } from './tools/runtime.js';
15
+ import { createLogger } from './logger.js';
16
+ import { browserTools } from './tools/browser.js';
17
+
18
+ const args = process.argv.slice(2);
19
+ const targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
20
+
21
+ const rl = readline.createInterface({
22
+ input: process.stdin,
23
+ output: process.stdout,
24
+ });
25
+
26
+ let browser = null;
27
+ let currentPage = null;
28
+
29
+ console.log('=== DeepSpider Agent ===');
30
+ console.log('JS 逆向分析助手,输入 exit 退出\n');
31
+
32
+ // 调试模式
33
+ const DEBUG = process.env.DEBUG === 'true' || process.argv.includes('--debug');
34
+
35
+ // 重试配置
36
+ const RETRY_CONFIG = {
37
+ maxRetries: 3,
38
+ baseDelayMs: 2000,
39
+ maxDelayMs: 30000,
40
+ };
41
+
42
+ // 人工介入配置
43
+ const INTERVENTION_CONFIG = {
44
+ idleTimeoutMs: 120000, // 2分钟无响应触发提示
45
+ checkIntervalMs: 30000, // 30秒检测一次
46
+ // 从 browserTools 获取可能触发风控的工具名称
47
+ riskTools: browserTools.map(t => t.name),
48
+ };
49
+
50
+ /**
51
+ * 判断是否为工具参数错误(需要 LLM 修正)
52
+ */
53
+ function isToolSchemaError(errMsg) {
54
+ return /did not match expected schema|Invalid input|tool input/i.test(errMsg);
55
+ }
56
+
57
+ /**
58
+ * 判断是否为 API 服务错误(可直接重试)
59
+ */
60
+ function isApiServiceError(errMsg) {
61
+ return /503|502|429|rate limit|无可用渠道|timeout|ECONNRESET|ETIMEDOUT/i.test(errMsg);
62
+ }
63
+
64
+ /**
65
+ * 计算重试延迟(指数退避 + 抖动)
66
+ */
67
+ function getRetryDelay(retryCount) {
68
+ const delay = Math.min(
69
+ RETRY_CONFIG.baseDelayMs * Math.pow(2, retryCount),
70
+ RETRY_CONFIG.maxDelayMs
71
+ );
72
+ // 添加 0-25% 的随机抖动
73
+ const jitter = delay * Math.random() * 0.25;
74
+ return Math.round(delay + jitter);
75
+ }
76
+
77
+ /**
78
+ * 延迟函数
79
+ */
80
+ function sleep(ms) {
81
+ return new Promise(resolve => setTimeout(resolve, ms));
82
+ }
83
+
84
+ // DeepSeek 特殊标记清理
85
+ const DSML_PATTERN = /|DSML|/g;
86
+ function cleanDSML(text) {
87
+ return text ? text.replace(DSML_PATTERN, '') : text;
88
+ }
89
+
90
+ // 创建日志回调
91
+ const logger = createLogger({ enabled: DEBUG, verbose: false });
92
+
93
+ /**
94
+ * 报告就绪回调 - 由中间件在 afterAgent 时调用
95
+ */
96
+ async function onReportReady(mdFilePath) {
97
+ console.log('[report] 中间件触发报告显示:', mdFilePath);
98
+ await showReportFromFile(mdFilePath);
99
+ }
100
+
101
+ // 创建 Agent,传入报告回调
102
+ const agent = createDeepSpiderAgent({ onReportReady });
103
+
104
+ const config = {
105
+ configurable: { thread_id: `deepspider-${Date.now()}` },
106
+ recursionLimit: 5000,
107
+ callbacks: logger ? [logger] : [],
108
+ };
109
+
110
+ // 文本累积缓冲区 - 用于累积 LLM 流式输出
111
+ let panelTextBuffer = '';
112
+ let hasStartedAssistantMsg = false;
113
+
114
+ function debug(...args) {
115
+ if (DEBUG) {
116
+ console.log('[DEBUG]', ...args);
117
+ }
118
+ }
119
+
120
+ /**
121
+ * 发送消息到前端面板
122
+ */
123
+ async function sendToPanel(role, content) {
124
+ if (!content?.trim()) return;
125
+
126
+ const page = browser?.getPage?.();
127
+ if (!page) return;
128
+
129
+ try {
130
+ const escaped = JSON.stringify(content.trim());
131
+ const code = `window.__deepspider__?.addMessage?.('${role}', ${escaped})`;
132
+ await evaluateInPage(code);
133
+ } catch (e) {
134
+ // ignore
135
+ }
136
+ }
137
+
138
+ /**
139
+ * 累积文本到缓冲区(用于 LLM 流式输出)
140
+ */
141
+ async function appendToPanel(text) {
142
+ if (!text) return;
143
+ panelTextBuffer += text;
144
+
145
+ // 每累积一定量或遇到换行时刷新
146
+ if (panelTextBuffer.length > 200 || text.includes('\n')) {
147
+ await flushPanelText();
148
+ }
149
+ }
150
+
151
+ /**
152
+ * 通过 CDP 在页面主世界执行 JavaScript(复用 session)
153
+ */
154
+ async function evaluateInPage(code) {
155
+ const cdp = await browser?.getCDPSession?.();
156
+ if (!cdp) return null;
157
+
158
+ try {
159
+ const result = await cdp.send('Runtime.evaluate', {
160
+ expression: code,
161
+ returnByValue: true,
162
+ });
163
+ return result.result?.value;
164
+ } catch (e) {
165
+ debug('evaluateInPage 失败:', e.message);
166
+ return null;
167
+ }
168
+ }
169
+
170
+ /**
171
+ * 刷新累积的文本到面板
172
+ */
173
+ async function flushPanelText() {
174
+ if (!panelTextBuffer.trim()) return;
175
+
176
+ const page = browser?.getPage?.();
177
+ if (!page) {
178
+ panelTextBuffer = '';
179
+ return;
180
+ }
181
+
182
+ try {
183
+ const content = panelTextBuffer.trim();
184
+ const escaped = JSON.stringify(content);
185
+
186
+ if (!hasStartedAssistantMsg) {
187
+ const code = `(function() {
188
+ const fn = window.__deepspider__?.addMessage;
189
+ if (typeof fn === 'function') {
190
+ fn('assistant', ${escaped});
191
+ return { ok: true };
192
+ }
193
+ return { ok: false };
194
+ })()`;
195
+ await evaluateInPage(code);
196
+ hasStartedAssistantMsg = true;
197
+ } else {
198
+ const code = `(function() {
199
+ const fn = window.__deepspider__?.appendToLastMessage;
200
+ if (typeof fn === 'function') {
201
+ fn('assistant', ${escaped});
202
+ return { ok: true };
203
+ }
204
+ return { ok: false };
205
+ })()`;
206
+ await evaluateInPage(code);
207
+ }
208
+ } catch (e) {
209
+ // ignore
210
+ }
211
+
212
+ panelTextBuffer = '';
213
+ }
214
+
215
+ /**
216
+ * 流式对话 - 显示思考过程(带重试)
217
+ */
218
+ async function chatStream(input, page = null, retryCount = 0) {
219
+ currentPage = page;
220
+ let finalResponse = '';
221
+ let lastEventTime = Date.now();
222
+ let eventCount = 0;
223
+ let lastToolCall = null;
224
+
225
+ // 重置面板状态
226
+ panelTextBuffer = '';
227
+ hasStartedAssistantMsg = false;
228
+
229
+ // 设置忙碌状态
230
+ await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
231
+
232
+ debug(`chatStream: 开始处理, 输入长度=${input.length}, page=${!!page}`);
233
+
234
+ // 心跳检测 - 每30秒输出状态
235
+ let interventionNotified = false;
236
+ const heartbeat = setInterval(() => {
237
+ const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
238
+ if (elapsed > 30) {
239
+ console.log(`\n[心跳] 已等待 ${elapsed}s, 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}`);
240
+ }
241
+
242
+ // 超时提示 - 只在风险工具调用后提示
243
+ const isRiskTool = lastToolCall && INTERVENTION_CONFIG.riskTools.includes(lastToolCall);
244
+ if (elapsed * 1000 > INTERVENTION_CONFIG.idleTimeoutMs && !interventionNotified && isRiskTool) {
245
+ interventionNotified = true;
246
+ const msg = '⚠️ 页面操作后长时间无响应,可能遇到验证码或风控,请检查浏览器';
247
+ console.log('\n[提示] ' + msg);
248
+ sendToPanel('system', msg).catch(() => {});
249
+ }
250
+ }, INTERVENTION_CONFIG.checkIntervalMs);
251
+
252
+ try {
253
+ debug('chatStream: 创建事件流');
254
+ const eventStream = await agent.streamEvents(
255
+ { messages: [{ role: 'user', content: input }] },
256
+ { ...config, version: 'v2' }
257
+ );
258
+
259
+ debug('chatStream: 开始遍历事件');
260
+ for await (const event of eventStream) {
261
+ lastEventTime = Date.now();
262
+ eventCount++;
263
+
264
+ // 记录工具调用
265
+ if (event.event === 'on_tool_start') {
266
+ lastToolCall = event.name;
267
+ }
268
+
269
+ await handleStreamEvent(event);
270
+
271
+ // 收集最终响应
272
+ if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
273
+ const output = event.data?.output;
274
+ if (output?.content) {
275
+ finalResponse = output.content;
276
+ debug(`chatStream: 收到最终响应, 长度=${finalResponse.length}`);
277
+ }
278
+ }
279
+ }
280
+
281
+ // 流正常结束
282
+ clearInterval(heartbeat);
283
+ console.log(`\n[完成] 共处理 ${eventCount} 个事件`);
284
+
285
+ // 刷新剩余的累积内容到面板
286
+ debug('chatStream: 刷新剩余内容');
287
+ await flushPanelText();
288
+
289
+ // 清除忙碌状态
290
+ await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
291
+
292
+ debug(`chatStream: 完成, 响应长度=${finalResponse.length}`);
293
+ return finalResponse || '[无响应]';
294
+ } catch (error) {
295
+ clearInterval(heartbeat);
296
+ const errMsg = error.message || String(error);
297
+
298
+ // 清除忙碌状态
299
+ await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
300
+
301
+ console.error(`\n[异常] 事件数=${eventCount}, 最后工具=${lastToolCall || '无'}, 错误: ${errMsg}`);
302
+
303
+ // 检查是否可重试
304
+ if (retryCount < RETRY_CONFIG.maxRetries) {
305
+ // API 服务错误 - 从检查点恢复
306
+ if (isApiServiceError(errMsg)) {
307
+ const delay = getRetryDelay(retryCount);
308
+ console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] API错误,${delay}ms 后从检查点恢复...`);
309
+ await sendToPanel('system', `服务暂时不可用,${Math.round(delay/1000)}s 后重试 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
310
+ await sleep(delay);
311
+ // 从检查点恢复:不传入新消息,使用相同 thread_id
312
+ return chatStreamResume(page, retryCount + 1);
313
+ }
314
+
315
+ // 工具参数错误 - 发送错误信息让 LLM 修正
316
+ if (isToolSchemaError(errMsg)) {
317
+ console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] 工具参数错误,发送修正请求...`);
318
+ await sendToPanel('system', `工具调用失败,正在修正 (${retryCount + 1}/${RETRY_CONFIG.maxRetries})`);
319
+ const resumeInput = `工具调用失败: ${errMsg}\n请检查参数格式并重试。`;
320
+ return chatStream(resumeInput, page, retryCount + 1);
321
+ }
322
+ }
323
+
324
+ return `错误: ${errMsg}`;
325
+ }
326
+ }
327
+
328
+ /**
329
+ * 从检查点恢复流式对话
330
+ * 不传入新消息,使用相同 thread_id 从上次中断处继续
331
+ */
332
+ async function chatStreamResume(page = null, retryCount = 0) {
333
+ currentPage = page;
334
+ let finalResponse = '';
335
+ let lastEventTime = Date.now();
336
+ let eventCount = 0;
337
+
338
+ await evaluateInPage('window.__deepspider__?.setBusy?.(true)');
339
+ debug(`chatStreamResume: 从检查点恢复, retryCount=${retryCount}`);
340
+
341
+ const heartbeat = setInterval(() => {
342
+ const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
343
+ if (elapsed > 30) {
344
+ console.log(`\n[心跳] 恢复中,已等待 ${elapsed}s`);
345
+ }
346
+ }, 30000);
347
+
348
+ try {
349
+ // 从检查点恢复:传入 null 或空消息
350
+ const eventStream = await agent.streamEvents(
351
+ { messages: [] },
352
+ { ...config, version: 'v2' }
353
+ );
354
+
355
+ for await (const event of eventStream) {
356
+ lastEventTime = Date.now();
357
+ eventCount++;
358
+ await handleStreamEvent(event);
359
+
360
+ if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
361
+ const output = event.data?.output;
362
+ if (output?.content) {
363
+ finalResponse = output.content;
364
+ }
365
+ }
366
+ }
367
+
368
+ clearInterval(heartbeat);
369
+ await flushPanelText();
370
+ await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
371
+ console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
372
+ return finalResponse || '[无响应]';
373
+ } catch (error) {
374
+ clearInterval(heartbeat);
375
+ await evaluateInPage('window.__deepspider__?.setBusy?.(false)');
376
+ const errMsg = error.message || String(error);
377
+ console.error(`\n[恢复失败] ${errMsg}`);
378
+
379
+ // 恢复失败也可以重试
380
+ if (isApiServiceError(errMsg) && retryCount < RETRY_CONFIG.maxRetries) {
381
+ const delay = getRetryDelay(retryCount);
382
+ console.log(`\n[重试 ${retryCount + 1}/${RETRY_CONFIG.maxRetries}] ${delay}ms 后再次恢复...`);
383
+ await sleep(delay);
384
+ return chatStreamResume(page, retryCount + 1);
385
+ }
386
+
387
+ return `恢复失败: ${errMsg}`;
388
+ }
389
+ }
390
+
391
+ /**
392
+ * 处理流式事件
393
+ */
394
+ async function handleStreamEvent(event) {
395
+ const { event: eventType, name, data } = event;
396
+
397
+ // 过滤内部事件
398
+ if (name?.startsWith('ChannelWrite') ||
399
+ name?.startsWith('Branch') ||
400
+ name?.includes('Middleware') ||
401
+ name === 'RunnableSequence' ||
402
+ name === 'model_request' ||
403
+ name === 'tools') {
404
+ return;
405
+ }
406
+
407
+ debug(`handleStreamEvent: ${eventType}, name=${name}`);
408
+
409
+ switch (eventType) {
410
+ case 'on_chat_model_stream':
411
+ // LLM 输出流 - 清理 DeepSeek 特殊标记
412
+ let chunk = data?.chunk?.content;
413
+ if (chunk && typeof chunk === 'string') {
414
+ chunk = cleanDSML(chunk);
415
+ process.stdout.write(chunk);
416
+ await appendToPanel(chunk); // 累积发送到面板
417
+ }
418
+ break;
419
+
420
+ case 'on_tool_start':
421
+ // 工具调用开始
422
+ debug('handleStreamEvent: 工具开始,先刷新缓冲区');
423
+ await flushPanelText();
424
+ // 重置标志,让工具调用后的 AI 输出创建新消息
425
+ hasStartedAssistantMsg = false;
426
+ const input = data?.input || {};
427
+ const inputStr = typeof input === 'string' ? input : JSON.stringify(input);
428
+ const preview = inputStr.length > 100 ? inputStr.slice(0, 100) + '...' : inputStr;
429
+ console.log(`\n[调用] ${name}(${preview})`);
430
+ await sendToPanel('system', `[调用] ${name}`);
431
+ break;
432
+
433
+ case 'on_tool_end':
434
+ // 工具调用结束
435
+ const output = data?.output;
436
+ let result = '';
437
+
438
+ // 调试:打印完整的事件结构
439
+ debug(`on_tool_end: name=${name}, output type=${typeof output}, keys=${output ? Object.keys(output) : 'null'}`);
440
+
441
+ if (typeof output === 'string') {
442
+ result = output.slice(0, 80);
443
+ } else if (output?.content) {
444
+ result = String(output.content).slice(0, 80);
445
+ }
446
+ if (result) {
447
+ console.log(`[结果] ${result}${result.length >= 80 ? '...' : ''}`);
448
+ await sendToPanel('system', `[结果] ${result.slice(0, 50)}${result.length > 50 ? '...' : ''}`);
449
+ }
450
+ break;
451
+ }
452
+ }
453
+
454
+ /**
455
+ * 从文件显示报告(由中间件回调触发)
456
+ */
457
+ async function showReportFromFile(mdFilePath) {
458
+ const page = browser?.getPage?.();
459
+ if (!page) {
460
+ console.log('[report] 错误: 无法获取 page');
461
+ return;
462
+ }
463
+
464
+ try {
465
+ const content = readFileSync(mdFilePath, 'utf-8');
466
+ console.log('[report] 读取 MD 文件成功, 长度:', content.length);
467
+
468
+ // 使用 marked 转换为 HTML
469
+ const htmlContent = marked.parse(content);
470
+ const escaped = JSON.stringify(htmlContent);
471
+ const code = `window.__deepspider__?.showReport?.(${escaped}, true)`;
472
+ await evaluateInPage(code);
473
+ console.log('[report] 已显示分析报告');
474
+ } catch (e) {
475
+ console.log('[report] showReportFromFile 失败:', e.message);
476
+ }
477
+ }
478
+
479
+ /**
480
+ * 处理浏览器消息(通过 CDP binding 接收)
481
+ */
482
+ async function handleBrowserMessage(data, page) {
483
+ debug(`handleBrowserMessage: 收到消息, type=${data.type}, page=${!!page}`);
484
+
485
+ // 添加浏览器已就绪前缀,告诉 Agent 不需要再启动浏览器
486
+ const browserReadyPrefix = '[浏览器已就绪] ';
487
+
488
+ let userPrompt;
489
+ if (data.type === 'analysis') {
490
+ const iframeInfo = data.iframeSrc ? `\niframe来源: ${data.iframeSrc}` : '';
491
+ const analysisType = data.analysisType || 'full';
492
+
493
+ // 根据分析类型生成不同的提示
494
+ const typePrompts = {
495
+ source: '请使用 search_in_responses 搜索选中文本,定位数据来源请求。',
496
+ crypto: '请分析该数据涉及的加密逻辑,识别加密算法并生成 Python 代码。',
497
+ full: '请使用 search_in_responses 搜索选中文本定位来源,分析加密逻辑,生成完整的 Python 代码。'
498
+ };
499
+
500
+ userPrompt = `${browserReadyPrefix}用户选中了以下数据要求分析:
501
+ "${data.text}"
502
+ XPath: ${data.xpath}${iframeInfo}
503
+
504
+ 分析类型: ${analysisType}
505
+ ${typePrompts[analysisType] || typePrompts.full}`;
506
+ } else if (data.type === 'generate-config') {
507
+ // 生成爬虫配置 - 使用 crawler 子代理
508
+ const config = data.config;
509
+ userPrompt = `${browserReadyPrefix}请使用 crawler 子代理生成爬虫。
510
+
511
+ 用户已选择 ${config.fields.length} 个字段:
512
+ ${JSON.stringify(config.fields, null, 2)}
513
+
514
+ 目标URL: ${data.url}
515
+
516
+ 请先用 query_store 查询已有的加密代码,然后整合生成配置和脚本。`;
517
+ } else if (data.type === 'chat') {
518
+ userPrompt = `${browserReadyPrefix}${data.text}`;
519
+ } else {
520
+ return;
521
+ }
522
+
523
+ console.log('\n[浏览器] ' + (data.type === 'analysis' ? '分析请求' : data.type === 'generate-config' ? '生成配置' : '对话'));
524
+ await chatStream(userPrompt, page);
525
+ console.log('\n');
526
+ // 流式输出已经同步到面板,无需再次发送
527
+ process.stdout.write('> ');
528
+ }
529
+
530
+ function prompt() {
531
+ rl.question('> ', async (input) => {
532
+ if (input.toLowerCase() === 'exit') {
533
+ console.log('再见!');
534
+ rl.close();
535
+ process.exit(0);
536
+ }
537
+
538
+ if (!input.trim()) {
539
+ prompt();
540
+ return;
541
+ }
542
+
543
+ await chatStream(input, browser?.getPage?.());
544
+ console.log('\n');
545
+ prompt();
546
+ });
547
+ }
548
+
549
+ async function init() {
550
+ debug('init: 启动');
551
+ if (DEBUG) {
552
+ console.log('[DEBUG] 调试模式已启用');
553
+ }
554
+
555
+ if (targetUrl) {
556
+ console.log(`正在打开: ${targetUrl}\n`);
557
+ try {
558
+ debug('init: 获取浏览器实例');
559
+ browser = await getBrowser();
560
+ browser.onMessage = handleBrowserMessage;
561
+ debug('init: 导航到目标URL');
562
+ await browser.navigate(targetUrl);
563
+ markHookInjected();
564
+ debug('init: 浏览器就绪');
565
+ console.log('浏览器已就绪,数据自动记录中');
566
+ console.log('点击面板选择按钮(⦿)选择数据进行分析\n');
567
+ } catch (error) {
568
+ console.error('启动浏览器失败:', error.message);
569
+ debug('init: 浏览器启动失败 -', error.stack);
570
+ }
571
+ }
572
+ prompt();
573
+ }
574
+
575
+ init();
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: anti-detect
3
+ description: |
4
+ 反检测经验。浏览器指纹、代理IP、风控规避技巧。
5
+ ---
6
+
7
+ # 反检测经验
8
+
9
+ ## 浏览器指纹
10
+
11
+ ### 常见检测点
12
+ - navigator.webdriver
13
+ - window.chrome
14
+ - Canvas 指纹
15
+ - WebGL 指纹
16
+
17
+ ## 代理 IP
18
+
19
+ ### 代理类型
20
+ - HTTP/HTTPS 代理
21
+ - SOCKS5 代理
22
+ - 隧道代理
23
+
24
+ ## 风控规避
25
+
26
+ ### 请求频率
27
+ - 随机延迟
28
+ - 分布式请求
@@ -0,0 +1,12 @@
1
+ ---
2
+ total: 0
3
+ last_merged: null
4
+ ---
5
+
6
+ ## 核心经验
7
+
8
+ <!-- 经过验证的高价值经验 -->
9
+
10
+ ## 近期发现
11
+
12
+ <!-- 最近发现,FIFO 滚动,最多保留 10 条 -->
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: captcha
3
+ description: |
4
+ 验证码处理经验。滑块验证码、图片验证码、点选验证码的识别与绕过技巧。
5
+ ---
6
+
7
+ # 验证码处理经验
8
+
9
+ ## 滑块验证码
10
+
11
+ ### 缺口检测
12
+ - 边缘检测:Canny 算法定位缺口边缘
13
+ - 模板匹配:滑块图与背景图匹配
14
+
15
+ ### 轨迹生成要点
16
+ - 起始慢,中间快,结束慢
17
+ - 添加微小随机抖动
18
+ - 总时长 300-800ms
19
+ - 结束时有轻微回弹
20
+
21
+ ## 图片验证码
22
+
23
+ ### OCR 选择
24
+ - ddddocr:开源免费,识别率一般
25
+ - 打码平台:付费,识别率高
26
+
27
+ ### 预处理技巧
28
+ - 二值化去除背景
29
+ - 去噪点
30
+ - 字符分割
31
+
32
+ ## 点选验证码
33
+
34
+ ### 目标检测
35
+ - 文字定位
36
+ - 图标识别
37
+ - 顺序判断
@@ -0,0 +1,12 @@
1
+ ---
2
+ total: 0
3
+ last_merged: null
4
+ ---
5
+
6
+ ## 核心经验
7
+
8
+ <!-- 经过验证的高价值经验 -->
9
+
10
+ ## 近期发现
11
+
12
+ <!-- 最近发现,FIFO 滚动,最多保留 10 条 -->
@@ -0,0 +1,30 @@
1
+ /**
2
+ * DeepSpider - Skills 配置
3
+ * 每个 agent 只加载属于自己的 skills
4
+ */
5
+
6
+ import { FilesystemBackend } from 'deepagents';
7
+
8
+ // 共享的 FilesystemBackend
9
+ export const skillsBackend = new FilesystemBackend({ rootDir: '/' });
10
+
11
+ // 基础路径
12
+ const BASE_DIR = new URL('.', import.meta.url).pathname;
13
+
14
+ // 各 agent 的 skills 路径
15
+ export const SKILLS = {
16
+ // 逆向分析
17
+ static: `${BASE_DIR}static-analysis`,
18
+ dynamic: `${BASE_DIR}dynamic-analysis`,
19
+ sandbox: `${BASE_DIR}sandbox`,
20
+ env: `${BASE_DIR}env`,
21
+ js2python: `${BASE_DIR}js2python`,
22
+ // 爬虫能力
23
+ captcha: `${BASE_DIR}captcha`,
24
+ antiDetect: `${BASE_DIR}anti-detect`,
25
+ crawler: `${BASE_DIR}crawler`,
26
+ xpath: `${BASE_DIR}xpath`,
27
+ // 通用
28
+ report: `${BASE_DIR}report`,
29
+ general: `${BASE_DIR}general`,
30
+ };
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: crawler
3
+ description: |
4
+ crawler 相关经验。
5
+ ---
6
+
7
+ # crawler
8
+
9
+ 自动创建的 skill 目录。