deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,45 @@
1
+ /**
2
+ * DeepSpider - 反检测子代理
3
+ * 浏览器指纹管理、代理池、风控规避
4
+ */
5
+
6
+ import { createSkillsMiddleware } from 'deepagents';
7
+ import { SKILLS, skillsBackend } from '../skills/config.js';
8
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
9
+
10
+ import { antiDetectTools } from '../tools/index.js';
11
+ import { browserTools } from '../tools/browser.js';
12
+ import { fileTools } from '../tools/file.js';
13
+ import { evolveTools } from '../tools/evolve.js';
14
+
15
+ export const antiDetectSubagent = {
16
+ name: 'anti-detect',
17
+ description: '反检测专家。当目标网站有风控检测、IP封禁、指纹识别时使用,适用于:代理IP配置、浏览器指纹伪装、请求特征修改、风控规避。',
18
+ systemPrompt: `你是 DeepSpider 的反检测专家,负责绑过网站的反爬虫检测。
19
+
20
+ ## 核心职责
21
+ 配置反检测环境,规避风控系统,确保爬虫稳定运行。
22
+
23
+ ## 检测类型
24
+ - IP 检测:代理轮换
25
+ - 浏览器指纹:指纹伪装
26
+ - 行为检测:模拟人类操作
27
+ - TLS 指纹:使用真实浏览器
28
+
29
+ ## 经验记录
30
+ 完成反检测配置后,如发现有价值的经验,使用 evolve_skill 记录:
31
+ - skill: "anti-detect"`,
32
+ tools: [
33
+ ...antiDetectTools,
34
+ ...browserTools,
35
+ ...fileTools,
36
+ ...evolveTools,
37
+ ],
38
+ middleware: [
39
+ createFilterToolsMiddleware(),
40
+ createSkillsMiddleware({
41
+ backend: skillsBackend,
42
+ sources: [SKILLS.antiDetect],
43
+ }),
44
+ ],
45
+ };
@@ -0,0 +1,51 @@
1
+ /**
2
+ * DeepSpider - 验证码处理子代理
3
+ * 识别和绕过各类验证码
4
+ */
5
+
6
+ import { createSkillsMiddleware } from 'deepagents';
7
+ import { SKILLS, skillsBackend } from '../skills/config.js';
8
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
9
+
10
+ import { captchaTools } from '../tools/captcha.js';
11
+ import { browserTools } from '../tools/browser.js';
12
+ import { fileTools } from '../tools/file.js';
13
+ import { evolveTools } from '../tools/evolve.js';
14
+
15
+ export const captchaSubagent = {
16
+ name: 'captcha',
17
+ description: '验证码处理专家。当遇到验证码需要识别或绕过时使用,适用于:图片验证码OCR、滑块验证码、点选验证码、短信验证码处理。',
18
+ systemPrompt: `你是 DeepSpider 的验证码处理专家,负责识别和绕过各类验证码。
19
+
20
+ ## 核心职责
21
+ 识别验证码类型,选择最优处理策略,确保验证通过。
22
+
23
+ ## 验证码类型
24
+ - 图片验证码:OCR 识别
25
+ - 滑块验证码:缺口检测 + 轨迹模拟
26
+ - 点选验证码:目标检测
27
+ - 短信验证码:接码平台或用户手动
28
+
29
+ ## 工作流程
30
+ 1. 检测验证码类型
31
+ 2. 选择处理策略
32
+ 3. 执行验证
33
+ 4. 检查结果,失败则重试
34
+
35
+ ## 经验记录
36
+ 完成验证码处理后,如发现有价值的经验,使用 evolve_skill 记录:
37
+ - skill: "captcha"`,
38
+ tools: [
39
+ ...captchaTools,
40
+ ...browserTools,
41
+ ...fileTools,
42
+ ...evolveTools,
43
+ ],
44
+ middleware: [
45
+ createFilterToolsMiddleware(),
46
+ createSkillsMiddleware({
47
+ backend: skillsBackend,
48
+ sources: [SKILLS.captcha],
49
+ }),
50
+ ],
51
+ };
@@ -0,0 +1,138 @@
1
+ /**
2
+ * DeepSpider - 爬虫编排子代理
3
+ * 智能调度、流程规划、脚本生成
4
+ */
5
+
6
+ import { createSkillsMiddleware } from 'deepagents';
7
+ import { SKILLS, skillsBackend } from '../skills/config.js';
8
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
9
+
10
+ import { crawlerTools } from '../tools/crawler.js';
11
+ import { fileTools } from '../tools/file.js';
12
+ import { evolveTools } from '../tools/evolve.js';
13
+ import { storeTools } from '../tools/store.js';
14
+
15
+ export const crawlerSubagent = {
16
+ name: 'crawler',
17
+ description: '爬虫编排专家。当需要规划完整爬虫流程、生成爬虫脚本、进行端到端测试时使用。负责分析目标网站复杂度,按需调度其他子代理,输出完整可运行的爬虫代码。',
18
+ systemPrompt: `你是 DeepSpider 的爬虫编排专家,负责生成完整可运行的 Python 爬虫脚本。
19
+
20
+ ## 核心职责
21
+ **最终目标:输出一份用户可以直接 python crawler.py 运行的完整爬虫代码**
22
+
23
+ 1. 分析目标网站,识别需要处理的环节
24
+ 2. 调度其他子代理获取各模块代码
25
+ 3. 整合所有模块,生成完整 Python 爬虫脚本
26
+ 4. E2E 测试验证脚本可运行
27
+ 5. 输出最终代码文件
28
+
29
+ ## 网站复杂度分级
30
+
31
+ ### Level 1 - 简单
32
+ - 无加密或简单加密
33
+ - 无验证码
34
+ - 无登录要求
35
+ - 无风控检测
36
+
37
+ ### Level 2 - 中等
38
+ - 有加密参数
39
+ - 可能有简单验证码
40
+ - 可能需要登录
41
+ - 基础风控
42
+
43
+ ### Level 3 - 复杂
44
+ - 复杂加密 + 多重风控
45
+ - 多种验证码
46
+ - 设备指纹检测
47
+ - 行为分析
48
+
49
+ ## 调度策略
50
+
51
+ 根据网站特征,按需调用子代理获取代码模块:
52
+
53
+ | 网站特征 | 调用子代理 | 获取模块 |
54
+ |----------|-----------|----------|
55
+ | 有加密参数 | static → js2python | crypto.py |
56
+ | 有验证码 | captcha 分析 | 生成验证码处理代码 |
57
+ | 有风控 | anti-detect 分析 | 生成反检测配置代码 |
58
+ | 需要登录 | dynamic 分析 | 生成登录流程代码 |
59
+
60
+ ## 输出规范
61
+
62
+ **重要:必须输出完整可运行的 Python 代码文件**
63
+
64
+ ### 输出要求
65
+ 1. 使用 artifact_save 保存完整 .py 文件
66
+ 2. 代码必须可以直接 \`python xxx.py\` 运行
67
+ 3. 包含所有依赖的 import
68
+ 4. 包含使用示例(if __name__ == "__main__")
69
+ 5. 包含 requirements.txt
70
+
71
+ ### 复杂网站 - 项目结构
72
+ \`\`\`
73
+ <domain>_crawler/
74
+ ├── config.py # 配置(可选的代理、账号等)
75
+ ├── crypto.py # 加密模块(来自 js2python)
76
+ ├── captcha.py # 验证码处理(如需要)
77
+ ├── crawler.py # 主爬虫逻辑
78
+ └── requirements.txt # 依赖列表
79
+ \`\`\`
80
+
81
+ ### 代码模板
82
+
83
+ \`\`\`python
84
+ """
85
+ <domain> 爬虫 - 由 DeepSpider 生成
86
+ """
87
+ import requests
88
+
89
+ class Crawler:
90
+ def __init__(self):
91
+ self.session = requests.Session()
92
+ self.session.headers.update({...})
93
+
94
+ def encrypt(self, data):
95
+ # 加密逻辑
96
+ ...
97
+
98
+ def login(self, username, password):
99
+ # 登录流程(如需要)
100
+ ...
101
+
102
+ def fetch(self, params):
103
+ # 请求逻辑
104
+ encrypted = self.encrypt(params)
105
+ resp = self.session.post(url, data=encrypted)
106
+ return resp.json()
107
+
108
+ if __name__ == "__main__":
109
+ c = Crawler()
110
+ # c.login("user", "pass") # 如需要
111
+ data = c.fetch({"page": 1})
112
+ print(data)
113
+ \`\`\`
114
+
115
+ ## 工作流程
116
+ 1. 分析网站特征
117
+ 2. 调度子代理获取模块
118
+ 3. 整合为完整脚本
119
+ 4. E2E 验证
120
+ 5. 输出文件
121
+
122
+ ## 经验记录
123
+ 完成爬虫编排后,如发现有价值的经验,使用 evolve_skill 记录:
124
+ - skill: "crawler"`,
125
+ tools: [
126
+ ...crawlerTools,
127
+ ...fileTools,
128
+ ...evolveTools,
129
+ ...storeTools,
130
+ ],
131
+ middleware: [
132
+ createFilterToolsMiddleware(),
133
+ createSkillsMiddleware({
134
+ backend: skillsBackend,
135
+ sources: [SKILLS.crawler, SKILLS.xpath],
136
+ }),
137
+ ],
138
+ };
@@ -0,0 +1,64 @@
1
+ /**
2
+ * DeepSpider - 动态分析子代理
3
+ */
4
+
5
+ import { createSkillsMiddleware } from 'deepagents';
6
+ import { SKILLS, skillsBackend } from '../skills/config.js';
7
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
8
+
9
+ import { runtimeTools } from '../tools/runtime.js';
10
+ import { debugTools } from '../tools/debug.js';
11
+ import { captureTools } from '../tools/capture.js';
12
+ import { browserTools } from '../tools/browser.js';
13
+ import { cryptoHookTools } from '../tools/cryptohook.js';
14
+ import { correlateTools } from '../tools/correlate.js';
15
+ import { tracingTools } from '../tools/tracing.js';
16
+ import { evolveTools } from '../tools/evolve.js';
17
+
18
+ export const dynamicSubagent = {
19
+ name: 'dynamic-agent',
20
+ description: '动态分析专家。当需要在浏览器中调试分析时使用,适用于:设置断点捕获运行时数据、分析请求与加密的关联、采集真实环境数据。',
21
+ systemPrompt: `你是 DeepSpider 的动态分析专家。
22
+
23
+ ## 职责
24
+ - 控制浏览器执行
25
+ - 设置断点捕获运行时数据
26
+ - 采集真实环境数据
27
+ - 收集 Hook 日志
28
+ - 分析请求与加密的关联
29
+
30
+ ## 浏览器状态检查
31
+ **在执行任何操作前,先判断浏览器状态:**
32
+ - 如果任务描述中包含"浏览器已就绪"等关键词,不要调用 launch_browser
33
+ - 先使用 get_hook_logs 检查是否有数据
34
+ - 只有确认浏览器未启动时,才执行启动流程
35
+
36
+ ## 工作流程
37
+ 1. 检查浏览器状态
38
+ 2. 如需启动:launch_browser → navigate_to
39
+ 3. 等待 Hook 捕获加密调用
40
+ 4. 分析请求与加密的关联
41
+ 5. 必要时设置断点深入分析
42
+ 6. 采集环境数据
43
+
44
+ ## 经验记录
45
+ 完成分析后,如发现有价值的经验,使用 evolve_skill 记录:
46
+ - skill: "dynamic-analysis"`,
47
+ tools: [
48
+ ...runtimeTools,
49
+ ...debugTools,
50
+ ...captureTools,
51
+ ...browserTools,
52
+ ...cryptoHookTools,
53
+ ...correlateTools,
54
+ ...tracingTools,
55
+ ...evolveTools,
56
+ ],
57
+ middleware: [
58
+ createFilterToolsMiddleware(),
59
+ createSkillsMiddleware({
60
+ backend: skillsBackend,
61
+ sources: [SKILLS.dynamic],
62
+ }),
63
+ ],
64
+ };
@@ -0,0 +1,82 @@
1
+ /**
2
+ * DeepSpider - 补环境子代理
3
+ * 方向:通过补全浏览器环境让代码直接运行
4
+ */
5
+
6
+ import { createSkillsMiddleware } from 'deepagents';
7
+ import { SKILLS, skillsBackend } from '../skills/config.js';
8
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
9
+
10
+ import { sandboxTools } from '../tools/sandbox.js';
11
+ import { nodejsTools } from '../tools/nodejs.js';
12
+ import { envDumpTools } from '../tools/envdump.js';
13
+ import { extractTools } from '../tools/extract.js';
14
+ import { patchTools } from '../tools/patch.js';
15
+ import { envTools } from '../tools/env.js';
16
+ import { profileTools } from '../tools/profile.js';
17
+ import { storeTools } from '../tools/store.js';
18
+ import { hookTools } from '../tools/hook.js';
19
+ import { antiDebugTools } from '../tools/antidebug.js';
20
+ import { asyncTools } from '../tools/async.js';
21
+ import { evolveTools } from '../tools/evolve.js';
22
+
23
+ export const envAgentSubagent = {
24
+ name: 'env-agent',
25
+ description: '补环境专家。当需要让混淆代码在沙箱中直接运行时使用,适用于:环境检测多、算法复杂难还原、需要快速获取结果的场景。',
26
+ systemPrompt: `你是 DeepSpider 的补环境专家。
27
+
28
+ ## 分析方向
29
+ 补环境是 JS 逆向的黑盒方向,目标是让混淆代码在沙箱中直接运行,无需理解算法逻辑。
30
+
31
+ ## 核心流程
32
+ 1. **环境自吐** - 发现代码访问了哪些环境
33
+ 2. **浏览器提取** - 从真实浏览器获取环境值
34
+ 3. **生成补丁** - 转换为可注入的代码
35
+ 4. **沙箱执行** - 运行并获取结果
36
+
37
+ ## 判断标准
38
+ 适合补环境的场景:
39
+ - 环境检测多(webdriver、chrome对象等)
40
+ - 算法复杂难以还原
41
+ - 需要快速获取结果
42
+ - 代码频繁更新
43
+
44
+ ## 快速模式
45
+ 如果只需快速验证代码能否运行:
46
+ 1. list_env_modules 查看预置模块
47
+ 2. load_all_env_modules 加载全部
48
+ 3. sandbox_inject 注入
49
+ 4. sandbox_execute 执行
50
+
51
+ ## 执行工具选择
52
+ - sandbox_execute: 隔离沙箱,适合补环境后的代码执行
53
+ - run_node_code: Node.js 执行,适合需要 require npm 包的场景
54
+
55
+ ## 失败处理
56
+ 如果补环境多次失败,建议切换到纯算分析方向。
57
+
58
+ ## 经验记录
59
+ 完成分析后,如发现有价值的经验,使用 evolve_skill 记录:
60
+ - skill: "env"`,
61
+ tools: [
62
+ ...sandboxTools,
63
+ ...nodejsTools,
64
+ ...envDumpTools,
65
+ ...extractTools,
66
+ ...patchTools,
67
+ ...envTools,
68
+ ...profileTools,
69
+ ...hookTools,
70
+ ...antiDebugTools,
71
+ ...asyncTools,
72
+ ...storeTools,
73
+ ...evolveTools,
74
+ ],
75
+ middleware: [
76
+ createFilterToolsMiddleware(),
77
+ createSkillsMiddleware({
78
+ backend: skillsBackend,
79
+ sources: [SKILLS.env],
80
+ }),
81
+ ],
82
+ };
@@ -0,0 +1,37 @@
1
+ /**
2
+ * DeepSpider - 子代理索引
3
+ */
4
+
5
+ // 编排层
6
+ export { crawlerSubagent } from './crawler.js';
7
+
8
+ // 逆向分析
9
+ export { staticSubagent } from './static.js';
10
+ export { dynamicSubagent } from './dynamic.js';
11
+ export { sandboxSubagent } from './sandbox.js';
12
+ export { js2pythonSubagent } from './js2python.js';
13
+ export { envAgentSubagent } from './env-agent.js';
14
+
15
+ // 爬虫能力
16
+ export { captchaSubagent } from './captcha.js';
17
+ export { antiDetectSubagent } from './anti-detect.js';
18
+
19
+ import { crawlerSubagent } from './crawler.js';
20
+ import { staticSubagent } from './static.js';
21
+ import { dynamicSubagent } from './dynamic.js';
22
+ import { sandboxSubagent } from './sandbox.js';
23
+ import { js2pythonSubagent } from './js2python.js';
24
+ import { envAgentSubagent } from './env-agent.js';
25
+ import { captchaSubagent } from './captcha.js';
26
+ import { antiDetectSubagent } from './anti-detect.js';
27
+
28
+ export const allSubagents = [
29
+ crawlerSubagent,
30
+ staticSubagent,
31
+ dynamicSubagent,
32
+ sandboxSubagent,
33
+ js2pythonSubagent,
34
+ envAgentSubagent,
35
+ captchaSubagent,
36
+ antiDetectSubagent,
37
+ ];
@@ -0,0 +1,72 @@
1
+ /**
2
+ * DeepSpider - JS 转 Python 子代理
3
+ * 将 JS 加密逻辑转换为 Python 代码
4
+ */
5
+
6
+ import { createSkillsMiddleware } from 'deepagents';
7
+ import { SKILLS, skillsBackend } from '../skills/config.js';
8
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
9
+
10
+ import { pythonTools } from '../tools/python.js';
11
+ import { nodejsTools } from '../tools/nodejs.js';
12
+ import { analyzerTools } from '../tools/analyzer.js';
13
+ import { fileTools } from '../tools/file.js';
14
+ import { evolveTools } from '../tools/evolve.js';
15
+
16
+ export const js2pythonSubagent = {
17
+ name: 'js2python',
18
+ description: 'JS转Python专家。当需要将JS加密代码转换为Python时使用,适用于:爬虫项目需要Python实现、标准加密算法转换、复杂算法使用execjs方案。',
19
+ systemPrompt: `你是 DeepSpider 的 JS 转 Python 专家,负责将 JS 加密逻辑转换为 Python 代码。
20
+
21
+ ## 核心职责
22
+ 将 JS 加密算法转换为 Python 实现,保证可以成功运行。
23
+
24
+ ## 转换策略
25
+
26
+ ### 策略一:纯 Python 重写(优先)
27
+ 适用:标准加密算法(AES、MD5、SHA、RSA、国密)
28
+
29
+ ### 策略二:execjs 执行原始 JS
30
+ 适用:复杂自定义算法、混淆代码难还原
31
+
32
+ ## 工作流程
33
+ 1. 分析 JS 代码,识别加密算法类型
34
+ 2. 使用 run_node_code 执行原始 JS 获取基准结果
35
+ 3. 选择转换策略
36
+ 4. 生成 Python 代码
37
+ 5. 验证结果一致性
38
+ 6. 使用 artifact_save 保存文件
39
+
40
+ ## 输出规范
41
+
42
+ **重要:必须输出完整可运行的 Python 文件**
43
+
44
+ 1. 使用 artifact_save 保存 .py 文件
45
+ 2. 文件必须可以直接 python xxx.py 运行
46
+ 3. 包含完整 import、函数定义、使用示例
47
+ 4. 禁止在对话中输出大段代码片段代替完整文件
48
+
49
+ ## 降级策略
50
+
51
+ 纯 Python 转换失败 3 次 → 改用 execjs 方案
52
+
53
+ 目标是保证最终输出可用的代码。
54
+
55
+ ## 经验记录
56
+ 完成转换后,如发现有价值的经验,使用 evolve_skill 记录:
57
+ - skill: "js2python"`,
58
+ tools: [
59
+ ...pythonTools,
60
+ ...nodejsTools,
61
+ ...analyzerTools,
62
+ ...fileTools,
63
+ ...evolveTools,
64
+ ],
65
+ middleware: [
66
+ createFilterToolsMiddleware(),
67
+ createSkillsMiddleware({
68
+ backend: skillsBackend,
69
+ sources: [SKILLS.js2python],
70
+ }),
71
+ ],
72
+ };
@@ -0,0 +1,55 @@
1
+ /**
2
+ * DeepSpider - 沙箱验证子代理
3
+ */
4
+
5
+ import { createSkillsMiddleware } from 'deepagents';
6
+ import { SKILLS, skillsBackend } from '../skills/config.js';
7
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
8
+
9
+ import { sandboxTools } from '../tools/sandbox.js';
10
+ import { nodejsTools } from '../tools/nodejs.js';
11
+ import { patchTools } from '../tools/patch.js';
12
+ import { envTools } from '../tools/env.js';
13
+ import { verifyTools } from '../tools/verify.js';
14
+ import { fileTools } from '../tools/file.js';
15
+ import { evolveTools } from '../tools/evolve.js';
16
+
17
+ export const sandboxSubagent = {
18
+ name: 'sandbox-agent',
19
+ description: '沙箱验证专家。当需要验证提取的代码能否正确执行时使用,适用于:验证加密算法、补全缺失环境、生成可独立运行的脚本。',
20
+ systemPrompt: `你是 DeepSpider 的验证执行专家。
21
+
22
+ ## 职责
23
+ - 在沙箱中验证提取的加密算法
24
+ - 补全缺失的环境
25
+ - 生成可独立运行的脚本
26
+ - 验证加密结果是否正确
27
+
28
+ ## 执行工具选择
29
+ - sandbox_execute: 隔离沙箱,适合不需要外部依赖的代码
30
+ - run_node_code: Node.js 执行,适合需要 require npm 包的代码(如 crypto-js)
31
+
32
+ ## 输出
33
+ - 验证结果
34
+ - 可执行的 JS 模块
35
+
36
+ ## 经验记录
37
+ 完成验证后,如发现有价值的经验,使用 evolve_skill 记录:
38
+ - skill: "sandbox"`,
39
+ tools: [
40
+ ...sandboxTools,
41
+ ...nodejsTools,
42
+ ...patchTools,
43
+ ...envTools,
44
+ ...verifyTools,
45
+ ...fileTools,
46
+ ...evolveTools,
47
+ ],
48
+ middleware: [
49
+ createFilterToolsMiddleware(),
50
+ createSkillsMiddleware({
51
+ backend: skillsBackend,
52
+ sources: [SKILLS.sandbox],
53
+ }),
54
+ ],
55
+ };
@@ -0,0 +1,66 @@
1
+ /**
2
+ * DeepSpider - 静态分析子代理
3
+ */
4
+
5
+ import { createSkillsMiddleware } from 'deepagents';
6
+ import { SKILLS, skillsBackend } from '../skills/config.js';
7
+ import { createFilterToolsMiddleware } from '../middleware/filterTools.js';
8
+
9
+ import { analyzerTools } from '../tools/analyzer.js';
10
+ import { deobfuscatorTools } from '../tools/deobfuscator.js';
11
+ import { traceTools } from '../tools/trace.js';
12
+ import { webcrackTools } from '../tools/webcrack.js';
13
+ import { preprocessTools } from '../tools/preprocess.js';
14
+ import { extractorTools } from '../tools/extractor.js';
15
+ import { storeTools } from '../tools/store.js';
16
+ import { verifyTools } from '../tools/verify.js';
17
+ import { correlateTools } from '../tools/correlate.js';
18
+ import { evolveTools } from '../tools/evolve.js';
19
+
20
+ export const staticSubagent = {
21
+ name: 'static-agent',
22
+ description: '静态代码分析专家。当需要分析混淆代码、还原加密算法时使用,适用于:Webpack解包、反混淆、定位加密入口、算法还原验证。',
23
+ systemPrompt: `你是 DeepSpider 的静态分析专家。
24
+
25
+ ## 职责
26
+ - 预处理打包代码(Webpack/Vite/Rollup)
27
+ - 反混淆处理
28
+ - 定位加密函数入口
29
+ - 还原算法逻辑
30
+ - 验证算法正确性
31
+
32
+ ## 工作流程
33
+ 1. preprocess_code 预处理
34
+ 2. 如有 bundle 则解包
35
+ 3. deobfuscate 反混淆
36
+ 4. analyze_encryption 定位入口
37
+ 5. 验证算法
38
+
39
+ ## 输出
40
+ - 加密函数位置
41
+ - 断点建议
42
+ - 算法分析结果
43
+
44
+ ## 经验记录
45
+ 完成分析后,如发现有价值的经验,使用 evolve_skill 记录:
46
+ - skill: "static-analysis"`,
47
+ tools: [
48
+ ...preprocessTools,
49
+ ...webcrackTools,
50
+ ...analyzerTools,
51
+ ...deobfuscatorTools,
52
+ ...traceTools,
53
+ ...extractorTools,
54
+ ...verifyTools,
55
+ ...correlateTools,
56
+ ...storeTools,
57
+ ...evolveTools,
58
+ ],
59
+ middleware: [
60
+ createFilterToolsMiddleware(),
61
+ createSkillsMiddleware({
62
+ backend: skillsBackend,
63
+ sources: [SKILLS.static],
64
+ }),
65
+ ],
66
+ };