deepspider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/.claude/agents/check.md +122 -0
  2. package/.claude/agents/debug.md +106 -0
  3. package/.claude/agents/dispatch.md +214 -0
  4. package/.claude/agents/implement.md +96 -0
  5. package/.claude/agents/plan.md +396 -0
  6. package/.claude/agents/research.md +120 -0
  7. package/.claude/commands/evolve/merge.md +80 -0
  8. package/.claude/commands/trellis/before-backend-dev.md +13 -0
  9. package/.claude/commands/trellis/before-frontend-dev.md +13 -0
  10. package/.claude/commands/trellis/break-loop.md +107 -0
  11. package/.claude/commands/trellis/check-backend.md +13 -0
  12. package/.claude/commands/trellis/check-cross-layer.md +153 -0
  13. package/.claude/commands/trellis/check-frontend.md +13 -0
  14. package/.claude/commands/trellis/create-command.md +154 -0
  15. package/.claude/commands/trellis/finish-work.md +129 -0
  16. package/.claude/commands/trellis/integrate-skill.md +219 -0
  17. package/.claude/commands/trellis/onboard.md +358 -0
  18. package/.claude/commands/trellis/parallel.md +193 -0
  19. package/.claude/commands/trellis/record-session.md +62 -0
  20. package/.claude/commands/trellis/start.md +280 -0
  21. package/.claude/commands/trellis/update-spec.md +213 -0
  22. package/.claude/hooks/inject-subagent-context.py +758 -0
  23. package/.claude/hooks/ralph-loop.py +374 -0
  24. package/.claude/hooks/session-start.py +126 -0
  25. package/.claude/settings.json +41 -0
  26. package/.claude/skills/deepagents-guide/SKILL.md +428 -0
  27. package/.cursor/commands/trellis-before-backend-dev.md +13 -0
  28. package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
  29. package/.cursor/commands/trellis-break-loop.md +107 -0
  30. package/.cursor/commands/trellis-check-backend.md +13 -0
  31. package/.cursor/commands/trellis-check-cross-layer.md +153 -0
  32. package/.cursor/commands/trellis-check-frontend.md +13 -0
  33. package/.cursor/commands/trellis-create-command.md +154 -0
  34. package/.cursor/commands/trellis-finish-work.md +129 -0
  35. package/.cursor/commands/trellis-integrate-skill.md +219 -0
  36. package/.cursor/commands/trellis-onboard.md +358 -0
  37. package/.cursor/commands/trellis-record-session.md +62 -0
  38. package/.cursor/commands/trellis-start.md +156 -0
  39. package/.cursor/commands/trellis-update-spec.md +213 -0
  40. package/.env.example +11 -0
  41. package/.husky/pre-commit +1 -0
  42. package/.mcp.json +8 -0
  43. package/.trellis/.template-hashes.json +65 -0
  44. package/.trellis/.version +1 -0
  45. package/.trellis/scripts/add-session.sh +384 -0
  46. package/.trellis/scripts/common/developer.sh +129 -0
  47. package/.trellis/scripts/common/git-context.sh +263 -0
  48. package/.trellis/scripts/common/paths.sh +208 -0
  49. package/.trellis/scripts/common/phase.sh +150 -0
  50. package/.trellis/scripts/common/registry.sh +247 -0
  51. package/.trellis/scripts/common/task-queue.sh +142 -0
  52. package/.trellis/scripts/common/task-utils.sh +151 -0
  53. package/.trellis/scripts/common/worktree.sh +128 -0
  54. package/.trellis/scripts/create-bootstrap.sh +299 -0
  55. package/.trellis/scripts/get-context.sh +7 -0
  56. package/.trellis/scripts/get-developer.sh +15 -0
  57. package/.trellis/scripts/init-developer.sh +34 -0
  58. package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
  59. package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
  60. package/.trellis/scripts/multi-agent/plan.sh +207 -0
  61. package/.trellis/scripts/multi-agent/start.sh +310 -0
  62. package/.trellis/scripts/multi-agent/status.sh +828 -0
  63. package/.trellis/scripts/task.sh +1118 -0
  64. package/.trellis/spec/backend/deepagents-guide.md +337 -0
  65. package/.trellis/spec/backend/directory-structure.md +126 -0
  66. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
  67. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
  68. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
  69. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
  70. package/.trellis/spec/backend/hook-guidelines.md +178 -0
  71. package/.trellis/spec/backend/index.md +36 -0
  72. package/.trellis/spec/backend/quality-guidelines.md +201 -0
  73. package/.trellis/spec/backend/state-management.md +76 -0
  74. package/.trellis/spec/backend/tool-guidelines.md +144 -0
  75. package/.trellis/spec/backend/type-safety.md +71 -0
  76. package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
  77. package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
  78. package/.trellis/spec/guides/index.md +79 -0
  79. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
  80. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
  81. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
  82. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
  83. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
  84. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
  85. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
  86. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
  87. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
  88. package/.trellis/workflow.md +407 -0
  89. package/.trellis/workspace/index.md +123 -0
  90. package/.trellis/workspace/pony/index.md +40 -0
  91. package/.trellis/workspace/pony/journal-1.md +7 -0
  92. package/.trellis/worktree.yaml +47 -0
  93. package/AGENTS.md +18 -0
  94. package/CLAUDE.md +292 -0
  95. package/README.md +134 -0
  96. package/agents/deepspider.md +142 -0
  97. package/docs/DEBUG.md +42 -0
  98. package/docs/GUIDE.md +334 -0
  99. package/docs/PROMPT.md +60 -0
  100. package/docs/USAGE.md +226 -0
  101. package/eslint.config.js +51 -0
  102. package/package.json +78 -0
  103. package/requirements-crypto.txt +14 -0
  104. package/src/agent/index.js +97 -0
  105. package/src/agent/logger.js +164 -0
  106. package/src/agent/middleware/filterTools.js +64 -0
  107. package/src/agent/middleware/report.js +79 -0
  108. package/src/agent/prompts/system.js +315 -0
  109. package/src/agent/run.js +575 -0
  110. package/src/agent/skills/anti-detect/SKILL.md +28 -0
  111. package/src/agent/skills/anti-detect/evolved.md +12 -0
  112. package/src/agent/skills/captcha/SKILL.md +37 -0
  113. package/src/agent/skills/captcha/evolved.md +12 -0
  114. package/src/agent/skills/config.js +30 -0
  115. package/src/agent/skills/crawler/SKILL.md +9 -0
  116. package/src/agent/skills/crawler/evolved.md +16 -0
  117. package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
  118. package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
  119. package/src/agent/skills/env/SKILL.md +72 -0
  120. package/src/agent/skills/env/evolved.md +12 -0
  121. package/src/agent/skills/evolve.js +79 -0
  122. package/src/agent/skills/general/SKILL.md +12 -0
  123. package/src/agent/skills/general/evolved.md +12 -0
  124. package/src/agent/skills/js2python/SKILL.md +30 -0
  125. package/src/agent/skills/js2python/evolved.md +13 -0
  126. package/src/agent/skills/report/SKILL.md +21 -0
  127. package/src/agent/skills/report/evolved.md +12 -0
  128. package/src/agent/skills/sandbox/SKILL.md +22 -0
  129. package/src/agent/skills/sandbox/evolved.md +16 -0
  130. package/src/agent/skills/static-analysis/SKILL.md +93 -0
  131. package/src/agent/skills/static-analysis/evolved.md +12 -0
  132. package/src/agent/skills/xpath/SKILL.md +119 -0
  133. package/src/agent/subagents/anti-detect.js +45 -0
  134. package/src/agent/subagents/captcha.js +51 -0
  135. package/src/agent/subagents/crawler.js +138 -0
  136. package/src/agent/subagents/dynamic.js +64 -0
  137. package/src/agent/subagents/env-agent.js +82 -0
  138. package/src/agent/subagents/index.js +37 -0
  139. package/src/agent/subagents/js2python.js +72 -0
  140. package/src/agent/subagents/sandbox.js +55 -0
  141. package/src/agent/subagents/static.js +66 -0
  142. package/src/agent/tools/analysis.js +135 -0
  143. package/src/agent/tools/analyzer.js +85 -0
  144. package/src/agent/tools/anti-detect.js +89 -0
  145. package/src/agent/tools/antidebug.js +64 -0
  146. package/src/agent/tools/async.js +43 -0
  147. package/src/agent/tools/browser.js +324 -0
  148. package/src/agent/tools/captcha.js +223 -0
  149. package/src/agent/tools/capture.js +179 -0
  150. package/src/agent/tools/correlate.js +303 -0
  151. package/src/agent/tools/crawler.js +116 -0
  152. package/src/agent/tools/cryptohook.js +80 -0
  153. package/src/agent/tools/debug.js +246 -0
  154. package/src/agent/tools/deobfuscator.js +90 -0
  155. package/src/agent/tools/env.js +83 -0
  156. package/src/agent/tools/envdump.js +92 -0
  157. package/src/agent/tools/evolve.js +164 -0
  158. package/src/agent/tools/extract.js +114 -0
  159. package/src/agent/tools/extractor.js +54 -0
  160. package/src/agent/tools/file.js +224 -0
  161. package/src/agent/tools/hook.js +84 -0
  162. package/src/agent/tools/hookManager.js +178 -0
  163. package/src/agent/tools/index.js +137 -0
  164. package/src/agent/tools/nodejs.js +101 -0
  165. package/src/agent/tools/patch.js +46 -0
  166. package/src/agent/tools/preprocess.js +71 -0
  167. package/src/agent/tools/profile.js +122 -0
  168. package/src/agent/tools/python.js +627 -0
  169. package/src/agent/tools/report.js +124 -0
  170. package/src/agent/tools/runtime.js +132 -0
  171. package/src/agent/tools/sandbox.js +79 -0
  172. package/src/agent/tools/store.js +73 -0
  173. package/src/agent/tools/trace.js +74 -0
  174. package/src/agent/tools/tracing.js +201 -0
  175. package/src/agent/tools/utils.js +51 -0
  176. package/src/agent/tools/verify.js +184 -0
  177. package/src/agent/tools/webcrack.js +109 -0
  178. package/src/analyzer/ASTAnalyzer.js +387 -0
  179. package/src/analyzer/CallStackAnalyzer.js +379 -0
  180. package/src/analyzer/Deobfuscator.js +289 -0
  181. package/src/analyzer/EncryptionAnalyzer.js +99 -0
  182. package/src/analyzer/index.js +22 -0
  183. package/src/browser/EnvBridge.js +186 -0
  184. package/src/browser/cdp.js +168 -0
  185. package/src/browser/client.js +197 -0
  186. package/src/browser/collector.js +444 -0
  187. package/src/browser/collectors/RequestCryptoLinker.js +109 -0
  188. package/src/browser/collectors/ResponseSearcher.js +107 -0
  189. package/src/browser/collectors/ScriptCollector.js +158 -0
  190. package/src/browser/collectors/index.js +26 -0
  191. package/src/browser/defaultHooks.js +932 -0
  192. package/src/browser/hooks/crypto.js +55 -0
  193. package/src/browser/hooks/index.js +64 -0
  194. package/src/browser/hooks/native.js +9 -0
  195. package/src/browser/hooks/network.js +33 -0
  196. package/src/browser/index.js +42 -0
  197. package/src/browser/interceptors/NetworkInterceptor.js +116 -0
  198. package/src/browser/interceptors/ScriptInterceptor.js +76 -0
  199. package/src/browser/interceptors/index.js +6 -0
  200. package/src/browser/ui/analysisPanel.js +1782 -0
  201. package/src/browser/ui/confirmDialog.js +158 -0
  202. package/src/browser/ui/panel.html +152 -0
  203. package/src/browser/ui/selector.js +170 -0
  204. package/src/config/index.js +5 -0
  205. package/src/config/paths.js +71 -0
  206. package/src/config/patterns/crypto.js +36 -0
  207. package/src/config/profiles/chrome.json +71 -0
  208. package/src/config/profiles/firefox.json +44 -0
  209. package/src/config/profiles/safari.json +38 -0
  210. package/src/core/EnvMonitor.js +200 -0
  211. package/src/core/PatchGenerator.js +278 -0
  212. package/src/core/Sandbox.js +181 -0
  213. package/src/env/AntiAntiDebug.js +111 -0
  214. package/src/env/AsyncHook.js +68 -0
  215. package/src/env/BrowserAPIList.js +265 -0
  216. package/src/env/CookieHook.js +48 -0
  217. package/src/env/CryptoHook.js +205 -0
  218. package/src/env/EnvCodeGenerator.js +157 -0
  219. package/src/env/EnvDumper.js +356 -0
  220. package/src/env/EnvExtractor.js +220 -0
  221. package/src/env/HookBase.js +618 -0
  222. package/src/env/NetworkHook.js +159 -0
  223. package/src/env/modules/bom/history.js +29 -0
  224. package/src/env/modules/bom/location.js +26 -0
  225. package/src/env/modules/bom/navigator.js +70 -0
  226. package/src/env/modules/bom/screen.js +26 -0
  227. package/src/env/modules/bom/storage.js +23 -0
  228. package/src/env/modules/dom/document.js +110 -0
  229. package/src/env/modules/dom/event.js +51 -0
  230. package/src/env/modules/index.js +34 -0
  231. package/src/env/modules/webapi/fetch.js +46 -0
  232. package/src/env/modules/webapi/url.js +47 -0
  233. package/src/env/modules/webapi/xhr.js +48 -0
  234. package/src/index.js +27 -0
  235. package/src/mcp/server.js +89 -0
  236. package/src/store/DataStore.js +708 -0
  237. package/src/store/Store.js +158 -0
  238. package/src/store/Validator.js +24 -0
  239. package/test/analyze.test.js +90 -0
  240. package/test/envdump.test.js +74 -0
  241. package/test/flow.test.js +90 -0
  242. package/test/hooks.test.js +138 -0
  243. package/test/plugin.test.js +35 -0
  244. package/test/refactor-full.test.js +30 -0
  245. package/test/refactor.test.js +21 -0
  246. package/test/samples/obfuscated.js +61 -0
  247. package/test/samples/original.js +66 -0
  248. package/test/samples/v10_eval_chain.js +52 -0
  249. package/test/samples/v11_bytecode_vm.js +81 -0
  250. package/test/samples/v12_polymorphic.js +69 -0
  251. package/test/samples/v1_ob_basic.js +98 -0
  252. package/test/samples/v2_ob_advanced.js +99 -0
  253. package/test/samples/v3_jjencode.js +77 -0
  254. package/test/samples/v4_aaencode.js +73 -0
  255. package/test/samples/v5_control_flow.js +86 -0
  256. package/test/samples/v6_string_encryption.js +71 -0
  257. package/test/samples/v7_jsvmp.js +83 -0
  258. package/test/samples/v8_anti_debug.js +79 -0
  259. package/test/samples/v9_proxy_trap.js +49 -0
  260. package/test/samples.test.js +96 -0
  261. package/test/webcrack.test.js +55 -0
@@ -0,0 +1,708 @@
1
+ /**
2
+ * DeepSpider - 数据存储管理
3
+ * 按网站和页面层级存储采集数据
4
+ * 支持会话隔离、内容去重、自动清理
5
+ */
6
+
7
+ import { mkdirSync, existsSync, readFileSync, statSync } from 'fs';
8
+ import { writeFile, readFile, readdir, rm, stat } from 'fs/promises';
9
+ import { join } from 'path';
10
+ import { createHash } from 'crypto';
11
+ import { PATHS, ensureDir } from '../config/paths.js';
12
+
13
+ const DATA_DIR = PATHS.DATA_DIR;
14
+ const SITES_DIR = PATHS.SITES_DIR;
15
+ const GLOBAL_INDEX = join(DATA_DIR, 'index.json');
16
+
17
+ // 存储配置
18
+ const STORAGE_CONFIG = {
19
+ maxAge: 7 * 24 * 60 * 60 * 1000, // 7天过期
20
+ maxSizePerSite: 100 * 1024 * 1024, // 单站点100MB
21
+ maxTotalSize: 500 * 1024 * 1024, // 总共500MB
22
+ cleanupInterval: 60 * 60 * 1000, // 1小时检查一次
23
+ };
24
+
25
+ /**
26
+ * 生成内容 hash(用于去重)
27
+ */
28
+ function contentHash(content) {
29
+ return createHash('md5').update(content || '').digest('hex').slice(0, 16);
30
+ }
31
+
32
+ /**
33
+ * 生成请求唯一标识
34
+ */
35
+ function requestHash(url, method, body) {
36
+ return contentHash(`${url}|${method}|${body || ''}`);
37
+ }
38
+
39
+ /**
40
+ * 生成脚本唯一标识
41
+ */
42
+ function scriptHash(url, source) {
43
+ return contentHash(`${url}|${source || ''}`);
44
+ }
45
+
46
+ /**
47
+ * 从 URL 提取站点和路径
48
+ */
49
+ function parseUrl(url) {
50
+ try {
51
+ const u = new URL(url);
52
+ const site = u.hostname;
53
+ // 路径转为安全的目录名
54
+ const path = u.pathname.replace(/\//g, '_').replace(/^_/, '') || '_root';
55
+ return { site, path };
56
+ } catch {
57
+ return { site: '_unknown', path: '_root' };
58
+ }
59
+ }
60
+
61
+ /**
62
+ * 生成安全的文件名(移除非法字符)
63
+ */
64
+ function sanitizeFilename(name, maxLen = 80) {
65
+ return name
66
+ .replace(/[<>:"/\\|?*\x00-\x1f]/g, '_') // 移除非法字符
67
+ .replace(/_{2,}/g, '_') // 合并连续下划线
68
+ .replace(/^_|_$/g, '') // 移除首尾下划线
69
+ .slice(0, maxLen);
70
+ }
71
+
72
+ /**
73
+ * 从 URL 提取可读的文件名
74
+ * 请求: method_path_query (如 GET_api_user_id=123)
75
+ * 脚本: 原始文件名 (如 app.min.js)
76
+ */
77
+ function getReadableFilename(url, type = 'response', method = 'GET') {
78
+ try {
79
+ const u = new URL(url);
80
+
81
+ if (type === 'script') {
82
+ // 脚本:提取原始文件名
83
+ const pathname = u.pathname;
84
+ const filename = pathname.split('/').pop() || 'inline';
85
+ // 如果没有 .js 后缀,可能是内联脚本
86
+ if (filename && !filename.includes('.')) {
87
+ return sanitizeFilename(filename) || 'inline';
88
+ }
89
+ return sanitizeFilename(filename.replace(/\.js$/i, '')) || 'script';
90
+ }
91
+
92
+ // 响应:method_path_query
93
+ const path = u.pathname
94
+ .replace(/^\//, '') // 移除开头斜杠
95
+ .replace(/\//g, '_') // 斜杠转下划线
96
+ .replace(/\.[^.]+$/, '') // 移除扩展名
97
+ || 'root';
98
+
99
+ // 提取有意义的查询参数
100
+ const params = [];
101
+ for (const [key, value] of u.searchParams) {
102
+ if (value && value.length < 30) {
103
+ params.push(`${key}=${value}`);
104
+ } else if (value) {
105
+ params.push(key);
106
+ }
107
+ }
108
+ const query = params.slice(0, 3).join('_'); // 最多3个参数
109
+
110
+ const parts = [method.toUpperCase(), path];
111
+ if (query) parts.push(query);
112
+
113
+ return sanitizeFilename(parts.join('_'));
114
+ } catch {
115
+ return type === 'script' ? 'script' : 'request';
116
+ }
117
+ }
118
+
119
+ export class DataStore {
120
+ constructor() {
121
+ // 全局索引:站点列表
122
+ this.globalIndex = {
123
+ sites: [], // { hostname, lastAccess, responseCount, scriptCount }
124
+ };
125
+ // 站点索引缓存
126
+ this.siteIndexCache = new Map();
127
+ // 当前会话 ID
128
+ this.sessionId = null;
129
+ // 上次清理时间
130
+ this.lastCleanup = 0;
131
+
132
+ ensureDir(DATA_DIR);
133
+ ensureDir(SITES_DIR);
134
+ this.loadGlobalIndex();
135
+ }
136
+
137
+ /**
138
+ * 创建新会话
139
+ */
140
+ startSession() {
141
+ this.sessionId = Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
142
+ console.log(`[DataStore] 新会话: ${this.sessionId}`);
143
+ return this.sessionId;
144
+ }
145
+
146
+ /**
147
+ * 获取当前会话 ID
148
+ */
149
+ getSessionId() {
150
+ if (!this.sessionId) {
151
+ this.startSession();
152
+ }
153
+ return this.sessionId;
154
+ }
155
+
156
+ loadGlobalIndex() {
157
+ try {
158
+ if (existsSync(GLOBAL_INDEX)) {
159
+ const data = JSON.parse(readFileSync(GLOBAL_INDEX, 'utf-8'));
160
+ // 确保 sites 数组存在(兼容旧格式)
161
+ this.globalIndex = {
162
+ sites: Array.isArray(data.sites) ? data.sites : []
163
+ };
164
+ }
165
+ } catch (e) {
166
+ console.error('[DataStore] 加载全局索引失败:', e.message);
167
+ this.globalIndex = { sites: [] };
168
+ }
169
+ }
170
+
171
+ async saveGlobalIndex() {
172
+ await writeFile(GLOBAL_INDEX, JSON.stringify(this.globalIndex, null, 2));
173
+ }
174
+
175
+ /**
176
+ * 获取站点目录
177
+ */
178
+ getSiteDir(site) {
179
+ return join(SITES_DIR, site);
180
+ }
181
+
182
+ /**
183
+ * 获取或创建站点索引
184
+ */
185
+ async getSiteIndex(site) {
186
+ if (this.siteIndexCache.has(site)) {
187
+ return this.siteIndexCache.get(site);
188
+ }
189
+
190
+ const siteDir = this.getSiteDir(site);
191
+ const indexFile = join(siteDir, 'index.json');
192
+
193
+ let index = {
194
+ hostname: site,
195
+ responses: [], // { id, url, path, method, status, timestamp, file }
196
+ scripts: [], // { id, url, type, timestamp, file }
197
+ crypto: []
198
+ };
199
+
200
+ try {
201
+ if (existsSync(indexFile)) {
202
+ index = JSON.parse(readFileSync(indexFile, 'utf-8'));
203
+ }
204
+ } catch (e) {
205
+ // 使用默认索引
206
+ }
207
+
208
+ this.siteIndexCache.set(site, index);
209
+ return index;
210
+ }
211
+
212
+ /**
213
+ * 保存站点索引
214
+ */
215
+ async saveSiteIndex(site) {
216
+ const index = this.siteIndexCache.get(site);
217
+ if (!index) return;
218
+
219
+ const siteDir = this.getSiteDir(site);
220
+ ensureDir(siteDir);
221
+ await writeFile(join(siteDir, 'index.json'), JSON.stringify(index, null, 2));
222
+ }
223
+
224
+ /**
225
+ * 更新全局站点列表
226
+ */
227
+ async updateSiteStats(site) {
228
+ const index = await this.getSiteIndex(site);
229
+ const existing = this.globalIndex.sites.find(s => s.hostname === site);
230
+
231
+ const stats = {
232
+ hostname: site,
233
+ lastAccess: Date.now(),
234
+ responseCount: index.responses.length,
235
+ scriptCount: index.scripts.length
236
+ };
237
+
238
+ if (existing) {
239
+ Object.assign(existing, stats);
240
+ } else {
241
+ this.globalIndex.sites.push(stats);
242
+ }
243
+
244
+ this.saveGlobalIndex().catch(() => {});
245
+ }
246
+
247
+ /**
248
+ * 保存响应数据(带去重)
249
+ */
250
+ async saveResponse(data) {
251
+ const { url, method, status, requestHeaders, requestBody, responseBody, timestamp, pageUrl } = data;
252
+ const { site, path } = parseUrl(pageUrl || url);
253
+
254
+ // 生成去重 hash
255
+ const hash = requestHash(url, method, requestBody);
256
+ const index = await this.getSiteIndex(site);
257
+
258
+ // 检查是否已存在相同内容
259
+ const existing = index.responses.find(r => r.hash === hash);
260
+ if (existing) {
261
+ // 更新时间戳和会话,不重复存储
262
+ existing.timestamp = timestamp || Date.now();
263
+ existing.sessionId = this.getSessionId();
264
+ await this.saveSiteIndex(site);
265
+ return { id: existing.id, site, path, deduplicated: true };
266
+ }
267
+
268
+ const siteDir = this.getSiteDir(site);
269
+ const responsesDir = join(siteDir, 'responses', path);
270
+ ensureDir(responsesDir);
271
+
272
+ // 生成可读文件名
273
+ const readableName = getReadableFilename(url, 'response', method);
274
+ const seq = String(index.responses.length).padStart(3, '0');
275
+ const id = `${readableName}_${seq}`;
276
+ const file = join(responsesDir, `${id}.json`);
277
+
278
+ const content = JSON.stringify({
279
+ url, method, status,
280
+ requestHeaders, requestBody, responseBody,
281
+ pageUrl, timestamp
282
+ });
283
+
284
+ await writeFile(file, content);
285
+
286
+ index.responses.push({
287
+ id, url, path, method, status,
288
+ timestamp: timestamp || Date.now(),
289
+ file, size: content.length,
290
+ hash,
291
+ sessionId: this.getSessionId()
292
+ });
293
+
294
+ await this.saveSiteIndex(site);
295
+ await this.updateSiteStats(site);
296
+ this.maybeCleanup();
297
+
298
+ return { id, site, path };
299
+ }
300
+
301
+ /**
302
+ * 保存脚本源码(带去重)
303
+ */
304
+ async saveScript(data) {
305
+ const { url, type, source, timestamp, pageUrl } = data;
306
+ const { site } = parseUrl(pageUrl || url);
307
+
308
+ // 生成去重 hash
309
+ const hash = scriptHash(url, source);
310
+ const index = await this.getSiteIndex(site);
311
+
312
+ // 检查是否已存在相同内容
313
+ const existing = index.scripts.find(s => s.hash === hash);
314
+ if (existing) {
315
+ existing.timestamp = timestamp || Date.now();
316
+ existing.sessionId = this.getSessionId();
317
+ await this.saveSiteIndex(site);
318
+ return { id: existing.id, site, deduplicated: true };
319
+ }
320
+
321
+ const siteDir = this.getSiteDir(site);
322
+ const scriptsDir = join(siteDir, 'scripts');
323
+ ensureDir(scriptsDir);
324
+
325
+ const readableName = getReadableFilename(url, 'script');
326
+ const seq = String(index.scripts.length).padStart(3, '0');
327
+ const id = `${readableName}_${seq}`;
328
+ const file = join(scriptsDir, `${id}.js`);
329
+
330
+ await writeFile(file, source || '');
331
+
332
+ index.scripts.push({
333
+ id, url, type,
334
+ timestamp: timestamp || Date.now(),
335
+ file, size: source?.length || 0,
336
+ hash,
337
+ sessionId: this.getSessionId()
338
+ });
339
+
340
+ await this.saveSiteIndex(site);
341
+ await this.updateSiteStats(site);
342
+ this.maybeCleanup();
343
+
344
+ return { id, site };
345
+ }
346
+
347
+ /**
348
+ * 获取站点列表
349
+ */
350
+ getSiteList() {
351
+ return this.globalIndex.sites.map(s => ({
352
+ hostname: s.hostname,
353
+ responseCount: s.responseCount,
354
+ scriptCount: s.scriptCount,
355
+ lastAccess: s.lastAccess
356
+ }));
357
+ }
358
+
359
+ /**
360
+ * 获取站点的响应列表(支持会话过滤)
361
+ */
362
+ async getResponseList(site, sessionOnly = false) {
363
+ const currentSession = this.getSessionId();
364
+
365
+ if (site) {
366
+ const index = await this.getSiteIndex(site);
367
+ let responses = index.responses;
368
+
369
+ if (sessionOnly) {
370
+ responses = responses.filter(r => r.sessionId === currentSession);
371
+ }
372
+
373
+ return responses.map(r => ({
374
+ id: r.id, url: r.url, path: r.path,
375
+ method: r.method, status: r.status,
376
+ timestamp: r.timestamp, size: r.size,
377
+ sessionId: r.sessionId
378
+ }));
379
+ }
380
+
381
+ // 返回所有站点的响应
382
+ const all = [];
383
+ for (const s of this.globalIndex.sites) {
384
+ const index = await this.getSiteIndex(s.hostname);
385
+ for (const r of index.responses) {
386
+ if (!sessionOnly || r.sessionId === currentSession) {
387
+ all.push({ ...r, site: s.hostname });
388
+ }
389
+ }
390
+ }
391
+ return all;
392
+ }
393
+
394
+ /**
395
+ * 获取站点的脚本列表(支持会话过滤)
396
+ */
397
+ async getScriptList(site, sessionOnly = false) {
398
+ const currentSession = this.getSessionId();
399
+
400
+ if (site) {
401
+ const index = await this.getSiteIndex(site);
402
+ let scripts = index.scripts;
403
+
404
+ if (sessionOnly) {
405
+ scripts = scripts.filter(s => s.sessionId === currentSession);
406
+ }
407
+
408
+ return scripts.map(s => ({
409
+ id: s.id, url: s.url, type: s.type,
410
+ timestamp: s.timestamp, size: s.size,
411
+ sessionId: s.sessionId
412
+ }));
413
+ }
414
+
415
+ const all = [];
416
+ for (const s of this.globalIndex.sites) {
417
+ const index = await this.getSiteIndex(s.hostname);
418
+ for (const sc of index.scripts) {
419
+ if (!sessionOnly || sc.sessionId === currentSession) {
420
+ all.push({ ...sc, site: s.hostname });
421
+ }
422
+ }
423
+ }
424
+ return all;
425
+ }
426
+
427
+ /**
428
+ * 搜索响应内容(支持按站点过滤)
429
+ */
430
+ async searchInResponses(text, site = null) {
431
+ const results = [];
432
+ const searchText = text.toLowerCase();
433
+ const sites = site ? [{ hostname: site }] : this.globalIndex.sites;
434
+
435
+ for (const s of sites) {
436
+ const index = await this.getSiteIndex(s.hostname);
437
+ for (const meta of index.responses) {
438
+ try {
439
+ const content = await readFile(meta.file, 'utf-8');
440
+ const data = JSON.parse(content);
441
+ if (data.responseBody?.toLowerCase().includes(searchText)) {
442
+ results.push({
443
+ site: s.hostname,
444
+ id: meta.id, url: meta.url, path: meta.path,
445
+ method: meta.method, status: meta.status,
446
+ timestamp: meta.timestamp
447
+ });
448
+ }
449
+ } catch (e) { /* skip */ }
450
+ }
451
+ }
452
+ return results;
453
+ }
454
+
455
+ /**
456
+ * 搜索脚本内容(支持按站点过滤)
457
+ */
458
+ async searchInScripts(text, site = null) {
459
+ const results = [];
460
+ const searchText = text.toLowerCase();
461
+ const sites = site ? [{ hostname: site }] : this.globalIndex.sites;
462
+
463
+ for (const s of sites) {
464
+ const index = await this.getSiteIndex(s.hostname);
465
+ for (const meta of index.scripts) {
466
+ try {
467
+ const source = await readFile(meta.file, 'utf-8');
468
+ const idx = source.toLowerCase().indexOf(searchText);
469
+ if (idx !== -1) {
470
+ const start = Math.max(0, idx - 50);
471
+ const end = Math.min(source.length, idx + text.length + 50);
472
+ results.push({
473
+ site: s.hostname,
474
+ id: meta.id, url: meta.url, type: meta.type,
475
+ matchIndex: idx,
476
+ context: source.slice(start, end),
477
+ timestamp: meta.timestamp
478
+ });
479
+ }
480
+ } catch (e) { /* skip */ }
481
+ }
482
+ }
483
+ return results;
484
+ }
485
+
486
+ /**
487
+ * 获取响应详情
488
+ */
489
+ async getResponse(site, id) {
490
+ const index = await this.getSiteIndex(site);
491
+ const meta = index.responses.find(r => r.id === id);
492
+ if (!meta) return null;
493
+ try {
494
+ return JSON.parse(await readFile(meta.file, 'utf-8'));
495
+ } catch { return null; }
496
+ }
497
+
498
+ /**
499
+ * 获取脚本源码
500
+ */
501
+ async getScript(site, id) {
502
+ const index = await this.getSiteIndex(site);
503
+ const meta = index.scripts.find(s => s.id === id);
504
+ if (!meta) return null;
505
+ try {
506
+ return await readFile(meta.file, 'utf-8');
507
+ } catch { return null; }
508
+ }
509
+
510
+ /**
511
+ * 清空站点数据
512
+ */
513
+ async clearSite(site) {
514
+ const siteDir = this.getSiteDir(site);
515
+ if (existsSync(siteDir)) {
516
+ await rm(siteDir, { recursive: true });
517
+ }
518
+ this.siteIndexCache.delete(site);
519
+ this.globalIndex.sites = this.globalIndex.sites.filter(s => s.hostname !== site);
520
+ await this.saveGlobalIndex();
521
+ }
522
+
523
+ /**
524
+ * 清空所有数据
525
+ */
526
+ async clearAll() {
527
+ for (const s of this.globalIndex.sites) {
528
+ const siteDir = this.getSiteDir(s.hostname);
529
+ if (existsSync(siteDir)) {
530
+ await rm(siteDir, { recursive: true }).catch(() => {});
531
+ }
532
+ }
533
+ this.siteIndexCache.clear();
534
+ this.globalIndex = { sites: [] };
535
+ await this.saveGlobalIndex();
536
+ }
537
+
538
+ /**
539
+ * 检查是否需要清理
540
+ */
541
+ maybeCleanup() {
542
+ const now = Date.now();
543
+ if (now - this.lastCleanup < STORAGE_CONFIG.cleanupInterval) {
544
+ return;
545
+ }
546
+ this.lastCleanup = now;
547
+ this.cleanup().catch(e => {
548
+ console.error('[DataStore] 清理失败:', e.message);
549
+ });
550
+ }
551
+
552
+ /**
553
+ * 执行清理
554
+ */
555
+ async cleanup() {
556
+ console.log('[DataStore] 开始清理过期数据...');
557
+ const now = Date.now();
558
+ let totalCleaned = 0;
559
+
560
+ // 1. 清理过期数据
561
+ totalCleaned += await this.cleanupExpired(now);
562
+
563
+ // 2. 清理超大站点
564
+ totalCleaned += await this.cleanupOversizedSites();
565
+
566
+ // 3. 清理总大小超限
567
+ totalCleaned += await this.cleanupTotalSize();
568
+
569
+ if (totalCleaned > 0) {
570
+ console.log(`[DataStore] 清理完成,删除 ${totalCleaned} 条记录`);
571
+ }
572
+ }
573
+
574
+ /**
575
+ * 清理过期数据
576
+ */
577
+ async cleanupExpired(now) {
578
+ let cleaned = 0;
579
+ const maxAge = STORAGE_CONFIG.maxAge;
580
+
581
+ for (const s of this.globalIndex.sites) {
582
+ const index = await this.getSiteIndex(s.hostname);
583
+ const expiredResponses = [];
584
+ const expiredScripts = [];
585
+
586
+ // 找出过期的响应
587
+ for (const r of index.responses) {
588
+ if (now - r.timestamp > maxAge) {
589
+ expiredResponses.push(r);
590
+ }
591
+ }
592
+
593
+ // 找出过期的脚本
594
+ for (const sc of index.scripts) {
595
+ if (now - sc.timestamp > maxAge) {
596
+ expiredScripts.push(sc);
597
+ }
598
+ }
599
+
600
+ // 删除过期文件
601
+ for (const r of expiredResponses) {
602
+ await rm(r.file, { force: true }).catch(() => {});
603
+ cleaned++;
604
+ }
605
+ for (const sc of expiredScripts) {
606
+ await rm(sc.file, { force: true }).catch(() => {});
607
+ cleaned++;
608
+ }
609
+
610
+ // 更新索引
611
+ if (expiredResponses.length || expiredScripts.length) {
612
+ index.responses = index.responses.filter(r => now - r.timestamp <= maxAge);
613
+ index.scripts = index.scripts.filter(s => now - s.timestamp <= maxAge);
614
+ await this.saveSiteIndex(s.hostname);
615
+ }
616
+ }
617
+
618
+ return cleaned;
619
+ }
620
+
621
+ /**
622
+ * 清理超大站点
623
+ */
624
+ async cleanupOversizedSites() {
625
+ let cleaned = 0;
626
+ const maxSize = STORAGE_CONFIG.maxSizePerSite;
627
+
628
+ for (const s of this.globalIndex.sites) {
629
+ const index = await this.getSiteIndex(s.hostname);
630
+ let totalSize = 0;
631
+
632
+ // 计算站点总大小
633
+ for (const r of index.responses) totalSize += r.size || 0;
634
+ for (const sc of index.scripts) totalSize += sc.size || 0;
635
+
636
+ if (totalSize <= maxSize) continue;
637
+
638
+ // 按时间排序,删除最旧的
639
+ const allItems = [
640
+ ...index.responses.map(r => ({ ...r, type: 'response' })),
641
+ ...index.scripts.map(s => ({ ...s, type: 'script' }))
642
+ ].sort((a, b) => a.timestamp - b.timestamp);
643
+
644
+ while (totalSize > maxSize && allItems.length > 0) {
645
+ const item = allItems.shift();
646
+ await rm(item.file, { force: true }).catch(() => {});
647
+ totalSize -= item.size || 0;
648
+ cleaned++;
649
+
650
+ if (item.type === 'response') {
651
+ index.responses = index.responses.filter(r => r.id !== item.id);
652
+ } else {
653
+ index.scripts = index.scripts.filter(s => s.id !== item.id);
654
+ }
655
+ }
656
+
657
+ await this.saveSiteIndex(s.hostname);
658
+ }
659
+
660
+ return cleaned;
661
+ }
662
+
663
+ /**
664
+ * 清理总大小超限
665
+ */
666
+ async cleanupTotalSize() {
667
+ let cleaned = 0;
668
+ const maxTotal = STORAGE_CONFIG.maxTotalSize;
669
+
670
+ // 计算所有站点总大小
671
+ const siteStats = [];
672
+ for (const s of this.globalIndex.sites) {
673
+ const index = await this.getSiteIndex(s.hostname);
674
+ let size = 0;
675
+ for (const r of index.responses) size += r.size || 0;
676
+ for (const sc of index.scripts) size += sc.size || 0;
677
+ siteStats.push({ hostname: s.hostname, size, lastAccess: s.lastAccess });
678
+ }
679
+
680
+ let totalSize = siteStats.reduce((sum, s) => sum + s.size, 0);
681
+ if (totalSize <= maxTotal) return 0;
682
+
683
+ // 按最后访问时间排序,删除最旧的站点
684
+ siteStats.sort((a, b) => a.lastAccess - b.lastAccess);
685
+
686
+ while (totalSize > maxTotal && siteStats.length > 1) {
687
+ const oldest = siteStats.shift();
688
+ await this.clearSite(oldest.hostname);
689
+ totalSize -= oldest.size;
690
+ cleaned++;
691
+ console.log(`[DataStore] 清理站点: ${oldest.hostname}`);
692
+ }
693
+
694
+ return cleaned;
695
+ }
696
+ }
697
+
698
+ // 单例
699
+ let instance = null;
700
+
701
+ export function getDataStore() {
702
+ if (!instance) {
703
+ instance = new DataStore();
704
+ }
705
+ return instance;
706
+ }
707
+
708
+ export default DataStore;