deepspider 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/README.md +53 -27
  2. package/bin/cli.js +45 -0
  3. package/package.json +10 -4
  4. package/src/agent/run.js +54 -63
  5. package/src/agent/setup.js +14 -14
  6. package/src/cli/commands/config.js +94 -0
  7. package/src/cli/commands/help.js +34 -0
  8. package/src/cli/commands/update.js +78 -0
  9. package/src/cli/commands/version.js +9 -0
  10. package/src/cli/config.js +15 -0
  11. package/src/config/settings.js +102 -0
  12. package/.claude/agents/check.md +0 -122
  13. package/.claude/agents/debug.md +0 -106
  14. package/.claude/agents/dispatch.md +0 -214
  15. package/.claude/agents/implement.md +0 -96
  16. package/.claude/agents/plan.md +0 -396
  17. package/.claude/agents/research.md +0 -120
  18. package/.claude/commands/evolve/merge.md +0 -80
  19. package/.claude/commands/trellis/before-backend-dev.md +0 -13
  20. package/.claude/commands/trellis/before-frontend-dev.md +0 -13
  21. package/.claude/commands/trellis/break-loop.md +0 -107
  22. package/.claude/commands/trellis/check-backend.md +0 -13
  23. package/.claude/commands/trellis/check-cross-layer.md +0 -153
  24. package/.claude/commands/trellis/check-frontend.md +0 -13
  25. package/.claude/commands/trellis/create-command.md +0 -154
  26. package/.claude/commands/trellis/finish-work.md +0 -129
  27. package/.claude/commands/trellis/integrate-skill.md +0 -219
  28. package/.claude/commands/trellis/onboard.md +0 -358
  29. package/.claude/commands/trellis/parallel.md +0 -193
  30. package/.claude/commands/trellis/record-session.md +0 -62
  31. package/.claude/commands/trellis/start.md +0 -280
  32. package/.claude/commands/trellis/update-spec.md +0 -213
  33. package/.claude/hooks/inject-subagent-context.py +0 -758
  34. package/.claude/hooks/ralph-loop.py +0 -374
  35. package/.claude/hooks/session-start.py +0 -126
  36. package/.claude/settings.json +0 -41
  37. package/.claude/skills/deepagents-guide/SKILL.md +0 -428
  38. package/.cursor/commands/trellis-before-backend-dev.md +0 -13
  39. package/.cursor/commands/trellis-before-frontend-dev.md +0 -13
  40. package/.cursor/commands/trellis-break-loop.md +0 -107
  41. package/.cursor/commands/trellis-check-backend.md +0 -13
  42. package/.cursor/commands/trellis-check-cross-layer.md +0 -153
  43. package/.cursor/commands/trellis-check-frontend.md +0 -13
  44. package/.cursor/commands/trellis-create-command.md +0 -154
  45. package/.cursor/commands/trellis-finish-work.md +0 -129
  46. package/.cursor/commands/trellis-integrate-skill.md +0 -219
  47. package/.cursor/commands/trellis-onboard.md +0 -358
  48. package/.cursor/commands/trellis-record-session.md +0 -62
  49. package/.cursor/commands/trellis-start.md +0 -156
  50. package/.cursor/commands/trellis-update-spec.md +0 -213
  51. package/.github/workflows/publish.yml +0 -63
  52. package/.husky/pre-commit +0 -1
  53. package/.mcp.json +0 -8
  54. package/.trellis/.template-hashes.json +0 -65
  55. package/.trellis/.version +0 -1
  56. package/.trellis/scripts/add-session.sh +0 -384
  57. package/.trellis/scripts/common/developer.sh +0 -129
  58. package/.trellis/scripts/common/git-context.sh +0 -263
  59. package/.trellis/scripts/common/paths.sh +0 -208
  60. package/.trellis/scripts/common/phase.sh +0 -150
  61. package/.trellis/scripts/common/registry.sh +0 -247
  62. package/.trellis/scripts/common/task-queue.sh +0 -142
  63. package/.trellis/scripts/common/task-utils.sh +0 -151
  64. package/.trellis/scripts/common/worktree.sh +0 -128
  65. package/.trellis/scripts/create-bootstrap.sh +0 -299
  66. package/.trellis/scripts/get-context.sh +0 -7
  67. package/.trellis/scripts/get-developer.sh +0 -15
  68. package/.trellis/scripts/init-developer.sh +0 -34
  69. package/.trellis/scripts/multi-agent/cleanup.sh +0 -396
  70. package/.trellis/scripts/multi-agent/create-pr.sh +0 -241
  71. package/.trellis/scripts/multi-agent/plan.sh +0 -207
  72. package/.trellis/scripts/multi-agent/start.sh +0 -310
  73. package/.trellis/scripts/multi-agent/status.sh +0 -828
  74. package/.trellis/scripts/task.sh +0 -1118
  75. package/.trellis/spec/backend/ci-cd-guidelines.md +0 -73
  76. package/.trellis/spec/backend/deepagents-guide.md +0 -380
  77. package/.trellis/spec/backend/directory-structure.md +0 -145
  78. package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +0 -11
  79. package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +0 -20
  80. package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +0 -13
  81. package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +0 -19
  82. package/.trellis/spec/backend/hook-guidelines.md +0 -218
  83. package/.trellis/spec/backend/index.md +0 -37
  84. package/.trellis/spec/backend/quality-guidelines.md +0 -377
  85. package/.trellis/spec/backend/state-management.md +0 -76
  86. package/.trellis/spec/backend/tool-guidelines.md +0 -144
  87. package/.trellis/spec/backend/type-safety.md +0 -71
  88. package/.trellis/spec/guides/code-reuse-thinking-guide.md +0 -92
  89. package/.trellis/spec/guides/cross-layer-thinking-guide.md +0 -94
  90. package/.trellis/spec/guides/index.md +0 -79
  91. package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +0 -61
  92. package/.trellis/tasks/archive/02-02-evolving-skills/task.json +0 -29
  93. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +0 -86
  94. package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +0 -27
  95. package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +0 -3
  96. package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +0 -2
  97. package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +0 -5
  98. package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +0 -33
  99. package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +0 -41
  100. package/.trellis/workflow.md +0 -407
  101. package/.trellis/workspace/index.md +0 -123
  102. package/.trellis/workspace/pony/index.md +0 -42
  103. package/.trellis/workspace/pony/journal-1.md +0 -125
  104. package/.trellis/worktree.yaml +0 -47
  105. package/AGENTS.md +0 -18
  106. package/CLAUDE.md +0 -315
  107. package/agents/deepspider.md +0 -142
  108. package/docs/DEBUG.md +0 -42
  109. package/docs/GUIDE.md +0 -338
  110. package/docs/PROMPT.md +0 -59
  111. package/docs/USAGE.md +0 -230
  112. package/eslint.config.js +0 -51
  113. package/test/analyze.test.js +0 -90
  114. package/test/envdump.test.js +0 -74
  115. package/test/flow.test.js +0 -90
  116. package/test/hooks.test.js +0 -138
  117. package/test/plugin.test.js +0 -35
  118. package/test/refactor-full.test.js +0 -30
  119. package/test/refactor.test.js +0 -21
  120. package/test/samples/obfuscated.js +0 -61
  121. package/test/samples/original.js +0 -66
  122. package/test/samples/v10_eval_chain.js +0 -52
  123. package/test/samples/v11_bytecode_vm.js +0 -81
  124. package/test/samples/v12_polymorphic.js +0 -69
  125. package/test/samples/v1_ob_basic.js +0 -98
  126. package/test/samples/v2_ob_advanced.js +0 -99
  127. package/test/samples/v3_jjencode.js +0 -77
  128. package/test/samples/v4_aaencode.js +0 -73
  129. package/test/samples/v5_control_flow.js +0 -86
  130. package/test/samples/v6_string_encryption.js +0 -71
  131. package/test/samples/v7_jsvmp.js +0 -83
  132. package/test/samples/v8_anti_debug.js +0 -79
  133. package/test/samples/v9_proxy_trap.js +0 -49
  134. package/test/samples.test.js +0 -96
  135. package/test/webcrack.test.js +0 -55
@@ -1,218 +0,0 @@
1
- # Hook Guidelines
2
-
3
- > 浏览器 Hook 注入规范
4
-
5
- ---
6
-
7
- ## Overview
8
-
9
- DeepSpider 使用 Hook 拦截浏览器 API 来采集加密调用、网络请求等数据。
10
- Hook 脚本通过 CDP 注入到页面中执行。
11
-
12
- ---
13
-
14
- ## Hook Types
15
-
16
- | Hook 类型 | 位置 | 用途 |
17
- |-----------|------|------|
18
- | CryptoHook | `src/env/CryptoHook.js` | 拦截加密 API |
19
- | NetworkHook | `src/env/NetworkHook.js` | 拦截网络请求 |
20
- | Browser Hooks | `src/browser/hooks/` | 浏览器注入脚本 |
21
-
22
- ---
23
-
24
- ## Browser Hook Pattern
25
-
26
- 浏览器注入脚本结构:
27
-
28
- ```javascript
29
- // src/browser/hooks/crypto.js
30
- export function getCryptoHookScript() {
31
- return `
32
- (function() {
33
- const original = window.crypto.subtle.digest;
34
- window.crypto.subtle.digest = async function(...args) {
35
- console.log('[Hook] crypto.digest:', args);
36
- return original.apply(this, args);
37
- };
38
- })();
39
- `;
40
- }
41
- ```
42
-
43
- **示例**: `src/browser/hooks/crypto.js`
44
-
45
- ---
46
-
47
- ## Naming Conventions
48
-
49
- | 类型 | 命名规则 | 示例 |
50
- |------|----------|------|
51
- | Hook 类 | *Hook | `CryptoHook`, `NetworkHook` |
52
- | 脚本函数 | get*Script | `getCryptoHookScript()` |
53
- | 全局对象 | __deepspider__* | `__deepspider__`, `__deepspider_send__` |
54
-
55
- ---
56
-
57
- ## Common Mistakes
58
-
59
- ### 1. 未保存原始函数
60
-
61
- ```javascript
62
- // ❌ 错误:直接覆盖
63
- window.fetch = function() { ... };
64
-
65
- // ✅ 正确:保存原始函数
66
- const originalFetch = window.fetch;
67
- window.fetch = function(...args) {
68
- // 记录
69
- return originalFetch.apply(this, args);
70
- };
71
- ```
72
-
73
- ### 2. Hook 脚本未使用 IIFE
74
-
75
- ```javascript
76
- // ❌ 错误:污染全局
77
- const hook = ...;
78
-
79
- // ✅ 正确:使用 IIFE 隔离
80
- (function() {
81
- const hook = ...;
82
- })();
83
- ```
84
-
85
- ### 3. 闭包变量陷阱
86
-
87
- ```javascript
88
- // ❌ 错误:循环中的闭包
89
- for (const trap in handler) {
90
- wrappedHandler[trap] = function() {
91
- console.log(trap); // trap 始终是最后一个值
92
- };
93
- }
94
-
95
- // ✅ 正确:使用函数工厂
96
- function wrapTrap(trapName, fn) {
97
- return function() {
98
- console.log(trapName);
99
- return fn.apply(this, arguments);
100
- };
101
- }
102
- for (const trap in handler) {
103
- wrappedHandler[trap] = wrapTrap(trap, handler[trap]);
104
- }
105
- ```
106
-
107
- ### 4. 内部操作触发 Hook
108
-
109
- **问题**: 系统内部的消息发送、状态存储等操作也会触发 Hook,产生噪音日志。
110
-
111
- ```javascript
112
- // ❌ 错误:内部操作被记录
113
- sessionStorage.setItem('deepspider_messages', JSON.stringify(messages));
114
- // 触发 Storage Hook 和 JSON Hook,污染日志
115
- ```
116
-
117
- **解决方案**: 使用统一标记过滤内部数据。
118
-
119
- 1. **Storage Hook**: 使用 `deepspider_` 前缀过滤 key
120
- ```javascript
121
- const INTERNAL_PREFIX = 'deepspider_';
122
- storage.setItem = function(key, value) {
123
- if (!key.startsWith(INTERNAL_PREFIX)) {
124
- deepspider.log('storage', { ... });
125
- }
126
- return origSet(key, value);
127
- };
128
- ```
129
-
130
- 2. **JSON Hook**: 使用 `__ds__` 标记过滤内部数据
131
- ```javascript
132
- // 内部消息添加标记
133
- const msg = { __ds__: true, type: 'chat', text: '...' };
134
-
135
- // Hook 中检查标记
136
- const INTERNAL_MARKER = '"__ds__":true';
137
- if (!result.includes(INTERNAL_MARKER)) {
138
- deepspider.log('json', { ... });
139
- }
140
- ```
141
-
142
- **规范**:
143
- - sessionStorage/localStorage key 必须以 `deepspider_` 开头
144
- - 发送到后端的 JSON 消息必须包含 `__ds__: true`
145
- - 面板消息对象必须包含 `__ds__: true`
146
-
147
- ---
148
-
149
- ## Anti-Detection Patterns
150
-
151
- Hook 容易被网站检测,必须做好伪装。
152
-
153
- ### 1. toString 伪装(必须)
154
-
155
- ```javascript
156
- const originalToString = Function.prototype.toString;
157
- const hookedFns = new WeakMap();
158
-
159
- // 包装函数
160
- function native(hookFunc, originalFunc) {
161
- hookedFns.set(hookFunc, originalToString.call(originalFunc));
162
- return hookFunc;
163
- }
164
-
165
- // 重写 toString
166
- Function.prototype.toString = function() {
167
- return hookedFns.has(this)
168
- ? hookedFns.get(this)
169
- : originalToString.call(this);
170
- };
171
- ```
172
-
173
- ### 2. getOwnPropertyDescriptor 保护
174
-
175
- ```javascript
176
- // 网站可能检测属性描述符
177
- const origGetDesc = Object.getOwnPropertyDescriptor;
178
- Object.getOwnPropertyDescriptor = function(obj, prop) {
179
- const desc = origGetDesc.call(Object, obj, prop);
180
- if (desc && hookedFns.has(desc.value)) {
181
- return { value: desc.value, writable: true, enumerable: false, configurable: true };
182
- }
183
- return desc;
184
- };
185
- ```
186
-
187
- ### 3. 隐藏内部属性
188
-
189
- ```javascript
190
- // 隐藏 __deepspider__ 等内部属性
191
- const hiddenProps = ['__deepspider__'];
192
- const origKeys = Object.keys;
193
- Object.keys = function(obj) {
194
- const keys = origKeys.call(Object, obj);
195
- return obj === window ? keys.filter(k => !hiddenProps.includes(k)) : keys;
196
- };
197
- ```
198
-
199
- ---
200
-
201
- ## Dynamic Hook Management
202
-
203
- Hook 应支持运行时动态启用/禁用。
204
-
205
- ### 架构设计
206
-
207
- | 类型 | 控制方式 | 用途 |
208
- |------|----------|------|
209
- | 内置 Hook | config[name] | xhr, fetch, crypto 等 |
210
- | 自定义 Hook | hookRegistry | 针对特定网站 |
211
-
212
- ### 性能优化
213
-
214
- | 配置项 | 默认 | 说明 |
215
- |--------|------|------|
216
- | captureStack | true | 关闭可提升性能 |
217
- | silent | false | 关闭控制台输出 |
218
- | logLimit | 50 | 每个 API 日志上限 |
@@ -1,37 +0,0 @@
1
- # DeepSpider Development Guidelines
2
-
3
- > DeepSpider 项目开发规范
4
-
5
- ---
6
-
7
- ## Overview
8
-
9
- DeepSpider 是基于 DeepAgents + Patchright 的智能爬虫 Agent。
10
- 本目录包含项目的开发规范和代码模式。
11
-
12
- ---
13
-
14
- ## Guidelines Index
15
-
16
- | Guide | Description | Status |
17
- |-------|-------------|--------|
18
- | [Directory Structure](./directory-structure.md) | 项目目录结构和模块组织 | Done |
19
- | [DeepAgents Guide](./deepagents-guide.md) | DeepAgents 框架使用指南 | Done |
20
- | [Tool Guidelines](./tool-guidelines.md) | LangChain 工具定义规范 | Done |
21
- | [Hook Guidelines](./hook-guidelines.md) | 浏览器 Hook 注入规范 | Done |
22
- | [State Management](./state-management.md) | Agent 状态与数据存储 | Done |
23
- | [Quality Guidelines](./quality-guidelines.md) | 代码质量规范 | Done |
24
- | [Type Safety](./type-safety.md) | Zod 类型验证规范 | Done |
25
- | [CI/CD Guidelines](./ci-cd-guidelines.md) | GitHub Actions 自动发布规范 | Done |
26
-
27
- ---
28
-
29
- ## Quick Reference
30
-
31
- 核心规范要点:
32
-
33
- 1. **Agent 创建**: 使用 `createDeepAgent()` + 配置对象
34
- 2. **工具定义**: 使用 `@langchain/core/tools` + Zod schema
35
- 3. **浏览器交互**: 优先使用 CDP,避免 `page.evaluate()`
36
- 4. **AST 遍历**: 使用 `@babel/traverse`
37
- 5. **数据存储**: 使用 `getDataStore()` 单例
@@ -1,377 +0,0 @@
1
- # Quality Guidelines
2
-
3
- > DeepSpider 代码质量规范
4
-
5
- ---
6
-
7
- ## Overview
8
-
9
- DeepSpider 遵循 CLAUDE.md 中定义的代码规范,重点关注:
10
- - CDP 优先的浏览器交互
11
- - Babel AST 遍历模式
12
- - LangChain 工具定义规范
13
-
14
- ---
15
-
16
- ## Forbidden Patterns
17
-
18
- ### 1. 使用 page.evaluate 代替 CDP
19
-
20
- ```javascript
21
- // ❌ 禁止
22
- const result = await page.evaluate(() => { ... });
23
-
24
- // ✅ 使用 CDP
25
- const cdp = await browser.getCDPSession();
26
- const result = await cdp.send('Runtime.evaluate', { ... });
27
- ```
28
-
29
- ### 2. 直接访问封装类的内部属性
30
-
31
- ```javascript
32
- // ❌ 禁止:暴露内部实现
33
- cdpSession.client.on('Debugger.paused', handler);
34
-
35
- // ✅ 使用封装类提供的方法
36
- cdpSession.on('Debugger.paused', handler);
37
- ```
38
-
39
- **原因**: 直接访问 `.client` 会导致封装泄漏,当内部实现变化时调用方会报错。
40
-
41
- ### 3. 子代理不配置中间件
42
-
43
- ```javascript
44
- // ❌ 禁止:只在主 Agent 配置中间件,子代理不配置
45
- // index.js
46
- const agent = createDeepAgent({
47
- middleware: [createFilterToolsMiddleware()],
48
- subagents: [subagent1, subagent2],
49
- });
50
-
51
- // subagent1.js - 没有中间件
52
- export const subagent1 = {
53
- name: 'subagent1',
54
- tools: [...],
55
- middleware: [], // 空!
56
- };
57
-
58
- // ✅ 子代理也需要配置相同的中间件
59
- export const subagent1 = {
60
- name: 'subagent1',
61
- tools: [...],
62
- middleware: [
63
- createFilterToolsMiddleware(), // 必须添加
64
- createSkillsMiddleware({ ... }),
65
- ],
66
- };
67
- ```
68
-
69
- **原因**: DeepAgents 子代理不会继承主 Agent 的中间件配置。如果主 Agent 过滤了内置工具,子代理也必须单独配置过滤中间件,否则子代理仍会使用被过滤的工具。
70
-
71
- ### 4. setInterval 中使用 async 回调
72
-
73
- ```javascript
74
- // ❌ 禁止:async 回调不会被等待,可能导致并发问题
75
- setInterval(async () => {
76
- const result = await detectCaptcha();
77
- await handleResult(result);
78
- }, 30000);
79
-
80
- // ✅ 保持同步,只做状态检查和标记
81
- let needsCheck = false;
82
- setInterval(() => {
83
- const elapsed = Date.now() - lastEventTime;
84
- if (elapsed > timeout) {
85
- console.log('[提示] 超时,请检查页面');
86
- }
87
- }, 30000);
88
- ```
89
-
90
- **原因**: setInterval 不会等待 async 回调完成,多次触发会导致并发执行。
91
-
92
- ### 5. spawn 使用不存在的 timeout 选项
93
-
94
- ```javascript
95
- // ❌ 禁止:spawn 不支持 timeout 选项,超时不会生效
96
- const proc = spawn('node', ['-e', code], {
97
- timeout: 10000, // 无效!
98
- });
99
-
100
- // ✅ 手动实现超时
101
- const proc = spawn('node', ['-e', code]);
102
- let killed = false;
103
-
104
- const timer = setTimeout(() => {
105
- killed = true;
106
- proc.kill('SIGTERM');
107
- }, 10000);
108
-
109
- proc.on('close', () => {
110
- clearTimeout(timer);
111
- });
112
- ```
113
-
114
- **原因**: `spawn` 的 options 不包含 `timeout`,这是 `execSync` 的选项。使用 spawn 时必须手动实现超时逻辑。
115
-
116
- ### 6. 用正则替换 HTML 字符串
117
-
118
- ```javascript
119
- // ❌ 禁止:正则替换 HTML 字符串会破坏结构
120
- function linkifyPaths(html) {
121
- return html.replace(/(\/[\w.\-\/]+)/g, '<a href="$1">$1</a>');
122
- }
123
- // 会把 </strong> 中的 /strong 也匹配成路径!
124
-
125
- // ✅ 使用 DOM TreeWalker 遍历文本节点
126
- function linkifyPaths(container) {
127
- const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
128
- const textNodes = [];
129
- while (walker.nextNode()) textNodes.push(walker.currentNode);
130
-
131
- textNodes.forEach(node => {
132
- // 只处理纯文本,不会影响 HTML 标签
133
- });
134
- }
135
- ```
136
-
137
- **原因**: 正则无法区分 HTML 标签和文本内容,容易误匹配导致结构破坏。
138
-
139
- ### 7. LLM 工具参数传递大段代码
140
-
141
- ```javascript
142
- // ❌ 禁止:直接传递大段代码内容,可能被 LLM 截断
143
- await saveReport({ pythonCode: longCodeString });
144
-
145
- // ✅ 先保存到文件,再传递文件路径
146
- await artifactSave({ path: 'domain/decrypt.py', content: code });
147
- await saveReport({ pythonCodeFile: 'domain/decrypt.py' });
148
- ```
149
-
150
- **原因**: LLM 输出有长度限制,大段代码作为参数传递时可能被截断。分步保存确保代码完整性。
151
-
152
- ---
153
-
154
- ## Required Patterns
155
-
156
- ### 1. Babel AST 遍历
157
-
158
- ```javascript
159
- import traverse from '@babel/traverse';
160
-
161
- traverse.default(ast, {
162
- FunctionDeclaration(path) {
163
- // 处理
164
- }
165
- });
166
- ```
167
-
168
- ### 2. CDP Session 复用
169
-
170
- ```javascript
171
- const cdp = await browser.getCDPSession();
172
- ```
173
-
174
- ### 3. Hook 日志记录调用位置
175
-
176
- ```javascript
177
- // ✅ 在日志中包含解析后的调用位置
178
- const entry = {
179
- ...data,
180
- timestamp: Date.now(),
181
- stack: stack,
182
- caller: caller, // { func, file, line, col }
183
- };
184
-
185
- // 控制台输出显示文件名和行号
186
- const loc = caller ? ' @ ' + caller.file.split('/').pop() + ':' + caller.line : '';
187
- console.log('[DeepSpider:' + type + ']' + loc, data);
188
- ```
189
-
190
- **原因**: Hook 日志需要记录 JS 文件调用位置,便于快速定位加密代码来源。
191
-
192
- ---
193
-
194
- ## Release Process
195
-
196
- ### 原生模块依赖处理
197
-
198
- 项目依赖 `isolated-vm` 等原生 C++ 模块,需要编译环境。
199
-
200
- **postinstall 自动处理**:
201
- ```json
202
- {
203
- "scripts": {
204
- "postinstall": "patchright install chromium && npm rebuild isolated-vm 2>/dev/null || true"
205
- }
206
- }
207
- ```
208
-
209
- **编译环境要求**:
210
- - macOS: `xcode-select --install`
211
- - Ubuntu: `sudo apt install build-essential`
212
- - Windows: Visual Studio Build Tools
213
-
214
- > **注意**: `2>/dev/null || true` 确保编译失败不会阻塞安装,但沙箱功能可能不可用。
215
-
216
- ---
217
-
218
- ### 版本发布流程
219
-
220
- 升级版本并推送 tag,GitHub Actions 会自动发布到 npm:
221
-
222
- ```bash
223
- # 1. 升级 package.json 版本
224
- # 编辑 package.json 中的 version 字段
225
-
226
- # 2. 提交版本变更
227
- git add package.json
228
- git commit -m "chore: bump version to x.x.x"
229
-
230
- # 3. 创建并推送 git tag
231
- git tag -a vx.x.x -m "vx.x.x"
232
- git push && git push origin vx.x.x
233
- ```
234
-
235
- > **注意**: 推送 tag 后 GitHub Actions 会自动触发 npm 发布,无需手动 `npm publish`。
236
-
237
- **原因**: 自动化发布避免手动操作失误,确保版本一致性。
238
-
239
- ---
240
-
241
- ## Testing Requirements
242
-
243
- 运行测试:
244
-
245
- ```bash
246
- pnpm test
247
- ```
248
-
249
- ---
250
-
251
- ## Code Review Checklist
252
-
253
- - [ ] 工具名称使用 snake_case
254
- - [ ] 参数有 describe 描述
255
- - [ ] 浏览器交互使用 CDP
256
- - [ ] AST 遍历使用 Babel
257
- - [ ] 数组访问前检查边界
258
- - [ ] 对象访问前检查空值
259
-
260
- ---
261
-
262
- ## Defensive Programming
263
-
264
- ### 1. 数组索引边界检查
265
-
266
- ```javascript
267
- // ❌ 禁止:直接访问可能越界
268
- const stage = stages[parseInt(index)];
269
- stage.fields.push(field);
270
-
271
- // ✅ 先检查边界
272
- const idx = parseInt(index);
273
- if (idx < 0 || idx >= stages.length) return;
274
- const stage = stages[idx];
275
- ```
276
-
277
- ### 2. 工厂函数避免重复结构
278
-
279
- ```javascript
280
- // ❌ 禁止:多处重复对象字面量
281
- stages.push({ name: 'list', fields: [], entry: null });
282
- // ... 另一处
283
- stages = [{ name: 'list', fields: [], entry: null }];
284
-
285
- // ✅ 使用工厂函数
286
- function createStage(name) {
287
- return { name, fields: [], entry: null, pagination: null };
288
- }
289
- stages.push(createStage('list'));
290
- ```
291
-
292
- ### 3. 空值检查
293
-
294
- ```javascript
295
- // ❌ 禁止:假设对象存在
296
- currentStage.fields.splice(index, 1);
297
-
298
- // ✅ 先检查
299
- if (!currentStage) return;
300
- if (index < 0 || index >= currentStage.fields.length) return;
301
- currentStage.fields.splice(index, 1);
302
- ```
303
-
304
- ---
305
-
306
- ## Modularization Patterns
307
-
308
- ### 1. 大文件拆分原则
309
-
310
- 当文件超过 300 行时,考虑按职责拆分:
311
-
312
- ```javascript
313
- // ❌ 禁止:单文件包含多种职责
314
- // run.js (600+ 行)
315
- // - 流式处理逻辑
316
- // - 重试策略
317
- // - 面板通信
318
- // - 错误分类
319
-
320
- // ✅ 按职责拆分到 core/ 目录
321
- // src/agent/core/
322
- // ├── StreamHandler.js # 流式输出处理
323
- // ├── RetryManager.js # 重试策略
324
- // ├── PanelBridge.js # 面板通信
325
- // └── index.js # 模块导出
326
- ```
327
-
328
- **原因**: 单一职责原则,便于测试和维护。
329
-
330
- ### 2. 使用子代理工厂函数
331
-
332
- ```javascript
333
- // ❌ 禁止:每个子代理重复配置中间件
334
- export const staticSubagent = {
335
- name: 'static-agent',
336
- tools: [...staticTools, ...evolveTools],
337
- middleware: [
338
- createFilterToolsMiddleware(),
339
- createSkillsMiddleware({ backend, sources: [SKILLS.static] }),
340
- ],
341
- };
342
-
343
- // ✅ 使用工厂函数统一配置
344
- import { createSubagent, SKILLS } from './factory.js';
345
-
346
- export const staticSubagent = createSubagent({
347
- name: 'static-agent',
348
- description: '静态分析专家',
349
- systemPrompt: '...',
350
- tools: staticTools,
351
- skills: [SKILLS.static],
352
- });
353
- ```
354
-
355
- **原因**: 工厂函数自动注入公共中间件和 evolveTools,避免遗漏。
356
-
357
- ### 3. 结构化错误类型
358
-
359
- ```javascript
360
- // ❌ 禁止:字符串匹配判断错误类型
361
- if (/503|502|429/.test(error.message)) {
362
- // 重试
363
- }
364
-
365
- // ✅ 使用结构化错误类型
366
- import { ApiServiceError, isApiServiceError } from './errors/index.js';
367
-
368
- // 抛出时
369
- throw new ApiServiceError('服务不可用', { statusCode: 503 });
370
-
371
- // 捕获时
372
- if (isApiServiceError(error.message)) {
373
- // 重试
374
- }
375
- ```
376
-
377
- **原因**: 结构化错误便于分类处理,支持携带额外上下文。
@@ -1,76 +0,0 @@
1
- # State Management
2
-
3
- > Agent 状态与数据存储规范
4
-
5
- ---
6
-
7
- ## Overview
8
-
9
- DeepSpider 使用 DeepAgents 的状态后端和文件系统存储管理数据。
10
- Agent 状态通过 FilesystemBackend 持久化,采集数据通过 DataStore 存储。
11
-
12
- ---
13
-
14
- ## State Categories
15
-
16
- | 类型 | 存储方式 | 示例 |
17
- |------|----------|------|
18
- | Agent 状态 | FilesystemBackend | `.deepspider-agent/` |
19
- | 采集数据 | DataStore | `.deepspider-data/` |
20
- | 会话状态 | MemorySaver | 内存中 |
21
-
22
- ---
23
-
24
- ## DataStore Pattern
25
-
26
- 数据存储使用单例模式:
27
-
28
- ```javascript
29
- import { getDataStore } from '../store/DataStore.js';
30
-
31
- const store = getDataStore();
32
- await store.saveResponse(data);
33
- ```
34
-
35
- **示例**: `src/store/DataStore.js:699-706`
36
-
37
- ---
38
-
39
- ## Agent Backend
40
-
41
- Agent 状态后端配置:
42
-
43
- ```javascript
44
- import { FilesystemBackend } from 'deepagents';
45
-
46
- const backend = new FilesystemBackend({
47
- rootDir: './.deepspider-agent'
48
- });
49
- ```
50
-
51
- **示例**: `src/agent/index.js:59-62`
52
-
53
- ---
54
-
55
- ## Common Mistakes
56
-
57
- ### 1. 未使用单例
58
-
59
- ```javascript
60
- // ❌ 错误:每次创建新实例
61
- const store = new DataStore();
62
-
63
- // ✅ 正确:使用单例
64
- const store = getDataStore();
65
- ```
66
-
67
- ### 2. 忘记启动会话
68
-
69
- ```javascript
70
- // ❌ 错误:直接保存
71
- await store.saveResponse(data);
72
-
73
- // ✅ 正确:先启动会话
74
- store.startSession();
75
- await store.saveResponse(data);
76
- ```