deepspider 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -27
- package/bin/cli.js +45 -0
- package/package.json +10 -4
- package/src/agent/run.js +54 -63
- package/src/agent/setup.js +14 -14
- package/src/cli/commands/config.js +94 -0
- package/src/cli/commands/help.js +34 -0
- package/src/cli/commands/update.js +78 -0
- package/src/cli/commands/version.js +9 -0
- package/src/cli/config.js +15 -0
- package/src/config/settings.js +102 -0
- package/.claude/agents/check.md +0 -122
- package/.claude/agents/debug.md +0 -106
- package/.claude/agents/dispatch.md +0 -214
- package/.claude/agents/implement.md +0 -96
- package/.claude/agents/plan.md +0 -396
- package/.claude/agents/research.md +0 -120
- package/.claude/commands/evolve/merge.md +0 -80
- package/.claude/commands/trellis/before-backend-dev.md +0 -13
- package/.claude/commands/trellis/before-frontend-dev.md +0 -13
- package/.claude/commands/trellis/break-loop.md +0 -107
- package/.claude/commands/trellis/check-backend.md +0 -13
- package/.claude/commands/trellis/check-cross-layer.md +0 -153
- package/.claude/commands/trellis/check-frontend.md +0 -13
- package/.claude/commands/trellis/create-command.md +0 -154
- package/.claude/commands/trellis/finish-work.md +0 -129
- package/.claude/commands/trellis/integrate-skill.md +0 -219
- package/.claude/commands/trellis/onboard.md +0 -358
- package/.claude/commands/trellis/parallel.md +0 -193
- package/.claude/commands/trellis/record-session.md +0 -62
- package/.claude/commands/trellis/start.md +0 -280
- package/.claude/commands/trellis/update-spec.md +0 -213
- package/.claude/hooks/inject-subagent-context.py +0 -758
- package/.claude/hooks/ralph-loop.py +0 -374
- package/.claude/hooks/session-start.py +0 -126
- package/.claude/settings.json +0 -41
- package/.claude/skills/deepagents-guide/SKILL.md +0 -428
- package/.cursor/commands/trellis-before-backend-dev.md +0 -13
- package/.cursor/commands/trellis-before-frontend-dev.md +0 -13
- package/.cursor/commands/trellis-break-loop.md +0 -107
- package/.cursor/commands/trellis-check-backend.md +0 -13
- package/.cursor/commands/trellis-check-cross-layer.md +0 -153
- package/.cursor/commands/trellis-check-frontend.md +0 -13
- package/.cursor/commands/trellis-create-command.md +0 -154
- package/.cursor/commands/trellis-finish-work.md +0 -129
- package/.cursor/commands/trellis-integrate-skill.md +0 -219
- package/.cursor/commands/trellis-onboard.md +0 -358
- package/.cursor/commands/trellis-record-session.md +0 -62
- package/.cursor/commands/trellis-start.md +0 -156
- package/.cursor/commands/trellis-update-spec.md +0 -213
- package/.github/workflows/publish.yml +0 -63
- package/.husky/pre-commit +0 -1
- package/.mcp.json +0 -8
- package/.trellis/.template-hashes.json +0 -65
- package/.trellis/.version +0 -1
- package/.trellis/scripts/add-session.sh +0 -384
- package/.trellis/scripts/common/developer.sh +0 -129
- package/.trellis/scripts/common/git-context.sh +0 -263
- package/.trellis/scripts/common/paths.sh +0 -208
- package/.trellis/scripts/common/phase.sh +0 -150
- package/.trellis/scripts/common/registry.sh +0 -247
- package/.trellis/scripts/common/task-queue.sh +0 -142
- package/.trellis/scripts/common/task-utils.sh +0 -151
- package/.trellis/scripts/common/worktree.sh +0 -128
- package/.trellis/scripts/create-bootstrap.sh +0 -299
- package/.trellis/scripts/get-context.sh +0 -7
- package/.trellis/scripts/get-developer.sh +0 -15
- package/.trellis/scripts/init-developer.sh +0 -34
- package/.trellis/scripts/multi-agent/cleanup.sh +0 -396
- package/.trellis/scripts/multi-agent/create-pr.sh +0 -241
- package/.trellis/scripts/multi-agent/plan.sh +0 -207
- package/.trellis/scripts/multi-agent/start.sh +0 -310
- package/.trellis/scripts/multi-agent/status.sh +0 -828
- package/.trellis/scripts/task.sh +0 -1118
- package/.trellis/spec/backend/ci-cd-guidelines.md +0 -73
- package/.trellis/spec/backend/deepagents-guide.md +0 -380
- package/.trellis/spec/backend/directory-structure.md +0 -145
- package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +0 -11
- package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +0 -20
- package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +0 -13
- package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +0 -19
- package/.trellis/spec/backend/hook-guidelines.md +0 -218
- package/.trellis/spec/backend/index.md +0 -37
- package/.trellis/spec/backend/quality-guidelines.md +0 -377
- package/.trellis/spec/backend/state-management.md +0 -76
- package/.trellis/spec/backend/tool-guidelines.md +0 -144
- package/.trellis/spec/backend/type-safety.md +0 -71
- package/.trellis/spec/guides/code-reuse-thinking-guide.md +0 -92
- package/.trellis/spec/guides/cross-layer-thinking-guide.md +0 -94
- package/.trellis/spec/guides/index.md +0 -79
- package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +0 -61
- package/.trellis/tasks/archive/02-02-evolving-skills/task.json +0 -29
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +0 -86
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +0 -27
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +0 -3
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +0 -2
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +0 -5
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +0 -33
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +0 -41
- package/.trellis/workflow.md +0 -407
- package/.trellis/workspace/index.md +0 -123
- package/.trellis/workspace/pony/index.md +0 -42
- package/.trellis/workspace/pony/journal-1.md +0 -125
- package/.trellis/worktree.yaml +0 -47
- package/AGENTS.md +0 -18
- package/CLAUDE.md +0 -315
- package/agents/deepspider.md +0 -142
- package/docs/DEBUG.md +0 -42
- package/docs/GUIDE.md +0 -338
- package/docs/PROMPT.md +0 -59
- package/docs/USAGE.md +0 -230
- package/eslint.config.js +0 -51
- package/test/analyze.test.js +0 -90
- package/test/envdump.test.js +0 -74
- package/test/flow.test.js +0 -90
- package/test/hooks.test.js +0 -138
- package/test/plugin.test.js +0 -35
- package/test/refactor-full.test.js +0 -30
- package/test/refactor.test.js +0 -21
- package/test/samples/obfuscated.js +0 -61
- package/test/samples/original.js +0 -66
- package/test/samples/v10_eval_chain.js +0 -52
- package/test/samples/v11_bytecode_vm.js +0 -81
- package/test/samples/v12_polymorphic.js +0 -69
- package/test/samples/v1_ob_basic.js +0 -98
- package/test/samples/v2_ob_advanced.js +0 -99
- package/test/samples/v3_jjencode.js +0 -77
- package/test/samples/v4_aaencode.js +0 -73
- package/test/samples/v5_control_flow.js +0 -86
- package/test/samples/v6_string_encryption.js +0 -71
- package/test/samples/v7_jsvmp.js +0 -83
- package/test/samples/v8_anti_debug.js +0 -79
- package/test/samples/v9_proxy_trap.js +0 -49
- package/test/samples.test.js +0 -96
- package/test/webcrack.test.js +0 -55
|
@@ -1,218 +0,0 @@
|
|
|
1
|
-
# Hook Guidelines
|
|
2
|
-
|
|
3
|
-
> 浏览器 Hook 注入规范
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Overview
|
|
8
|
-
|
|
9
|
-
DeepSpider 使用 Hook 拦截浏览器 API 来采集加密调用、网络请求等数据。
|
|
10
|
-
Hook 脚本通过 CDP 注入到页面中执行。
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
## Hook Types
|
|
15
|
-
|
|
16
|
-
| Hook 类型 | 位置 | 用途 |
|
|
17
|
-
|-----------|------|------|
|
|
18
|
-
| CryptoHook | `src/env/CryptoHook.js` | 拦截加密 API |
|
|
19
|
-
| NetworkHook | `src/env/NetworkHook.js` | 拦截网络请求 |
|
|
20
|
-
| Browser Hooks | `src/browser/hooks/` | 浏览器注入脚本 |
|
|
21
|
-
|
|
22
|
-
---
|
|
23
|
-
|
|
24
|
-
## Browser Hook Pattern
|
|
25
|
-
|
|
26
|
-
浏览器注入脚本结构:
|
|
27
|
-
|
|
28
|
-
```javascript
|
|
29
|
-
// src/browser/hooks/crypto.js
|
|
30
|
-
export function getCryptoHookScript() {
|
|
31
|
-
return `
|
|
32
|
-
(function() {
|
|
33
|
-
const original = window.crypto.subtle.digest;
|
|
34
|
-
window.crypto.subtle.digest = async function(...args) {
|
|
35
|
-
console.log('[Hook] crypto.digest:', args);
|
|
36
|
-
return original.apply(this, args);
|
|
37
|
-
};
|
|
38
|
-
})();
|
|
39
|
-
`;
|
|
40
|
-
}
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
**示例**: `src/browser/hooks/crypto.js`
|
|
44
|
-
|
|
45
|
-
---
|
|
46
|
-
|
|
47
|
-
## Naming Conventions
|
|
48
|
-
|
|
49
|
-
| 类型 | 命名规则 | 示例 |
|
|
50
|
-
|------|----------|------|
|
|
51
|
-
| Hook 类 | *Hook | `CryptoHook`, `NetworkHook` |
|
|
52
|
-
| 脚本函数 | get*Script | `getCryptoHookScript()` |
|
|
53
|
-
| 全局对象 | __deepspider__* | `__deepspider__`, `__deepspider_send__` |
|
|
54
|
-
|
|
55
|
-
---
|
|
56
|
-
|
|
57
|
-
## Common Mistakes
|
|
58
|
-
|
|
59
|
-
### 1. 未保存原始函数
|
|
60
|
-
|
|
61
|
-
```javascript
|
|
62
|
-
// ❌ 错误:直接覆盖
|
|
63
|
-
window.fetch = function() { ... };
|
|
64
|
-
|
|
65
|
-
// ✅ 正确:保存原始函数
|
|
66
|
-
const originalFetch = window.fetch;
|
|
67
|
-
window.fetch = function(...args) {
|
|
68
|
-
// 记录
|
|
69
|
-
return originalFetch.apply(this, args);
|
|
70
|
-
};
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
### 2. Hook 脚本未使用 IIFE
|
|
74
|
-
|
|
75
|
-
```javascript
|
|
76
|
-
// ❌ 错误:污染全局
|
|
77
|
-
const hook = ...;
|
|
78
|
-
|
|
79
|
-
// ✅ 正确:使用 IIFE 隔离
|
|
80
|
-
(function() {
|
|
81
|
-
const hook = ...;
|
|
82
|
-
})();
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
### 3. 闭包变量陷阱
|
|
86
|
-
|
|
87
|
-
```javascript
|
|
88
|
-
// ❌ 错误:循环中的闭包
|
|
89
|
-
for (const trap in handler) {
|
|
90
|
-
wrappedHandler[trap] = function() {
|
|
91
|
-
console.log(trap); // trap 始终是最后一个值
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// ✅ 正确:使用函数工厂
|
|
96
|
-
function wrapTrap(trapName, fn) {
|
|
97
|
-
return function() {
|
|
98
|
-
console.log(trapName);
|
|
99
|
-
return fn.apply(this, arguments);
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
for (const trap in handler) {
|
|
103
|
-
wrappedHandler[trap] = wrapTrap(trap, handler[trap]);
|
|
104
|
-
}
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### 4. 内部操作触发 Hook
|
|
108
|
-
|
|
109
|
-
**问题**: 系统内部的消息发送、状态存储等操作也会触发 Hook,产生噪音日志。
|
|
110
|
-
|
|
111
|
-
```javascript
|
|
112
|
-
// ❌ 错误:内部操作被记录
|
|
113
|
-
sessionStorage.setItem('deepspider_messages', JSON.stringify(messages));
|
|
114
|
-
// 触发 Storage Hook 和 JSON Hook,污染日志
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
**解决方案**: 使用统一标记过滤内部数据。
|
|
118
|
-
|
|
119
|
-
1. **Storage Hook**: 使用 `deepspider_` 前缀过滤 key
|
|
120
|
-
```javascript
|
|
121
|
-
const INTERNAL_PREFIX = 'deepspider_';
|
|
122
|
-
storage.setItem = function(key, value) {
|
|
123
|
-
if (!key.startsWith(INTERNAL_PREFIX)) {
|
|
124
|
-
deepspider.log('storage', { ... });
|
|
125
|
-
}
|
|
126
|
-
return origSet(key, value);
|
|
127
|
-
};
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
2. **JSON Hook**: 使用 `__ds__` 标记过滤内部数据
|
|
131
|
-
```javascript
|
|
132
|
-
// 内部消息添加标记
|
|
133
|
-
const msg = { __ds__: true, type: 'chat', text: '...' };
|
|
134
|
-
|
|
135
|
-
// Hook 中检查标记
|
|
136
|
-
const INTERNAL_MARKER = '"__ds__":true';
|
|
137
|
-
if (!result.includes(INTERNAL_MARKER)) {
|
|
138
|
-
deepspider.log('json', { ... });
|
|
139
|
-
}
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
**规范**:
|
|
143
|
-
- sessionStorage/localStorage key 必须以 `deepspider_` 开头
|
|
144
|
-
- 发送到后端的 JSON 消息必须包含 `__ds__: true`
|
|
145
|
-
- 面板消息对象必须包含 `__ds__: true`
|
|
146
|
-
|
|
147
|
-
---
|
|
148
|
-
|
|
149
|
-
## Anti-Detection Patterns
|
|
150
|
-
|
|
151
|
-
Hook 容易被网站检测,必须做好伪装。
|
|
152
|
-
|
|
153
|
-
### 1. toString 伪装(必须)
|
|
154
|
-
|
|
155
|
-
```javascript
|
|
156
|
-
const originalToString = Function.prototype.toString;
|
|
157
|
-
const hookedFns = new WeakMap();
|
|
158
|
-
|
|
159
|
-
// 包装函数
|
|
160
|
-
function native(hookFunc, originalFunc) {
|
|
161
|
-
hookedFns.set(hookFunc, originalToString.call(originalFunc));
|
|
162
|
-
return hookFunc;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// 重写 toString
|
|
166
|
-
Function.prototype.toString = function() {
|
|
167
|
-
return hookedFns.has(this)
|
|
168
|
-
? hookedFns.get(this)
|
|
169
|
-
: originalToString.call(this);
|
|
170
|
-
};
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
### 2. getOwnPropertyDescriptor 保护
|
|
174
|
-
|
|
175
|
-
```javascript
|
|
176
|
-
// 网站可能检测属性描述符
|
|
177
|
-
const origGetDesc = Object.getOwnPropertyDescriptor;
|
|
178
|
-
Object.getOwnPropertyDescriptor = function(obj, prop) {
|
|
179
|
-
const desc = origGetDesc.call(Object, obj, prop);
|
|
180
|
-
if (desc && hookedFns.has(desc.value)) {
|
|
181
|
-
return { value: desc.value, writable: true, enumerable: false, configurable: true };
|
|
182
|
-
}
|
|
183
|
-
return desc;
|
|
184
|
-
};
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
### 3. 隐藏内部属性
|
|
188
|
-
|
|
189
|
-
```javascript
|
|
190
|
-
// 隐藏 __deepspider__ 等内部属性
|
|
191
|
-
const hiddenProps = ['__deepspider__'];
|
|
192
|
-
const origKeys = Object.keys;
|
|
193
|
-
Object.keys = function(obj) {
|
|
194
|
-
const keys = origKeys.call(Object, obj);
|
|
195
|
-
return obj === window ? keys.filter(k => !hiddenProps.includes(k)) : keys;
|
|
196
|
-
};
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
---
|
|
200
|
-
|
|
201
|
-
## Dynamic Hook Management
|
|
202
|
-
|
|
203
|
-
Hook 应支持运行时动态启用/禁用。
|
|
204
|
-
|
|
205
|
-
### 架构设计
|
|
206
|
-
|
|
207
|
-
| 类型 | 控制方式 | 用途 |
|
|
208
|
-
|------|----------|------|
|
|
209
|
-
| 内置 Hook | config[name] | xhr, fetch, crypto 等 |
|
|
210
|
-
| 自定义 Hook | hookRegistry | 针对特定网站 |
|
|
211
|
-
|
|
212
|
-
### 性能优化
|
|
213
|
-
|
|
214
|
-
| 配置项 | 默认 | 说明 |
|
|
215
|
-
|--------|------|------|
|
|
216
|
-
| captureStack | true | 关闭可提升性能 |
|
|
217
|
-
| silent | false | 关闭控制台输出 |
|
|
218
|
-
| logLimit | 50 | 每个 API 日志上限 |
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# DeepSpider Development Guidelines
|
|
2
|
-
|
|
3
|
-
> DeepSpider 项目开发规范
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Overview
|
|
8
|
-
|
|
9
|
-
DeepSpider 是基于 DeepAgents + Patchright 的智能爬虫 Agent。
|
|
10
|
-
本目录包含项目的开发规范和代码模式。
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
## Guidelines Index
|
|
15
|
-
|
|
16
|
-
| Guide | Description | Status |
|
|
17
|
-
|-------|-------------|--------|
|
|
18
|
-
| [Directory Structure](./directory-structure.md) | 项目目录结构和模块组织 | Done |
|
|
19
|
-
| [DeepAgents Guide](./deepagents-guide.md) | DeepAgents 框架使用指南 | Done |
|
|
20
|
-
| [Tool Guidelines](./tool-guidelines.md) | LangChain 工具定义规范 | Done |
|
|
21
|
-
| [Hook Guidelines](./hook-guidelines.md) | 浏览器 Hook 注入规范 | Done |
|
|
22
|
-
| [State Management](./state-management.md) | Agent 状态与数据存储 | Done |
|
|
23
|
-
| [Quality Guidelines](./quality-guidelines.md) | 代码质量规范 | Done |
|
|
24
|
-
| [Type Safety](./type-safety.md) | Zod 类型验证规范 | Done |
|
|
25
|
-
| [CI/CD Guidelines](./ci-cd-guidelines.md) | GitHub Actions 自动发布规范 | Done |
|
|
26
|
-
|
|
27
|
-
---
|
|
28
|
-
|
|
29
|
-
## Quick Reference
|
|
30
|
-
|
|
31
|
-
核心规范要点:
|
|
32
|
-
|
|
33
|
-
1. **Agent 创建**: 使用 `createDeepAgent()` + 配置对象
|
|
34
|
-
2. **工具定义**: 使用 `@langchain/core/tools` + Zod schema
|
|
35
|
-
3. **浏览器交互**: 优先使用 CDP,避免 `page.evaluate()`
|
|
36
|
-
4. **AST 遍历**: 使用 `@babel/traverse`
|
|
37
|
-
5. **数据存储**: 使用 `getDataStore()` 单例
|
|
@@ -1,377 +0,0 @@
|
|
|
1
|
-
# Quality Guidelines
|
|
2
|
-
|
|
3
|
-
> DeepSpider 代码质量规范
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Overview
|
|
8
|
-
|
|
9
|
-
DeepSpider 遵循 CLAUDE.md 中定义的代码规范,重点关注:
|
|
10
|
-
- CDP 优先的浏览器交互
|
|
11
|
-
- Babel AST 遍历模式
|
|
12
|
-
- LangChain 工具定义规范
|
|
13
|
-
|
|
14
|
-
---
|
|
15
|
-
|
|
16
|
-
## Forbidden Patterns
|
|
17
|
-
|
|
18
|
-
### 1. 使用 page.evaluate 代替 CDP
|
|
19
|
-
|
|
20
|
-
```javascript
|
|
21
|
-
// ❌ 禁止
|
|
22
|
-
const result = await page.evaluate(() => { ... });
|
|
23
|
-
|
|
24
|
-
// ✅ 使用 CDP
|
|
25
|
-
const cdp = await browser.getCDPSession();
|
|
26
|
-
const result = await cdp.send('Runtime.evaluate', { ... });
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
### 2. 直接访问封装类的内部属性
|
|
30
|
-
|
|
31
|
-
```javascript
|
|
32
|
-
// ❌ 禁止:暴露内部实现
|
|
33
|
-
cdpSession.client.on('Debugger.paused', handler);
|
|
34
|
-
|
|
35
|
-
// ✅ 使用封装类提供的方法
|
|
36
|
-
cdpSession.on('Debugger.paused', handler);
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
**原因**: 直接访问 `.client` 会导致封装泄漏,当内部实现变化时调用方会报错。
|
|
40
|
-
|
|
41
|
-
### 3. 子代理不配置中间件
|
|
42
|
-
|
|
43
|
-
```javascript
|
|
44
|
-
// ❌ 禁止:只在主 Agent 配置中间件,子代理不配置
|
|
45
|
-
// index.js
|
|
46
|
-
const agent = createDeepAgent({
|
|
47
|
-
middleware: [createFilterToolsMiddleware()],
|
|
48
|
-
subagents: [subagent1, subagent2],
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
// subagent1.js - 没有中间件
|
|
52
|
-
export const subagent1 = {
|
|
53
|
-
name: 'subagent1',
|
|
54
|
-
tools: [...],
|
|
55
|
-
middleware: [], // 空!
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
// ✅ 子代理也需要配置相同的中间件
|
|
59
|
-
export const subagent1 = {
|
|
60
|
-
name: 'subagent1',
|
|
61
|
-
tools: [...],
|
|
62
|
-
middleware: [
|
|
63
|
-
createFilterToolsMiddleware(), // 必须添加
|
|
64
|
-
createSkillsMiddleware({ ... }),
|
|
65
|
-
],
|
|
66
|
-
};
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
**原因**: DeepAgents 子代理不会继承主 Agent 的中间件配置。如果主 Agent 过滤了内置工具,子代理也必须单独配置过滤中间件,否则子代理仍会使用被过滤的工具。
|
|
70
|
-
|
|
71
|
-
### 4. setInterval 中使用 async 回调
|
|
72
|
-
|
|
73
|
-
```javascript
|
|
74
|
-
// ❌ 禁止:async 回调不会被等待,可能导致并发问题
|
|
75
|
-
setInterval(async () => {
|
|
76
|
-
const result = await detectCaptcha();
|
|
77
|
-
await handleResult(result);
|
|
78
|
-
}, 30000);
|
|
79
|
-
|
|
80
|
-
// ✅ 保持同步,只做状态检查和标记
|
|
81
|
-
let needsCheck = false;
|
|
82
|
-
setInterval(() => {
|
|
83
|
-
const elapsed = Date.now() - lastEventTime;
|
|
84
|
-
if (elapsed > timeout) {
|
|
85
|
-
console.log('[提示] 超时,请检查页面');
|
|
86
|
-
}
|
|
87
|
-
}, 30000);
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
**原因**: setInterval 不会等待 async 回调完成,多次触发会导致并发执行。
|
|
91
|
-
|
|
92
|
-
### 5. spawn 使用不存在的 timeout 选项
|
|
93
|
-
|
|
94
|
-
```javascript
|
|
95
|
-
// ❌ 禁止:spawn 不支持 timeout 选项,超时不会生效
|
|
96
|
-
const proc = spawn('node', ['-e', code], {
|
|
97
|
-
timeout: 10000, // 无效!
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
// ✅ 手动实现超时
|
|
101
|
-
const proc = spawn('node', ['-e', code]);
|
|
102
|
-
let killed = false;
|
|
103
|
-
|
|
104
|
-
const timer = setTimeout(() => {
|
|
105
|
-
killed = true;
|
|
106
|
-
proc.kill('SIGTERM');
|
|
107
|
-
}, 10000);
|
|
108
|
-
|
|
109
|
-
proc.on('close', () => {
|
|
110
|
-
clearTimeout(timer);
|
|
111
|
-
});
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
**原因**: `spawn` 的 options 不包含 `timeout`,这是 `execSync` 的选项。使用 spawn 时必须手动实现超时逻辑。
|
|
115
|
-
|
|
116
|
-
### 6. 用正则替换 HTML 字符串
|
|
117
|
-
|
|
118
|
-
```javascript
|
|
119
|
-
// ❌ 禁止:正则替换 HTML 字符串会破坏结构
|
|
120
|
-
function linkifyPaths(html) {
|
|
121
|
-
return html.replace(/(\/[\w.\-\/]+)/g, '<a href="$1">$1</a>');
|
|
122
|
-
}
|
|
123
|
-
// 会把 </strong> 中的 /strong 也匹配成路径!
|
|
124
|
-
|
|
125
|
-
// ✅ 使用 DOM TreeWalker 遍历文本节点
|
|
126
|
-
function linkifyPaths(container) {
|
|
127
|
-
const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
|
|
128
|
-
const textNodes = [];
|
|
129
|
-
while (walker.nextNode()) textNodes.push(walker.currentNode);
|
|
130
|
-
|
|
131
|
-
textNodes.forEach(node => {
|
|
132
|
-
// 只处理纯文本,不会影响 HTML 标签
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
```
|
|
136
|
-
|
|
137
|
-
**原因**: 正则无法区分 HTML 标签和文本内容,容易误匹配导致结构破坏。
|
|
138
|
-
|
|
139
|
-
### 7. LLM 工具参数传递大段代码
|
|
140
|
-
|
|
141
|
-
```javascript
|
|
142
|
-
// ❌ 禁止:直接传递大段代码内容,可能被 LLM 截断
|
|
143
|
-
await saveReport({ pythonCode: longCodeString });
|
|
144
|
-
|
|
145
|
-
// ✅ 先保存到文件,再传递文件路径
|
|
146
|
-
await artifactSave({ path: 'domain/decrypt.py', content: code });
|
|
147
|
-
await saveReport({ pythonCodeFile: 'domain/decrypt.py' });
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
**原因**: LLM 输出有长度限制,大段代码作为参数传递时可能被截断。分步保存确保代码完整性。
|
|
151
|
-
|
|
152
|
-
---
|
|
153
|
-
|
|
154
|
-
## Required Patterns
|
|
155
|
-
|
|
156
|
-
### 1. Babel AST 遍历
|
|
157
|
-
|
|
158
|
-
```javascript
|
|
159
|
-
import traverse from '@babel/traverse';
|
|
160
|
-
|
|
161
|
-
traverse.default(ast, {
|
|
162
|
-
FunctionDeclaration(path) {
|
|
163
|
-
// 处理
|
|
164
|
-
}
|
|
165
|
-
});
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
### 2. CDP Session 复用
|
|
169
|
-
|
|
170
|
-
```javascript
|
|
171
|
-
const cdp = await browser.getCDPSession();
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
### 3. Hook 日志记录调用位置
|
|
175
|
-
|
|
176
|
-
```javascript
|
|
177
|
-
// ✅ 在日志中包含解析后的调用位置
|
|
178
|
-
const entry = {
|
|
179
|
-
...data,
|
|
180
|
-
timestamp: Date.now(),
|
|
181
|
-
stack: stack,
|
|
182
|
-
caller: caller, // { func, file, line, col }
|
|
183
|
-
};
|
|
184
|
-
|
|
185
|
-
// 控制台输出显示文件名和行号
|
|
186
|
-
const loc = caller ? ' @ ' + caller.file.split('/').pop() + ':' + caller.line : '';
|
|
187
|
-
console.log('[DeepSpider:' + type + ']' + loc, data);
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
**原因**: Hook 日志需要记录 JS 文件调用位置,便于快速定位加密代码来源。
|
|
191
|
-
|
|
192
|
-
---
|
|
193
|
-
|
|
194
|
-
## Release Process
|
|
195
|
-
|
|
196
|
-
### 原生模块依赖处理
|
|
197
|
-
|
|
198
|
-
项目依赖 `isolated-vm` 等原生 C++ 模块,需要编译环境。
|
|
199
|
-
|
|
200
|
-
**postinstall 自动处理**:
|
|
201
|
-
```json
|
|
202
|
-
{
|
|
203
|
-
"scripts": {
|
|
204
|
-
"postinstall": "patchright install chromium && npm rebuild isolated-vm 2>/dev/null || true"
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
```
|
|
208
|
-
|
|
209
|
-
**编译环境要求**:
|
|
210
|
-
- macOS: `xcode-select --install`
|
|
211
|
-
- Ubuntu: `sudo apt install build-essential`
|
|
212
|
-
- Windows: Visual Studio Build Tools
|
|
213
|
-
|
|
214
|
-
> **注意**: `2>/dev/null || true` 确保编译失败不会阻塞安装,但沙箱功能可能不可用。
|
|
215
|
-
|
|
216
|
-
---
|
|
217
|
-
|
|
218
|
-
### 版本发布流程
|
|
219
|
-
|
|
220
|
-
升级版本并推送 tag,GitHub Actions 会自动发布到 npm:
|
|
221
|
-
|
|
222
|
-
```bash
|
|
223
|
-
# 1. 升级 package.json 版本
|
|
224
|
-
# 编辑 package.json 中的 version 字段
|
|
225
|
-
|
|
226
|
-
# 2. 提交版本变更
|
|
227
|
-
git add package.json
|
|
228
|
-
git commit -m "chore: bump version to x.x.x"
|
|
229
|
-
|
|
230
|
-
# 3. 创建并推送 git tag
|
|
231
|
-
git tag -a vx.x.x -m "vx.x.x"
|
|
232
|
-
git push && git push origin vx.x.x
|
|
233
|
-
```
|
|
234
|
-
|
|
235
|
-
> **注意**: 推送 tag 后 GitHub Actions 会自动触发 npm 发布,无需手动 `npm publish`。
|
|
236
|
-
|
|
237
|
-
**原因**: 自动化发布避免手动操作失误,确保版本一致性。
|
|
238
|
-
|
|
239
|
-
---
|
|
240
|
-
|
|
241
|
-
## Testing Requirements
|
|
242
|
-
|
|
243
|
-
运行测试:
|
|
244
|
-
|
|
245
|
-
```bash
|
|
246
|
-
pnpm test
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
---
|
|
250
|
-
|
|
251
|
-
## Code Review Checklist
|
|
252
|
-
|
|
253
|
-
- [ ] 工具名称使用 snake_case
|
|
254
|
-
- [ ] 参数有 describe 描述
|
|
255
|
-
- [ ] 浏览器交互使用 CDP
|
|
256
|
-
- [ ] AST 遍历使用 Babel
|
|
257
|
-
- [ ] 数组访问前检查边界
|
|
258
|
-
- [ ] 对象访问前检查空值
|
|
259
|
-
|
|
260
|
-
---
|
|
261
|
-
|
|
262
|
-
## Defensive Programming
|
|
263
|
-
|
|
264
|
-
### 1. 数组索引边界检查
|
|
265
|
-
|
|
266
|
-
```javascript
|
|
267
|
-
// ❌ 禁止:直接访问可能越界
|
|
268
|
-
const stage = stages[parseInt(index)];
|
|
269
|
-
stage.fields.push(field);
|
|
270
|
-
|
|
271
|
-
// ✅ 先检查边界
|
|
272
|
-
const idx = parseInt(index);
|
|
273
|
-
if (idx < 0 || idx >= stages.length) return;
|
|
274
|
-
const stage = stages[idx];
|
|
275
|
-
```
|
|
276
|
-
|
|
277
|
-
### 2. 工厂函数避免重复结构
|
|
278
|
-
|
|
279
|
-
```javascript
|
|
280
|
-
// ❌ 禁止:多处重复对象字面量
|
|
281
|
-
stages.push({ name: 'list', fields: [], entry: null });
|
|
282
|
-
// ... 另一处
|
|
283
|
-
stages = [{ name: 'list', fields: [], entry: null }];
|
|
284
|
-
|
|
285
|
-
// ✅ 使用工厂函数
|
|
286
|
-
function createStage(name) {
|
|
287
|
-
return { name, fields: [], entry: null, pagination: null };
|
|
288
|
-
}
|
|
289
|
-
stages.push(createStage('list'));
|
|
290
|
-
```
|
|
291
|
-
|
|
292
|
-
### 3. 空值检查
|
|
293
|
-
|
|
294
|
-
```javascript
|
|
295
|
-
// ❌ 禁止:假设对象存在
|
|
296
|
-
currentStage.fields.splice(index, 1);
|
|
297
|
-
|
|
298
|
-
// ✅ 先检查
|
|
299
|
-
if (!currentStage) return;
|
|
300
|
-
if (index < 0 || index >= currentStage.fields.length) return;
|
|
301
|
-
currentStage.fields.splice(index, 1);
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
---
|
|
305
|
-
|
|
306
|
-
## Modularization Patterns
|
|
307
|
-
|
|
308
|
-
### 1. 大文件拆分原则
|
|
309
|
-
|
|
310
|
-
当文件超过 300 行时,考虑按职责拆分:
|
|
311
|
-
|
|
312
|
-
```javascript
|
|
313
|
-
// ❌ 禁止:单文件包含多种职责
|
|
314
|
-
// run.js (600+ 行)
|
|
315
|
-
// - 流式处理逻辑
|
|
316
|
-
// - 重试策略
|
|
317
|
-
// - 面板通信
|
|
318
|
-
// - 错误分类
|
|
319
|
-
|
|
320
|
-
// ✅ 按职责拆分到 core/ 目录
|
|
321
|
-
// src/agent/core/
|
|
322
|
-
// ├── StreamHandler.js # 流式输出处理
|
|
323
|
-
// ├── RetryManager.js # 重试策略
|
|
324
|
-
// ├── PanelBridge.js # 面板通信
|
|
325
|
-
// └── index.js # 模块导出
|
|
326
|
-
```
|
|
327
|
-
|
|
328
|
-
**原因**: 单一职责原则,便于测试和维护。
|
|
329
|
-
|
|
330
|
-
### 2. 使用子代理工厂函数
|
|
331
|
-
|
|
332
|
-
```javascript
|
|
333
|
-
// ❌ 禁止:每个子代理重复配置中间件
|
|
334
|
-
export const staticSubagent = {
|
|
335
|
-
name: 'static-agent',
|
|
336
|
-
tools: [...staticTools, ...evolveTools],
|
|
337
|
-
middleware: [
|
|
338
|
-
createFilterToolsMiddleware(),
|
|
339
|
-
createSkillsMiddleware({ backend, sources: [SKILLS.static] }),
|
|
340
|
-
],
|
|
341
|
-
};
|
|
342
|
-
|
|
343
|
-
// ✅ 使用工厂函数统一配置
|
|
344
|
-
import { createSubagent, SKILLS } from './factory.js';
|
|
345
|
-
|
|
346
|
-
export const staticSubagent = createSubagent({
|
|
347
|
-
name: 'static-agent',
|
|
348
|
-
description: '静态分析专家',
|
|
349
|
-
systemPrompt: '...',
|
|
350
|
-
tools: staticTools,
|
|
351
|
-
skills: [SKILLS.static],
|
|
352
|
-
});
|
|
353
|
-
```
|
|
354
|
-
|
|
355
|
-
**原因**: 工厂函数自动注入公共中间件和 evolveTools,避免遗漏。
|
|
356
|
-
|
|
357
|
-
### 3. 结构化错误类型
|
|
358
|
-
|
|
359
|
-
```javascript
|
|
360
|
-
// ❌ 禁止:字符串匹配判断错误类型
|
|
361
|
-
if (/503|502|429/.test(error.message)) {
|
|
362
|
-
// 重试
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
// ✅ 使用结构化错误类型
|
|
366
|
-
import { ApiServiceError, isApiServiceError } from './errors/index.js';
|
|
367
|
-
|
|
368
|
-
// 抛出时
|
|
369
|
-
throw new ApiServiceError('服务不可用', { statusCode: 503 });
|
|
370
|
-
|
|
371
|
-
// 捕获时
|
|
372
|
-
if (isApiServiceError(error.message)) {
|
|
373
|
-
// 重试
|
|
374
|
-
}
|
|
375
|
-
```
|
|
376
|
-
|
|
377
|
-
**原因**: 结构化错误便于分类处理,支持携带额外上下文。
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
# State Management
|
|
2
|
-
|
|
3
|
-
> Agent 状态与数据存储规范
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Overview
|
|
8
|
-
|
|
9
|
-
DeepSpider 使用 DeepAgents 的状态后端和文件系统存储管理数据。
|
|
10
|
-
Agent 状态通过 FilesystemBackend 持久化,采集数据通过 DataStore 存储。
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
## State Categories
|
|
15
|
-
|
|
16
|
-
| 类型 | 存储方式 | 示例 |
|
|
17
|
-
|------|----------|------|
|
|
18
|
-
| Agent 状态 | FilesystemBackend | `.deepspider-agent/` |
|
|
19
|
-
| 采集数据 | DataStore | `.deepspider-data/` |
|
|
20
|
-
| 会话状态 | MemorySaver | 内存中 |
|
|
21
|
-
|
|
22
|
-
---
|
|
23
|
-
|
|
24
|
-
## DataStore Pattern
|
|
25
|
-
|
|
26
|
-
数据存储使用单例模式:
|
|
27
|
-
|
|
28
|
-
```javascript
|
|
29
|
-
import { getDataStore } from '../store/DataStore.js';
|
|
30
|
-
|
|
31
|
-
const store = getDataStore();
|
|
32
|
-
await store.saveResponse(data);
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
**示例**: `src/store/DataStore.js:699-706`
|
|
36
|
-
|
|
37
|
-
---
|
|
38
|
-
|
|
39
|
-
## Agent Backend
|
|
40
|
-
|
|
41
|
-
Agent 状态后端配置:
|
|
42
|
-
|
|
43
|
-
```javascript
|
|
44
|
-
import { FilesystemBackend } from 'deepagents';
|
|
45
|
-
|
|
46
|
-
const backend = new FilesystemBackend({
|
|
47
|
-
rootDir: './.deepspider-agent'
|
|
48
|
-
});
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
**示例**: `src/agent/index.js:59-62`
|
|
52
|
-
|
|
53
|
-
---
|
|
54
|
-
|
|
55
|
-
## Common Mistakes
|
|
56
|
-
|
|
57
|
-
### 1. 未使用单例
|
|
58
|
-
|
|
59
|
-
```javascript
|
|
60
|
-
// ❌ 错误:每次创建新实例
|
|
61
|
-
const store = new DataStore();
|
|
62
|
-
|
|
63
|
-
// ✅ 正确:使用单例
|
|
64
|
-
const store = getDataStore();
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### 2. 忘记启动会话
|
|
68
|
-
|
|
69
|
-
```javascript
|
|
70
|
-
// ❌ 错误:直接保存
|
|
71
|
-
await store.saveResponse(data);
|
|
72
|
-
|
|
73
|
-
// ✅ 正确:先启动会话
|
|
74
|
-
store.startSession();
|
|
75
|
-
await store.saveResponse(data);
|
|
76
|
-
```
|