deepspider 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -27
- package/bin/cli.js +45 -0
- package/package.json +10 -4
- package/src/agent/run.js +54 -63
- package/src/agent/setup.js +14 -14
- package/src/cli/commands/config.js +94 -0
- package/src/cli/commands/help.js +34 -0
- package/src/cli/commands/update.js +78 -0
- package/src/cli/commands/version.js +9 -0
- package/src/cli/config.js +15 -0
- package/src/config/settings.js +102 -0
- package/.claude/agents/check.md +0 -122
- package/.claude/agents/debug.md +0 -106
- package/.claude/agents/dispatch.md +0 -214
- package/.claude/agents/implement.md +0 -96
- package/.claude/agents/plan.md +0 -396
- package/.claude/agents/research.md +0 -120
- package/.claude/commands/evolve/merge.md +0 -80
- package/.claude/commands/trellis/before-backend-dev.md +0 -13
- package/.claude/commands/trellis/before-frontend-dev.md +0 -13
- package/.claude/commands/trellis/break-loop.md +0 -107
- package/.claude/commands/trellis/check-backend.md +0 -13
- package/.claude/commands/trellis/check-cross-layer.md +0 -153
- package/.claude/commands/trellis/check-frontend.md +0 -13
- package/.claude/commands/trellis/create-command.md +0 -154
- package/.claude/commands/trellis/finish-work.md +0 -129
- package/.claude/commands/trellis/integrate-skill.md +0 -219
- package/.claude/commands/trellis/onboard.md +0 -358
- package/.claude/commands/trellis/parallel.md +0 -193
- package/.claude/commands/trellis/record-session.md +0 -62
- package/.claude/commands/trellis/start.md +0 -280
- package/.claude/commands/trellis/update-spec.md +0 -213
- package/.claude/hooks/inject-subagent-context.py +0 -758
- package/.claude/hooks/ralph-loop.py +0 -374
- package/.claude/hooks/session-start.py +0 -126
- package/.claude/settings.json +0 -41
- package/.claude/skills/deepagents-guide/SKILL.md +0 -428
- package/.cursor/commands/trellis-before-backend-dev.md +0 -13
- package/.cursor/commands/trellis-before-frontend-dev.md +0 -13
- package/.cursor/commands/trellis-break-loop.md +0 -107
- package/.cursor/commands/trellis-check-backend.md +0 -13
- package/.cursor/commands/trellis-check-cross-layer.md +0 -153
- package/.cursor/commands/trellis-check-frontend.md +0 -13
- package/.cursor/commands/trellis-create-command.md +0 -154
- package/.cursor/commands/trellis-finish-work.md +0 -129
- package/.cursor/commands/trellis-integrate-skill.md +0 -219
- package/.cursor/commands/trellis-onboard.md +0 -358
- package/.cursor/commands/trellis-record-session.md +0 -62
- package/.cursor/commands/trellis-start.md +0 -156
- package/.cursor/commands/trellis-update-spec.md +0 -213
- package/.github/workflows/publish.yml +0 -63
- package/.husky/pre-commit +0 -1
- package/.mcp.json +0 -8
- package/.trellis/.template-hashes.json +0 -65
- package/.trellis/.version +0 -1
- package/.trellis/scripts/add-session.sh +0 -384
- package/.trellis/scripts/common/developer.sh +0 -129
- package/.trellis/scripts/common/git-context.sh +0 -263
- package/.trellis/scripts/common/paths.sh +0 -208
- package/.trellis/scripts/common/phase.sh +0 -150
- package/.trellis/scripts/common/registry.sh +0 -247
- package/.trellis/scripts/common/task-queue.sh +0 -142
- package/.trellis/scripts/common/task-utils.sh +0 -151
- package/.trellis/scripts/common/worktree.sh +0 -128
- package/.trellis/scripts/create-bootstrap.sh +0 -299
- package/.trellis/scripts/get-context.sh +0 -7
- package/.trellis/scripts/get-developer.sh +0 -15
- package/.trellis/scripts/init-developer.sh +0 -34
- package/.trellis/scripts/multi-agent/cleanup.sh +0 -396
- package/.trellis/scripts/multi-agent/create-pr.sh +0 -241
- package/.trellis/scripts/multi-agent/plan.sh +0 -207
- package/.trellis/scripts/multi-agent/start.sh +0 -310
- package/.trellis/scripts/multi-agent/status.sh +0 -828
- package/.trellis/scripts/task.sh +0 -1118
- package/.trellis/spec/backend/ci-cd-guidelines.md +0 -73
- package/.trellis/spec/backend/deepagents-guide.md +0 -380
- package/.trellis/spec/backend/directory-structure.md +0 -145
- package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +0 -11
- package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +0 -20
- package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +0 -13
- package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +0 -19
- package/.trellis/spec/backend/hook-guidelines.md +0 -218
- package/.trellis/spec/backend/index.md +0 -37
- package/.trellis/spec/backend/quality-guidelines.md +0 -377
- package/.trellis/spec/backend/state-management.md +0 -76
- package/.trellis/spec/backend/tool-guidelines.md +0 -144
- package/.trellis/spec/backend/type-safety.md +0 -71
- package/.trellis/spec/guides/code-reuse-thinking-guide.md +0 -92
- package/.trellis/spec/guides/cross-layer-thinking-guide.md +0 -94
- package/.trellis/spec/guides/index.md +0 -79
- package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +0 -61
- package/.trellis/tasks/archive/02-02-evolving-skills/task.json +0 -29
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +0 -86
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +0 -27
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +0 -3
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +0 -2
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +0 -5
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +0 -33
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +0 -41
- package/.trellis/workflow.md +0 -407
- package/.trellis/workspace/index.md +0 -123
- package/.trellis/workspace/pony/index.md +0 -42
- package/.trellis/workspace/pony/journal-1.md +0 -125
- package/.trellis/worktree.yaml +0 -47
- package/AGENTS.md +0 -18
- package/CLAUDE.md +0 -315
- package/agents/deepspider.md +0 -142
- package/docs/DEBUG.md +0 -42
- package/docs/GUIDE.md +0 -338
- package/docs/PROMPT.md +0 -59
- package/docs/USAGE.md +0 -230
- package/eslint.config.js +0 -51
- package/test/analyze.test.js +0 -90
- package/test/envdump.test.js +0 -74
- package/test/flow.test.js +0 -90
- package/test/hooks.test.js +0 -138
- package/test/plugin.test.js +0 -35
- package/test/refactor-full.test.js +0 -30
- package/test/refactor.test.js +0 -21
- package/test/samples/obfuscated.js +0 -61
- package/test/samples/original.js +0 -66
- package/test/samples/v10_eval_chain.js +0 -52
- package/test/samples/v11_bytecode_vm.js +0 -81
- package/test/samples/v12_polymorphic.js +0 -69
- package/test/samples/v1_ob_basic.js +0 -98
- package/test/samples/v2_ob_advanced.js +0 -99
- package/test/samples/v3_jjencode.js +0 -77
- package/test/samples/v4_aaencode.js +0 -73
- package/test/samples/v5_control_flow.js +0 -86
- package/test/samples/v6_string_encryption.js +0 -71
- package/test/samples/v7_jsvmp.js +0 -83
- package/test/samples/v8_anti_debug.js +0 -79
- package/test/samples/v9_proxy_trap.js +0 -49
- package/test/samples.test.js +0 -96
- package/test/webcrack.test.js +0 -55
package/README.md
CHANGED
|
@@ -37,40 +37,51 @@ cp .env.example .env # 配置环境变量
|
|
|
37
37
|
pnpm run setup:crypto # 安装 Python 加密库(可选)
|
|
38
38
|
```
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
安装完成后,首次运行会提示配置 LLM API。
|
|
41
41
|
|
|
42
42
|
> **注意**: 项目依赖 `isolated-vm` 原生模块,需要 C++ 编译环境:
|
|
43
43
|
> - macOS: `xcode-select --install`
|
|
44
44
|
> - Ubuntu: `sudo apt install build-essential`
|
|
45
45
|
> - Windows: 安装 [Visual Studio Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
|
|
46
46
|
|
|
47
|
-
###
|
|
47
|
+
### 配置
|
|
48
48
|
|
|
49
49
|
DeepSpider 需要配置 LLM API 才能运行。支持任何兼容 OpenAI 格式的供应商。
|
|
50
50
|
|
|
51
|
-
|
|
|
52
|
-
|
|
53
|
-
| `
|
|
54
|
-
| `
|
|
55
|
-
| `
|
|
51
|
+
| 配置键 | 环境变量 | 说明 |
|
|
52
|
+
|--------|----------|------|
|
|
53
|
+
| `apiKey` | `DEEPSPIDER_API_KEY` | API 密钥 |
|
|
54
|
+
| `baseUrl` | `DEEPSPIDER_BASE_URL` | API 地址 |
|
|
55
|
+
| `model` | `DEEPSPIDER_MODEL` | 模型名称 |
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
优先级:环境变量 > 配置文件 (`~/.deepspider/config/settings.json`) > 默认值
|
|
58
|
+
|
|
59
|
+
**方式一:使用 CLI 命令(推荐)**
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
deepspider config set apiKey sk-xxx
|
|
63
|
+
deepspider config set baseUrl https://api.openai.com/v1
|
|
64
|
+
deepspider config set model gpt-4o
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**方式二:环境变量**
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
export DEEPSPIDER_API_KEY=sk-xxx
|
|
71
|
+
export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
72
|
+
export DEEPSPIDER_MODEL=gpt-4o
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**常用供应商示例**:
|
|
58
76
|
|
|
59
77
|
```bash
|
|
60
78
|
# OpenAI
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
DEEPSPIDER_MODEL=gpt-4o
|
|
79
|
+
deepspider config set baseUrl https://api.openai.com/v1
|
|
80
|
+
deepspider config set model gpt-4o
|
|
64
81
|
|
|
65
82
|
# DeepSeek
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
DEEPSPIDER_MODEL=deepseek-chat
|
|
69
|
-
|
|
70
|
-
# 其他 OpenAI 兼容供应商
|
|
71
|
-
DEEPSPIDER_API_KEY=your-key
|
|
72
|
-
DEEPSPIDER_BASE_URL=https://your-provider.com/v1
|
|
73
|
-
DEEPSPIDER_MODEL=model-name
|
|
83
|
+
deepspider config set baseUrl https://api.deepseek.com/v1
|
|
84
|
+
deepspider config set model deepseek-chat
|
|
74
85
|
```
|
|
75
86
|
|
|
76
87
|
### 使用
|
|
@@ -78,24 +89,33 @@ DEEPSPIDER_MODEL=model-name
|
|
|
78
89
|
#### 全局安装(npm/pnpm install -g)
|
|
79
90
|
|
|
80
91
|
```bash
|
|
81
|
-
# 配置环境变量
|
|
82
|
-
export DEEPSPIDER_API_KEY=sk-xxx
|
|
83
|
-
export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
84
|
-
export DEEPSPIDER_MODEL=gpt-4o
|
|
85
|
-
|
|
86
92
|
# 启动 Agent - 指定目标网站
|
|
87
93
|
deepspider https://example.com
|
|
88
94
|
|
|
89
95
|
# 启动 Agent - 纯交互模式
|
|
90
96
|
deepspider
|
|
97
|
+
|
|
98
|
+
# 查看帮助
|
|
99
|
+
deepspider --help
|
|
100
|
+
|
|
101
|
+
# 管理配置
|
|
102
|
+
deepspider config list # 查看所有配置
|
|
103
|
+
deepspider config set apiKey sk-xxx
|
|
104
|
+
deepspider config set model gpt-4o
|
|
105
|
+
|
|
106
|
+
# 检查更新
|
|
107
|
+
deepspider update
|
|
91
108
|
```
|
|
92
109
|
|
|
93
110
|
#### 克隆仓库
|
|
94
111
|
|
|
95
112
|
```bash
|
|
96
|
-
#
|
|
113
|
+
# 配置(二选一)
|
|
97
114
|
cp .env.example .env # 编辑 .env 文件
|
|
98
|
-
#
|
|
115
|
+
# 或使用 CLI 命令
|
|
116
|
+
node bin/cli.js config set apiKey sk-xxx
|
|
117
|
+
node bin/cli.js config set baseUrl https://api.openai.com/v1
|
|
118
|
+
node bin/cli.js config set model gpt-4o
|
|
99
119
|
|
|
100
120
|
# 安装 Python 依赖(可选,用于执行生成的 Python 代码)
|
|
101
121
|
pnpm run setup:crypto
|
|
@@ -161,12 +181,19 @@ pnpm test
|
|
|
161
181
|
|
|
162
182
|
```
|
|
163
183
|
deepspider/
|
|
184
|
+
├── bin/cli.js # CLI 入口(命令路由)
|
|
164
185
|
├── src/
|
|
165
186
|
│ ├── agent/ # DeepAgent 系统
|
|
166
187
|
│ │ ├── tools/ # 工具集(90+)
|
|
167
188
|
│ │ ├── subagents/ # 子代理
|
|
168
189
|
│ │ ├── skills/ # 领域技能
|
|
169
190
|
│ │ └── prompts/ # 系统提示
|
|
191
|
+
│ ├── cli/ # CLI 命令
|
|
192
|
+
│ │ ├── config.js # 配置 re-export
|
|
193
|
+
│ │ └── commands/ # 子命令(version/help/config/update)
|
|
194
|
+
│ ├── config/ # 核心配置
|
|
195
|
+
│ │ ├── paths.js # 路径常量
|
|
196
|
+
│ │ └── settings.js # 配置读写(环境变量/文件/默认值)
|
|
170
197
|
│ ├── browser/ # 浏览器运行时
|
|
171
198
|
│ │ ├── client.js # Patchright 客户端
|
|
172
199
|
│ │ ├── cdp.js # CDP 会话管理
|
|
@@ -177,7 +204,6 @@ deepspider/
|
|
|
177
204
|
│ ├── env/ # 环境补丁模块
|
|
178
205
|
│ ├── store/ # 数据存储
|
|
179
206
|
│ └── mcp/ # MCP 服务
|
|
180
|
-
├── bin/cli.js # CLI 入口
|
|
181
207
|
└── test/ # 测试
|
|
182
208
|
```
|
|
183
209
|
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* DeepSpider CLI 入口
|
|
4
|
+
* 路由命令到对应处理模块
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import 'dotenv/config';
|
|
8
|
+
|
|
9
|
+
const args = process.argv.slice(2);
|
|
10
|
+
const first = args[0];
|
|
11
|
+
|
|
12
|
+
switch (first) {
|
|
13
|
+
case '-v':
|
|
14
|
+
case '--version': {
|
|
15
|
+
const { run } = await import('../src/cli/commands/version.js');
|
|
16
|
+
run();
|
|
17
|
+
break;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
case '-h':
|
|
21
|
+
case '--help': {
|
|
22
|
+
const { run } = await import('../src/cli/commands/help.js');
|
|
23
|
+
run();
|
|
24
|
+
break;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
case 'config': {
|
|
28
|
+
const { run } = await import('../src/cli/commands/config.js');
|
|
29
|
+
run(args.slice(1));
|
|
30
|
+
break;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
case 'update': {
|
|
34
|
+
const { run } = await import('../src/cli/commands/update.js');
|
|
35
|
+
await run();
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
default: {
|
|
40
|
+
// URL 或无参数 → 启动 Agent
|
|
41
|
+
const { init } = await import('../src/agent/run.js');
|
|
42
|
+
await init();
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
package/package.json
CHANGED
|
@@ -1,18 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "deepspider",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
7
|
+
"files": [
|
|
8
|
+
"bin/",
|
|
9
|
+
"src/",
|
|
10
|
+
"requirements-crypto.txt",
|
|
11
|
+
".env.example"
|
|
12
|
+
],
|
|
7
13
|
"bin": {
|
|
8
|
-
"deepspider": "./
|
|
14
|
+
"deepspider": "./bin/cli.js"
|
|
9
15
|
},
|
|
10
16
|
"scripts": {
|
|
11
17
|
"start": "node src/index.js",
|
|
12
18
|
"dev": "node --watch src/index.js",
|
|
13
|
-
"cli": "node
|
|
19
|
+
"cli": "node bin/cli.js",
|
|
14
20
|
"mcp": "node src/mcp/server.js",
|
|
15
|
-
"agent": "node
|
|
21
|
+
"agent": "node bin/cli.js",
|
|
16
22
|
"test": "node --test test/",
|
|
17
23
|
"lint": "eslint src/",
|
|
18
24
|
"lint:fix": "eslint src/ --fix",
|
package/src/agent/run.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
1
|
/**
|
|
3
|
-
* DeepSpider Agent
|
|
2
|
+
* DeepSpider Agent 运行模块
|
|
4
3
|
* 使用 CDP binding 接收浏览器消息
|
|
5
4
|
* 支持流式输出显示思考过程
|
|
5
|
+
*
|
|
6
|
+
* 所有状态初始化延迟到 init() 中执行,避免 import 时产生副作用
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
|
-
import 'dotenv/config';
|
|
9
9
|
import readline from 'readline';
|
|
10
10
|
import { readFileSync } from 'fs';
|
|
11
11
|
import { marked } from 'marked';
|
|
@@ -18,60 +18,14 @@ import { browserTools } from './tools/browser.js';
|
|
|
18
18
|
import { ensureConfig } from './setup.js';
|
|
19
19
|
import { StreamHandler, PanelBridge } from './core/index.js';
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
const targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
|
|
23
|
-
|
|
24
|
-
const rl = readline.createInterface({
|
|
25
|
-
input: process.stdin,
|
|
26
|
-
output: process.stdout,
|
|
27
|
-
});
|
|
28
|
-
|
|
21
|
+
let rl = null;
|
|
29
22
|
let browser = null;
|
|
30
23
|
let streamHandler = null;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
function debug(...args) {
|
|
38
|
-
if (DEBUG) {
|
|
39
|
-
console.log('[DEBUG]', ...args);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const logger = createLogger({ enabled: DEBUG, verbose: false });
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* 报告就绪回调 - 由中间件在 afterAgent 时调用
|
|
47
|
-
*/
|
|
48
|
-
async function onReportReady(mdFilePath) {
|
|
49
|
-
console.log('[report] 中间件触发报告显示:', mdFilePath);
|
|
50
|
-
await showReportFromFile(mdFilePath);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// 创建 Agent
|
|
54
|
-
const agent = createDeepSpiderAgent({ onReportReady });
|
|
55
|
-
|
|
56
|
-
const config = {
|
|
57
|
-
configurable: { thread_id: `deepspider-${Date.now()}` },
|
|
58
|
-
recursionLimit: 5000,
|
|
59
|
-
callbacks: logger ? [logger] : [],
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* 初始化流处理器
|
|
64
|
-
*/
|
|
65
|
-
function initStreamHandler() {
|
|
66
|
-
const panelBridge = new PanelBridge(() => browser, debug);
|
|
67
|
-
streamHandler = new StreamHandler({
|
|
68
|
-
agent,
|
|
69
|
-
config,
|
|
70
|
-
panelBridge,
|
|
71
|
-
riskTools: browserTools.map(t => t.name),
|
|
72
|
-
debug,
|
|
73
|
-
});
|
|
74
|
-
}
|
|
24
|
+
let targetUrl = null;
|
|
25
|
+
let DEBUG = false;
|
|
26
|
+
let debugFn = () => {};
|
|
27
|
+
let agent = null;
|
|
28
|
+
let agentConfig = null;
|
|
75
29
|
|
|
76
30
|
/**
|
|
77
31
|
* 从文件显示报告(由中间件回调触发)
|
|
@@ -106,7 +60,7 @@ async function showReportFromFile(mdFilePath) {
|
|
|
106
60
|
* 处理浏览器消息(通过 CDP binding 接收)
|
|
107
61
|
*/
|
|
108
62
|
async function handleBrowserMessage(data, page) {
|
|
109
|
-
|
|
63
|
+
debugFn(`handleBrowserMessage: 收到消息, type=${data.type}, page=${!!page}`);
|
|
110
64
|
|
|
111
65
|
const browserReadyPrefix = '[浏览器已就绪] ';
|
|
112
66
|
|
|
@@ -193,7 +147,13 @@ function prompt() {
|
|
|
193
147
|
}
|
|
194
148
|
|
|
195
149
|
async function init() {
|
|
196
|
-
|
|
150
|
+
// 解析参数(在 init 时才读取,避免与 CLI 路由层的 argv 冲突)
|
|
151
|
+
const args = process.argv.slice(2);
|
|
152
|
+
targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
|
|
153
|
+
DEBUG = process.env.DEBUG === 'true' || args.includes('--debug');
|
|
154
|
+
debugFn = (...a) => { if (DEBUG) console.log('[DEBUG]', ...a); };
|
|
155
|
+
|
|
156
|
+
debugFn('init: 启动');
|
|
197
157
|
|
|
198
158
|
if (!ensureConfig()) {
|
|
199
159
|
process.exit(1);
|
|
@@ -203,27 +163,58 @@ async function init() {
|
|
|
203
163
|
console.log('[DEBUG] 调试模式已启用');
|
|
204
164
|
}
|
|
205
165
|
|
|
166
|
+
// 创建 readline、logger、agent(全部延迟到 init)
|
|
167
|
+
rl = readline.createInterface({
|
|
168
|
+
input: process.stdin,
|
|
169
|
+
output: process.stdout,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const logger = createLogger({ enabled: DEBUG, verbose: false });
|
|
173
|
+
|
|
174
|
+
async function onReportReady(mdFilePath) {
|
|
175
|
+
console.log('[report] 中间件触发报告显示:', mdFilePath);
|
|
176
|
+
await showReportFromFile(mdFilePath);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
agent = createDeepSpiderAgent({ onReportReady });
|
|
180
|
+
|
|
181
|
+
agentConfig = {
|
|
182
|
+
configurable: { thread_id: `deepspider-${Date.now()}` },
|
|
183
|
+
recursionLimit: 5000,
|
|
184
|
+
callbacks: logger ? [logger] : [],
|
|
185
|
+
};
|
|
186
|
+
|
|
206
187
|
// 初始化流处理器
|
|
207
|
-
|
|
188
|
+
const panelBridge = new PanelBridge(() => browser, debugFn);
|
|
189
|
+
streamHandler = new StreamHandler({
|
|
190
|
+
agent,
|
|
191
|
+
config: agentConfig,
|
|
192
|
+
panelBridge,
|
|
193
|
+
riskTools: browserTools.map(t => t.name),
|
|
194
|
+
debug: debugFn,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
console.log('=== DeepSpider Agent ===');
|
|
198
|
+
console.log('智能爬虫 Agent,输入 exit 退出\n');
|
|
208
199
|
|
|
209
200
|
if (targetUrl) {
|
|
210
201
|
console.log(`正在打开: ${targetUrl}\n`);
|
|
211
202
|
try {
|
|
212
|
-
|
|
203
|
+
debugFn('init: 获取浏览器实例');
|
|
213
204
|
browser = await getBrowser();
|
|
214
205
|
browser.onMessage = handleBrowserMessage;
|
|
215
|
-
|
|
206
|
+
debugFn('init: 导航到目标URL');
|
|
216
207
|
await browser.navigate(targetUrl);
|
|
217
208
|
markHookInjected();
|
|
218
|
-
|
|
209
|
+
debugFn('init: 浏览器就绪');
|
|
219
210
|
console.log('浏览器已就绪,数据自动记录中');
|
|
220
211
|
console.log('点击面板选择按钮(⦿)选择数据进行分析\n');
|
|
221
212
|
} catch (error) {
|
|
222
213
|
console.error('启动浏览器失败:', error.message);
|
|
223
|
-
|
|
214
|
+
debugFn('init: 浏览器启动失败 -', error.stack);
|
|
224
215
|
}
|
|
225
216
|
}
|
|
226
217
|
prompt();
|
|
227
218
|
}
|
|
228
219
|
|
|
229
|
-
init
|
|
220
|
+
export { init };
|
package/src/agent/setup.js
CHANGED
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* DeepSpider 配置检测
|
|
3
|
-
*
|
|
3
|
+
* 环境变量 > 配置文件 > 默认值
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { getConfigValues } from '../config/settings.js';
|
|
7
|
+
|
|
6
8
|
/**
|
|
7
|
-
*
|
|
9
|
+
* 检查配置(合并环境变量和配置文件)
|
|
8
10
|
*/
|
|
9
11
|
export function checkEnvConfig() {
|
|
10
|
-
return
|
|
11
|
-
apiKey: process.env.DEEPSPIDER_API_KEY,
|
|
12
|
-
baseUrl: process.env.DEEPSPIDER_BASE_URL,
|
|
13
|
-
model: process.env.DEEPSPIDER_MODEL,
|
|
14
|
-
};
|
|
12
|
+
return getConfigValues();
|
|
15
13
|
}
|
|
16
14
|
|
|
17
15
|
/**
|
|
@@ -22,9 +20,9 @@ export function ensureConfig() {
|
|
|
22
20
|
const { apiKey, baseUrl, model } = checkEnvConfig();
|
|
23
21
|
const missing = [];
|
|
24
22
|
|
|
25
|
-
if (!apiKey) missing.push('DEEPSPIDER_API_KEY');
|
|
26
|
-
if (!baseUrl) missing.push('DEEPSPIDER_BASE_URL');
|
|
27
|
-
if (!model) missing.push('DEEPSPIDER_MODEL');
|
|
23
|
+
if (!apiKey) missing.push('apiKey (DEEPSPIDER_API_KEY)');
|
|
24
|
+
if (!baseUrl) missing.push('baseUrl (DEEPSPIDER_BASE_URL)');
|
|
25
|
+
if (!model) missing.push('model (DEEPSPIDER_MODEL)');
|
|
28
26
|
|
|
29
27
|
if (missing.length === 0) {
|
|
30
28
|
return true;
|
|
@@ -35,14 +33,16 @@ export function ensureConfig() {
|
|
|
35
33
|
|
|
36
34
|
配置方式(任选其一):
|
|
37
35
|
|
|
38
|
-
1.
|
|
36
|
+
1. 使用 deepspider config 命令:
|
|
37
|
+
deepspider config set apiKey sk-xxx
|
|
38
|
+
deepspider config set baseUrl https://api.openai.com/v1
|
|
39
|
+
deepspider config set model gpt-4o
|
|
40
|
+
|
|
41
|
+
2. 配置环境变量:
|
|
39
42
|
export DEEPSPIDER_API_KEY=sk-xxx
|
|
40
43
|
export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
41
44
|
export DEEPSPIDER_MODEL=gpt-4o
|
|
42
45
|
|
|
43
|
-
2. 一行命令:
|
|
44
|
-
DEEPSPIDER_API_KEY=sk-xxx DEEPSPIDER_BASE_URL=https://api.openai.com/v1 DEEPSPIDER_MODEL=gpt-4o deepspider <url>
|
|
45
|
-
|
|
46
46
|
请根据提示补全配置后重试。
|
|
47
47
|
`);
|
|
48
48
|
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deepspider config 子命令
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
loadConfig,
|
|
7
|
+
saveConfig,
|
|
8
|
+
getEffectiveConfig,
|
|
9
|
+
resetConfig,
|
|
10
|
+
CONFIG_FILE,
|
|
11
|
+
ENV_MAP,
|
|
12
|
+
DEFAULTS,
|
|
13
|
+
} from '../config.js';
|
|
14
|
+
|
|
15
|
+
export function run(args) {
|
|
16
|
+
const sub = args[0] || 'list';
|
|
17
|
+
|
|
18
|
+
switch (sub) {
|
|
19
|
+
case 'list':
|
|
20
|
+
return list();
|
|
21
|
+
case 'get':
|
|
22
|
+
return get(args[1]);
|
|
23
|
+
case 'set':
|
|
24
|
+
return set(args[1], args[2]);
|
|
25
|
+
case 'reset':
|
|
26
|
+
return reset();
|
|
27
|
+
case 'path':
|
|
28
|
+
return path();
|
|
29
|
+
default:
|
|
30
|
+
console.error(`未知子命令: ${sub}\n可用: list, get, set, reset, path`);
|
|
31
|
+
process.exit(1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function list() {
|
|
36
|
+
const effective = getEffectiveConfig();
|
|
37
|
+
console.log('配置项:');
|
|
38
|
+
for (const [key, { value, source }] of Object.entries(effective)) {
|
|
39
|
+
const display = key === 'apiKey' && value ? maskKey(value) : value || '(未设置)';
|
|
40
|
+
console.log(` ${key} = ${display} [${source}]`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function get(key) {
|
|
45
|
+
if (!key) {
|
|
46
|
+
console.error('用法: deepspider config get <key>');
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
if (!Object.hasOwn(DEFAULTS, key)) {
|
|
50
|
+
console.error(`未知配置项: ${key}\n可用: ${Object.keys(DEFAULTS).join(', ')}`);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
const effective = getEffectiveConfig();
|
|
54
|
+
const { value, source } = effective[key];
|
|
55
|
+
const display = key === 'apiKey' && value ? maskKey(value) : value || '(未设置)';
|
|
56
|
+
console.log(`${key} = ${display} [${source}]`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function set(key, value) {
|
|
60
|
+
if (!key || value === undefined) {
|
|
61
|
+
console.error('用法: deepspider config set <key> <value>');
|
|
62
|
+
process.exit(1);
|
|
63
|
+
}
|
|
64
|
+
if (!Object.hasOwn(DEFAULTS, key)) {
|
|
65
|
+
console.error(`未知配置项: ${key}\n可用: ${Object.keys(DEFAULTS).join(', ')}`);
|
|
66
|
+
process.exit(1);
|
|
67
|
+
}
|
|
68
|
+
const config = loadConfig();
|
|
69
|
+
config[key] = value;
|
|
70
|
+
saveConfig(config);
|
|
71
|
+
|
|
72
|
+
const envVar = ENV_MAP[key];
|
|
73
|
+
console.log(`已设置 ${key} = ${key === 'apiKey' ? maskKey(value) : value}`);
|
|
74
|
+
if (process.env[envVar]) {
|
|
75
|
+
console.log(`注意: 环境变量 ${envVar} 已设置,将优先使用环境变量的值`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function reset() {
|
|
80
|
+
if (resetConfig()) {
|
|
81
|
+
console.log('配置已重置');
|
|
82
|
+
} else {
|
|
83
|
+
console.log('配置文件不存在,无需重置');
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function path() {
|
|
88
|
+
console.log(CONFIG_FILE);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function maskKey(key) {
|
|
92
|
+
if (key.length <= 8) return '****';
|
|
93
|
+
return key.slice(0, 4) + '****' + key.slice(-4);
|
|
94
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deepspider --help
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { getVersion } from '../../config/settings.js';
|
|
6
|
+
|
|
7
|
+
export function run() {
|
|
8
|
+
console.log(`
|
|
9
|
+
deepspider v${getVersion()} - 智能爬虫工程平台
|
|
10
|
+
|
|
11
|
+
用法:
|
|
12
|
+
deepspider 启动交互式 Agent
|
|
13
|
+
deepspider <url> 打开目标网站并启动 Agent
|
|
14
|
+
deepspider config 管理配置
|
|
15
|
+
deepspider update 检查更新
|
|
16
|
+
|
|
17
|
+
选项:
|
|
18
|
+
-v, --version 显示版本号
|
|
19
|
+
-h, --help 显示帮助信息
|
|
20
|
+
--debug 启用调试模式
|
|
21
|
+
|
|
22
|
+
配置命令:
|
|
23
|
+
deepspider config list 列出所有配置
|
|
24
|
+
deepspider config get <key> 获取配置项
|
|
25
|
+
deepspider config set <key> <val> 设置配置项
|
|
26
|
+
deepspider config reset 重置配置
|
|
27
|
+
deepspider config path 显示配置文件路径
|
|
28
|
+
|
|
29
|
+
示例:
|
|
30
|
+
deepspider https://example.com 分析目标网站
|
|
31
|
+
deepspider config set apiKey sk-xxx
|
|
32
|
+
deepspider config set model gpt-4o
|
|
33
|
+
`.trim());
|
|
34
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* deepspider update
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import readline from 'readline';
|
|
6
|
+
import { existsSync } from 'fs';
|
|
7
|
+
import { execSync } from 'child_process';
|
|
8
|
+
import { join } from 'path';
|
|
9
|
+
import { getVersion } from '../../config/settings.js';
|
|
10
|
+
|
|
11
|
+
export async function run() {
|
|
12
|
+
const current = getVersion();
|
|
13
|
+
|
|
14
|
+
console.log(`当前版本: v${current}`);
|
|
15
|
+
console.log('检查更新...');
|
|
16
|
+
|
|
17
|
+
let latest;
|
|
18
|
+
try {
|
|
19
|
+
const resp = await fetch('https://registry.npmjs.org/deepspider/latest');
|
|
20
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
21
|
+
const data = await resp.json();
|
|
22
|
+
latest = data.version;
|
|
23
|
+
} catch (e) {
|
|
24
|
+
console.error(`检查更新失败: ${e.message}`);
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (current === latest) {
|
|
29
|
+
console.log(`已是最新版本 v${current}`);
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
console.log(`发现新版本: v${latest}`);
|
|
34
|
+
|
|
35
|
+
const isGlobal = detectGlobalInstall();
|
|
36
|
+
|
|
37
|
+
if (isGlobal) {
|
|
38
|
+
const confirmed = await confirm(`是否更新到 v${latest}?(y/N) `);
|
|
39
|
+
if (!confirmed) {
|
|
40
|
+
console.log('已取消');
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
console.log('正在更新...');
|
|
44
|
+
try {
|
|
45
|
+
execSync('npm install -g deepspider@latest', { stdio: 'inherit' });
|
|
46
|
+
console.log(`已更新到 v${latest}`);
|
|
47
|
+
} catch {
|
|
48
|
+
console.error('更新失败,请手动执行: npm install -g deepspider@latest');
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
console.log('当前为本地安装,请手动更新:');
|
|
53
|
+
console.log(' git pull && pnpm install');
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function confirm(question) {
|
|
58
|
+
return new Promise((resolve) => {
|
|
59
|
+
const rl = readline.createInterface({
|
|
60
|
+
input: process.stdin,
|
|
61
|
+
output: process.stdout,
|
|
62
|
+
});
|
|
63
|
+
rl.question(question, (answer) => {
|
|
64
|
+
rl.close();
|
|
65
|
+
resolve(answer.toLowerCase() === 'y');
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function detectGlobalInstall() {
|
|
71
|
+
try {
|
|
72
|
+
const globalDir = execSync('npm root -g', { encoding: 'utf-8' }).trim();
|
|
73
|
+
const globalPkg = join(globalDir, 'deepspider');
|
|
74
|
+
return existsSync(globalPkg);
|
|
75
|
+
} catch {
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
}
|