deepspider 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -3
- package/.trellis/spec/backend/directory-structure.md +1 -1
- package/.trellis/spec/backend/index.md +1 -1
- package/.trellis/workspace/pony/index.md +4 -3
- package/.trellis/workspace/pony/journal-1.md +54 -0
- package/CLAUDE.md +3 -3
- package/README.md +45 -4
- package/agents/deepspider.md +3 -3
- package/docs/GUIDE.md +2 -2
- package/docs/PROMPT.md +1 -1
- package/docs/USAGE.md +1 -1
- package/package.json +2 -2
- package/src/agent/index.js +3 -3
- package/src/agent/prompts/system.js +1 -1
- package/src/agent/run.js +8 -1
- package/src/agent/setup.js +56 -0
- package/src/index.js +1 -1
package/.env.example
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# DeepSpider 配置
|
|
2
2
|
|
|
3
3
|
# LLM API 配置(兼容 OpenAI 格式的任意供应商)
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
DEEPSPIDER_API_KEY=your_api_key_here
|
|
5
|
+
DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
6
|
+
DEEPSPIDER_MODEL=gpt-4o
|
|
7
7
|
|
|
8
8
|
# LangSmith 追踪配置(可选)
|
|
9
9
|
LANGSMITH_TRACING=true
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
|
|
9
9
|
<!-- @@@auto:current-status -->
|
|
10
10
|
- **Active File**: `journal-1.md`
|
|
11
|
-
- **Total Sessions**:
|
|
12
|
-
- **Last Active**: -
|
|
11
|
+
- **Total Sessions**: 1
|
|
12
|
+
- **Last Active**: 2026-02-03
|
|
13
13
|
<!-- @@@/auto:current-status -->
|
|
14
14
|
|
|
15
15
|
---
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
<!-- @@@auto:active-documents -->
|
|
20
20
|
| File | Lines | Status |
|
|
21
21
|
|------|-------|--------|
|
|
22
|
-
| `journal-1.md` | ~
|
|
22
|
+
| `journal-1.md` | ~61 | Active |
|
|
23
23
|
<!-- @@@/auto:active-documents -->
|
|
24
24
|
|
|
25
25
|
---
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
<!-- @@@auto:session-history -->
|
|
30
30
|
| # | Date | Title | Commits |
|
|
31
31
|
|---|------|-------|---------|
|
|
32
|
+
| 1 | 2026-02-03 | 环境变量重命名与配置检测 | `4aa6cad` |
|
|
32
33
|
<!-- @@@/auto:session-history -->
|
|
33
34
|
|
|
34
35
|
---
|
|
@@ -5,3 +5,57 @@
|
|
|
5
5
|
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
+
|
|
9
|
+
## Session 1: 环境变量重命名与配置检测
|
|
10
|
+
|
|
11
|
+
**Date**: 2026-02-03
|
|
12
|
+
**Task**: 环境变量重命名与配置检测
|
|
13
|
+
|
|
14
|
+
### Summary
|
|
15
|
+
|
|
16
|
+
(Add summary)
|
|
17
|
+
|
|
18
|
+
### Main Changes
|
|
19
|
+
|
|
20
|
+
## 完成内容
|
|
21
|
+
|
|
22
|
+
重命名环境变量为项目专属前缀,并添加启动时配置检测。
|
|
23
|
+
|
|
24
|
+
| 变更 | 说明 |
|
|
25
|
+
|------|------|
|
|
26
|
+
| 环境变量重命名 | LLM_* → DEEPSPIDER_API_KEY/BASE_URL/MODEL |
|
|
27
|
+
| 配置检测模块 | 新增 setup.js,启动时检测必要配置 |
|
|
28
|
+
| 文档更新 | README.md, CLAUDE.md 同步更新 |
|
|
29
|
+
|
|
30
|
+
## 设计决策
|
|
31
|
+
|
|
32
|
+
从第一性原理分析,采用简化方案:
|
|
33
|
+
- 移除交互式配置向导(200行→47行)
|
|
34
|
+
- 只做检测+提示,不做选择
|
|
35
|
+
- 符合 Unix 哲学
|
|
36
|
+
|
|
37
|
+
## 变更文件
|
|
38
|
+
|
|
39
|
+
- `.env.example` - 环境变量模板
|
|
40
|
+
- `src/agent/index.js` - 读取新变量名
|
|
41
|
+
- `src/agent/run.js` - 添加配置检测调用
|
|
42
|
+
- `src/agent/setup.js` - 新增配置检测模块
|
|
43
|
+
- `README.md`, `CLAUDE.md` - 文档更新
|
|
44
|
+
|
|
45
|
+
### Git Commits
|
|
46
|
+
|
|
47
|
+
| Hash | Message |
|
|
48
|
+
|------|---------|
|
|
49
|
+
| `4aa6cad` | (see git log) |
|
|
50
|
+
|
|
51
|
+
### Testing
|
|
52
|
+
|
|
53
|
+
- [OK] (Add test results)
|
|
54
|
+
|
|
55
|
+
### Status
|
|
56
|
+
|
|
57
|
+
[OK] **Completed**
|
|
58
|
+
|
|
59
|
+
### Next Steps
|
|
60
|
+
|
|
61
|
+
- None - task complete
|
package/CLAUDE.md
CHANGED
|
@@ -265,9 +265,9 @@ pnpm run setup:crypto
|
|
|
265
265
|
# 配置环境变量
|
|
266
266
|
cp .env.example .env
|
|
267
267
|
# 编辑 .env 填入:
|
|
268
|
-
#
|
|
269
|
-
#
|
|
270
|
-
#
|
|
268
|
+
# DEEPSPIDER_API_KEY=your-api-key
|
|
269
|
+
# DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
270
|
+
# DEEPSPIDER_MODEL=gpt-4o
|
|
271
271
|
|
|
272
272
|
# Agent 模式(推荐)- 指定目标网站
|
|
273
273
|
pnpm run agent https://example.com
|
package/README.md
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
# DeepSpider
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/deepspider)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
> 智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent
|
|
7
|
+
|
|
8
|
+
从 JS 逆向到完整爬虫脚本的一站式 AI Agent 解决方案。
|
|
6
9
|
|
|
7
10
|
## 特性
|
|
8
11
|
|
|
@@ -18,7 +21,12 @@
|
|
|
18
21
|
### 安装
|
|
19
22
|
|
|
20
23
|
```bash
|
|
21
|
-
#
|
|
24
|
+
# 方式一:npm 全局安装
|
|
25
|
+
npm install -g deepspider
|
|
26
|
+
|
|
27
|
+
# 方式二:克隆仓库
|
|
28
|
+
git clone https://github.com/ma-pony/deepspider.git
|
|
29
|
+
cd deepspider
|
|
22
30
|
pnpm install
|
|
23
31
|
|
|
24
32
|
# 安装 Python 加密库(用于运行生成的 Python 代码)
|
|
@@ -26,7 +34,36 @@ pnpm run setup:crypto
|
|
|
26
34
|
|
|
27
35
|
# 配置环境变量
|
|
28
36
|
cp .env.example .env
|
|
29
|
-
# 编辑 .env
|
|
37
|
+
# 编辑 .env 填入配置(见下方环境变量说明)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### 环境变量配置
|
|
41
|
+
|
|
42
|
+
DeepSpider 需要配置 LLM API 才能运行。支持任何兼容 OpenAI 格式的供应商。
|
|
43
|
+
|
|
44
|
+
| 变量名 | 必填 | 说明 |
|
|
45
|
+
|--------|------|------|
|
|
46
|
+
| `DEEPSPIDER_API_KEY` | 是 | API 密钥 |
|
|
47
|
+
| `DEEPSPIDER_BASE_URL` | 是 | API 地址 |
|
|
48
|
+
| `DEEPSPIDER_MODEL` | 是 | 模型名称 |
|
|
49
|
+
|
|
50
|
+
**常用供应商配置示例**:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# OpenAI
|
|
54
|
+
DEEPSPIDER_API_KEY=sk-xxx
|
|
55
|
+
DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
56
|
+
DEEPSPIDER_MODEL=gpt-4o
|
|
57
|
+
|
|
58
|
+
# DeepSeek
|
|
59
|
+
DEEPSPIDER_API_KEY=sk-xxx
|
|
60
|
+
DEEPSPIDER_BASE_URL=https://api.deepseek.com/v1
|
|
61
|
+
DEEPSPIDER_MODEL=deepseek-chat
|
|
62
|
+
|
|
63
|
+
# 其他 OpenAI 兼容供应商
|
|
64
|
+
DEEPSPIDER_API_KEY=your-key
|
|
65
|
+
DEEPSPIDER_BASE_URL=https://your-provider.com/v1
|
|
66
|
+
DEEPSPIDER_MODEL=model-name
|
|
30
67
|
```
|
|
31
68
|
|
|
32
69
|
### 使用
|
|
@@ -129,6 +166,10 @@ deepspider/
|
|
|
129
166
|
- [开发使用指南](docs/GUIDE.md)
|
|
130
167
|
- [调试指南](docs/DEBUG.md)
|
|
131
168
|
|
|
169
|
+
## 贡献
|
|
170
|
+
|
|
171
|
+
欢迎提交 Issue 和 Pull Request!
|
|
172
|
+
|
|
132
173
|
## License
|
|
133
174
|
|
|
134
175
|
MIT
|
package/agents/deepspider.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
---
|
|
2
|
-
description:
|
|
3
|
-
capabilities: ["动态调试", "代码解包", "反混淆", "加密捕获", "
|
|
2
|
+
description: 智能爬虫 Agent。JS逆向、动态调试、代码解包、反混淆、加密捕获、爬虫生成。
|
|
3
|
+
capabilities: ["JS逆向", "动态调试", "代码解包", "反混淆", "加密捕获", "爬虫生成"]
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
你是 DeepSpider
|
|
6
|
+
你是 DeepSpider,一个智能爬虫 Agent。
|
|
7
7
|
|
|
8
8
|
## 核心能力
|
|
9
9
|
|
package/docs/GUIDE.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# DeepSpider 开发使用指南
|
|
2
2
|
|
|
3
|
-
>
|
|
3
|
+
> 智能爬虫 Agent - 基于 DeepAgents + Patchright
|
|
4
4
|
|
|
5
5
|
## 目录
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
## 项目概述
|
|
18
18
|
|
|
19
|
-
DeepSpider
|
|
19
|
+
DeepSpider 是一个智能爬虫 Agent,基于 DeepAgents + Patchright 构建。
|
|
20
20
|
|
|
21
21
|
### 核心能力
|
|
22
22
|
|
package/docs/PROMPT.md
CHANGED
package/docs/USAGE.md
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "deepspider",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"scripts": {
|
|
11
11
|
"start": "node src/index.js",
|
|
12
12
|
"dev": "node --watch src/index.js",
|
|
13
|
-
"cli": "node
|
|
13
|
+
"cli": "node src/agent/run.js",
|
|
14
14
|
"mcp": "node src/mcp/server.js",
|
|
15
15
|
"agent": "node src/agent/run.js",
|
|
16
16
|
"test": "node --test test/",
|
package/src/agent/index.js
CHANGED
|
@@ -16,9 +16,9 @@ import { createFilterToolsMiddleware } from './middleware/filterTools.js';
|
|
|
16
16
|
|
|
17
17
|
// 从环境变量读取配置
|
|
18
18
|
const config = {
|
|
19
|
-
apiKey: process.env.
|
|
20
|
-
baseUrl: process.env.
|
|
21
|
-
model: process.env.
|
|
19
|
+
apiKey: process.env.DEEPSPIDER_API_KEY,
|
|
20
|
+
baseUrl: process.env.DEEPSPIDER_BASE_URL,
|
|
21
|
+
model: process.env.DEEPSPIDER_MODEL || 'gpt-4o',
|
|
22
22
|
};
|
|
23
23
|
|
|
24
24
|
/**
|
package/src/agent/run.js
CHANGED
|
@@ -14,6 +14,7 @@ import { getBrowser } from '../browser/index.js';
|
|
|
14
14
|
import { markHookInjected } from './tools/runtime.js';
|
|
15
15
|
import { createLogger } from './logger.js';
|
|
16
16
|
import { browserTools } from './tools/browser.js';
|
|
17
|
+
import { ensureConfig } from './setup.js';
|
|
17
18
|
|
|
18
19
|
const args = process.argv.slice(2);
|
|
19
20
|
const targetUrl = args.find(arg => arg.startsWith('http://') || arg.startsWith('https://'));
|
|
@@ -27,7 +28,7 @@ let browser = null;
|
|
|
27
28
|
let currentPage = null;
|
|
28
29
|
|
|
29
30
|
console.log('=== DeepSpider Agent ===');
|
|
30
|
-
console.log('
|
|
31
|
+
console.log('智能爬虫 Agent,输入 exit 退出\n');
|
|
31
32
|
|
|
32
33
|
// 调试模式
|
|
33
34
|
const DEBUG = process.env.DEBUG === 'true' || process.argv.includes('--debug');
|
|
@@ -548,6 +549,12 @@ function prompt() {
|
|
|
548
549
|
|
|
549
550
|
async function init() {
|
|
550
551
|
debug('init: 启动');
|
|
552
|
+
|
|
553
|
+
// 首次运行检测:确保环境变量已配置
|
|
554
|
+
if (!ensureConfig()) {
|
|
555
|
+
process.exit(1);
|
|
556
|
+
}
|
|
557
|
+
|
|
551
558
|
if (DEBUG) {
|
|
552
559
|
console.log('[DEBUG] 调试模式已启用');
|
|
553
560
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider 配置检测
|
|
3
|
+
* 简单检测 + 清晰提示,不做交互式向导
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 检查环境变量是否已配置
|
|
8
|
+
*/
|
|
9
|
+
export function checkEnvConfig() {
|
|
10
|
+
return {
|
|
11
|
+
apiKey: process.env.DEEPSPIDER_API_KEY,
|
|
12
|
+
baseUrl: process.env.DEEPSPIDER_BASE_URL,
|
|
13
|
+
model: process.env.DEEPSPIDER_MODEL,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* 检查配置是否完整
|
|
19
|
+
* @returns {boolean} 是否可以继续运行
|
|
20
|
+
*/
|
|
21
|
+
export function ensureConfig() {
|
|
22
|
+
const { apiKey, baseUrl, model } = checkEnvConfig();
|
|
23
|
+
const missing = [];
|
|
24
|
+
|
|
25
|
+
if (!apiKey) missing.push('DEEPSPIDER_API_KEY');
|
|
26
|
+
if (!baseUrl) missing.push('DEEPSPIDER_BASE_URL');
|
|
27
|
+
if (!model) missing.push('DEEPSPIDER_MODEL');
|
|
28
|
+
|
|
29
|
+
if (missing.length === 0) {
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
console.error(`
|
|
34
|
+
错误:缺少必要配置 - ${missing.join(', ')}
|
|
35
|
+
|
|
36
|
+
配置方式(任选其一):
|
|
37
|
+
|
|
38
|
+
1. 环境变量:
|
|
39
|
+
export DEEPSPIDER_API_KEY=sk-xxx
|
|
40
|
+
export DEEPSPIDER_BASE_URL=https://api.openai.com/v1
|
|
41
|
+
export DEEPSPIDER_MODEL=gpt-4o
|
|
42
|
+
|
|
43
|
+
2. .env 文件(推荐):
|
|
44
|
+
cp .env.example .env
|
|
45
|
+
# 编辑 .env 填入配置
|
|
46
|
+
|
|
47
|
+
3. 一行命令:
|
|
48
|
+
DEEPSPIDER_API_KEY=sk-xxx DEEPSPIDER_BASE_URL=https://api.openai.com/v1 DEEPSPIDER_MODEL=gpt-4o npx deepspider <url>
|
|
49
|
+
|
|
50
|
+
常用供应商配置:
|
|
51
|
+
OpenAI: BASE_URL=https://api.openai.com/v1 MODEL=gpt-4o
|
|
52
|
+
DeepSeek: BASE_URL=https://api.deepseek.com/v1 MODEL=deepseek-chat
|
|
53
|
+
`);
|
|
54
|
+
|
|
55
|
+
return false;
|
|
56
|
+
}
|