dslighting 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsat/__init__.py +3 -0
- dsat/benchmark/__init__.py +1 -0
- dsat/benchmark/benchmark.py +168 -0
- dsat/benchmark/datasci.py +291 -0
- dsat/benchmark/mle.py +777 -0
- dsat/benchmark/sciencebench.py +304 -0
- dsat/common/__init__.py +0 -0
- dsat/common/constants.py +11 -0
- dsat/common/exceptions.py +48 -0
- dsat/common/typing.py +19 -0
- dsat/config.py +79 -0
- dsat/models/__init__.py +3 -0
- dsat/models/candidates.py +16 -0
- dsat/models/formats.py +52 -0
- dsat/models/task.py +64 -0
- dsat/operators/__init__.py +0 -0
- dsat/operators/aflow_ops.py +90 -0
- dsat/operators/autokaggle_ops.py +170 -0
- dsat/operators/automind_ops.py +38 -0
- dsat/operators/base.py +22 -0
- dsat/operators/code.py +45 -0
- dsat/operators/dsagent_ops.py +123 -0
- dsat/operators/llm_basic.py +84 -0
- dsat/prompts/__init__.py +0 -0
- dsat/prompts/aflow_prompt.py +76 -0
- dsat/prompts/aide_prompt.py +52 -0
- dsat/prompts/autokaggle_prompt.py +290 -0
- dsat/prompts/automind_prompt.py +29 -0
- dsat/prompts/common.py +51 -0
- dsat/prompts/data_interpreter_prompt.py +82 -0
- dsat/prompts/dsagent_prompt.py +88 -0
- dsat/runner.py +554 -0
- dsat/services/__init__.py +0 -0
- dsat/services/data_analyzer.py +387 -0
- dsat/services/llm.py +486 -0
- dsat/services/llm_single.py +421 -0
- dsat/services/sandbox.py +386 -0
- dsat/services/states/__init__.py +0 -0
- dsat/services/states/autokaggle_state.py +43 -0
- dsat/services/states/base.py +14 -0
- dsat/services/states/dsa_log.py +13 -0
- dsat/services/states/experience.py +237 -0
- dsat/services/states/journal.py +153 -0
- dsat/services/states/operator_library.py +290 -0
- dsat/services/vdb.py +76 -0
- dsat/services/workspace.py +178 -0
- dsat/tasks/__init__.py +3 -0
- dsat/tasks/handlers.py +376 -0
- dsat/templates/open_ended/grade_template.py +107 -0
- dsat/tools/__init__.py +4 -0
- dsat/utils/__init__.py +0 -0
- dsat/utils/context.py +172 -0
- dsat/utils/dynamic_import.py +71 -0
- dsat/utils/parsing.py +33 -0
- dsat/workflows/__init__.py +12 -0
- dsat/workflows/base.py +53 -0
- dsat/workflows/factory.py +439 -0
- dsat/workflows/manual/__init__.py +0 -0
- dsat/workflows/manual/autokaggle_workflow.py +148 -0
- dsat/workflows/manual/data_interpreter_workflow.py +153 -0
- dsat/workflows/manual/deepanalyze_workflow.py +484 -0
- dsat/workflows/manual/dsagent_workflow.py +76 -0
- dsat/workflows/search/__init__.py +0 -0
- dsat/workflows/search/aflow_workflow.py +344 -0
- dsat/workflows/search/aide_workflow.py +283 -0
- dsat/workflows/search/automind_workflow.py +237 -0
- dsat/workflows/templates/__init__.py +0 -0
- dsat/workflows/templates/basic_kaggle_loop.py +71 -0
- dslighting/__init__.py +170 -0
- dslighting/core/__init__.py +13 -0
- dslighting/core/agent.py +646 -0
- dslighting/core/config_builder.py +318 -0
- dslighting/core/data_loader.py +422 -0
- dslighting/core/task_detector.py +422 -0
- dslighting/utils/__init__.py +19 -0
- dslighting/utils/defaults.py +151 -0
- dslighting-1.3.9.dist-info/METADATA +554 -0
- dslighting-1.3.9.dist-info/RECORD +80 -0
- dslighting-1.3.9.dist-info/WHEEL +5 -0
- dslighting-1.3.9.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dslighting
|
|
3
|
+
Version: 1.3.9
|
|
4
|
+
Summary: Simplified API for Data Science Agent Automation
|
|
5
|
+
Author: DSLighting Team
|
|
6
|
+
License: AGPL-3.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/usail-hkust/dslighting
|
|
8
|
+
Project-URL: Documentation, https://luckyfan-cs.github.io/dslighting-web/
|
|
9
|
+
Project-URL: Repository, https://github.com/usail-hkust/dslighting
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/usail-hkust/dslighting/issues
|
|
11
|
+
Keywords: data-science,agent,automation,machine-learning,ai
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: pandas>=1.5.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
26
|
+
Requires-Dist: openai
|
|
27
|
+
Requires-Dist: anthropic
|
|
28
|
+
Requires-Dist: litellm>=1.80.0
|
|
29
|
+
Requires-Dist: rich
|
|
30
|
+
Requires-Dist: transformers
|
|
31
|
+
Requires-Dist: torch
|
|
32
|
+
Requires-Dist: scikit-learn
|
|
33
|
+
Requires-Dist: nbformat
|
|
34
|
+
Requires-Dist: nbclient
|
|
35
|
+
Provides-Extra: full
|
|
36
|
+
Requires-Dist: openai>=1.0.0; extra == "full"
|
|
37
|
+
Requires-Dist: anthropic>=0.34.0; extra == "full"
|
|
38
|
+
Requires-Dist: litellm>=1.0.0; extra == "full"
|
|
39
|
+
Requires-Dist: rich>=13.0.0; extra == "full"
|
|
40
|
+
Requires-Dist: transformers>=4.30.0; extra == "full"
|
|
41
|
+
Requires-Dist: torch>=2.0.0; extra == "full"
|
|
42
|
+
Requires-Dist: scikit-learn>=1.0.0; extra == "full"
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
46
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
48
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: build>=0.10.0; extra == "dev"
|
|
50
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
51
|
+
|
|
52
|
+
<div align="center">
|
|
53
|
+
|
|
54
|
+
<img src="assets/dslighting.png" alt="DSLIGHTING Logo" width="180" style="border-radius: 15px;">
|
|
55
|
+
|
|
56
|
+
# DSLIGHTING:全流程数据科学智能助手
|
|
57
|
+
|
|
58
|
+
[](https://www.python.org/downloads/)
|
|
59
|
+
[](https://fastapi.tiangolo.com/)
|
|
60
|
+
[](https://react.dev/)
|
|
61
|
+
[](https://nextjs.org/)
|
|
62
|
+
[](LICENSE)
|
|
63
|
+
|
|
64
|
+
<p align="center">
|
|
65
|
+
<a href="#快速开始"><img src="https://img.shields.io/badge/🚀-快速开始-green?style=for-the-badge" alt="Quick Start"></a>
|
|
66
|
+
|
|
67
|
+
<a href="#核心功能"><img src="https://img.shields.io/badge/⚡-核心功能-blue?style=for-the-badge" alt="Core Features"></a>
|
|
68
|
+
|
|
69
|
+
<a href="https://luckyfan-cs.github.io/dslighting-web/"><img src="https://img.shields.io/badge/📚-文档-orange?style=for-the-badge" alt="Documentation"></a>
|
|
70
|
+
|
|
71
|
+
<a href="https://luckyfan-cs.github.io/dslighting-web/guide/getting-started.html"><img src="https://img.shields.io/badge/📖-使用指南-purple?style=for-the-badge" alt="User Guide"></a>
|
|
72
|
+
|
|
73
|
+
<a href="https://github.com/usail-hkust/dslighting/stargazers"><img src="https://img.shields.io/github/stars/usail-hkust/dslighting?style=for-the-badge" alt="Stars"></a>
|
|
74
|
+
|
|
75
|
+
<img src="https://komarev.com/ghpvc/?username=usail-hkust&repo=dslighting&style=for-the-badge" alt="Profile views">
|
|
76
|
+
</p>
|
|
77
|
+
|
|
78
|
+
[English](docs/README_EN.md) · [日本語](docs/README_JA.md) · [Français](docs/README_FR.md)
|
|
79
|
+
|
|
80
|
+
</div>
|
|
81
|
+
|
|
82
|
+
<div align="center">
|
|
83
|
+
|
|
84
|
+
🎯 **智能Agent工作流** • 📊 **交互式数据可视化**<br>
|
|
85
|
+
🤖 **自动化代码生成** • 📈 **全流程任务评估**
|
|
86
|
+
|
|
87
|
+
[💬 加入微信交流群](#-微信交流群) • [⭐ 给我们Star](https://github.com/usail-hkust/dslighting/stargazers)
|
|
88
|
+
|
|
89
|
+
</div>
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## 📸 Web界面预览
|
|
94
|
+
|
|
95
|
+
### 主页面
|
|
96
|
+

|
|
97
|
+
|
|
98
|
+
### 数据探索 (EDA)
|
|
99
|
+

|
|
100
|
+
|
|
101
|
+
### 自定义任务
|
|
102
|
+

|
|
103
|
+
|
|
104
|
+
### 模型训练
|
|
105
|
+

|
|
106
|
+
|
|
107
|
+
### 报告生成
|
|
108
|
+

|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## 📖 项目简介
|
|
113
|
+
|
|
114
|
+
DSLIGHTING 是一个全流程数据科学智能助手系统,采用Agent式工作流和可复用的数据布局,为数据科学任务提供端到端的执行、评估和迭代能力。
|
|
115
|
+
|
|
116
|
+
### ✨ 核心特性
|
|
117
|
+
|
|
118
|
+
- 🤖 **多种Agent工作流**:集成aide、automind、dsagent等多种智能体风格
|
|
119
|
+
- 🔄 **元优化框架**:支持AFlow元优化,自动选择最优工作流
|
|
120
|
+
- 📊 **Web可视化界面**:基于Next.js + FastAPI的交互式Dashboard
|
|
121
|
+
- 📝 **完整日志追踪**:记录每次运行的artifacts和摘要
|
|
122
|
+
- 🧩 **可扩展架构**:灵活的任务注册和数据准备流程
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 🚀 快速开始
|
|
127
|
+
|
|
128
|
+
> 📖 **查看完整文档**:https://luckyfan-cs.github.io/dslighting-web/
|
|
129
|
+
>
|
|
130
|
+
> 💡 **需要详细配置步骤?** 查看 [完整配置指南](SETUP_GUIDE.md)
|
|
131
|
+
|
|
132
|
+
### 系统要求
|
|
133
|
+
|
|
134
|
+
- **Python**: 3.10 或更高版本
|
|
135
|
+
```bash
|
|
136
|
+
# 检查Python版本
|
|
137
|
+
python --version
|
|
138
|
+
# 或
|
|
139
|
+
python3 --version
|
|
140
|
+
```
|
|
141
|
+
- **Node.js**: 18.x 或更高版本
|
|
142
|
+
- **npm**: 9.x 或更高版本(随Node.js一起安装)
|
|
143
|
+
- **Git**: 用于版本控制
|
|
144
|
+
|
|
145
|
+
### 1. 环境准备
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
git clone https://github.com/usail-hkust/dslighting.git
|
|
149
|
+
cd dslighting
|
|
150
|
+
python3.10 -m venv dslighting
|
|
151
|
+
source dslighting/bin/activate # Windows: dslighting\Scripts\activate
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### 2. 安装依赖
|
|
155
|
+
|
|
156
|
+
**标准安装**(推荐):
|
|
157
|
+
```bash
|
|
158
|
+
pip install -r requirements.txt
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**备选方案**(如果标准安装出错):
|
|
162
|
+
```bash
|
|
163
|
+
pip install -r requirements_local.txt
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
> 💡 **说明**:
|
|
167
|
+
> - `requirements.txt`:锁定具体版本,适合生产环境
|
|
168
|
+
> - `requirements_local.txt`:不锁定版本,依赖更灵活,适合开发环境
|
|
169
|
+
|
|
170
|
+
### 3. (可选) 安装 Python API 包
|
|
171
|
+
|
|
172
|
+
> 🎉 **新功能!** DSLighting 现在提供简化的 Python API,让使用像 scikit-learn 一样简单!
|
|
173
|
+
|
|
174
|
+
**快速安装 Python API 包**:
|
|
175
|
+
```bash
|
|
176
|
+
pip install -e .
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**一行代码运行数据科学任务**:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
import dslighting
|
|
183
|
+
|
|
184
|
+
# 快速运行
|
|
185
|
+
result = dslighting.run_agent("data/competitions/bike-sharing-demand")
|
|
186
|
+
|
|
187
|
+
# 查看结果
|
|
188
|
+
print(f"得分: {result.score}, 成本: ${result.cost:.4f}")
|
|
189
|
+
|
|
190
|
+
# 或者更多控制
|
|
191
|
+
data = dslighting.load_data("data/competitions/bike-sharing-demand")
|
|
192
|
+
agent = dslighting.Agent(workflow="aide", model="gpt-4o-mini", temperature=0.7)
|
|
193
|
+
result = agent.run(data)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
📖 **详细文档**:[Python API 快速上手指南](docs/python-api-guide.md) | [API 完整文档](dslighting/README.md)
|
|
197
|
+
|
|
198
|
+
> ✨ **特点**:
|
|
199
|
+
> - 🚀 **超简单**:1-3 行代码完成复杂任务
|
|
200
|
+
> - 🤖 **智能检测**:自动识别任务类型和推荐工作流
|
|
201
|
+
> - 🔧 **完全兼容**:与现有 DSAT API 100% 兼容
|
|
202
|
+
> - 📊 **开箱即用**:合理的默认配置,环境变量驱动
|
|
203
|
+
|
|
204
|
+
### 4. 配置API密钥
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
cp .env.example .env
|
|
208
|
+
# 编辑.env文件,设置你的API密钥
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
DSLighting支持多种LLM提供商:
|
|
212
|
+
|
|
213
|
+
**国内API提供商**(推荐):
|
|
214
|
+
- **智谱AI** (https://bigmodel.cn/) - GLM系列模型
|
|
215
|
+
- API Base: `https://open.bigmodel.cn/api/paas/v4`
|
|
216
|
+
- 获取密钥: https://open.bigmodel.cn/usercenter/apikeys
|
|
217
|
+
- **硅基流动** (https://siliconflow.cn/) - DeepSeek、Qwen等多种模型
|
|
218
|
+
- API Base: `https://api.siliconflow.cn/v1`
|
|
219
|
+
- 获取密钥: https://siliconflow.cn/account/ak
|
|
220
|
+
|
|
221
|
+
**国际API提供商**:
|
|
222
|
+
- **OpenAI** (https://openai.com/) - GPT系列模型
|
|
223
|
+
- API Base: `https://api.openai.com/v1`
|
|
224
|
+
- 获取密钥: https://platform.openai.com/api-keys
|
|
225
|
+
|
|
226
|
+
支持通过 `API_KEY`/`API_BASE` 或 `LLM_MODEL_CONFIGS` 配置不同模型。
|
|
227
|
+
|
|
228
|
+
> 💡 **配置示例**: 查看 `.env.example` 文件获取详细的多模型配置示例,包括API密钥轮换、温度设置等。
|
|
229
|
+
|
|
230
|
+
### 5. 准备数据
|
|
231
|
+
|
|
232
|
+
DSLighting支持多种数据来源。目前支持以下数据准备方式:
|
|
233
|
+
|
|
234
|
+
#### 方式1:通过MLE-Bench下载(推荐)
|
|
235
|
+
|
|
236
|
+
[MLE-Bench](https://github.com/openai/mle-bench)是OpenAI提供的机器学习评估基准数据集。
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# 1. 克隆MLE-Bench仓库
|
|
240
|
+
git clone https://github.com/openai/mle-bench.git
|
|
241
|
+
cd mle-bench
|
|
242
|
+
|
|
243
|
+
# 2. 安装依赖
|
|
244
|
+
pip install -e .
|
|
245
|
+
|
|
246
|
+
# 3. 下载所有数据集
|
|
247
|
+
python scripts/prepare.py --competition all
|
|
248
|
+
|
|
249
|
+
# 4. 将数据链接到DSLighting项目
|
|
250
|
+
# MLE-Bench数据默认在 ~/mle-bench/data/
|
|
251
|
+
# 可以创建符号链接或复制到 dslighting 项目
|
|
252
|
+
ln -s ~/mle-bench/data/competitions /path/to/dslighting/data/competitions
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
> 📖 **详细信息**: 查看 [MLE-Bench文档](https://github.com/openai/mle-bench) 了解更多数据集详情。
|
|
256
|
+
|
|
257
|
+
#### 方式2:自定义数据集
|
|
258
|
+
|
|
259
|
+
您也可以使用自己的数据集,只需按照DSLighting的数据布局结构组织:
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
data/competitions/
|
|
263
|
+
<竞赛ID>/
|
|
264
|
+
config.yaml # 竞赛配置文件
|
|
265
|
+
prepared/
|
|
266
|
+
public/ # 公开数据(训练集、样本提交)
|
|
267
|
+
private/ # 私有数据(测试标签,用于评分)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
> 💡 **提示**: 更多数据类型和预训练模型支持正在陆续开放中,敬请期待!
|
|
271
|
+
|
|
272
|
+
> 📖 **详细数据准备指南**: 查看 [数据准备文档](docs/DATA_PREPARATION.md) 了解更多详情。
|
|
273
|
+
|
|
274
|
+
### 6. 运行单个任务
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
python run_benchmark.py \
|
|
278
|
+
--workflow aide \
|
|
279
|
+
--benchmark mle \
|
|
280
|
+
--data-dir data/competitions \
|
|
281
|
+
--task-id bike-sharing-demand \
|
|
282
|
+
--llm-model gpt-4
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### 7. 使用Web UI(推荐)
|
|
286
|
+
|
|
287
|
+
我们提供了基于 Next.js + FastAPI 的Web界面,让数据上传和任务执行更加便捷。
|
|
288
|
+
|
|
289
|
+
#### 6.1 后端环境配置
|
|
290
|
+
|
|
291
|
+
后端依赖主项目的dslighting环境,只需额外安装Web框架依赖:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
source dslighting/bin/activate
|
|
295
|
+
# 安装后端依赖
|
|
296
|
+
pip install -r web_ui/backend/requirements.txt
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
#### 6.2 启动后端服务
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
# 进入后端目录
|
|
303
|
+
cd web_ui/backend
|
|
304
|
+
|
|
305
|
+
# 启动后端(默认端口8003)
|
|
306
|
+
python main.py
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
或者使用uvicorn直接启动:
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
cd web_ui/backend
|
|
313
|
+
uvicorn app.main:app --reload --host 0.0.0.0 --port 8003
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
> 📖 **详细文档**:查看 [后端README](web_ui/backend/README.md) 了解API端点和配置说明
|
|
317
|
+
|
|
318
|
+
> 💡 **提示**:后端默认运行在 **8003端口**。如果端口被占用,修改 `main.py` 中的端口号。
|
|
319
|
+
|
|
320
|
+
#### 6.3 启动前端服务
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
cd web_ui/frontend
|
|
324
|
+
npm install # 首次运行时安装依赖
|
|
325
|
+
npm run dev # 启动开发服务器
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
> 📖 **详细文档**:查看 [前端README](web_ui/frontend/README.md) 了解更多前端开发细节
|
|
329
|
+
|
|
330
|
+
#### 6.4 访问Dashboard
|
|
331
|
+
|
|
332
|
+
打开浏览器访问:[http://localhost:3000](http://localhost:3000)
|
|
333
|
+
|
|
334
|
+
---
|
|
335
|
+
|
|
336
|
+
## 🏗️ 核心功能
|
|
337
|
+
|
|
338
|
+
### Agent工作流
|
|
339
|
+
|
|
340
|
+
- **`aide`**:迭代式代码生成和审查循环
|
|
341
|
+
- **`automind`**:带记忆和任务分解的规划+推理
|
|
342
|
+
- **`dsagent`**:结构化操作符流程的规划/执行循环
|
|
343
|
+
- **`data_interpreter`**:快速代码执行和调试循环
|
|
344
|
+
- **`autokaggle`**:SOP风格的Kaggle工作流
|
|
345
|
+
- **`aflow`**:工作流的元优化
|
|
346
|
+
- **`deepanalyze`**:专注分析型执行工作流
|
|
347
|
+
|
|
348
|
+
### 数据布局
|
|
349
|
+
|
|
350
|
+
```
|
|
351
|
+
data/competitions/
|
|
352
|
+
<竞赛ID>/
|
|
353
|
+
config.yaml # 竞赛配置文件
|
|
354
|
+
prepared/
|
|
355
|
+
public/ # 公开数据
|
|
356
|
+
private/ # 私有数据
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### 配置说明
|
|
360
|
+
|
|
361
|
+
#### 基础配置
|
|
362
|
+
|
|
363
|
+
`config.yaml` 会被基准测试运行器和LLM服务读取:
|
|
364
|
+
|
|
365
|
+
- `competitions`:MLEBench的默认竞赛列表
|
|
366
|
+
- `sciencebench_competitions`(可选):ScienceBench的默认列表
|
|
367
|
+
- `custom_model_pricing`:LiteLLM的按模型token定价覆盖
|
|
368
|
+
- `run`:轨迹日志记录开关
|
|
369
|
+
|
|
370
|
+
#### 自定义模型价格配置
|
|
371
|
+
|
|
372
|
+
**默认行为**:
|
|
373
|
+
- DSLighting 使用 LiteLLM 的内置默认价格
|
|
374
|
+
- 如果没有 `config.yaml`,系统会正常工作,**不会报错**
|
|
375
|
+
- 价格配置是**可选的**,仅在需要覆盖默认价格时使用
|
|
376
|
+
|
|
377
|
+
**自定义价格配置**:
|
|
378
|
+
|
|
379
|
+
如果需要为自定义模型设置价格,可以在项目目录创建 `config.yaml` 文件:
|
|
380
|
+
|
|
381
|
+
**位置**:
|
|
382
|
+
```bash
|
|
383
|
+
# 对于 pip 安装
|
|
384
|
+
/path/to/your/project/config.yaml
|
|
385
|
+
|
|
386
|
+
# 示例:测试项目中
|
|
387
|
+
/Users/liufan/Applications/Github/dslighting_test_project/config.yaml
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
> 📖 **参考示例**:查看 [config.yaml.example](config.yaml.example) 获取完整配置示例
|
|
391
|
+
|
|
392
|
+
**配置示例**:
|
|
393
|
+
```yaml
|
|
394
|
+
custom_model_pricing:
|
|
395
|
+
openai/Qwen/Qwen3-Coder-480B-A35B-Instruct:
|
|
396
|
+
input_cost_per_token: 6.0e-07
|
|
397
|
+
output_cost_per_token: 1.8e-06
|
|
398
|
+
openai/Qwen/Qwen3-Coder-30B-A3B-Instruct:
|
|
399
|
+
input_cost_per_token: 6.0e-07
|
|
400
|
+
output_cost_per_token: 1.8e-06
|
|
401
|
+
o4-mini-2025-04-16:
|
|
402
|
+
input_cost_per_token: 1.1e-06
|
|
403
|
+
output_cost_per_token: 4.4e-06
|
|
404
|
+
openai/deepseek-ai/DeepSeek-V3.1-Terminus:
|
|
405
|
+
input_cost_per_token: 5.55e-07
|
|
406
|
+
output_cost_per_token: 1.67e-06
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
**价格参数说明**:
|
|
410
|
+
- `input_cost_per_token`:输入 token 价格(每次请求)
|
|
411
|
+
- `output_cost_per_token`:输出 token 价格(每次响应)
|
|
412
|
+
- 单位:美元/token(通常为科学计数法)
|
|
413
|
+
|
|
414
|
+
**注意事项**:
|
|
415
|
+
- 💡 价格配置是**可选的**,没有 config.yaml 也不会报错
|
|
416
|
+
- 💡 只需要覆盖需要自定义的模型,其他模型使用默认价格
|
|
417
|
+
- 💡 价格配置会影响成本计算和预算控制
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## 📂 日志和Artifacts
|
|
422
|
+
|
|
423
|
+
默认日志写入路径:
|
|
424
|
+
|
|
425
|
+
```
|
|
426
|
+
runs/benchmark_results/<workflow>_on_<benchmark>/<model_name>/
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
可以通过 `--log-path` 参数覆盖基础目录。
|
|
430
|
+
|
|
431
|
+
---
|
|
432
|
+
|
|
433
|
+
## ❓ 常见问题
|
|
434
|
+
|
|
435
|
+
查看 `docs/FAQ.md` 获取更多详细信息。
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## ⭐ Star History
|
|
440
|
+
|
|
441
|
+
<div align="center">
|
|
442
|
+
|
|
443
|
+
<p>
|
|
444
|
+
<a href="https://github.com/usail-hkust/dslighting/stargazers"><img src="assets/roster/stargazers.svg" alt="Stargazers"/></a>
|
|
445
|
+
|
|
446
|
+
<a href="https://github.com/usail-hkust/dslighting/network/members"><img src="assets/roster/forkers.svg" alt="Forkers"/></a>
|
|
447
|
+
</p>
|
|
448
|
+
|
|
449
|
+
<a href="https://www.star-history.com/#usail-hkust/dslighting&type=timeline&legend=top-left">
|
|
450
|
+
<picture>
|
|
451
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=usail-hkust/dslighting&type=timeline&theme=dark&legend=top-left" />
|
|
452
|
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=usail-hkust/dslighting&type=timeline&legend=top-left" />
|
|
453
|
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=usail-hkust/dslighting&type=timeline&legend=top-left" />
|
|
454
|
+
</picture>
|
|
455
|
+
</a>
|
|
456
|
+
|
|
457
|
+
</div>
|
|
458
|
+
|
|
459
|
+
---
|
|
460
|
+
|
|
461
|
+
## 💬 微信交流群
|
|
462
|
+
|
|
463
|
+
欢迎加入我们的微信交流群,与其他用户和开发者交流经验!
|
|
464
|
+
|
|
465
|
+
<div align="center">
|
|
466
|
+
|
|
467
|
+
<img src="assets/wechat_group.jpg" alt="微信交流群" width="300" style="border-radius: 10px; border: 2px solid #e0e0e0;">
|
|
468
|
+
|
|
469
|
+
**扫描上方二维码加入DSLighting用户交流群**
|
|
470
|
+
|
|
471
|
+
</div>
|
|
472
|
+
|
|
473
|
+
在群内您可以:
|
|
474
|
+
- 🤝 与其他用户交流使用经验
|
|
475
|
+
- 💡 提出功能建议和反馈
|
|
476
|
+
- 🐛 报告Bug并获得帮助
|
|
477
|
+
- 📢 了解最新开发动态
|
|
478
|
+
|
|
479
|
+
---
|
|
480
|
+
|
|
481
|
+
## 🤝 贡献指南
|
|
482
|
+
|
|
483
|
+
<div align="center">
|
|
484
|
+
|
|
485
|
+
我们希望 DSLIGHTING 能成为社区的一份礼物。🎁
|
|
486
|
+
|
|
487
|
+
<a href="https://github.com/usail-hkust/dslighting/graphs/contributors">
|
|
488
|
+
<img src="https://contrib.rocks/image?repo=usail-hkust/dslighting" />
|
|
489
|
+
</a>
|
|
490
|
+
|
|
491
|
+
**核心贡献者**:
|
|
492
|
+
- [luckyfan-cs](https://github.com/luckyfan-cs)(项目负责人,前端和后端开发)
|
|
493
|
+
- [canchengliu](https://github.com/canchengliu)(工作流贡献)
|
|
494
|
+
|
|
495
|
+
查看 `docs/CONTRIBUTING.md` 了解如何参与贡献。
|
|
496
|
+
|
|
497
|
+
</div>
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
501
|
+
## 🔗 社区
|
|
502
|
+
|
|
503
|
+
<div align="center">
|
|
504
|
+
|
|
505
|
+
**[DSLIGHTING 社区](https://github.com/luckyfan-cs)**
|
|
506
|
+
|
|
507
|
+
[💬 微信交流群](#-微信交流群) · [⭐ 给我们Star](https://github.com/usail-hkust/dslighting/stargazers) · [🐛 报告Bug](https://github.com/usail-hkust/dslighting/issues) · [💬 参与讨论](https://github.com/usail-hkust/dslighting/discussions)
|
|
508
|
+
|
|
509
|
+
</div>
|
|
510
|
+
|
|
511
|
+
---
|
|
512
|
+
|
|
513
|
+
## 📄 许可证
|
|
514
|
+
|
|
515
|
+
本项目采用 AGPL-3.0 许可证。
|
|
516
|
+
|
|
517
|
+
---
|
|
518
|
+
|
|
519
|
+
## 🙏 致谢
|
|
520
|
+
|
|
521
|
+
感谢你关注 DSLIGHTING!
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## 📊 项目统计
|
|
526
|
+
|
|
527
|
+

|
|
528
|
+

|
|
529
|
+

|
|
530
|
+

|
|
531
|
+
|
|
532
|
+
---
|
|
533
|
+
|
|
534
|
+
## 📚 引用 (Citation)
|
|
535
|
+
|
|
536
|
+
如果你在研究中使用了 DSLIGHTING,请使用以下 BibTeX 格式进行引用:
|
|
537
|
+
|
|
538
|
+
```bibtex
|
|
539
|
+
@software{dslighting2025,
|
|
540
|
+
title = {DSLIGHTING: An End-to-End Data Science Intelligent Assistant System},
|
|
541
|
+
author = {Liu, F. and Liu, C. and others},
|
|
542
|
+
year = {2025},
|
|
543
|
+
publisher = {GitHub},
|
|
544
|
+
url = {https://github.com/usail-hkust/dslighting},
|
|
545
|
+
version = {1.0.0}
|
|
546
|
+
}
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
或者使用 plain text 格式:
|
|
550
|
+
|
|
551
|
+
```
|
|
552
|
+
Liu, F., Liu, C., et al. (2025). DSLIGHTING: An End-to-End Data Science Intelligent Assistant System.
|
|
553
|
+
GitHub repository. https://github.com/usail-hkust/dslighting
|
|
554
|
+
```
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
dsat/__init__.py,sha256=wHgtKWECmb5KQyVGqxvQ5mAl_yPg8RkZY4n929SIhi4,105
|
|
2
|
+
dsat/config.py,sha256=revgOB4NHSl-MEvn8nwvaL0Nn3AaJI4uqsWtZzvJdn8,3062
|
|
3
|
+
dsat/runner.py,sha256=y10bjs8pM4TKQjR5oCH7YSgwOh4OG20qI7PJClWuTEM,26486
|
|
4
|
+
dsat/benchmark/__init__.py,sha256=p-wt40WdlZM_--ha5wuYVOEmsD820jFQB-GZnju1Fi4,24
|
|
5
|
+
dsat/benchmark/benchmark.py,sha256=8sVyovMKD2Zrazq-2-7jgfzOJNnWKjsUjOgYQIGeCBA,8002
|
|
6
|
+
dsat/benchmark/datasci.py,sha256=z8dzyXt_F8_0xRg2rfrIkN24rYw0o1yKdoutKlQgpmo,10864
|
|
7
|
+
dsat/benchmark/mle.py,sha256=ppRyaY8e2lZ2VU2fHcKTsavXhfvnu56yK9fDmcP22VU,33365
|
|
8
|
+
dsat/benchmark/sciencebench.py,sha256=Jgmw9nS35nMz86M_v8ZBCJxgzYLwlB5OhvyM-lKWW30,12467
|
|
9
|
+
dsat/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
dsat/common/constants.py,sha256=CQVYDiKWvAmVRG-TR8teI-C_7c407hmezhducsG69Fk,277
|
|
11
|
+
dsat/common/exceptions.py,sha256=1FYk8IXnfmk2JkUEjnR8IH0A6Cvq_FHmhnndJCoPScc,1957
|
|
12
|
+
dsat/common/typing.py,sha256=R6FipaTSxj2DBG0WaU7IOu2pvbXavDoL4b2rlcc2h90,1074
|
|
13
|
+
dsat/models/__init__.py,sha256=5GSGpTROhsSb8AQc9FMdXmWk8FpQyoNNzyiFjTeZOfg,91
|
|
14
|
+
dsat/models/candidates.py,sha256=OyKiGk-DfrdDB-h7oIyksWQQqao43j64_y2p4ks1oH0,754
|
|
15
|
+
dsat/models/formats.py,sha256=lUV3SlkO5Qe_xvNbtrrSvn__slGjzTB7YbSxbElr9Po,3536
|
|
16
|
+
dsat/models/task.py,sha256=3lroC5SOjMzjz-IkyHS-PogHFi79Ek1gVNxbnSf0hDY,2569
|
|
17
|
+
dsat/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
dsat/operators/aflow_ops.py,sha256=JrMb-FaSdTVKr6ClOeEHtRI2aUUkAA6fSi0bpHkHhLA,4166
|
|
19
|
+
dsat/operators/autokaggle_ops.py,sha256=eMPt3d66-EsFT5xBRyVrTLeLCH7E0dBH4IAuIGKTdck,7177
|
|
20
|
+
dsat/operators/automind_ops.py,sha256=aoNz2FE-8kmclUeNN3X92mxA7MPTc0O4PYKreJA4bjY,2127
|
|
21
|
+
dsat/operators/base.py,sha256=7ELfrnSkJPyoqiG8l9rDpHWQdjv_lqBOppx8jxAwCYA,746
|
|
22
|
+
dsat/operators/code.py,sha256=fy1wGYOLwbybHGvV1OGpk8N__pHr6hGisAiA5e7WXL0,1881
|
|
23
|
+
dsat/operators/dsagent_ops.py,sha256=XcIpcUHg5jCXkqLR6MImi3ZwMSQLKzenKk6QsHSGT_Q,6010
|
|
24
|
+
dsat/operators/llm_basic.py,sha256=lTgOsPyjCijycVK1Fb9ZGDM3JgNfmPB1jI6zq64Okb4,3755
|
|
25
|
+
dsat/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
dsat/prompts/aflow_prompt.py,sha256=AAMPbyHDWcgMpEHVrMGwYfzvh56rkCjTMfTEEzCJZJM,3940
|
|
27
|
+
dsat/prompts/aide_prompt.py,sha256=RK2B8jWThypyXjUYti2trlj_d57TaVX5SrfQvaGT0VA,3235
|
|
28
|
+
dsat/prompts/autokaggle_prompt.py,sha256=BIf6hgnwSPaIReyYaTTYyzwAlHNHIILXVuS2mlWC9Zs,10574
|
|
29
|
+
dsat/prompts/automind_prompt.py,sha256=zogMhiccjaVfTCYfz65DVmgs6iNCifAhdIP7tR8viEA,1938
|
|
30
|
+
dsat/prompts/common.py,sha256=3N9B8-eEaXV41m1U_h50ym0I4ryug5KPrXyHP8kA5PA,2743
|
|
31
|
+
dsat/prompts/data_interpreter_prompt.py,sha256=YB3mKHU61dAouPKJRNytQlZd8hoCL1SUtmZckV_IQc8,2384
|
|
32
|
+
dsat/prompts/dsagent_prompt.py,sha256=fNlGQQs6EKwfqlzhfbusv6f7OYPQiaFHxOR9_cYtke4,2295
|
|
33
|
+
dsat/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
dsat/services/data_analyzer.py,sha256=cSOnYnUDsU2D5sL3tYQ32RWryJ6dnJs7y2PNd3aWvO8,16059
|
|
35
|
+
dsat/services/llm.py,sha256=lDyxGmaP-pfz1vRc3beOjLWhgs1NS2fUOxNoI_0stcM,21412
|
|
36
|
+
dsat/services/llm_single.py,sha256=R6_FfpyPYnLXjecxEBbaQZaEAtamIGQ5RFrsLdFLK6Q,18137
|
|
37
|
+
dsat/services/sandbox.py,sha256=hq6jy2fXMzwa8Mh-4zYpLVl9X0dHcbRJamTOC0_gu2Q,16334
|
|
38
|
+
dsat/services/vdb.py,sha256=WEbuP7S0QRkEUQk9Wtn9cyUzOiF4pAIp9DdwzYrRpeI,3241
|
|
39
|
+
dsat/services/workspace.py,sha256=ly4uRC4BHcyU-Jf6ptZpjYrp79UaWYFlxKguU1gYATQ,7320
|
|
40
|
+
dsat/services/states/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
+
dsat/services/states/autokaggle_state.py,sha256=fGKPnQs-WGf0-UPfQ69aFV6OO_CqYVRTy7vh5GAbdMI,1724
|
|
42
|
+
dsat/services/states/base.py,sha256=9Kfy1IUOCmp2-4r5IQ334PTUchqWaaTkpZFH3Gb3Yy8,420
|
|
43
|
+
dsat/services/states/dsa_log.py,sha256=f0SpyKn-RzeuyVg1-G6ynow8PasYUWKz-nrMvixaMHk,411
|
|
44
|
+
dsat/services/states/experience.py,sha256=HEJ0z_QtzdDayomyM7jKs86YFwzNfNLwS3N4QDncqcQ,9908
|
|
45
|
+
dsat/services/states/journal.py,sha256=ZR_1LVaqQGPJYnIISN6mojRAAfrq66mFX1quL85o8Y4,5736
|
|
46
|
+
dsat/services/states/operator_library.py,sha256=D4FNKamO6jqZTpbl-Ptz-1BcwLWECaJHI4-0HIa8tW0,10166
|
|
47
|
+
dsat/tasks/__init__.py,sha256=CDHinKWr8SLctx6LGTtl7tL3o_VX5urcUWB2vUcgSF0,83
|
|
48
|
+
dsat/tasks/handlers.py,sha256=ncPxgrCPOyXAu8n2Nj0dd8EIi4_D1z9SthZXeNTt4v4,15598
|
|
49
|
+
dsat/templates/open_ended/grade_template.py,sha256=rb9xmEzJOJlgbX7MFiEfGuXoXJ8t4WjtSD6ZB0ZYyyc,4041
|
|
50
|
+
dsat/tools/__init__.py,sha256=c-x4m_NvU0q5nrz4YcbH3vBUcaxGUS7YIKsL_ni4BRI,59
|
|
51
|
+
dsat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
+
dsat/utils/context.py,sha256=7PDSOBZzCGQ9N4qaOgcEM-DiAf4hbtq4RlQ_mK31k6s,6174
|
|
53
|
+
dsat/utils/dynamic_import.py,sha256=LhkKH6yRGUJyJn5oP4NfYCvEn1LnZs6ncgkQQv1AMSE,2980
|
|
54
|
+
dsat/utils/parsing.py,sha256=OYAihoZEYR-flGdLGUdExZM1EocE6VANijJUfbqGWaA,1174
|
|
55
|
+
dsat/workflows/__init__.py,sha256=vCq3ExTxnHpEHkYY4vFNBsIGMNKVA25O0_h2WPrRglY,318
|
|
56
|
+
dsat/workflows/base.py,sha256=P9LjPtJ5gXYQHComBsNAWuYxdMMkQy1u0b2FW1bUqAQ,2292
|
|
57
|
+
dsat/workflows/factory.py,sha256=JTx0lKwZsYe6gGv04zRQpu-NBaQ-K9MrpQ9AadKxDTk,18187
|
|
58
|
+
dsat/workflows/manual/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
+
dsat/workflows/manual/autokaggle_workflow.py,sha256=7In3YLqEVK35n6g_brNRFzJDKMV7PQgJA92XZi2tk1I,7891
|
|
60
|
+
dsat/workflows/manual/data_interpreter_workflow.py,sha256=TSnRpMgjOcoFDEyVHM-yRxO1UcdQTgLtz0Fqp1oLk4E,7666
|
|
61
|
+
dsat/workflows/manual/deepanalyze_workflow.py,sha256=DJFJnEQjYUL9eK-rKMCVS9v_7Mr1BJM0KnDGnP9tonk,21672
|
|
62
|
+
dsat/workflows/manual/dsagent_workflow.py,sha256=zn3y_14bkKRqSCzaV3iQNzbk2VknEBQ7fP32brqo2wg,3156
|
|
63
|
+
dsat/workflows/search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
+
dsat/workflows/search/aflow_workflow.py,sha256=7l1q92z7S96EZwqq4Un8pk2-Nuu94_pjyfvWxVN4ffY,17137
|
|
65
|
+
dsat/workflows/search/aide_workflow.py,sha256=mxIGXcueZGXpv1RXsQJ0YPWtvzICaFQeJowcaZ2krYc,13321
|
|
66
|
+
dsat/workflows/search/automind_workflow.py,sha256=b2JzqUDnDOt_SQdtAvC0fBCJzgTadLylbpgmpaS63Ls,12573
|
|
67
|
+
dsat/workflows/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
+
dsat/workflows/templates/basic_kaggle_loop.py,sha256=e6YLEpCArgWfKViwoti7SdygHsHp43sqP6VyMqnOJaA,3128
|
|
69
|
+
dslighting/__init__.py,sha256=X2VBmzH5-yDSQMK6JJIa7KZqxMItCHDm5Gt0LRfvalE,5060
|
|
70
|
+
dslighting/core/__init__.py,sha256=T4yYs0RQoz6DBarjOk12PeZq9YoPYrfl3Os0CPlzcB0,252
|
|
71
|
+
dslighting/core/agent.py,sha256=8UGRhri9k2qXJyau3BduwQYSpj-DsW4EzaX2601dBzw,22941
|
|
72
|
+
dslighting/core/config_builder.py,sha256=O6pSYAAwlRoITFuLeKB2BaahK5Zq_0R9NdVGHWZTBXE,10288
|
|
73
|
+
dslighting/core/data_loader.py,sha256=sUVkGmZjQlCaTLoEBiJH2sc44vVoM4rP4ksrew7NjIY,13636
|
|
74
|
+
dslighting/core/task_detector.py,sha256=xOYAV9yiboC8lDeDIEtxvucaGi6fENfeycrowWs-kP0,16300
|
|
75
|
+
dslighting/utils/__init__.py,sha256=SjWP1V1ghRKI6bZ-CO7-eumCQbMfHGwIvrFPLCE_1OY,364
|
|
76
|
+
dslighting/utils/defaults.py,sha256=TMwQJgabA-OrHYhP6VAlYFCbDsP5i-BIFM-U_9mqq8M,4425
|
|
77
|
+
dslighting-1.3.9.dist-info/METADATA,sha256=syV2luI_p0-J6Jf90h_Cf_dgQztPU_Zz41mLifYX0tI,17273
|
|
78
|
+
dslighting-1.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
dslighting-1.3.9.dist-info/top_level.txt,sha256=okQQRxSSVfD73jJSZJ5PxsZV_z8rFLBoP5b80pjJ2FY,16
|
|
80
|
+
dslighting-1.3.9.dist-info/RECORD,,
|