astron-eval 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +119 -0
- package/bin/astron-eval.mjs +111 -0
- package/package.json +24 -0
- package/skills/astron-eval/SKILL.md +60 -0
- package/skills/model-evaluation/SKILL.md +180 -0
- package/skills/model-evaluation/assets/dimensions//345/206/205/345/256/271/347/233/270/345/205/263/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/206/205/345/256/271/347/262/276/347/241/256/347/273/264/345/272/246.json +19 -0
- package/skills/model-evaluation/assets/dimensions//345/207/206/347/241/256/346/200/247/347/273/264/345/272/246-/344/270/252/346/200/247/345/214/226/350/247/204/345/210/222.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/207/206/347/241/256/346/200/247/347/273/264/345/272/246-/344/277/241/346/201/257/345/210/206/346/236/220.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/207/206/347/241/256/346/200/247/347/273/264/345/272/246-/346/227/205/346/270/270/345/207/272/350/241/214.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/207/206/347/241/256/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/210/233/346/204/217/346/200/247-/345/220/270/345/274/225/346/200/247/347/273/264/345/272/246.json +21 -0
- package/skills/model-evaluation/assets/dimensions//345/210/233/346/226/260/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/256/214/346/225/264/346/200/247/347/273/264/345/272/246-/344/277/241/346/201/257/345/210/206/346/236/220.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/256/214/346/225/264/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/275/242/345/274/217/347/233/270/345/205/263/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//345/277/240/350/257/232/345/272/246/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/214/207/344/273/244/351/201/265/345/276/252/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/226/207/346/234/254/345/267/256/345/274/202/345/272/246-TER/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/234/211/346/225/210/346/200/247/347/273/264/345/272/246-/344/270/252/346/200/247/345/214/226/350/247/204/345/210/222.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/234/211/346/225/210/346/200/247/347/273/264/345/272/246-/344/277/241/346/201/257/345/210/206/346/236/220.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/234/211/346/225/210/346/200/247/347/273/264/345/272/246-/346/265/201/347/250/213/350/207/252/345/212/250/345/214/226.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/234/211/346/225/210/346/200/247/347/273/264/345/272/246.json +21 -0
- package/skills/model-evaluation/assets/dimensions//346/240/270/345/277/203/345/205/203/347/264/240/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//346/240/274/345/274/217/351/201/265/345/276/252/347/273/264/345/272/246.json +19 -0
- package/skills/model-evaluation/assets/dimensions//347/211/271/350/211/262/344/272/256/347/202/271/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/224/250/344/276/213/347/272/247/350/257/204/346/265/213/347/273/264/345/272/246/346/250/241/346/235/277.json +25 -0
- package/skills/model-evaluation/assets/dimensions//347/233/270/344/274/274/345/272/246-BERTScore/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/233/270/344/274/274/345/272/246-Cosine/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/233/270/344/274/274/345/272/246-ROUGE/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/233/270/345/205/263/346/200/247/347/273/264/345/272/246-/344/270/252/346/200/247/345/214/226/350/247/204/345/210/222.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/233/270/345/205/263/346/200/247/347/273/264/345/272/246.json +21 -0
- package/skills/model-evaluation/assets/dimensions//347/262/276/347/241/256/346/200/247-BLUE/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//347/262/276/347/241/256/346/200/247-COMET/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//351/200/273/350/276/221/345/220/210/347/220/206/346/200/247/347/273/264/345/272/246.json +20 -0
- package/skills/model-evaluation/assets/dimensions//351/200/273/350/276/221/350/277/236/350/264/257/346/200/247/347/273/264/345/272/246-/344/270/252/346/200/247/345/214/226/350/247/204/345/210/222.json +20 -0
- package/skills/model-evaluation/assets/dimensions//351/200/273/350/276/221/350/277/236/350/264/257/346/200/247/347/273/264/345/272/246-/344/277/241/346/201/257/345/210/206/346/236/220.json +20 -0
- package/skills/model-evaluation/assets/dimensions//351/200/273/350/276/221/350/277/236/350/264/257/346/200/247/347/273/264/345/272/246-/346/265/201/347/250/213/350/207/252/345/212/250/345/214/226.json +20 -0
- package/skills/model-evaluation/assets/dimensions//351/200/273/350/276/221/350/277/236/350/264/257/346/200/247/347/273/264/345/272/246.json +21 -0
- package/skills/model-evaluation/assets/eval-judge.json +11 -0
- package/skills/model-evaluation/assets/experts/business-process-automation.json +71 -0
- package/skills/model-evaluation/assets/experts/content-generation.json +75 -0
- package/skills/model-evaluation/assets/experts/content-match.json +37 -0
- package/skills/model-evaluation/assets/experts/information-analysis.json +87 -0
- package/skills/model-evaluation/assets/experts/marketing-digital-human.json +27 -0
- package/skills/model-evaluation/assets/experts/personalized-planning.json +87 -0
- package/skills/model-evaluation/assets/experts/text-translation.json +103 -0
- package/skills/model-evaluation/assets/experts/tourism-travel.json +119 -0
- package/skills/model-evaluation/assets/templates/custom-dimension.template.json +30 -0
- package/skills/model-evaluation/eval-build.md +281 -0
- package/skills/model-evaluation/eval-execute.md +196 -0
- package/skills/model-evaluation/eval-init.md +237 -0
- package/skills/model-evaluation/processes/dimension-process.md +207 -0
- package/skills/model-evaluation/processes/evalset-create-process.md +184 -0
- package/skills/model-evaluation/processes/evalset-parse-process.md +171 -0
- package/skills/model-evaluation/processes/evalset-supplement-process.md +136 -0
- package/skills/model-evaluation/processes/keypoint-process.md +148 -0
- package/skills/model-evaluation/processes/python-env-process.md +113 -0
- package/skills/model-evaluation/references//344/270/255/351/227/264/344/272/247/347/211/251/350/257/264/346/230/216.md +340 -0
- package/skills/model-evaluation/references//345/206/205/347/275/256/346/250/241/346/235/277/350/257/264/346/230/216.md +149 -0
- package/skills/model-evaluation/references//350/204/232/346/234/254/345/256/232/344/271/211.md +274 -0
- package/skills/model-evaluation/references//350/256/244/350/257/201/346/234/215/345/212/241/346/216/245/345/217/243/350/257/264/346/230/216.md +271 -0
- package/skills/model-evaluation/references//350/257/204/346/265/213/346/234/215/345/212/241/346/216/245/345/217/243/350/257/264/346/230/216.md +455 -0
- package/skills/model-evaluation/references//350/257/204/346/265/213/347/273/264/345/272/246/350/257/264/346/230/216.md +171 -0
- package/skills/model-evaluation/scripts/cfg/eval-auth.cfg +16 -0
- package/skills/model-evaluation/scripts/cfg/eval-server.cfg +1 -0
- package/skills/model-evaluation/scripts/clients/__init__.py +33 -0
- package/skills/model-evaluation/scripts/clients/api_client.py +97 -0
- package/skills/model-evaluation/scripts/clients/auth_client.py +96 -0
- package/skills/model-evaluation/scripts/clients/http_client.py +199 -0
- package/skills/model-evaluation/scripts/clients/oauth_callback.py +397 -0
- package/skills/model-evaluation/scripts/clients/token_manager.py +53 -0
- package/skills/model-evaluation/scripts/eval_auth.py +588 -0
- package/skills/model-evaluation/scripts/eval_dimension.py +240 -0
- package/skills/model-evaluation/scripts/eval_set.py +410 -0
- package/skills/model-evaluation/scripts/eval_task.py +324 -0
- package/skills/model-evaluation/scripts/files/__init__.py +38 -0
- package/skills/model-evaluation/scripts/files/file_utils.py +330 -0
- package/skills/model-evaluation/scripts/files/streaming.py +245 -0
- package/skills/model-evaluation/scripts/utils/__init__.py +128 -0
- package/skills/model-evaluation/scripts/utils/constants.py +101 -0
- package/skills/model-evaluation/scripts/utils/datetime_utils.py +60 -0
- package/skills/model-evaluation/scripts/utils/errors.py +244 -0
- package/skills/model-evaluation/scripts/utils/keypoint_prompts.py +73 -0
- package/skills/skill-driven-eval/SKILL.md +456 -0
- package/skills/skill-driven-eval/agents/grader.md +144 -0
- package/skills/skill-driven-eval/eval-viewer/__init__.py +1 -0
- package/skills/skill-driven-eval/eval-viewer/generate_report.py +485 -0
- package/skills/skill-driven-eval/eval-viewer/viewer.html +767 -0
- package/skills/skill-driven-eval/references/schemas.md +282 -0
- package/skills/skill-driven-eval/scripts/__init__.py +1 -0
- package/skills/skill-driven-eval/scripts/__main__.py +70 -0
- package/skills/skill-driven-eval/scripts/aggregate_results.py +681 -0
- package/skills/skill-driven-eval/scripts/extract_transcript.py +294 -0
- package/skills/skill-driven-eval/scripts/test_aggregate.py +244 -0
package/skills/model-evaluation/references//350/204/232/346/234/254/345/256/232/344/271/211.md
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: script-definitions
|
|
3
|
+
description: Use when needing to understand or execute evaluation scripts (eval_auth.py, eval_set.py, eval_task.py, eval_dimension.py)
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# 脚本定义
|
|
7
|
+
|
|
8
|
+
本文档定义评测脚本的使用方法和参数说明。
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## 快速参考
|
|
13
|
+
|
|
14
|
+
| 脚本 | 功能 | 子命令 |
|
|
15
|
+
|------|------|--------|
|
|
16
|
+
| `eval_auth.py` | 鉴权Token管理 | detect, login, token, check |
|
|
17
|
+
| `eval_set.py` | 评测集管理 | analysis, normalize, submit |
|
|
18
|
+
| `eval_task.py` | 任务管理 | submit, status, summary |
|
|
19
|
+
| `eval_dimension.py` | 维度配置工具 | check, update |
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 1. 鉴权Token管理
|
|
24
|
+
|
|
25
|
+
**脚本**:`eval_auth.py`
|
|
26
|
+
|
|
27
|
+
### 登录模式
|
|
28
|
+
|
|
29
|
+
脚本支持两种登录模式:
|
|
30
|
+
|
|
31
|
+
| 模式 | 说明 | 适用场景 |
|
|
32
|
+
|------|------|----------|
|
|
33
|
+
| **回调模式** | 自动启动本地HTTP服务器,浏览器回调自动接收授权码 | 本地桌面环境,支持浏览器 |
|
|
34
|
+
| **OOB模式** | 手动复制授权码,需要用户交互 | 服务器终端,无图形界面 |
|
|
35
|
+
|
|
36
|
+
### 子命令
|
|
37
|
+
|
|
38
|
+
| 子命令 | 说明 |
|
|
39
|
+
|--------|------|
|
|
40
|
+
| `detect` | 检测浏览器环境是否支持自动打开 |
|
|
41
|
+
| `login` | 智能登录授权,自动选择最佳模式 |
|
|
42
|
+
| `token` | 用授权码换取Token(OOB模式手动输入时使用) |
|
|
43
|
+
| `check` | 检查Token有效性 |
|
|
44
|
+
|
|
45
|
+
### 使用示例
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# 检测浏览器环境
|
|
49
|
+
python eval_auth.py detect --output {work-dir}/.eval/auth.json
|
|
50
|
+
|
|
51
|
+
# 智能登录(自动选择最佳模式)
|
|
52
|
+
python eval_auth.py login \
|
|
53
|
+
--config {skill-dir}/scripts/cfg/eval-auth.cfg \
|
|
54
|
+
--output {work-dir}/.eval/auth.json
|
|
55
|
+
|
|
56
|
+
# 强制使用自动模式(回调)
|
|
57
|
+
python eval_auth.py login --mode auto \
|
|
58
|
+
--config {skill-dir}/scripts/cfg/eval-auth.cfg \
|
|
59
|
+
--output {work-dir}/.eval/auth.json
|
|
60
|
+
|
|
61
|
+
# 强制使用手动模式(OOB)
|
|
62
|
+
python eval_auth.py login --mode manual \
|
|
63
|
+
--config {skill-dir}/scripts/cfg/eval-auth.cfg \
|
|
64
|
+
--output {work-dir}/.eval/auth.json
|
|
65
|
+
|
|
66
|
+
# 指定回调端口
|
|
67
|
+
python eval_auth.py login --mode auto --port 8080 \
|
|
68
|
+
--config {skill-dir}/scripts/cfg/eval-auth.cfg \
|
|
69
|
+
--output {work-dir}/.eval/auth.json
|
|
70
|
+
|
|
71
|
+
# 授权码换取Token(OOB模式)
|
|
72
|
+
python eval_auth.py token \
|
|
73
|
+
--code {authorization_code} \
|
|
74
|
+
--state_token {state_token} \
|
|
75
|
+
--config {skill-dir}/scripts/cfg/eval-auth.cfg \
|
|
76
|
+
--output {work-dir}/.eval/auth.json
|
|
77
|
+
|
|
78
|
+
# 检查Token有效性
|
|
79
|
+
python eval_auth.py check --output {work-dir}/.eval/auth.json
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 参数说明
|
|
83
|
+
|
|
84
|
+
| 参数 | 说明 |
|
|
85
|
+
|------|------|
|
|
86
|
+
| `--config` | 鉴权配置文件路径 |
|
|
87
|
+
| `--code` | 授权码(token 子命令必填) |
|
|
88
|
+
| `--state_token` | 授权状态标识 |
|
|
89
|
+
| `--output` | Token缓存文件路径 |
|
|
90
|
+
| `--mode` | 登录模式:`auto` 强制使用回调模式,`manual` 强制使用手动模式,默认智能选择 |
|
|
91
|
+
| `--port` | 回调模式端口(默认 51943) |
|
|
92
|
+
|
|
93
|
+
### 回调模式流程
|
|
94
|
+
|
|
95
|
+
1. 启动本地HTTP服务器监听 `127.0.0.1:{port}/callback`
|
|
96
|
+
2. 请求登录URL,携带 loopback redirect_uri
|
|
97
|
+
3. 打开浏览器访问登录URL
|
|
98
|
+
4. 用户完成登录后,浏览器回调本地服务器
|
|
99
|
+
5. 本地服务器接收授权码,自动换取Token
|
|
100
|
+
6. 保存Token并完成认证
|
|
101
|
+
|
|
102
|
+
### 服务端配置要求
|
|
103
|
+
|
|
104
|
+
使用回调模式需要在服务端配置 loopback redirect_uri 白名单:
|
|
105
|
+
|
|
106
|
+
```toml
|
|
107
|
+
# 示例配置
|
|
108
|
+
redirect_uris = ["http://127.0.0.1:51943/callback"]
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## 2. 评测集管理
|
|
114
|
+
|
|
115
|
+
**脚本**:`eval_set.py`
|
|
116
|
+
|
|
117
|
+
### 子命令
|
|
118
|
+
|
|
119
|
+
| 子命令 | 说明 |
|
|
120
|
+
|--------|------|
|
|
121
|
+
| `analysis` | 解析评测集结构 |
|
|
122
|
+
| `normalize` | 标准化评测集 |
|
|
123
|
+
| `submit` | 提交评测集 |
|
|
124
|
+
|
|
125
|
+
### analysis - 解析评测集结构
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
python eval_set.py analysis \
|
|
129
|
+
--input {work-dir}/.eval/{session-id}/evalset/evalset-prepared.csv \
|
|
130
|
+
--output {work-dir}/.eval/{session-id}/evalset/evalset-fields-mapping.json
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
**输出**:
|
|
134
|
+
- 主要产物:字段映射文件(`--output` 指定)
|
|
135
|
+
- 中间产物:结构文件(自动生成到同目录 `evalset-structure.json`)
|
|
136
|
+
|
|
137
|
+
### normalize - 标准化评测集
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
python eval_set.py normalize \
|
|
141
|
+
--input {work-dir}/.eval/{session-id}/evalset/evalset-prepared.{ext} \
|
|
142
|
+
--mapping {work-dir}/.eval/{session-id}/evalset/evalset-fields-mapping.json \
|
|
143
|
+
--output {work-dir}/.eval/{session-id}/evalset/evalset-standard.jsonl
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**支持格式**:Excel、JSON、JSONL、CSV
|
|
147
|
+
|
|
148
|
+
**注意**:`{ext}` 为流程产物的文件扩展名(流程1/2为 `jsonl`,流程3为原始扩展名)。
|
|
149
|
+
|
|
150
|
+
### submit - 提交评测集
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
python eval_set.py submit \
|
|
154
|
+
--auth {work-dir}/.eval/auth.json \
|
|
155
|
+
--config {skill-dir}/scripts/cfg/eval-server.cfg \
|
|
156
|
+
--evalset {work-dir}/.eval/{session-id}/evalset/evalset-standard.jsonl \
|
|
157
|
+
--output {work-dir}/.eval/{session-id}/evalset/evalset-meta.json
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### 字段映射规则
|
|
161
|
+
|
|
162
|
+
字段匹配关键词表见 [evalset-parse-process.md](../processes/evalset-parse-process.md#32-匹配字段)。
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## 3. 任务管理
|
|
167
|
+
|
|
168
|
+
**脚本**:`eval_task.py`
|
|
169
|
+
|
|
170
|
+
### 子命令
|
|
171
|
+
|
|
172
|
+
| 子命令 | 说明 |
|
|
173
|
+
|--------|------|
|
|
174
|
+
| `submit` | 提交评测任务 |
|
|
175
|
+
| `status` | 查询任务状态(支持轮询) |
|
|
176
|
+
| `summary` | 生成评测结果摘要 |
|
|
177
|
+
|
|
178
|
+
### submit - 提交评测任务
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
python eval_task.py submit \
|
|
182
|
+
--config {skill-dir}/scripts/cfg/eval-server.cfg \
|
|
183
|
+
--auth {work-dir}/.eval/auth.json \
|
|
184
|
+
--eval_set {work-dir}/.eval/{session-id}/evalset/evalset-meta.json \
|
|
185
|
+
--eval_dimension {work-dir}/.eval/{session-id}/eval-dimension.json \
|
|
186
|
+
--eval_judge {work-dir}/.eval/{session-id}/eval-judge.json \
|
|
187
|
+
--output {work-dir}/.eval/{session-id}/evaltask/evaltask-meta.json
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### status - 查询任务状态
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# 轮询模式(推荐)
|
|
194
|
+
python eval_task.py status \
|
|
195
|
+
--config {skill-dir}/scripts/cfg/eval-server.cfg \
|
|
196
|
+
--auth {work-dir}/.eval/auth.json \
|
|
197
|
+
--evaltask {work-dir}/.eval/{session-id}/evaltask/evaltask-meta.json \
|
|
198
|
+
--output {work-dir}/.eval/{session-id}/evaltask/evaltask-result.json \
|
|
199
|
+
--poll --interval 30 --timeout 3600
|
|
200
|
+
|
|
201
|
+
# 单次查询模式(移除 --poll)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### summary - 生成结果摘要
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
python eval_task.py summary \
|
|
208
|
+
--result {work-dir}/.eval/{session-id}/evaltask/evaltask-result.json \
|
|
209
|
+
--platform_url {在线报告链接}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### 参数说明
|
|
213
|
+
|
|
214
|
+
| 参数 | 说明 |
|
|
215
|
+
|------|------|
|
|
216
|
+
| `--config` | 服务配置文件(submit/status子命令参数) |
|
|
217
|
+
| `--auth` | 鉴权信息文件(submit/status子命令参数) |
|
|
218
|
+
| `--eval_set` | 评测集标识文件 |
|
|
219
|
+
| `--eval_dimension` | 评测维度配置文件 |
|
|
220
|
+
| `--eval_judge` | 评委模型配置文件 |
|
|
221
|
+
| `--evaltask` | 评测任务元信息文件 |
|
|
222
|
+
| `--output` | 输出文件路径 |
|
|
223
|
+
| `--poll` | 启用自动轮询模式 |
|
|
224
|
+
| `--interval` | 轮询间隔秒数(默认30) |
|
|
225
|
+
| `--timeout` | 轮询超时秒数(默认3600) |
|
|
226
|
+
| `--result` | 评测结果文件 |
|
|
227
|
+
| `--platform_url` | 在线报告链接 |
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## 4. 维度配置工具
|
|
232
|
+
|
|
233
|
+
**脚本**:`eval_dimension.py`
|
|
234
|
+
|
|
235
|
+
### 子命令
|
|
236
|
+
|
|
237
|
+
| 子命令 | 说明 |
|
|
238
|
+
|--------|------|
|
|
239
|
+
| `check` | 校验配置文件完整性和规范性 |
|
|
240
|
+
| `update` | 自动填充评委ID到主观评测维度 |
|
|
241
|
+
|
|
242
|
+
### check - 校验配置
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
python eval_dimension.py -a check \
|
|
246
|
+
-d {work-dir}/.eval/{session-id}/eval-dimension.json
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
**校验规则**:
|
|
250
|
+
|
|
251
|
+
| type | 必填字段 | 条件必填 |
|
|
252
|
+
|------|----------|----------|
|
|
253
|
+
| llm-score | name, type, weight | judge_id, prompt |
|
|
254
|
+
| llm-judge | name, type, weight | judge_id, prompt |
|
|
255
|
+
| builtin | name, type, weight | func |
|
|
256
|
+
|
|
257
|
+
### update - 更新评委ID
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
python eval_dimension.py -a update \
|
|
261
|
+
-d {work-dir}/.eval/{session-id}/eval-dimension.json \
|
|
262
|
+
-j {work-dir}/.eval/{session-id}/eval-judge.json
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### 参数说明
|
|
266
|
+
|
|
267
|
+
| 参数 | 说明 |
|
|
268
|
+
|------|------|
|
|
269
|
+
| `-a, --action` | 操作类型:check/update |
|
|
270
|
+
| `-d, --dimension` | 评测维度配置文件 |
|
|
271
|
+
| `-j, --judge` | 评委配置文件(update时必填) |
|
|
272
|
+
| `-o, --output` | 输出文件路径(默认覆盖原文件) |
|
|
273
|
+
|
|
274
|
+
**更新规则**:仅更新 llm-score 和 llm-judge 类型的维度,builtin 类型不需要评委。
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: auth-service-api
|
|
3
|
+
description: Use when needing to understand authentication service API for obtaining access tokens
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# 认证服务接口说明
|
|
7
|
+
|
|
8
|
+
本文档定义认证服务的 API 接口,用于获取访问令牌(access_token)。
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## 快速参考
|
|
13
|
+
|
|
14
|
+
| 接口 | 方法 | 路径 | 说明 |
|
|
15
|
+
|------|------|------|------|
|
|
16
|
+
| 登录初始化 | POST | `/astron/bifrost-heimdallr/api/auth/init` | 获取登录地址 |
|
|
17
|
+
| 换取令牌 | POST | `/astron/bifrost-heimdallr/api/oauth/token` | 用授权码换取令牌 |
|
|
18
|
+
| 用户信息 | GET | `/astron/bifrost-heimdallr/oauth2/api/auth/userinfo` | 查询当前用户 |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 1. 认证流程概述
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
1. 客户端生成 state_token
|
|
26
|
+
↓
|
|
27
|
+
2. 调用登录初始化接口获取 login_url
|
|
28
|
+
↓
|
|
29
|
+
3. 用户浏览器访问 login_url 完成登录
|
|
30
|
+
↓
|
|
31
|
+
4. 用户获取授权码 code
|
|
32
|
+
↓
|
|
33
|
+
5. 客户端用 code + state_token 换取 access_token
|
|
34
|
+
↓
|
|
35
|
+
6. 后续请求携带 access_token 调用业务接口
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 2. 统一响应格式
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"code": 0,
|
|
45
|
+
"message": "OK",
|
|
46
|
+
"data": {}
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
| 字段 | 类型 | 说明 |
|
|
51
|
+
|------|------|------|
|
|
52
|
+
| `code` | `int` | 业务状态码(`0` 表示成功) |
|
|
53
|
+
| `message` | `string` | 状态描述信息 |
|
|
54
|
+
| `data` | `object` | 业务数据 |
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## 3. 关键参数说明
|
|
59
|
+
|
|
60
|
+
| 参数 | 类型 | 必填 | 说明 |
|
|
61
|
+
|------|------|------|------|
|
|
62
|
+
| `state_token` | `string` | 是 | 客户端生成的随机状态串(建议UUID去除连接符) |
|
|
63
|
+
| `redirect_uri` | `string` | 是 | 固定值:`urn:ietf:wg:oauth:2.0:oob` |
|
|
64
|
+
| `client_id` | `string` | 是 | 客户端标识(示例:`bifrost-dev-test`) |
|
|
65
|
+
| `code` | `string` | 是 | 用户登录授权后返回的授权码 |
|
|
66
|
+
| `access_token` | `string` | 是 | 访问令牌,用于后续接口调用 |
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 4. 接口定义
|
|
71
|
+
|
|
72
|
+
### 4.1 生成状态令牌
|
|
73
|
+
|
|
74
|
+
`state_token` 由客户端本地生成,不通过接口获取。
|
|
75
|
+
|
|
76
|
+
**生成示例**:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
STATE_TOKEN=$(python3 -c "import uuid; print(uuid.uuid4().hex)")
|
|
80
|
+
# 示例输出: 6e62ff70c88448aabbe6a60337c4ce0c
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
### 4.2 登录初始化
|
|
86
|
+
|
|
87
|
+
**接口URL**:`POST /astron/bifrost-heimdallr/api/auth/init`
|
|
88
|
+
|
|
89
|
+
**完整地址**:`https://www.iflyaicloud.com/astron/bifrost-heimdallr/api/auth/init`
|
|
90
|
+
|
|
91
|
+
**请求参数**:
|
|
92
|
+
|
|
93
|
+
| 字段 | 类型 | 必填 | 说明 |
|
|
94
|
+
|------|------|------|------|
|
|
95
|
+
| `state_token` | `string` | 是 | 客户端生成的状态串 |
|
|
96
|
+
| `redirect_uri` | `string` | 是 | 固定值:`urn:ietf:wg:oauth:2.0:oob` |
|
|
97
|
+
| `client_id` | `string` | 是 | 客户端标识 |
|
|
98
|
+
|
|
99
|
+
**请求示例**:
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"state_token": "6e62ff70c88448aabbe6a60337c4ce0c",
|
|
104
|
+
"redirect_uri": "urn:ietf:wg:oauth:2.0:oob",
|
|
105
|
+
"client_id": "bifrost-dev-test"
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**响应参数**:
|
|
110
|
+
|
|
111
|
+
| 字段 | 类型 | 说明 |
|
|
112
|
+
|------|------|------|
|
|
113
|
+
| `data.login_url` | `string` | 用户登录地址 |
|
|
114
|
+
|
|
115
|
+
**响应示例**:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"code": 0,
|
|
120
|
+
"message": "OK",
|
|
121
|
+
"data": {
|
|
122
|
+
"login_url": "https://www.iflyaicloud.com/login?redirect=..."
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
### 4.3 获取授权码
|
|
130
|
+
|
|
131
|
+
用户在浏览器中访问 `login_url` 完成登录后,将获得授权码 `code`。
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# 示例
|
|
135
|
+
CODE=4736ede401d4486fb1887e06a5cf7cb9
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
### 4.4 换取访问令牌
|
|
141
|
+
|
|
142
|
+
**接口URL**:`POST /astron/bifrost-heimdallr/api/oauth/token`
|
|
143
|
+
|
|
144
|
+
**完整地址**:`https://www.iflyaicloud.com/astron/bifrost-heimdallr/api/oauth/token`
|
|
145
|
+
|
|
146
|
+
**请求参数**:
|
|
147
|
+
|
|
148
|
+
| 字段 | 类型 | 必填 | 说明 |
|
|
149
|
+
|------|------|------|------|
|
|
150
|
+
| `grant_type` | `string` | 是 | 固定值:`authorization_code` |
|
|
151
|
+
| `code` | `string` | 是 | 用户登录后获得的授权码 |
|
|
152
|
+
| `state` | `string` | 是 | 必须与登录初始化时的 `state_token` 一致 |
|
|
153
|
+
| `client_id` | `string` | 是 | 客户端标识 |
|
|
154
|
+
|
|
155
|
+
**请求示例**:
|
|
156
|
+
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"grant_type": "authorization_code",
|
|
160
|
+
"code": "4736ede401d4486fb1887e06a5cf7cb9",
|
|
161
|
+
"state": "6e62ff70c88448aabbe6a60337c4ce0c",
|
|
162
|
+
"client_id": "bifrost-dev-test"
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**响应参数**:
|
|
167
|
+
|
|
168
|
+
| 字段 | 类型 | 说明 |
|
|
169
|
+
|------|------|------|
|
|
170
|
+
| `access_token` | `string` | 访问令牌 |
|
|
171
|
+
| `expires_in` | `int` | 有效期(秒) |
|
|
172
|
+
| `refresh_token` | `string \| null` | 刷新令牌(当前返回 `null`) |
|
|
173
|
+
| `token_type` | `string` | 令牌类型(`bearer`) |
|
|
174
|
+
|
|
175
|
+
**响应示例**:
|
|
176
|
+
|
|
177
|
+
```json
|
|
178
|
+
{
|
|
179
|
+
"access_token": "EcRC6XuYa1AbLyFh5k1Q1za8zL7MMD33",
|
|
180
|
+
"expires_in": 7200,
|
|
181
|
+
"refresh_token": null,
|
|
182
|
+
"token_type": "bearer"
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
### 4.5 查询当前用户信息
|
|
189
|
+
|
|
190
|
+
**接口URL**:`GET /astron/bifrost-heimdallr/oauth2/api/auth/userinfo`
|
|
191
|
+
|
|
192
|
+
**完整地址**:`https://www.iflyaicloud.com/astron/bifrost-heimdallr/oauth2/api/auth/userinfo`
|
|
193
|
+
|
|
194
|
+
**请求头**:
|
|
195
|
+
|
|
196
|
+
| Header | 必填 | 说明 |
|
|
197
|
+
|--------|------|------|
|
|
198
|
+
| `Authorization` | 是 | `Bearer <access_token>` |
|
|
199
|
+
|
|
200
|
+
**响应参数**:
|
|
201
|
+
|
|
202
|
+
| 字段 | 类型 | 说明 |
|
|
203
|
+
|------|------|------|
|
|
204
|
+
| `data.uid` | `int64` | 用户唯一标识 |
|
|
205
|
+
| `data.username` | `string` | 用户名 |
|
|
206
|
+
| `data.source` | `int` | 用户来源标识 |
|
|
207
|
+
| `data.appInfos` | `array[AppInfo]` | 关联应用信息 |
|
|
208
|
+
|
|
209
|
+
**AppInfo 定义**:
|
|
210
|
+
|
|
211
|
+
| 字段 | 类型 | 说明 |
|
|
212
|
+
|------|------|------|
|
|
213
|
+
| `appId` | `string` | 应用ID |
|
|
214
|
+
| `apiKey` | `string` | 应用访问Key |
|
|
215
|
+
| `apiSecret` | `string` | 应用访问Secret |
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## 5. 错误码
|
|
220
|
+
|
|
221
|
+
| 错误场景 | HTTP状态码 | 说明 |
|
|
222
|
+
|----------|------------|------|
|
|
223
|
+
| `state mismatch` | 400 | `state_token` 与换取令牌时的 `state` 不一致 |
|
|
224
|
+
| `unauthorized` | 401 | `access_token` 缺失、无效或已过期 |
|
|
225
|
+
| `invalid request` | 400 | 请求参数缺失或格式不正确 |
|
|
226
|
+
| `internal server error` | 500 | 服务内部错误 |
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## 6. 认证流程示例
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# 步骤1:生成 state_token
|
|
234
|
+
STATE_TOKEN=$(python3 -c "import uuid; print(uuid.uuid4().hex)")
|
|
235
|
+
|
|
236
|
+
# 步骤2:登录初始化
|
|
237
|
+
curl -s 'https://www.iflyaicloud.com/astron/bifrost-heimdallr/api/auth/init' \
|
|
238
|
+
-H 'Content-Type: application/json' \
|
|
239
|
+
-d "{\"state_token\":\"$STATE_TOKEN\",\"redirect_uri\":\"urn:ietf:wg:oauth:2.0:oob\",\"client_id\":\"bifrost-dev-test\"}"
|
|
240
|
+
|
|
241
|
+
# 步骤3:用户在浏览器中完成登录,获取授权码
|
|
242
|
+
CODE=4736ede401d4486fb1887e06a5cf7cb9
|
|
243
|
+
|
|
244
|
+
# 步骤4:换取 access_token
|
|
245
|
+
curl -s -X POST 'https://www.iflyaicloud.com/astron/bifrost-heimdallr/api/oauth/token' \
|
|
246
|
+
-H 'Content-Type: application/json' \
|
|
247
|
+
-d "{\"grant_type\":\"authorization_code\",\"code\":\"$CODE\",\"state\":\"$STATE_TOKEN\",\"client_id\":\"bifrost-dev-test\"}"
|
|
248
|
+
|
|
249
|
+
# 步骤5:使用 access_token 访问受保护资源
|
|
250
|
+
ACCESS_TOKEN=EcRC6XuYa1AbLyFh5k1Q1za8zL7MMD33
|
|
251
|
+
curl -s 'https://www.iflyaicloud.com/astron/bifrost-heimdallr/oauth2/api/auth/userinfo' \
|
|
252
|
+
-H "Authorization: Bearer $ACCESS_TOKEN"
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## 7. 最佳实践
|
|
258
|
+
|
|
259
|
+
### 7.1 服务端集成
|
|
260
|
+
|
|
261
|
+
- 由服务端统一生成并维护 `state_token`
|
|
262
|
+
- 登录初始化后,将 `login_url` 返回前端跳转
|
|
263
|
+
- `access_token` 应保存在服务端安全存储中
|
|
264
|
+
- 在令牌过期前进行有效性检查
|
|
265
|
+
|
|
266
|
+
### 7.2 安全建议
|
|
267
|
+
|
|
268
|
+
- 不要在前端明文持久化 `apiSecret`、`access_token`
|
|
269
|
+
- `state_token` 应一次一用,使用后立即失效
|
|
270
|
+
- `code` 应视为敏感凭证,避免打印到公共日志
|
|
271
|
+
- `access_token` 缓存过期时间不超过 `expires_in`
|