@archon-claw/cli 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -99
- package/package.json +4 -5
- package/dist/scaffold.d.ts +0 -7
- package/dist/scaffold.js +0 -115
- package/dist/templates/agent/model.json +0 -6
- package/dist/templates/agent/system-prompt.md +0 -9
- package/dist/templates/agent/tool-impls/greeting.impl.js +0 -9
- package/dist/templates/agent/tools/greeting.json +0 -14
- package/dist/templates/workspace/.claude/skills/create-agent/SKILL.md +0 -90
- package/dist/templates/workspace/.claude/skills/create-dataset/SKILL.md +0 -57
- package/dist/templates/workspace/.claude/skills/create-eval-case/SKILL.md +0 -159
- package/dist/templates/workspace/.claude/skills/create-eval-judge/SKILL.md +0 -128
- package/dist/templates/workspace/.claude/skills/create-mcp-config/SKILL.md +0 -151
- package/dist/templates/workspace/.claude/skills/create-model-config/SKILL.md +0 -45
- package/dist/templates/workspace/.claude/skills/create-skill/SKILL.md +0 -63
- package/dist/templates/workspace/.claude/skills/create-system-prompt/SKILL.md +0 -168
- package/dist/templates/workspace/.claude/skills/create-tool/SKILL.md +0 -56
- package/dist/templates/workspace/.claude/skills/create-tool-impl/SKILL.md +0 -83
- package/dist/templates/workspace/.claude/skills/create-tool-test/SKILL.md +0 -117
- package/dist/templates/workspace/.claude/skills/create-tool-ui/SKILL.md +0 -218
- package/dist/templates/workspace/README.md +0 -13
- package/dist/templates/workspace/package.json +0 -19
package/dist/cli.js
CHANGED
|
@@ -11,7 +11,6 @@ import { SessionStore } from "./session.js";
|
|
|
11
11
|
import { startDev } from "./dev.js";
|
|
12
12
|
import { runToolTests, formatResults } from "./test-runner.js";
|
|
13
13
|
import { runEvals } from "./eval/runner.js";
|
|
14
|
-
import { scaffoldAgent, scaffoldWorkspace, exportSkills } from "./scaffold.js";
|
|
15
14
|
const pkg = JSON.parse(readFileSync(path.resolve(__dirname, "../package.json"), "utf-8"));
|
|
16
15
|
const program = new Command();
|
|
17
16
|
program
|
|
@@ -25,12 +24,6 @@ program
|
|
|
25
24
|
: path.resolve(process.cwd(), ".env");
|
|
26
25
|
dotenv.config({ path: envPath });
|
|
27
26
|
});
|
|
28
|
-
program
|
|
29
|
-
.command("chat")
|
|
30
|
-
.description("Start an agent chat session")
|
|
31
|
-
.action(() => {
|
|
32
|
-
console.log("Starting agent...");
|
|
33
|
-
});
|
|
34
27
|
/** Scan a directory for agent subdirectories and load them all */
|
|
35
28
|
async function loadAgentsFromDir(agentsDir) {
|
|
36
29
|
const absDir = path.resolve(agentsDir);
|
|
@@ -157,96 +150,4 @@ program
|
|
|
157
150
|
process.exit(1);
|
|
158
151
|
}
|
|
159
152
|
});
|
|
160
|
-
program
|
|
161
|
-
.command("init")
|
|
162
|
-
.description("Initialise an agent workspace project")
|
|
163
|
-
.argument("[dir]", "Directory to initialise")
|
|
164
|
-
.option("--install", "Automatically install npm dependencies")
|
|
165
|
-
.action(async (dir, opts) => {
|
|
166
|
-
try {
|
|
167
|
-
const { intro, text, confirm, isCancel, outro } = await import("@clack/prompts");
|
|
168
|
-
intro("archon-claw init");
|
|
169
|
-
if (!dir) {
|
|
170
|
-
const name = await text({
|
|
171
|
-
message: "Project name",
|
|
172
|
-
placeholder: "my-ai-agent",
|
|
173
|
-
validate: (v) => {
|
|
174
|
-
if (!v?.trim())
|
|
175
|
-
return "Project name is required";
|
|
176
|
-
if (!/^[a-z0-9._-]+$/i.test(v))
|
|
177
|
-
return "Invalid directory name";
|
|
178
|
-
},
|
|
179
|
-
});
|
|
180
|
-
if (isCancel(name)) {
|
|
181
|
-
outro("Cancelled");
|
|
182
|
-
return;
|
|
183
|
-
}
|
|
184
|
-
dir = name;
|
|
185
|
-
}
|
|
186
|
-
if (opts.install === undefined) {
|
|
187
|
-
const install = await confirm({ message: "Install dependencies?" });
|
|
188
|
-
if (isCancel(install)) {
|
|
189
|
-
outro("Cancelled");
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
opts.install = install;
|
|
193
|
-
}
|
|
194
|
-
await scaffoldWorkspace(dir, { install: opts.install });
|
|
195
|
-
outro("Done!");
|
|
196
|
-
}
|
|
197
|
-
catch (err) {
|
|
198
|
-
console.error(err instanceof Error ? err.message : err);
|
|
199
|
-
process.exit(1);
|
|
200
|
-
}
|
|
201
|
-
});
|
|
202
|
-
program
|
|
203
|
-
.command("create-agent")
|
|
204
|
-
.description("Create a new agent project")
|
|
205
|
-
.argument("[agent-name]", "Name of the agent (used as directory name)")
|
|
206
|
-
.option("-d, --dir <path>", "Parent directory for the agent", "./agents")
|
|
207
|
-
.action(async (agentName, opts) => {
|
|
208
|
-
try {
|
|
209
|
-
if (!agentName) {
|
|
210
|
-
const { intro, text, isCancel, outro } = await import("@clack/prompts");
|
|
211
|
-
intro("archon-claw create");
|
|
212
|
-
const name = await text({
|
|
213
|
-
message: "Agent name",
|
|
214
|
-
placeholder: "my-agent",
|
|
215
|
-
validate: (v) => {
|
|
216
|
-
if (!v?.trim())
|
|
217
|
-
return "Agent name is required";
|
|
218
|
-
if (!/^[a-z0-9._-]+$/i.test(v))
|
|
219
|
-
return "Invalid directory name";
|
|
220
|
-
},
|
|
221
|
-
});
|
|
222
|
-
if (isCancel(name)) {
|
|
223
|
-
outro("Cancelled");
|
|
224
|
-
return;
|
|
225
|
-
}
|
|
226
|
-
agentName = name;
|
|
227
|
-
await scaffoldAgent(agentName, opts.dir);
|
|
228
|
-
outro("Done!");
|
|
229
|
-
}
|
|
230
|
-
else {
|
|
231
|
-
await scaffoldAgent(agentName, opts.dir);
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
catch (err) {
|
|
235
|
-
console.error(err instanceof Error ? err.message : err);
|
|
236
|
-
process.exit(1);
|
|
237
|
-
}
|
|
238
|
-
});
|
|
239
|
-
program
|
|
240
|
-
.command("export-skills")
|
|
241
|
-
.description("Export bundled skills to a target directory")
|
|
242
|
-
.argument("<target>", "Target directory to export skills into")
|
|
243
|
-
.action(async (target) => {
|
|
244
|
-
try {
|
|
245
|
-
await exportSkills(target);
|
|
246
|
-
}
|
|
247
|
-
catch (err) {
|
|
248
|
-
console.error(err instanceof Error ? err.message : err);
|
|
249
|
-
process.exit(1);
|
|
250
|
-
}
|
|
251
|
-
});
|
|
252
153
|
program.parse();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@archon-claw/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "AI Agent CLI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -24,7 +24,6 @@
|
|
|
24
24
|
"@archon-claw/web": "0.1.1"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@clack/prompts": "^1.1.0",
|
|
28
27
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
29
28
|
"chokidar": "^5.0.0",
|
|
30
29
|
"commander": "^14.0.3",
|
|
@@ -34,11 +33,11 @@
|
|
|
34
33
|
"open": "^10.2.0",
|
|
35
34
|
"openai": "^6.25.0",
|
|
36
35
|
"picomatch": "^4.0.3",
|
|
37
|
-
"zod": "^3.25.76"
|
|
38
|
-
"@archon-claw/skills": "0.2.1"
|
|
36
|
+
"zod": "^3.25.76"
|
|
39
37
|
},
|
|
40
38
|
"scripts": {
|
|
41
|
-
"
|
|
39
|
+
"clean": "rm -rf dist",
|
|
40
|
+
"build": "tsc -p tsconfig.build.json && cp -r ../web/dist dist/public",
|
|
42
41
|
"dev": "tsx src/cli.ts start examples/my-agent",
|
|
43
42
|
"start": "node dist/cli.js",
|
|
44
43
|
"test": "vitest run --exclude dist --exclude examples",
|
package/dist/scaffold.d.ts
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
export declare function scaffoldAgent(name: string, targetDir: string, opts?: {
|
|
2
|
-
quiet?: boolean;
|
|
3
|
-
}): Promise<void>;
|
|
4
|
-
export declare function scaffoldWorkspace(targetDir: string, opts?: {
|
|
5
|
-
install?: boolean;
|
|
6
|
-
}): Promise<void>;
|
|
7
|
-
export declare function exportSkills(targetDir: string): Promise<void>;
|
package/dist/scaffold.js
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
import fs from "fs/promises";
|
|
2
|
-
import path from "path";
|
|
3
|
-
import { fileURLToPath } from "url";
|
|
4
|
-
import { readFileSync } from "fs";
|
|
5
|
-
import { execSync } from "child_process";
|
|
6
|
-
import { skillsDir } from "@archon-claw/skills";
|
|
7
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
|
-
const cliVersion = JSON.parse(readFileSync(path.resolve(__dirname, "../package.json"), "utf-8")).version;
|
|
9
|
-
export async function scaffoldAgent(name, targetDir, opts = {}) {
|
|
10
|
-
const dest = path.resolve(targetDir, name);
|
|
11
|
-
// Check if destination already exists
|
|
12
|
-
try {
|
|
13
|
-
await fs.access(dest);
|
|
14
|
-
throw new Error(`Directory already exists: ${dest}`);
|
|
15
|
-
}
|
|
16
|
-
catch (err) {
|
|
17
|
-
if (err.code !== "ENOENT")
|
|
18
|
-
throw err;
|
|
19
|
-
}
|
|
20
|
-
// Resolve template directory (works for both src/ and dist/)
|
|
21
|
-
const templateDir = path.resolve(__dirname, "../templates/agent");
|
|
22
|
-
try {
|
|
23
|
-
await fs.access(templateDir);
|
|
24
|
-
}
|
|
25
|
-
catch {
|
|
26
|
-
throw new Error(`Template directory not found: ${templateDir}`);
|
|
27
|
-
}
|
|
28
|
-
// Recursively copy template to destination
|
|
29
|
-
await fs.cp(templateDir, dest, { recursive: true });
|
|
30
|
-
if (!opts.quiet) {
|
|
31
|
-
const files = await listFiles(dest, dest);
|
|
32
|
-
console.log(`\nCreated agent "${name}" at ${dest}\n`);
|
|
33
|
-
console.log("Files:");
|
|
34
|
-
for (const file of files) {
|
|
35
|
-
console.log(` ${file}`);
|
|
36
|
-
}
|
|
37
|
-
console.log(`\nNext steps:`);
|
|
38
|
-
console.log(` cd ${dest}`);
|
|
39
|
-
console.log(` archon-claw start .`);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
export async function scaffoldWorkspace(targetDir, opts = {}) {
|
|
43
|
-
const dest = path.resolve(targetDir);
|
|
44
|
-
const name = path.basename(dest);
|
|
45
|
-
// Check if package.json already exists at the target
|
|
46
|
-
const pkgPath = path.join(dest, "package.json");
|
|
47
|
-
try {
|
|
48
|
-
await fs.access(pkgPath);
|
|
49
|
-
throw new Error(`package.json already exists in ${dest}`);
|
|
50
|
-
}
|
|
51
|
-
catch (err) {
|
|
52
|
-
if (err.code !== "ENOENT")
|
|
53
|
-
throw err;
|
|
54
|
-
}
|
|
55
|
-
await fs.mkdir(dest, { recursive: true });
|
|
56
|
-
// Copy and render workspace template files (package.json, README.md, .gitignore)
|
|
57
|
-
const templateDir = path.resolve(__dirname, "../templates/workspace");
|
|
58
|
-
const vars = { name, cliVersion };
|
|
59
|
-
const templateFiles = await listFiles(templateDir, templateDir);
|
|
60
|
-
for (const relPath of templateFiles) {
|
|
61
|
-
const src = path.join(templateDir, relPath);
|
|
62
|
-
const destFile = path.join(dest, relPath);
|
|
63
|
-
await fs.mkdir(path.dirname(destFile), { recursive: true });
|
|
64
|
-
let content = await fs.readFile(src, "utf-8");
|
|
65
|
-
content = content.replace(/\{\{(\w+)\}\}/g, (_, key) => vars[key] ?? _);
|
|
66
|
-
await fs.writeFile(destFile, content);
|
|
67
|
-
}
|
|
68
|
-
// Copy skills from @archon-claw/skills
|
|
69
|
-
const destSkillsDir = path.join(dest, ".claude", "skills");
|
|
70
|
-
await fs.mkdir(destSkillsDir, { recursive: true });
|
|
71
|
-
await fs.cp(skillsDir, destSkillsDir, { recursive: true });
|
|
72
|
-
// Create default agent
|
|
73
|
-
await scaffoldAgent("my-agent", path.join(dest, "agents"), { quiet: true });
|
|
74
|
-
const files = await listFiles(dest, dest);
|
|
75
|
-
console.log(`\nInitialised workspace at ${dest}\n`);
|
|
76
|
-
console.log("Files:");
|
|
77
|
-
for (const file of files) {
|
|
78
|
-
console.log(` ${file}`);
|
|
79
|
-
}
|
|
80
|
-
// Install dependencies
|
|
81
|
-
if (opts.install) {
|
|
82
|
-
console.log("\nInstalling dependencies...\n");
|
|
83
|
-
execSync("npm install", { cwd: dest, stdio: "inherit" });
|
|
84
|
-
}
|
|
85
|
-
console.log(`\nNext steps:`);
|
|
86
|
-
if (!opts.install) {
|
|
87
|
-
console.log(` cd ${name}`);
|
|
88
|
-
console.log(` npm install`);
|
|
89
|
-
}
|
|
90
|
-
console.log(` archon-claw dev agents/my-agent`);
|
|
91
|
-
}
|
|
92
|
-
export async function exportSkills(targetDir) {
|
|
93
|
-
const dest = path.resolve(targetDir);
|
|
94
|
-
await fs.mkdir(dest, { recursive: true });
|
|
95
|
-
await fs.cp(skillsDir, dest, { recursive: true });
|
|
96
|
-
const entries = await fs.readdir(dest);
|
|
97
|
-
console.log(`\nExported ${entries.length} skills to ${dest}\n`);
|
|
98
|
-
for (const entry of entries.sort()) {
|
|
99
|
-
console.log(` ${entry}/SKILL.md`);
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
async function listFiles(dir, root) {
|
|
103
|
-
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
104
|
-
const files = [];
|
|
105
|
-
for (const entry of entries) {
|
|
106
|
-
const full = path.join(dir, entry.name);
|
|
107
|
-
if (entry.isDirectory()) {
|
|
108
|
-
files.push(...(await listFiles(full, root)));
|
|
109
|
-
}
|
|
110
|
-
else {
|
|
111
|
-
files.push(path.relative(root, full));
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
return files.sort();
|
|
115
|
-
}
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Greeting tool implementation
|
|
3
|
-
* @param {object} params
|
|
4
|
-
* @param {string} params.name - The name of the person to greet
|
|
5
|
-
* @returns {Promise<object>} Greeting message
|
|
6
|
-
*/
|
|
7
|
-
export default async ({ name }) => {
|
|
8
|
-
return { message: `Hello, ${name}!` };
|
|
9
|
-
};
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "greeting",
|
|
3
|
-
"description": "Generate a greeting message for the given name",
|
|
4
|
-
"input_schema": {
|
|
5
|
-
"type": "object",
|
|
6
|
-
"properties": {
|
|
7
|
-
"name": {
|
|
8
|
-
"type": "string",
|
|
9
|
-
"description": "The name of the person to greet"
|
|
10
|
-
}
|
|
11
|
-
},
|
|
12
|
-
"required": ["name"]
|
|
13
|
-
}
|
|
14
|
-
}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: create-agent
|
|
3
|
-
description: 创建完整的 agent 配置目录。当用户需要新建一个 AI Agent 时使用。
|
|
4
|
-
argument-hint: "[agent_name]"
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
在 `agents/` 目录下创建一个完整的 agent 配置文件夹。
|
|
8
|
-
|
|
9
|
-
## 目录结构
|
|
10
|
-
|
|
11
|
-
```
|
|
12
|
-
agents/<agent-name>/
|
|
13
|
-
├── system-prompt.md # 系统提示词(必填)
|
|
14
|
-
├── model.json # 模型配置(必填)
|
|
15
|
-
├── tools/ # 工具定义(必填,至少 1 个)
|
|
16
|
-
│ └── <tool-name>.json
|
|
17
|
-
├── tool-impls/ # 工具实现(必填,与 tools/ 一一对应)
|
|
18
|
-
│ └── <tool-name>.impl.js
|
|
19
|
-
├── datasets/ # 数据集(可选)
|
|
20
|
-
│ └── <name>.json
|
|
21
|
-
└── skills/ # 技能(可选)
|
|
22
|
-
└── <skill-name>/
|
|
23
|
-
└── SKILL.md
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## 创建步骤
|
|
27
|
-
|
|
28
|
-
1. **创建 `system-prompt.md`** — 定义 agent 的角色和行为指令
|
|
29
|
-
- 可使用 Liquid 模板语法引用 datasets 中的数据
|
|
30
|
-
- 语法:`{% for item in dataset_name %}` / `{{ item.field }}`
|
|
31
|
-
- 内容不能为空
|
|
32
|
-
|
|
33
|
-
2. **创建 `model.json`** — 模型配置
|
|
34
|
-
- 必填:`provider`(`"anthropic"` 或 `"openai"`)、`model`(模型 ID)
|
|
35
|
-
- 可选:`maxTokens`、`temperature`(0~2)、`apiKey`
|
|
36
|
-
- 必须通过 `src/schemas/model.schema.json` 校验
|
|
37
|
-
|
|
38
|
-
3. **创建 `tools/` 目录** — 至少一个工具定义 JSON
|
|
39
|
-
- 每个文件定义一个工具:`name`、`description`、`input_schema`
|
|
40
|
-
- `name` 匹配 `^[a-z][a-z0-9_]*$`
|
|
41
|
-
- 必须通过 `src/schemas/tool.schema.json` 校验
|
|
42
|
-
|
|
43
|
-
4. **创建 `tool-impls/` 目录** — 每个工具对应一个实现文件
|
|
44
|
-
- 文件名格式:`<tool-name>.impl.js`
|
|
45
|
-
- ES6 模块,export default async function
|
|
46
|
-
- 必须与 tools/ 中的工具一一对应
|
|
47
|
-
|
|
48
|
-
5. **(可选)创建 `datasets/` 目录** — 数据集 JSON 文件
|
|
49
|
-
- 必须是非空数组(字符串数组或对象数组)
|
|
50
|
-
- 文件名去掉 `.json` 后作为 Liquid 模板变量名
|
|
51
|
-
|
|
52
|
-
6. **(可选)创建 `skills/` 目录** — agent 技能
|
|
53
|
-
- 每个技能一个子目录,包含 `SKILL.md`
|
|
54
|
-
- frontmatter 必须有 `name` 和 `description`
|
|
55
|
-
|
|
56
|
-
## 示例
|
|
57
|
-
|
|
58
|
-
创建一个名为 `my-assistant` 的 agent:
|
|
59
|
-
|
|
60
|
-
```
|
|
61
|
-
agents/my-assistant/
|
|
62
|
-
├── system-prompt.md
|
|
63
|
-
├── model.json
|
|
64
|
-
├── tools/
|
|
65
|
-
│ └── web_search.json
|
|
66
|
-
├── tool-impls/
|
|
67
|
-
│ └── web_search.impl.js
|
|
68
|
-
├── datasets/
|
|
69
|
-
│ └── rules.json
|
|
70
|
-
└── skills/
|
|
71
|
-
└── summarize/
|
|
72
|
-
└── SKILL.md
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
## 验证
|
|
76
|
-
|
|
77
|
-
创建完成后运行验证确保配置正确:
|
|
78
|
-
|
|
79
|
-
```typescript
|
|
80
|
-
import { validateDir } from "./src/validator/index.js";
|
|
81
|
-
const result = await validateDir("agent-dir", "agents/<agent-name>");
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
## 注意
|
|
85
|
-
|
|
86
|
-
- tools/ 和 tool-impls/ 必须一一对应(名称匹配)
|
|
87
|
-
- system-prompt.md 中的 Liquid 模板变量名要与 datasets/ 中的文件名对应
|
|
88
|
-
- 参考 `agents/my-agent/` 作为完整示例
|
|
89
|
-
|
|
90
|
-
请根据用户的需求创建 agent。$ARGUMENTS
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: create-dataset
|
|
3
|
-
description: 创建 dataset JSON 文件。当用户需要为 agent 添加数据集(few-shot 示例、规则列表等)时使用。
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
在 `agents/my-agent/datasets/` 目录下创建一个新的 dataset JSON 文件。
|
|
7
|
-
|
|
8
|
-
## 规范
|
|
9
|
-
|
|
10
|
-
- 文件名语义化,如 `examples.json`、`rules.json`
|
|
11
|
-
- 必须通过 `src/schemas/dataset.schema.json` 的校验
|
|
12
|
-
- 必须是 JSON 数组,至少包含一个元素
|
|
13
|
-
|
|
14
|
-
## 支持的格式
|
|
15
|
-
|
|
16
|
-
### 字符串数组(规则、提示等)
|
|
17
|
-
|
|
18
|
-
```json
|
|
19
|
-
[
|
|
20
|
-
"回答使用中文",
|
|
21
|
-
"代码示例使用 TypeScript"
|
|
22
|
-
]
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
### 对象数组(问答对、示例等)
|
|
26
|
-
|
|
27
|
-
```json
|
|
28
|
-
[
|
|
29
|
-
{
|
|
30
|
-
"input": "什么是 TypeScript?",
|
|
31
|
-
"output": "TypeScript 是 JavaScript 的超集,添加了静态类型系统。"
|
|
32
|
-
}
|
|
33
|
-
]
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
## 在 system-prompt.md 中引用
|
|
37
|
-
|
|
38
|
-
dataset 的文件名(去掉 .json)作为 Liquid 模板变量名:
|
|
39
|
-
|
|
40
|
-
```markdown
|
|
41
|
-
{% for rule in rules %}
|
|
42
|
-
- {{ rule }}
|
|
43
|
-
{% endfor %}
|
|
44
|
-
|
|
45
|
-
{% for item in examples %}
|
|
46
|
-
问:{{ item.input }}
|
|
47
|
-
答:{{ item.output }}
|
|
48
|
-
{% endfor %}
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
## 注意
|
|
52
|
-
|
|
53
|
-
- 数组不能为空(minItems: 1)
|
|
54
|
-
- 文件名会作为模板变量名,建议用小写字母和下划线
|
|
55
|
-
- 对象数组的字段名自由定义,但要和 system-prompt.md 模板对应
|
|
56
|
-
|
|
57
|
-
请根据用户需求创建 dataset 文件。$ARGUMENTS
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: create-eval-case
|
|
3
|
-
description: 创建评估用例文件。当用户需要为 agent 编写 eval case 时使用。
|
|
4
|
-
argument-hint: "[case_name]"
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
在 `agents/my-agent/eval-cases/` 目录下创建评估用例文件。
|
|
8
|
-
|
|
9
|
-
## 规范
|
|
10
|
-
|
|
11
|
-
- 文件名格式:`<name>.eval.json`
|
|
12
|
-
- 必须通过 `packages/cli/src/schemas/eval-case.schema.json` 的校验
|
|
13
|
-
- 通过 `archon-claw eval <agent-dir>` 命令运行
|
|
14
|
-
|
|
15
|
-
## 文件结构
|
|
16
|
-
|
|
17
|
-
```
|
|
18
|
-
agents/<agent-name>/
|
|
19
|
-
├── eval-cases/ # 评估用例
|
|
20
|
-
│ ├── basic.eval.json
|
|
21
|
-
│ └── tool-usage.eval.json
|
|
22
|
-
└── eval-judges/ # Judge 配置(可选)
|
|
23
|
-
├── default.json
|
|
24
|
-
└── strict.json
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
## 三种评估模式
|
|
28
|
-
|
|
29
|
-
### 1. `single` — 单轮评估
|
|
30
|
-
|
|
31
|
-
只有 1 个 user turn,Agent 生成 1 次回复,对最终回复运行断言。
|
|
32
|
-
|
|
33
|
-
```json
|
|
34
|
-
{
|
|
35
|
-
"name": "计算器测试",
|
|
36
|
-
"mode": "single",
|
|
37
|
-
"turns": [
|
|
38
|
-
{ "role": "user", "content": "帮我算一下 15 * 7" }
|
|
39
|
-
],
|
|
40
|
-
"assertions": [
|
|
41
|
-
{ "type": "contains", "value": "105" },
|
|
42
|
-
{ "type": "tool-called", "value": "calculator" }
|
|
43
|
-
]
|
|
44
|
-
}
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
### 2. `injected` — 历史注入模式
|
|
48
|
-
|
|
49
|
-
提供多轮历史,一次性注入上下文,只有最后一个 user turn 触发 LLM 生成。
|
|
50
|
-
|
|
51
|
-
```json
|
|
52
|
-
{
|
|
53
|
-
"name": "上下文理解",
|
|
54
|
-
"mode": "injected",
|
|
55
|
-
"turns": [
|
|
56
|
-
{ "role": "user", "content": "我叫小明" },
|
|
57
|
-
{ "role": "assistant", "content": "你好小明!" },
|
|
58
|
-
{ "role": "user", "content": "我叫什么名字?" }
|
|
59
|
-
],
|
|
60
|
-
"assertions": [
|
|
61
|
-
{ "type": "contains", "value": "小明" }
|
|
62
|
-
]
|
|
63
|
-
}
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
### 3. `sequential` — 多轮对话模式
|
|
67
|
-
|
|
68
|
-
每个 user turn 独立调用 LLM 生成回复,支持每轮独立断言。
|
|
69
|
-
|
|
70
|
-
```json
|
|
71
|
-
{
|
|
72
|
-
"name": "多步计算",
|
|
73
|
-
"mode": "sequential",
|
|
74
|
-
"turns": [
|
|
75
|
-
{
|
|
76
|
-
"role": "user",
|
|
77
|
-
"content": "帮我算 10 + 20",
|
|
78
|
-
"assertions": [{ "type": "contains", "value": "30" }]
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"role": "user",
|
|
82
|
-
"content": "再乘以 3",
|
|
83
|
-
"assertions": [{ "type": "contains", "value": "90" }]
|
|
84
|
-
}
|
|
85
|
-
],
|
|
86
|
-
"assertions": [{ "type": "contains", "value": "90" }]
|
|
87
|
-
}
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
## 断言类型
|
|
91
|
-
|
|
92
|
-
**文本断言:**
|
|
93
|
-
|
|
94
|
-
| 类型 | 说明 | value 示例 |
|
|
95
|
-
|------|------|-----------|
|
|
96
|
-
| `contains` | 包含文本(不区分大小写) | `"105"` |
|
|
97
|
-
| `not-contains` | 不包含文本 | `"error"` |
|
|
98
|
-
| `regex` | 正则匹配 | `"\\d+\\.\\d+"` |
|
|
99
|
-
| `length-min` | 长度 >= N | `"50"` |
|
|
100
|
-
| `length-max` | 长度 <= N | `"500"` |
|
|
101
|
-
| `json-valid` | 合法 JSON | `""` |
|
|
102
|
-
|
|
103
|
-
**工具断言:**
|
|
104
|
-
|
|
105
|
-
| 类型 | 说明 | value 示例 |
|
|
106
|
-
|------|------|-----------|
|
|
107
|
-
| `tool-called` | 调用了指定工具 | `"calculator"` |
|
|
108
|
-
| `tool-not-called` | 未调用指定工具 | `"search"` |
|
|
109
|
-
| `tool-called-with` | 调用工具且参数匹配 | `{"tool":"calculator","args":{"expression":"15*7"}}` |
|
|
110
|
-
|
|
111
|
-
## Case 可选字段
|
|
112
|
-
|
|
113
|
-
| 字段 | 类型 | 说明 |
|
|
114
|
-
|------|------|------|
|
|
115
|
-
| `expectedOutput` | string | 期望输出描述(给 Judge 参考) |
|
|
116
|
-
| `tags` | string[] | 标签,用于 `--tag` 过滤 |
|
|
117
|
-
| `tools` | string[] | 限制可用工具子集 |
|
|
118
|
-
| `judge` | string | 指定 judge 配置名(如 `"strict"`),默认 `"default"` |
|
|
119
|
-
|
|
120
|
-
## 模板
|
|
121
|
-
|
|
122
|
-
```json
|
|
123
|
-
{
|
|
124
|
-
"$schema": "../../../packages/cli/src/schemas/eval-case.schema.json",
|
|
125
|
-
"name": "测试套件名称",
|
|
126
|
-
"description": "测试说明",
|
|
127
|
-
"cases": [
|
|
128
|
-
{
|
|
129
|
-
"name": "用例名称",
|
|
130
|
-
"mode": "single",
|
|
131
|
-
"turns": [
|
|
132
|
-
{ "role": "user", "content": "用户输入" }
|
|
133
|
-
],
|
|
134
|
-
"assertions": [
|
|
135
|
-
{ "type": "contains", "value": "期望包含的文本" }
|
|
136
|
-
],
|
|
137
|
-
"tags": ["tag1"]
|
|
138
|
-
}
|
|
139
|
-
]
|
|
140
|
-
}
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
## 运行
|
|
144
|
-
|
|
145
|
-
```bash
|
|
146
|
-
archon-claw eval agents/my-agent
|
|
147
|
-
archon-claw eval agents/my-agent --file basic.eval.json
|
|
148
|
-
archon-claw eval agents/my-agent --tag tool-usage
|
|
149
|
-
```
|
|
150
|
-
|
|
151
|
-
## 注意
|
|
152
|
-
|
|
153
|
-
- cases 数组至少包含 1 个用例
|
|
154
|
-
- 每个 case 的 turns 至少有 1 个 turn
|
|
155
|
-
- `injected` 模式最后一个 turn 必须是 `user`
|
|
156
|
-
- `sequential` 模式 turns 中只有 `user` role 的 turn 会触发 LLM 生成
|
|
157
|
-
- `tool-called-with` 的 value 是 JSON 字符串,需要转义
|
|
158
|
-
|
|
159
|
-
请根据用户的需求创建评估用例文件。$ARGUMENTS
|