@lythos/skill-arena 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +53 -0
  3. package/package.json +19 -0
  4. package/src/cli.ts +225 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 lythos-labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # @lythos/skill-arena
2
+
3
+ > Skill comparison benchmark tool. Run control-variable decks against the same task to compare skill effectiveness.
4
+
5
+ Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) meta-skill ecosystem.
6
+
7
+ ## What it does
8
+
9
+ Creates an arena directory with isolated decks for each skill under test, generates task cards for subagent dispatch, and produces a structured output for judge evaluation. Core principle: **control variables** — only the tested skill differs between decks.
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ bun add -d @lythos/skill-arena
15
+ # or
16
+ bunx @lythos/skill-arena <args>
17
+ ```
18
+
19
+ ## Commands
20
+
21
+ ```bash
22
+ # Initialize an arena with 2-5 skills
23
+ bunx @lythos/skill-arena \
24
+ --task "Generate user auth flow diagram" \
25
+ --skills "design-doc-mermaid,mermaid-tools" \
26
+ --criteria "syntax,context,token"
27
+
28
+ # Options
29
+ # --task, -t Task description (required)
30
+ # --skills, -s Comma-separated skill list, min 2, max 5
31
+ # --criteria, -c Evaluation criteria (default: syntax,context,logic,token)
32
+ # --control Control variable skill (default: project-scribe)
33
+ # --dir, -d Arena parent directory (default: tmp)
34
+ # --project, -p Project root (default: .)
35
+ ```
36
+
37
+ ## Output
38
+
39
+ ```
40
+ tmp/arena-<timestamp>-<slug>/
41
+ ├── arena.json # metadata + config
42
+ ├── decks/ # one control-variable deck per skill
43
+ ├── runs/ # subagent output (you fill this)
44
+ └── TASK-arena.md # task card with subagent instructions
45
+ ```
46
+
47
+ ## Architecture
48
+
49
+ This is the **Starter** layer of the thin-skill pattern. The agent-visible **Skill** layer is in `packages/lythoskill-arena/skill/`.
50
+
51
+ ## License
52
+
53
+ MIT
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "@lythos/skill-arena",
3
+ "version": "0.1.0",
4
+ "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "bin": {
8
+ "lythoskill-arena": "src/cli.ts"
9
+ },
10
+ "files": [
11
+ "src",
12
+ "README.md",
13
+ "LICENSE"
14
+ ],
15
+ "engines": {
16
+ "bun": ">=1.0.0"
17
+ },
18
+ "license": "MIT"
19
+ }
package/src/cli.ts ADDED
@@ -0,0 +1,225 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * lythoskill-arena CLI — Skill Arena 编排器
4
+ *
5
+ * 创建标准化的 arena 目录结构,为每个被测 skill 生成控制变量 deck。
6
+ */
7
+
8
+ import {
9
+ existsSync, mkdirSync, writeFileSync,
10
+ } from 'node:fs'
11
+ import { join, resolve } from 'node:path'
12
+
13
+ // ── 简单的 slugify ──────────────────────────────────────────
14
+ function slugify(input: string): string {
15
+ return input
16
+ .toLowerCase()
17
+ .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-')
18
+ .replace(/^-+|-+$/g, '')
19
+ .slice(0, 40)
20
+ }
21
+
22
+ function timestamp(): string {
23
+ const d = new Date()
24
+ return d.toISOString().replace(/[-:T.Z]/g, '').slice(0, 17) // yyyyMMddHHmmssSSS
25
+ }
26
+
27
+ // ── 解析参数(简单 slice 风格)──────────────────────────────
28
+ function parseArgs(argv: string[]) {
29
+ const options: Record<string, string | undefined> = {
30
+ task: undefined,
31
+ skills: undefined,
32
+ criteria: 'syntax,context,logic,token',
33
+ control: 'project-scribe',
34
+ dir: 'tmp',
35
+ project: '.',
36
+ }
37
+ const positionals: string[] = []
38
+
39
+ for (let i = 0; i < argv.length; i++) {
40
+ const arg = argv[i]
41
+ if (arg === '--task' || arg === '-t') {
42
+ options.task = argv[++i]
43
+ } else if (arg === '--skills' || arg === '-s') {
44
+ options.skills = argv[++i]
45
+ } else if (arg === '--criteria' || arg === '-c') {
46
+ options.criteria = argv[++i]
47
+ } else if (arg === '--control') {
48
+ options.control = argv[++i]
49
+ } else if (arg === '--dir' || arg === '-d') {
50
+ options.dir = argv[++i]
51
+ } else if (arg === '--project' || arg === '-p') {
52
+ options.project = argv[++i]
53
+ } else if (!arg.startsWith('-')) {
54
+ positionals.push(arg)
55
+ }
56
+ }
57
+ return { options, positionals }
58
+ }
59
+
60
+ // ── 主流程 ──────────────────────────────────────────────────
61
+ export function runArena(argv: string[]) {
62
+ const { options, positionals } = parseArgs(argv)
63
+
64
+ const TASK = options.task || positionals.join(' ') || ''
65
+ if (!TASK) {
66
+ console.error('❌ 请提供 --task 或位置参数')
67
+ process.exit(1)
68
+ }
69
+
70
+ const SKILLS = (options.skills || '').split(',').map(s => s.trim()).filter(Boolean)
71
+ if (SKILLS.length < 2) {
72
+ console.error('❌ 至少需要 2 个 skill 才能进行 arena')
73
+ process.exit(1)
74
+ }
75
+ if (SKILLS.length > 5) {
76
+ console.error('❌ 一次 arena 最多 5 个 skill')
77
+ process.exit(1)
78
+ }
79
+
80
+ const CRITERIA = (options.criteria || 'syntax,context,logic,token')
81
+ .split(',').map(s => s.trim()).filter(Boolean)
82
+
83
+ const CONTROL_SKILLS = (options.control || 'lythoskill-project-cortex')
84
+ .split(',').map(s => s.trim()).filter(Boolean)
85
+
86
+ const PROJECT_DIR = resolve(options.project!)
87
+ const ARENA_SLUG = slugify(TASK)
88
+ const ARENA_ID = `arena-${timestamp()}-${ARENA_SLUG.slice(0, 30)}`
89
+ const ARENA_DIR = resolve(PROJECT_DIR, options.dir!, ARENA_ID)
90
+
91
+ // ── 创建目录结构 ────────────────────────────────────────────
92
+ mkdirSync(join(ARENA_DIR, 'decks'), { recursive: true })
93
+ mkdirSync(join(ARENA_DIR, 'runs'), { recursive: true })
94
+
95
+ // ── 生成参与者与 deck ───────────────────────────────────────
96
+ const participants = SKILLS.map((skill, i) => {
97
+ const id = `run-${String(i + 1).padStart(2, '0')}`
98
+ return {
99
+ id,
100
+ name: skill,
101
+ skill_name: skill,
102
+ deck_path: join(ARENA_DIR, 'decks', `arena-${id}.toml`),
103
+ }
104
+ })
105
+
106
+ const criteria = CRITERIA.map((c) => ({
107
+ name: c,
108
+ label: c,
109
+ weight: 1,
110
+ }))
111
+
112
+ for (const p of participants) {
113
+ const deckContent = `# ============================================================
114
+ # Arena Deck: ${p.id} — ${p.name}
115
+ # ============================================================
116
+ # 变量:${p.name}
117
+ # 控制变量:${CONTROL_SKILLS.join(', ')}
118
+ # ============================================================
119
+
120
+ [deck]
121
+ working_set = ".claude/skills"
122
+ cold_pool = "~/.agents/skill-repos"
123
+ max_cards = 10
124
+
125
+ [tool]
126
+ skills = [
127
+ "${p.skill_name}",
128
+ ${CONTROL_SKILLS.map(s => ` "${s}",`).join('\n')}
129
+ ]
130
+ `
131
+ writeFileSync(p.deck_path, deckContent)
132
+ }
133
+
134
+ // ── 生成 arena.json ─────────────────────────────────────────
135
+ const arenaJson = {
136
+ version: '1.0.0',
137
+ metadata: {
138
+ id: ARENA_ID,
139
+ slug: ARENA_SLUG,
140
+ created_at: new Date().toISOString(),
141
+ task_description: TASK,
142
+ participants,
143
+ criteria,
144
+ working_dir: ARENA_DIR,
145
+ },
146
+ status: 'setup',
147
+ runs: participants.map(p => ({
148
+ participant_id: p.id,
149
+ output_path: join(ARENA_DIR, 'runs', `${p.id}.md`),
150
+ })),
151
+ }
152
+
153
+ writeFileSync(join(ARENA_DIR, 'arena.json'), JSON.stringify(arenaJson, null, 2) + '\n')
154
+
155
+ // ── 生成 Task Card 模板 ─────────────────────────────────────
156
+ const taskCardPath = join(ARENA_DIR, 'TASK-arena.md')
157
+ const relArenaDir = ARENA_DIR.replace(PROJECT_DIR, '.')
158
+ const taskCardContent = `---
159
+ type: arena
160
+ objective: |
161
+ ${TASK}
162
+ evaluation_criteria:
163
+ ${criteria.map(c => ` - ${c.label}`).join('\n')}
164
+ arena_decks:
165
+ ${participants.map(p => ` - ${p.deck_path.replace(PROJECT_DIR, '.')}`).join('\n')}
166
+ judge_persona: |
167
+ 你是一个中立的技能评测员。对比所有 subagent 的输出,
168
+ 按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。
169
+ acceptance:
170
+ ${participants.map(p => ` - Subagent ${p.id} 使用 ${p.deck_path.replace(PROJECT_DIR, '.')} 完成任务并写入 runs/${p.id}.md`).join('\n')}
171
+ - Judge 读取所有 run 文件并生成 report.md
172
+ - 所有 subagent 完成后恢复父 deck
173
+ managed_dirs:
174
+ - ${relArenaDir}/
175
+ ---
176
+
177
+ # Arena Task: ${TASK}
178
+
179
+ ## Subagent 指令
180
+
181
+ ${participants.map(p => `### ${p.id} (${p.name})
182
+ \`\`\`bash
183
+ cd "${PROJECT_DIR}"
184
+ bunx @lythos/skill-deck link --deck "${p.deck_path}"
185
+ # 然后执行任务,输出写入 "${join(ARENA_DIR, 'runs', `${p.id}.md`)}"
186
+ bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}"
187
+ \`\`\`
188
+ `).join('')}
189
+
190
+ ### Judge
191
+ \`\`\`bash
192
+ cd "${PROJECT_DIR}"
193
+ bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}"
194
+ # 读取所有 run 文件,生成 "${join(ARENA_DIR, 'report.md')}"
195
+ \`\`\`
196
+ `
197
+
198
+ writeFileSync(taskCardPath, taskCardContent)
199
+
200
+ // ── 报告 ────────────────────────────────────────────────────
201
+ console.log(`
202
+ 🎮 Skill Arena 初始化完成
203
+
204
+ ID: ${ARENA_ID}
205
+ 任务: ${TASK}
206
+ 目录: ${ARENA_DIR}
207
+ 参与者: ${SKILLS.join(', ')}
208
+ 控制变量: ${CONTROL_SKILLS.join(', ')}
209
+ 评测维度: ${CRITERIA.join(', ')}
210
+
211
+ 生成文件:
212
+ 📋 ${join(ARENA_DIR, 'arena.json')}
213
+ 🎴 ${participants.length} 个 arena deck → ${join(ARENA_DIR, 'decks')}
214
+ 📝 Task Card → ${taskCardPath}
215
+
216
+ 下一步:
217
+ 1. 阅读 Task Card: cat "${taskCardPath}"
218
+ 2. 按指令逐个/并行启动 subagent
219
+ 3. Judge 生成 report.md
220
+ `)
221
+ }
222
+
223
+ if (import.meta.main) {
224
+ runArena(process.argv.slice(2))
225
+ }