@lythos/skill-arena 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +52 -23
  2. package/package.json +20 -3
  3. package/src/cli.ts +325 -30
package/README.md CHANGED
@@ -1,52 +1,81 @@
1
1
  # @lythos/skill-arena
2
2
 
3
- > Skill comparison benchmark tool. Run control-variable decks against the same task to compare skill effectiveness.
3
+ > Controlled-variable benchmark for AI agent skills. Compare skills, decks, or configurations on the same task single-skill A/B or full-deck Pareto frontier analysis.
4
4
 
5
- Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) meta-skill ecosystem.
5
+ ## Why
6
6
 
7
- ## What it does
7
+ "Which skill is better?" is the wrong question. The right question is "which skill is better for what."
8
8
 
9
- Creates an arena directory with isolated decks for each skill under test, generates task cards for subagent dispatch, and produces a structured output for judge evaluation. Core principle: **control variables** — only the tested skill differs between decks.
9
+ `skill-arena` scaffolds isolated environments where subagents complete the same task under different decks. A judge agent scores outputs across multiple dimensions. Supports:
10
+
11
+ - **Mode 1**: Single-skill comparison (controlled variable — same helper skills, different test skill).
12
+ - **Mode 2**: Full-deck comparison (Pareto frontier — no single winner, only optimal trade-offs).
10
13
 
11
14
  ## Install
12
15
 
13
16
  ```bash
14
17
  bun add -d @lythos/skill-arena
15
- # or
16
- bunx @lythos/skill-arena <args>
18
+ # or use directly
19
+ bunx @lythos/skill-arena <command>
17
20
  ```
18
21
 
19
- ## Commands
22
+ ## Quick Start
20
23
 
21
24
  ```bash
22
- # Initialize an arena with 2-5 skills
25
+ # Mode 1: Compare two skills on the same task
23
26
  bunx @lythos/skill-arena \
24
- --task "Generate user auth flow diagram" \
27
+ --task "Generate auth flow diagram" \
25
28
  --skills "design-doc-mermaid,mermaid-tools" \
26
29
  --criteria "syntax,context,token"
27
30
 
28
- # Options
29
- # --task, -t Task description (required)
30
- # --skills, -s Comma-separated skill list, min 2, max 5
31
- # --criteria, -c Evaluation criteria (default: syntax,context,logic,token)
32
- # --control Control variable skill (default: project-scribe)
33
- # --dir, -d Arena parent directory (default: tmp)
34
- # --project, -p Project root (default: .)
31
+ # Mode 2: Compare full deck configurations
32
+ bunx @lythos/skill-arena \
33
+ --task "Generate auth flow diagram" \
34
+ --decks "./decks/minimal.toml,./decks/rich.toml" \
35
+ --criteria "quality,token,maintainability"
36
+
37
+ # Visualize results
38
+ bunx @lythos/skill-arena viz tmp/arena-<id>/
35
39
  ```
36
40
 
37
- ## Output
41
+ ## Commands
38
42
 
39
43
  ```
40
- tmp/arena-<timestamp>-<slug>/
41
- ├── arena.json # metadata + config
42
- ├── decks/ # one control-variable deck per skill
43
- ├── runs/ # subagent output (you fill this)
44
- └── TASK-arena.md # task card with subagent instructions
44
+ Usage: bunx @lythos/skill-arena <options> | bunx @lythos/skill-arena viz <dir>
45
+
46
+ Mode 1 Single-Skill Comparison:
47
+ --task, -t <desc> Task description (required)
48
+ --skills, -s <list> Comma-separated skills, 2–5 (Mode 1)
49
+ --criteria, -c <list> Evaluation dimensions (default: syntax,context,logic,token)
50
+ --control <skill> Control skill (default: lythoskill-project-scribe)
51
+
52
+ Mode 2 — Full-Deck Comparison:
53
+ --decks <paths> Comma-separated deck toml paths, 2–5 (Mode 2)
54
+ --criteria, -c <list> Evaluation dimensions
55
+
56
+ Common:
57
+ --dir, -d <path> Arena parent directory (default: tmp)
58
+ --project, -p <path> Project root (default: .)
59
+
60
+ Viz:
61
+ viz <dir> Render ASCII charts from report.md
45
62
  ```
46
63
 
64
+ ## Skill Documentation
65
+
66
+ This package is the **Starter** layer (CLI implementation).
67
+ The agent-visible **Skill** layer documentation is here:
68
+ [packages/lythoskill-arena/skill/SKILL.md](../../packages/lythoskill-arena/skill/SKILL.md)
69
+
47
70
  ## Architecture
48
71
 
49
- This is the **Starter** layer of the thin-skill pattern. The agent-visible **Skill** layer is in `packages/lythoskill-arena/skill/`.
72
+ Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
73
+
74
+ ```
75
+ Starter (this package) → npm publish → bunx @lythos/skill-arena ...
76
+ Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
77
+ Output (skills/<name>/) → git commit → agent-visible skill
78
+ ```
50
79
 
51
80
  ## License
52
81
 
package/package.json CHANGED
@@ -1,7 +1,16 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.1.0",
3
+ "version": "0.4.0",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
+ "keywords": [
6
+ "ai-agent",
7
+ "skill",
8
+ "claude-code",
9
+ "agent-skills",
10
+ "llm-tooling",
11
+ "lythoskill"
12
+ ],
13
+ "author": "lythos-labs",
5
14
  "license": "MIT",
6
15
  "type": "module",
7
16
  "bin": {
@@ -12,8 +21,16 @@
12
21
  "README.md",
13
22
  "LICENSE"
14
23
  ],
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "git+https://github.com/lythos-labs/lythoskill.git",
27
+ "directory": "packages/lythoskill-arena"
28
+ },
29
+ "bugs": {
30
+ "url": "https://github.com/lythos-labs/lythoskill/issues"
31
+ },
32
+ "homepage": "https://github.com/lythos-labs/lythoskill/tree/main/packages/lythoskill-arena#readme",
15
33
  "engines": {
16
34
  "bun": ">=1.0.0"
17
- },
18
- "license": "MIT"
35
+ }
19
36
  }
package/src/cli.ts CHANGED
@@ -6,9 +6,9 @@
6
6
  */
7
7
 
8
8
  import {
9
- existsSync, mkdirSync, writeFileSync,
9
+ existsSync, mkdirSync, writeFileSync, readFileSync,
10
10
  } from 'node:fs'
11
- import { join, resolve } from 'node:path'
11
+ import { join, resolve, basename } from 'node:path'
12
12
 
13
13
  // ── 简单的 slugify ──────────────────────────────────────────
14
14
  function slugify(input: string): string {
@@ -29,8 +29,9 @@ function parseArgs(argv: string[]) {
29
29
  const options: Record<string, string | undefined> = {
30
30
  task: undefined,
31
31
  skills: undefined,
32
+ decks: undefined,
32
33
  criteria: 'syntax,context,logic,token',
33
- control: 'project-scribe',
34
+ control: 'lythoskill-project-scribe',
34
35
  dir: 'tmp',
35
36
  project: '.',
36
37
  }
@@ -42,6 +43,8 @@ function parseArgs(argv: string[]) {
42
43
  options.task = argv[++i]
43
44
  } else if (arg === '--skills' || arg === '-s') {
44
45
  options.skills = argv[++i]
46
+ } else if (arg === '--decks') {
47
+ options.decks = argv[++i]
45
48
  } else if (arg === '--criteria' || arg === '-c') {
46
49
  options.criteria = argv[++i]
47
50
  } else if (arg === '--control') {
@@ -67,20 +70,47 @@ export function runArena(argv: string[]) {
67
70
  process.exit(1)
68
71
  }
69
72
 
70
- const SKILLS = (options.skills || '').split(',').map(s => s.trim()).filter(Boolean)
71
- if (SKILLS.length < 2) {
73
+ const HAS_DECKS = !!options.decks
74
+ const HAS_SKILLS = !!options.skills
75
+
76
+ if (!HAS_DECKS && !HAS_SKILLS) {
77
+ console.error('❌ 请提供 --skills 或 --decks')
78
+ process.exit(1)
79
+ }
80
+ if (HAS_DECKS && HAS_SKILLS) {
81
+ console.error('❌ --skills 和 --decks 不能同时使用')
82
+ process.exit(1)
83
+ }
84
+
85
+ const DECK_PATHS = HAS_DECKS
86
+ ? (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
87
+ : []
88
+
89
+ const SKILLS = HAS_SKILLS
90
+ ? (options.skills || '').split(',').map(s => s.trim()).filter(Boolean)
91
+ : []
92
+
93
+ if (HAS_SKILLS && SKILLS.length < 2) {
72
94
  console.error('❌ 至少需要 2 个 skill 才能进行 arena')
73
95
  process.exit(1)
74
96
  }
75
- if (SKILLS.length > 5) {
97
+ if (HAS_SKILLS && SKILLS.length > 5) {
76
98
  console.error('❌ 一次 arena 最多 5 个 skill')
77
99
  process.exit(1)
78
100
  }
101
+ if (HAS_DECKS && DECK_PATHS.length < 2) {
102
+ console.error('❌ 至少需要 2 个 deck 才能进行 arena')
103
+ process.exit(1)
104
+ }
105
+ if (HAS_DECKS && DECK_PATHS.length > 5) {
106
+ console.error('❌ 一次 arena 最多 5 个 deck')
107
+ process.exit(1)
108
+ }
79
109
 
80
110
  const CRITERIA = (options.criteria || 'syntax,context,logic,token')
81
111
  .split(',').map(s => s.trim()).filter(Boolean)
82
112
 
83
- const CONTROL_SKILLS = (options.control || 'lythoskill-project-cortex')
113
+ const CONTROL_SKILLS = (options.control || 'lythoskill-project-scribe')
84
114
  .split(',').map(s => s.trim()).filter(Boolean)
85
115
 
86
116
  const PROJECT_DIR = resolve(options.project!)
@@ -93,15 +123,37 @@ export function runArena(argv: string[]) {
93
123
  mkdirSync(join(ARENA_DIR, 'runs'), { recursive: true })
94
124
 
95
125
  // ── 生成参与者与 deck ───────────────────────────────────────
96
- const participants = SKILLS.map((skill, i) => {
97
- const id = `run-${String(i + 1).padStart(2, '0')}`
98
- return {
99
- id,
100
- name: skill,
101
- skill_name: skill,
102
- deck_path: join(ARENA_DIR, 'decks', `arena-${id}.toml`),
103
- }
104
- })
126
+ let participants: { id: string; name: string; skill_name: string; deck_path: string }[]
127
+ let mode: 'single-skill' | 'full-deck'
128
+
129
+ if (HAS_DECKS) {
130
+ mode = 'full-deck'
131
+ participants = DECK_PATHS.map((deckPath, i) => {
132
+ const id = `run-${String(i + 1).padStart(2, '0')}`
133
+ const name = basename(deckPath).replace(/\.toml$/, '')
134
+ const destPath = join(ARENA_DIR, 'decks', `arena-${id}.toml`)
135
+ // Copy the provided deck to arena directory
136
+ if (existsSync(deckPath)) {
137
+ const content = readFileSync(deckPath, 'utf-8')
138
+ writeFileSync(destPath, content)
139
+ } else {
140
+ console.error(`❌ Deck 文件不存在: ${deckPath}`)
141
+ process.exit(1)
142
+ }
143
+ return { id, name, skill_name: name, deck_path: destPath }
144
+ })
145
+ } else {
146
+ mode = 'single-skill'
147
+ participants = SKILLS.map((skill, i) => {
148
+ const id = `run-${String(i + 1).padStart(2, '0')}`
149
+ return {
150
+ id,
151
+ name: skill,
152
+ skill_name: skill,
153
+ deck_path: join(ARENA_DIR, 'decks', `arena-${id}.toml`),
154
+ }
155
+ })
156
+ }
105
157
 
106
158
  const criteria = CRITERIA.map((c) => ({
107
159
  name: c,
@@ -109,8 +161,9 @@ export function runArena(argv: string[]) {
109
161
  weight: 1,
110
162
  }))
111
163
 
112
- for (const p of participants) {
113
- const deckContent = `# ============================================================
164
+ if (mode === 'single-skill') {
165
+ for (const p of participants) {
166
+ const deckContent = `# ============================================================
114
167
  # Arena Deck: ${p.id} — ${p.name}
115
168
  # ============================================================
116
169
  # 变量:${p.name}
@@ -124,11 +177,11 @@ max_cards = 10
124
177
 
125
178
  [tool]
126
179
  skills = [
127
- "${p.skill_name}",
128
- ${CONTROL_SKILLS.map(s => ` "${s}",`).join('\n')}
180
+ ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n')}
129
181
  ]
130
182
  `
131
- writeFileSync(p.deck_path, deckContent)
183
+ writeFileSync(p.deck_path, deckContent)
184
+ }
132
185
  }
133
186
 
134
187
  // ── 生成 arena.json ─────────────────────────────────────────
@@ -164,8 +217,14 @@ ${criteria.map(c => ` - ${c.label}`).join('\n')}
164
217
  arena_decks:
165
218
  ${participants.map(p => ` - ${p.deck_path.replace(PROJECT_DIR, '.')}`).join('\n')}
166
219
  judge_persona: |
167
- 你是一个中立的技能评测员。对比所有 subagent 的输出,
168
- evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。
220
+ ${mode === 'full-deck'
221
+ ? `你是一个多目标优化分析师。不要选 Winner
222
+ 对每个 deck 配置,按 evaluation_criteria 输出评分向量(1-5 分)。
223
+ 识别 Pareto 非支配解集——没有"最强",只有"在不同维度上的最优权衡"。
224
+ 对被支配的解,说明它被谁支配、在哪个维度上劣势。
225
+ 如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。`
226
+ : `你是一个中立的技能评测员。对比所有 subagent 的输出,
227
+ 按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。`}
169
228
  acceptance:
170
229
  ${participants.map(p => ` - Subagent ${p.id} 使用 ${p.deck_path.replace(PROJECT_DIR, '.')} 完成任务并写入 runs/${p.id}.md`).join('\n')}
171
230
  - Judge 读取所有 run 文件并生成 report.md
@@ -181,16 +240,16 @@ managed_dirs:
181
240
  ${participants.map(p => `### ${p.id} (${p.name})
182
241
  \`\`\`bash
183
242
  cd "${PROJECT_DIR}"
184
- bunx @lythos/skill-deck link --deck "${p.deck_path}"
243
+ bunx @lythos/skill-deck link --deck "${p.deck_path}" --workdir "${PROJECT_DIR}"
185
244
  # 然后执行任务,输出写入 "${join(ARENA_DIR, 'runs', `${p.id}.md`)}"
186
- bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}"
245
+ bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}" --workdir "${PROJECT_DIR}"
187
246
  \`\`\`
188
247
  `).join('')}
189
248
 
190
249
  ### Judge
191
250
  \`\`\`bash
192
251
  cd "${PROJECT_DIR}"
193
- bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}"
252
+ bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}" --workdir "${PROJECT_DIR}"
194
253
  # 读取所有 run 文件,生成 "${join(ARENA_DIR, 'report.md')}"
195
254
  \`\`\`
196
255
  `
@@ -204,9 +263,9 @@ bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}"
204
263
  ID: ${ARENA_ID}
205
264
  任务: ${TASK}
206
265
  目录: ${ARENA_DIR}
207
- 参与者: ${SKILLS.join(', ')}
208
- 控制变量: ${CONTROL_SKILLS.join(', ')}
209
- 评测维度: ${CRITERIA.join(', ')}
266
+ 模式: ${mode === 'full-deck' ? '完整 deck 配置对比' : '单 skill 对比'}
267
+ 参与者: ${participants.map(p => p.name).join(', ')}
268
+ ${mode === 'single-skill' ? `控制变量: ${CONTROL_SKILLS.join(', ')}\n` : ''}评测维度: ${CRITERIA.join(', ')}
210
269
 
211
270
  生成文件:
212
271
  📋 ${join(ARENA_DIR, 'arena.json')}
@@ -220,6 +279,242 @@ ID: ${ARENA_ID}
220
279
  `)
221
280
  }
222
281
 
282
+ // ── Viz: Report Visualizer ─────────────────────────────────
283
+
284
+ interface ScoreRow {
285
+ checkpoint: string
286
+ scores: Record<string, number>
287
+ notes: string
288
+ maxScore: number
289
+ }
290
+
291
+ function parseReportMd(reportPath: string): { title: string; rows: ScoreRow[]; summary?: Record<string, number> } | null {
292
+ if (!existsSync(reportPath)) return null
293
+ const text = readFileSync(reportPath, 'utf-8')
294
+
295
+ // Extract title
296
+ const titleMatch = text.match(/^#\s+(.+)$/m)
297
+ const title = titleMatch ? titleMatch[1].trim() : 'Arena Report'
298
+
299
+ const lines = text.split('\n')
300
+ const rows: ScoreRow[] = []
301
+ const summaries: Record<string, number> = {}
302
+
303
+ let currentSection = ''
304
+ let inTable = false
305
+ let headers: string[] = []
306
+
307
+ for (const line of lines) {
308
+ const trimmed = line.trim()
309
+
310
+ // Detect section headers like "### Memory Condition" or "### Control Condition"
311
+ const sectionMatch = trimmed.match(/^#{2,4}\s+(.*Condition.*|.*Variable.*|.*Group.*)/i)
312
+ if (sectionMatch) {
313
+ currentSection = sectionMatch[1].replace(/[()]/g, '').trim()
314
+ inTable = false
315
+ continue
316
+ }
317
+
318
+ // Table header row
319
+ if (trimmed.startsWith('|') && trimmed.includes('Checkpoint') && !trimmed.includes('---')) {
320
+ inTable = true
321
+ const parts = trimmed.split('|').map(s => s.trim()).filter(Boolean)
322
+ headers = parts.slice(1)
323
+ continue
324
+ }
325
+
326
+ // Table separator
327
+ if (inTable && trimmed.startsWith('|') && trimmed.includes('---')) continue
328
+
329
+ // Table data row
330
+ if (inTable && trimmed.startsWith('|')) {
331
+ const parts = trimmed.split('|').map(s => s.trim()).filter(Boolean)
332
+ if (parts.length >= 2) {
333
+ const firstCell = parts[0]
334
+ const checkpoint = firstCell.replace(/\*\*/g, '').trim()
335
+
336
+ // Skip "Total" rows — handle them as summary
337
+ if (/^total/i.test(checkpoint)) {
338
+ for (let i = 1; i < parts.length && i <= headers.length; i++) {
339
+ const num = parseFloat(parts[i])
340
+ if (!isNaN(num)) {
341
+ const key = currentSection
342
+ ? `${currentSection} ${headers[i - 1]}`.trim()
343
+ : headers[i - 1]
344
+ summaries[key] = num
345
+ }
346
+ }
347
+ continue
348
+ }
349
+
350
+ // Skip non-numeric rows (section headers inside table)
351
+ const secondCell = parts[1]
352
+ if (isNaN(parseFloat(secondCell))) continue
353
+
354
+ const scores: Record<string, number> = {}
355
+ let maxScore = 0
356
+ for (let i = 1; i < parts.length && i <= headers.length; i++) {
357
+ const header = headers[i - 1]
358
+ if (/notes?/i.test(header)) continue // Skip notes column
359
+ const val = parts[i]
360
+ const num = parseFloat(val)
361
+ if (!isNaN(num)) {
362
+ // Prefix with section name if multiple condition tables exist
363
+ const key = currentSection && headers.length <= 2
364
+ ? `${currentSection} Score`
365
+ : header
366
+ scores[key] = num
367
+ maxScore = Math.max(maxScore, num)
368
+ }
369
+ }
370
+
371
+ const notes = parts[parts.length - 1] || ''
372
+ if (Object.keys(scores).length > 0) {
373
+ rows.push({ checkpoint, scores, notes, maxScore })
374
+ }
375
+ }
376
+ continue
377
+ }
378
+
379
+ // End of table
380
+ if (inTable && !trimmed.startsWith('|') && trimmed !== '') {
381
+ inTable = false
382
+ currentSection = ''
383
+ }
384
+ }
385
+
386
+ return { title, rows, summary: Object.keys(summaries).length > 0 ? summaries : undefined }
387
+ }
388
+
389
+ function renderBar(value: number, max: number, width = 30): string {
390
+ const filled = Math.round((value / max) * width)
391
+ const empty = width - filled
392
+ return '█'.repeat(filled) + '░'.repeat(empty)
393
+ }
394
+
395
+ function renderAsciiChart(report: NonNullable<ReturnType<typeof parseReportMd>>): string {
396
+ const { title, rows, summary } = report
397
+ const participants = rows.length > 0 ? Object.keys(rows[0].scores) : []
398
+ const maxVal = rows.reduce((m, r) => Math.max(m, r.maxScore), 0) || 10
399
+
400
+ let out = `\n╔══════════════════════════════════════════════════════════════════════╗\n`
401
+ out += `║ 🏆 ${title.slice(0, 58).padEnd(58)} ║\n`
402
+ out += `╚══════════════════════════════════════════════════════════════════════╝\n\n`
403
+
404
+ // Per-checkpoint bars
405
+ for (const row of rows) {
406
+ out += `📋 ${row.checkpoint}\n`
407
+ for (const [name, score] of Object.entries(row.scores)) {
408
+ const bar = renderBar(score, maxVal)
409
+ out += ` ${name.padEnd(12)} ${bar} ${score}/${maxVal}\n`
410
+ }
411
+ if (row.notes) {
412
+ out += ` 💡 ${row.notes.slice(0, 80)}${row.notes.length > 80 ? '...' : ''}\n`
413
+ }
414
+ out += '\n'
415
+ }
416
+
417
+ // Summary totals
418
+ if (summary) {
419
+ out += `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`
420
+ out += `📊 TOTAL SCORES\n`
421
+ for (const [name, score] of Object.entries(summary)) {
422
+ const bar = renderBar(score, maxVal * rows.length)
423
+ out += ` ${name.padEnd(12)} ${bar} ${score}\n`
424
+ }
425
+ out += '\n'
426
+ }
427
+
428
+ return out
429
+ }
430
+
431
+ function renderRadarChart(report: NonNullable<ReturnType<typeof parseReportMd>>): string {
432
+ const { rows } = report
433
+ if (rows.length === 0) return ''
434
+
435
+ const participants = Object.keys(rows[0].scores)
436
+ if (participants.length < 2) return ''
437
+
438
+ // Use checkpoint names as axes
439
+ const axes = rows.map(r => r.checkpoint.slice(0, 12))
440
+ const maxVal = rows.reduce((m, r) => Math.max(m, ...Object.values(r.scores)), 0) || 10
441
+
442
+ // Simple ASCII radar: concentric circles with labels
443
+ const size = 16
444
+ const center = size / 2
445
+ let out = `\n🕸️ RADAR CHART (MOO Scoring)\n\n`
446
+
447
+ // For each participant, show a compact radar representation
448
+ const symbols = ['■', '●', '▲', '◆', '★']
449
+ for (let pi = 0; pi < participants.length; pi++) {
450
+ const p = participants[pi]
451
+ const sym = symbols[pi % symbols.length]
452
+ out += ` ${sym} ${p}\n`
453
+ }
454
+ out += '\n'
455
+
456
+ // Per-axis score table (more readable than pure ASCII art)
457
+ out += ` Axis `
458
+ for (const p of participants) out += `${p.slice(0, 8).padStart(8)} `
459
+ out += '\n'
460
+ out += ` ${'─'.repeat(14 + participants.length * 9)}\n`
461
+
462
+ for (let i = 0; i < rows.length; i++) {
463
+ const axis = axes[i].padEnd(12)
464
+ out += ` ${axis} `
465
+ for (const p of participants) {
466
+ const score = rows[i].scores[p] ?? 0
467
+ out += `${String(score).padStart(8)} `
468
+ }
469
+ out += '\n'
470
+ }
471
+
472
+ return out
473
+ }
474
+
475
+ function runViz(argv: string[]) {
476
+ const arenaDir = argv.find(a => !a.startsWith('-')) || '.'
477
+ const resolvedDir = resolve(arenaDir)
478
+
479
+ const arenaJsonPath = join(resolvedDir, 'arena.json')
480
+ const reportPath = join(resolvedDir, 'report.md')
481
+
482
+ if (!existsSync(arenaJsonPath)) {
483
+ console.error(`❌ 找不到 arena.json: ${arenaJsonPath}`)
484
+ process.exit(1)
485
+ }
486
+
487
+ const arenaJson = JSON.parse(readFileSync(arenaJsonPath, 'utf-8'))
488
+ const meta = arenaJson.metadata
489
+
490
+ console.log(`\n🎮 Arena Viz: ${meta.id}`)
491
+ console.log(` 任务: ${meta.task_description}`)
492
+ console.log(` 参与者: ${meta.participants.map((p: any) => p.name).join(', ')}`)
493
+
494
+ if (!existsSync(reportPath)) {
495
+ console.log(`\n⏳ report.md 尚未生成,请先运行 Judge`)
496
+ return
497
+ }
498
+
499
+ const report = parseReportMd(reportPath)
500
+ if (!report || report.rows.length === 0) {
501
+ console.log(`\n⚠️ 无法从 report.md 解析评分数据`)
502
+ return
503
+ }
504
+
505
+ console.log(renderAsciiChart(report))
506
+ console.log(renderRadarChart(report))
507
+ }
508
+
509
+ // ── Main Entry ───────────────────────────────────────────────
510
+
223
511
  if (import.meta.main) {
224
- runArena(process.argv.slice(2))
512
+ const args = process.argv.slice(2)
513
+ const cmd = args[0]
514
+
515
+ if (cmd === 'viz') {
516
+ runViz(args.slice(1))
517
+ } else {
518
+ runArena(args)
519
+ }
225
520
  }