@lythos/skill-arena 0.9.38 → 0.9.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +16 -28
  2. package/package.json +1 -1
  3. package/src/cli.ts +82 -147
package/README.md CHANGED
@@ -49,26 +49,23 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
49
49
  ```bash
50
50
  bun add -d @lythos/skill-arena
51
51
  # or use directly
52
- bunx @lythos/skill-arena@0.9.38 <command>
52
+ bunx @lythos/skill-arena@0.9.40 <command>
53
53
  ```
54
54
 
55
55
  ## Quick Start
56
56
 
57
57
  ```bash
58
- # Mode 1: Compare two skills on the same task
59
- bunx @lythos/skill-arena@0.9.38 \
60
- --task "Generate auth flow diagram" \
61
- --skills "design-doc-mermaid,mermaid-tools" \
62
- --criteria "syntax,context,token"
63
-
64
- # Mode 2: Compare full deck configurations
65
- bunx @lythos/skill-arena@0.9.38 \
66
- --task "Generate auth flow diagram" \
67
- --decks "./decks/minimal.toml,./decks/rich.toml" \
68
- --criteria "quality,token,maintainability"
58
+ # Single: test a deck with one agent (exec shortcut)
59
+ bunx @lythos/skill-arena@0.9.40 single \
60
+ --brief "Generate auth flow diagram" \
61
+ --deck ./examples/decks/documents.toml
62
+
63
+ # Vs: compare multiple decks side by side (declarative)
64
+ bunx @lythos/skill-arena@0.9.40 vs \
65
+ --config examples/arena/research-compare/arena.toml
69
66
 
70
67
  # Visualize results
71
- bunx @lythos/skill-arena@0.9.38 viz tmp/arena-<id>/
68
+ bunx @lythos/skill-arena@0.9.40 viz tmp/arena-<id>/
72
69
  ```
73
70
 
74
71
  ## Commands
@@ -77,16 +74,16 @@ bunx @lythos/skill-arena@0.9.38 viz tmp/arena-<id>/
77
74
 
78
75
  ```bash
79
76
  # Print execution plan without running
80
- bunx @lythos/skill-arena@0.9.38 run --config arena.toml --dry-run
77
+ bunx @lythos/skill-arena@0.9.40 vs --config arena.toml --dry-run
81
78
 
82
79
  # Execute with per-side runs_per_side and statistical aggregation
83
- bunx @lythos/skill-arena@0.9.38 run --config arena.toml
80
+ bunx @lythos/skill-arena@0.9.40 vs --config arena.toml
84
81
  ```
85
82
 
86
83
  ### CLI-flag mode (backward compat)
87
84
 
88
85
  ```
89
- bunx @lythos/skill-arena@0.9.38 run \
86
+ bunx @lythos/skill-arena@0.9.40 run \
90
87
  --task ./TASK-arena.md \
91
88
  --players ./players/claude.toml \
92
89
  --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +93,13 @@ bunx @lythos/skill-arena@0.9.38 run \
96
93
  ### Scaffold mode (legacy, manual execution)
97
94
 
98
95
  ```
99
- bunx @lythos/skill-arena@0.9.38 scaffold --task "..." --skills a,b
96
+ bunx @lythos/skill-arena@0.9.40 scaffold --task "..." --decks a.toml,b.toml
100
97
  ```
101
98
 
102
99
  ### Viz
103
100
 
104
101
  ```bash
105
- bunx @lythos/skill-arena@0.9.38 viz runs/arena-<id>/
102
+ bunx @lythos/skill-arena@0.9.40 viz runs/arena-<id>/
106
103
  ```
107
104
 
108
105
  ## Skill Documentation
@@ -116,7 +113,7 @@ The agent-visible **Skill** layer documentation is here:
116
113
  Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
117
114
 
118
115
  ```
119
- Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.38 ...
116
+ Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.40 ...
120
117
  Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
121
118
  Output (skills/<name>/) → git commit → agent-visible skill
122
119
  ```
@@ -137,15 +134,6 @@ arena.toml → ArenaToml (Zod) → ExecutionPlan (pure) → per-cell agent
137
134
 
138
135
  Built on `@lythos/test-utils` shared infrastructure.
139
136
 
140
- ## Test Coverage
141
-
142
- | Layer | Count | CI | Notes |
143
- |-------|-------|----|-------|
144
- | Unit tests | 41 | ✅ | TOML parser, player resolution, Pareto, stats |
145
- | Agent BDD | — | ❌ | Requires `claude` CLI; run locally |
146
-
147
- Pareto frontier is a **deterministic algorithm** — never delegated to LLM. 8 unit tests cover dominance, cross-dominance, transitive chains, partial criteria, and empty scores.
148
-
149
137
  ## License
150
138
 
151
139
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.38",
3
+ "version": "0.9.40",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
package/src/cli.ts CHANGED
@@ -37,52 +37,49 @@ function printHelp(): void {
37
37
  console.log(`🎭 lythoskill-arena — Skill comparison runner
38
38
 
39
39
  Usage:
40
- lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
41
- lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
42
- lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
43
- lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
40
+ lythoskill-arena single --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
41
+ lythoskill-arena single --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
42
+ lythoskill-arena vs --config arena.toml [--dry-run]
44
43
  lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
45
44
  lythoskill-arena viz <arena-dir>
46
45
 
47
46
  Commands:
48
- run Run arena programmatically (declarative arena.toml or CLI flags)
47
+ single Single-player deck test (exec shortcut): test a deck with one player
48
+ vs Multi-side comparison: run arena from declarative arena.toml
49
49
  scaffold Create arena directory structure (legacy, manual subagent execution)
50
50
  viz Visualize arena report (ASCII charts)
51
51
 
52
52
  Options:
53
- -t, --task <path|desc> Task description or path to TASK-arena.md
54
- -s, --skills <list> Comma-separated skill names (scaffold only)
55
- --decks <list> Comma-separated deck paths
56
- -c, --criteria <list> Evaluation criteria (default: syntax,context,logic,token)
57
- --players <list> Comma-separated player.toml paths (CLI run only)
58
- --config <path> Path to arena.toml (declarative mode, k8s-style)
59
- --dry-run Print execution plan without running (with --config)
60
- --control <skill> Control skill for comparison (scaffold only)
61
- --out <dir> Output directory (run: defaults to runs/arena-<id>)
62
- -d, --dir <dir> Output directory (scaffold: defaults to tmp)
63
- -p, --project <dir> Project directory (default: .)
53
+ -t, --task <path|desc> Task description or path to TASK-arena.md / .agent.md
54
+ --deck <path> Deck path (single only)
55
+ --brief "<text>" Inline task description (single only, alternative to --task)
56
+ --player <name> Agent player (single only, default: kimi)
57
+ -c, --criteria <list> Evaluation criteria (scaffold only, default: syntax,context,logic,token)
58
+ --config <path> Path to arena.toml (vs only)
59
+ --dry-run Print execution plan without running (vs --config only)
60
+ --out <dir> Output directory
61
+ -d, --dir <dir> Parent dir (scaffold: defaults to tmp)
62
+ -p, --project <dir> Project root (default: .)
63
+ --timeout <ms> Subagent timeout (single only)
64
64
 
65
65
  Examples:
66
- # Single agent run (simplest path)
67
- lythoskill-arena agent-run --task ./TASK.md --deck ./deck.toml
68
- lythoskill-arena agent-run --task ./TASK.md --deck ./deck.toml --player kimi --out ./output
66
+ # Single-player deck test (exec shortcut)
67
+ lythoskill-arena single --task ./TASK.agent.md --deck ./deck.toml
68
+ lythoskill-arena single --brief "Generate auth flow diagram" --deck ./deck.toml --player kimi
69
69
 
70
- # Declarative mode (k8s-style)
71
- lythoskill-arena run --config ./arena.toml
72
- lythoskill-arena run --config ./arena.toml --dry-run
73
-
74
- # CLI-flag mode (backward compat)
75
- lythoskill-arena run --task ./TASK-arena.md --players ./players/claude.toml --decks ./decks/run-01.toml,./decks/run-02.toml --criteria coverage,relevance
70
+ # Multi-side comparison (declarative)
71
+ lythoskill-arena vs --config ./arena.toml
72
+ lythoskill-arena vs --config ./arena.toml --dry-run
76
73
 
77
74
  # Legacy scaffolding
78
- lythoskill-arena scaffold --task "Refactor auth module" --skills skill-a,skill-b
75
+ lythoskill-arena scaffold --task "Refactor auth module" --decks ./decks/a.toml,./decks/b.toml
79
76
  lythoskill-arena viz runs/arena-20260504
80
77
  `)
81
78
  }
82
79
 
83
- // ── agent-run: single agent execution (simplest path) ────────────────────
80
+ // ── single: single-player deck test (exec shortcut) ──────────────────────
84
81
 
85
- async function agentRun(args: string[]) {
82
+ async function singleRun(args: string[]) {
86
83
  const opts: Record<string, string | undefined> = {}
87
84
  for (let i = 0; i < args.length; i++) {
88
85
  if (args[i] === '--task' || args[i] === '-t') opts.task = args[++i]
@@ -94,11 +91,16 @@ async function agentRun(args: string[]) {
94
91
  }
95
92
 
96
93
  if (!opts.deck) {
97
- console.error('❌ --deck <path> is required')
94
+ console.error(`❌ --deck <path> is required.
95
+ Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
96
+ lythoskill-arena single --deck ./deck.toml --brief "your task description"
97
+ Example decks: examples/decks/scout.toml, examples/decks/documents.toml`)
98
98
  process.exit(1)
99
99
  }
100
100
  if (!opts.task && (!opts.brief || !opts.brief.trim())) {
101
- console.error('❌ --task <path> or --brief "<prompt>" is required and cannot be empty')
101
+ console.error(`❌ --task <path> or --brief "<prompt>" is required.
102
+ Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
103
+ lythoskill-arena single --deck ./deck.toml --brief "your task description"`)
102
104
  process.exit(1)
103
105
  }
104
106
 
@@ -122,7 +124,10 @@ async function agentRun(args: string[]) {
122
124
  deckPath = dest
123
125
  } else {
124
126
  deckPath = resolve(opts.deck)
125
- if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}`); process.exit(1) }
127
+ if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
128
+ Create one: examples/decks/scout.toml (minimal), examples/decks/documents.toml (documents)
129
+ Or fetch: curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml > deck.toml
130
+ Or create: see https://github.com/lythos-labs/lythoskill/tree/main/examples/decks/`); process.exit(1) }
126
131
  }
127
132
 
128
133
  const { useAgent } = await import('@lythos/test-utils/agents')
@@ -142,7 +147,10 @@ async function agentRun(args: string[]) {
142
147
  const scenarioOpt: Record<string, unknown> = {}
143
148
  if (opts.task) {
144
149
  const taskPath = resolve(opts.task)
145
- if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
150
+ if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
151
+ Create a .agent.md scenario or use --brief for inline tasks.
152
+ Format: frontmatter (name, description, timeout) + Given/When/Then/Judge sections.
153
+ Example: see playground/arena-one-shot/TASK-arena.agent.md`); process.exit(1) }
146
154
  scenarioOpt.scenarioPath = taskPath
147
155
  } else {
148
156
  scenarioOpt.scenario = {
@@ -268,10 +276,8 @@ function parseArgs(argv: string[]) {
268
276
 
269
277
  const options: Record<string, string | undefined> = {
270
278
  task: undefined,
271
- skills: undefined,
272
279
  decks: undefined,
273
280
  criteria: 'syntax,context,logic,token',
274
- control: 'lythoskill-project-scribe',
275
281
  dir: 'tmp',
276
282
  project: '.',
277
283
  config: undefined,
@@ -284,13 +290,10 @@ function parseArgs(argv: string[]) {
284
290
  const arg = argv[i]
285
291
  if (arg === '--task' || arg === '-t') {
286
292
  options.task = argv[++i]
287
- } else if (arg === '--skills' || arg === '-s') {
288
- options.skills = argv[++i]
289
293
  } else if (arg === '--decks') {
290
294
  options.decks = argv[++i]
291
295
  } else if (arg === '--criteria' || arg === '-c') {
292
296
  options.criteria = argv[++i]
293
- } else if (arg === '--control') {
294
297
  options.control = argv[++i]
295
298
  } else if (arg === '--dir' || arg === '-d') {
296
299
  options.dir = argv[++i]
@@ -319,39 +322,13 @@ export function runArena(argv: string[]) {
319
322
  process.exit(1)
320
323
  }
321
324
 
322
- const HAS_DECKS = !!options.decks
323
- const HAS_SKILLS = !!options.skills
325
+ const DECK_PATHS = (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
324
326
 
325
- if (!HAS_DECKS && !HAS_SKILLS) {
326
- console.error('❌ 请提供 --skills 或 --decks')
327
- process.exit(1)
328
- }
329
- if (HAS_DECKS && HAS_SKILLS) {
330
- console.error('❌ --skills 和 --decks 不能同时使用')
331
- process.exit(1)
332
- }
333
-
334
- const DECK_PATHS = HAS_DECKS
335
- ? (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
336
- : []
337
-
338
- const SKILLS = HAS_SKILLS
339
- ? (options.skills || '').split(',').map(s => s.trim()).filter(Boolean)
340
- : []
341
-
342
- if (HAS_SKILLS && SKILLS.length < 2) {
343
- console.error('❌ 至少需要 2 个 skill 才能进行 arena')
344
- process.exit(1)
345
- }
346
- if (HAS_SKILLS && SKILLS.length > 5) {
347
- console.error('❌ 一次 arena 最多 5 个 skill')
348
- process.exit(1)
349
- }
350
- if (HAS_DECKS && DECK_PATHS.length < 2) {
327
+ if (DECK_PATHS.length < 2) {
351
328
  console.error('❌ 至少需要 2 个 deck 才能进行 arena')
352
329
  process.exit(1)
353
330
  }
354
- if (HAS_DECKS && DECK_PATHS.length > 5) {
331
+ if (DECK_PATHS.length > 5) {
355
332
  console.error('❌ 一次 arena 最多 5 个 deck')
356
333
  process.exit(1)
357
334
  }
@@ -359,9 +336,6 @@ export function runArena(argv: string[]) {
359
336
  const CRITERIA = (options.criteria || 'syntax,context,logic,token')
360
337
  .split(',').map(s => s.trim()).filter(Boolean)
361
338
 
362
- const CONTROL_SKILLS = (options.control || 'lythoskill-project-scribe')
363
- .split(',').map(s => s.trim()).filter(Boolean)
364
-
365
339
  const PROJECT_DIR = resolve(options.project!)
366
340
  const ARENA_SLUG = slugify(TASK)
367
341
  const ARENA_ID = `arena-${timestamp()}-${ARENA_SLUG.slice(0, 30)}`
@@ -373,37 +347,20 @@ export function runArena(argv: string[]) {
373
347
  mkdirSync(join(ARENA_DIR, 'sides'), { recursive: true })
374
348
 
375
349
  // ── 生成参与者与 deck ───────────────────────────────────────
376
- let participants: { id: string; name: string; skill_name: string; deck_path: string }[]
377
- let mode: 'single-skill' | 'full-deck'
378
-
379
- if (HAS_DECKS) {
380
- mode = 'full-deck'
381
- participants = DECK_PATHS.map((deckPath, i) => {
382
- const id = `run-${String(i + 1).padStart(2, '0')}`
383
- const name = basename(deckPath).replace(/\.toml$/, '')
384
- const destPath = join(ARENA_DIR, 'decks', `arena-${id}.toml`)
385
- // Copy the provided deck to arena directory
386
- if (existsSync(deckPath)) {
387
- const content = readFileSync(deckPath, 'utf-8')
388
- writeFileSync(destPath, content)
389
- } else {
390
- console.error(`❌ Deck 文件不存在: ${deckPath}`)
391
- process.exit(1)
392
- }
393
- return { id, name, skill_name: name, deck_path: destPath }
394
- })
395
- } else {
396
- mode = 'single-skill'
397
- participants = SKILLS.map((skill, i) => {
398
- const id = `run-${String(i + 1).padStart(2, '0')}`
399
- return {
400
- id,
401
- name: skill,
402
- skill_name: skill,
403
- deck_path: join(ARENA_DIR, 'decks', `arena-${id}.toml`),
404
- }
405
- })
406
- }
350
+ const participants = DECK_PATHS.map((deckPath, i) => {
351
+ const id = `run-${String(i + 1).padStart(2, '0')}`
352
+ const name = basename(deckPath).replace(/\.toml$/, '')
353
+ const destPath = join(ARENA_DIR, 'decks', `arena-${id}.toml`)
354
+ // Copy the provided deck to arena directory
355
+ if (existsSync(deckPath)) {
356
+ const content = readFileSync(deckPath, 'utf-8')
357
+ writeFileSync(destPath, content)
358
+ } else {
359
+ console.error(`❌ Deck 文件不存在: ${deckPath}`)
360
+ process.exit(1)
361
+ }
362
+ return { id, name, skill_name: name, deck_path: destPath }
363
+ })
407
364
 
408
365
  const criteria = CRITERIA.map((c) => ({
409
366
  name: c,
@@ -411,29 +368,6 @@ export function runArena(argv: string[]) {
411
368
  weight: 1,
412
369
  }))
413
370
 
414
- if (mode === 'single-skill') {
415
- for (const p of participants) {
416
- const deckContent = `# ============================================================
417
- # Arena Deck: ${p.id} — ${p.name}
418
- # ============================================================
419
- # 变量:${p.name}
420
- # 控制变量:${CONTROL_SKILLS.join(', ')}
421
- # ============================================================
422
-
423
- [deck]
424
- working_set = ".claude/skills"
425
- cold_pool = "~/.agents/skill-repos"
426
- max_cards = 10
427
-
428
- [tool]
429
- skills = [
430
- ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n')}
431
- ]
432
- `
433
- writeFileSync(p.deck_path, deckContent)
434
- }
435
- }
436
-
437
371
  // ── 为每个 side 创建隔离工作空间 ────────────────────────────
438
372
  for (const p of participants) {
439
373
  const sideDir = join(ARENA_DIR, 'sides', p.id)
@@ -481,14 +415,11 @@ ${criteria.map(c => ` - ${c.label}`).join('\n')}
481
415
  arena_decks:
482
416
  ${participants.map(p => ` - ${p.deck_path.replace(PROJECT_DIR, '.')}`).join('\n')}
483
417
  judge_persona: |
484
- ${mode === 'full-deck'
485
- ? `你是一个多目标优化分析师。不要选 Winner。
486
- 对每个 deck 配置,按 evaluation_criteria 输出评分向量(1-5 分)。
487
- 识别 Pareto 非支配解集——没有"最强",只有"在不同维度上的最优权衡"。
488
- 对被支配的解,说明它被谁支配、在哪个维度上劣势。
489
- 如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。`
490
- : `你是一个中立的技能评测员。对比所有 subagent 的输出,
491
- 按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。`}
418
+ 你是一个多目标优化分析师。不要选 Winner。
419
+ 对每个 deck 配置,按 evaluation_criteria 输出评分向量(1-5 分)。
420
+ 识别 Pareto 非支配解集——没有"最强",只有"在不同维度上的最优权衡"。
421
+ 对被支配的解,说明它被谁支配、在哪个维度上劣势。
422
+ 如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。
492
423
  acceptance:
493
424
  ${participants.map(p => ` - Subagent ${p.id} 在 sides/${p.id}/ 隔离环境完成任务并写入 runs/${p.id}.md`).join('\n')}
494
425
  - Judge 读取所有 run 文件并生成 report.md
@@ -527,9 +458,9 @@ cd "${ARENA_DIR}"
527
458
  ID: ${ARENA_ID}
528
459
  任务: ${TASK}
529
460
  目录: ${ARENA_DIR}
530
- 模式: ${mode === 'full-deck' ? '完整 deck 配置对比' : '单 skill 对比'}
461
+ 模式: deck 配置对比
531
462
  参与者: ${participants.map(p => p.name).join(', ')}
532
- ${mode === 'single-skill' ? `控制变量: ${CONTROL_SKILLS.join(', ')}\n` : ''}评测维度: ${CRITERIA.join(', ')}
463
+ 评测维度: ${CRITERIA.join(', ')}
533
464
 
534
465
  生成文件:
535
466
  📋 ${join(ARENA_DIR, 'arena.json')}
@@ -773,7 +704,7 @@ function runViz(argv: string[]) {
773
704
 
774
705
  // ── Run: programmatic arena execution ───────────────────────
775
706
 
776
- async function runProgrammaticArena(argv: string[]) {
707
+ async function vsRun(argv: string[]) {
777
708
  const { options } = parseArgs(argv)
778
709
  const { readFileSync } = await import('node:fs')
779
710
 
@@ -811,13 +742,15 @@ async function runProgrammaticArena(argv: string[]) {
811
742
  return
812
743
  }
813
744
 
814
- // CLI-flag mode (backward compat)
815
- if (!options.task || !options.decks) {
816
- console.error('❌ --task <path> and --decks <list> are required for "run" (or use --config <arena.toml>)')
817
- process.exit(1)
818
- }
819
-
820
- const { runArena: runArenaProgrammatic } = await import('./runner')
745
+ // --config was not provided
746
+ console.error(`❌ --config <arena.toml> is required.
747
+ Usage: lythoskill-arena vs --config ./arena.toml
748
+ lythoskill-arena vs --config ./arena.toml --dry-run
749
+ Example configs:
750
+ examples/arena/research-compare/arena.toml — two-side A/B
751
+ examples/arena/add-remove/arena.toml — three-side Pareto
752
+ Create one: cp examples/arena/research-compare/arena.toml ./arena.toml`)
753
+ process.exit(1)
821
754
 
822
755
  const result = await runArenaProgrammatic({
823
756
  taskPath: options.task,
@@ -838,18 +771,20 @@ if (import.meta.main) {
838
771
  const args = process.argv.slice(2)
839
772
  const cmd = args[0]
840
773
 
841
- if (cmd === 'agent-run') {
842
- agentRun(args.slice(1))
774
+ if (cmd === 'single') {
775
+ singleRun(args.slice(1))
843
776
  } else if (cmd === 'viz') {
844
777
  runViz(args.slice(1))
845
- } else if (cmd === 'run') {
846
- runProgrammaticArena(args.slice(1))
778
+ } else if (cmd === 'vs') {
779
+ vsRun(args.slice(1))
847
780
  } else if (cmd === 'scaffold' || !cmd || args[0]?.startsWith('-')) {
848
781
  // Legacy behavior: if no subcommand or starts with flags, treat as scaffold
849
782
  runArena(cmd === 'scaffold' ? args.slice(1) : args)
850
783
  } else {
851
- console.error(`❌ Unknown command: ${cmd}`)
852
- printHelp()
784
+ console.error(`❌ Unknown command: "${cmd}"
785
+ Available: single, vs, scaffold, viz
786
+ Usage: lythoskill-arena <command> [options]
787
+ Help: lythoskill-arena --help`)
853
788
  process.exit(1)
854
789
  }
855
790
  }