@lythos/skill-arena 0.9.38 → 0.9.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -28
- package/package.json +1 -1
- package/src/cli.ts +82 -147
package/README.md
CHANGED
|
@@ -49,26 +49,23 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.40 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
|
-
#
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
60
|
-
--
|
|
61
|
-
--
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
--task "Generate auth flow diagram" \
|
|
67
|
-
--decks "./decks/minimal.toml,./decks/rich.toml" \
|
|
68
|
-
--criteria "quality,token,maintainability"
|
|
58
|
+
# Single: test a deck with one agent (exec shortcut)
|
|
59
|
+
bunx @lythos/skill-arena@0.9.40 single \
|
|
60
|
+
--brief "Generate auth flow diagram" \
|
|
61
|
+
--deck ./examples/decks/documents.toml
|
|
62
|
+
|
|
63
|
+
# Vs: compare multiple decks side by side (declarative)
|
|
64
|
+
bunx @lythos/skill-arena@0.9.40 vs \
|
|
65
|
+
--config examples/arena/research-compare/arena.toml
|
|
69
66
|
|
|
70
67
|
# Visualize results
|
|
71
|
-
bunx @lythos/skill-arena@0.9.
|
|
68
|
+
bunx @lythos/skill-arena@0.9.40 viz tmp/arena-<id>/
|
|
72
69
|
```
|
|
73
70
|
|
|
74
71
|
## Commands
|
|
@@ -77,16 +74,16 @@ bunx @lythos/skill-arena@0.9.38 viz tmp/arena-<id>/
|
|
|
77
74
|
|
|
78
75
|
```bash
|
|
79
76
|
# Print execution plan without running
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
77
|
+
bunx @lythos/skill-arena@0.9.40 vs --config arena.toml --dry-run
|
|
81
78
|
|
|
82
79
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
83
|
-
bunx @lythos/skill-arena@0.9.
|
|
80
|
+
bunx @lythos/skill-arena@0.9.40 vs --config arena.toml
|
|
84
81
|
```
|
|
85
82
|
|
|
86
83
|
### CLI-flag mode (backward compat)
|
|
87
84
|
|
|
88
85
|
```
|
|
89
|
-
bunx @lythos/skill-arena@0.9.
|
|
86
|
+
bunx @lythos/skill-arena@0.9.40 run \
|
|
90
87
|
--task ./TASK-arena.md \
|
|
91
88
|
--players ./players/claude.toml \
|
|
92
89
|
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
@@ -96,13 +93,13 @@ bunx @lythos/skill-arena@0.9.38 run \
|
|
|
96
93
|
### Scaffold mode (legacy, manual execution)
|
|
97
94
|
|
|
98
95
|
```
|
|
99
|
-
bunx @lythos/skill-arena@0.9.
|
|
96
|
+
bunx @lythos/skill-arena@0.9.40 scaffold --task "..." --decks a.toml,b.toml
|
|
100
97
|
```
|
|
101
98
|
|
|
102
99
|
### Viz
|
|
103
100
|
|
|
104
101
|
```bash
|
|
105
|
-
bunx @lythos/skill-arena@0.9.
|
|
102
|
+
bunx @lythos/skill-arena@0.9.40 viz runs/arena-<id>/
|
|
106
103
|
```
|
|
107
104
|
|
|
108
105
|
## Skill Documentation
|
|
@@ -116,7 +113,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
116
113
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
117
114
|
|
|
118
115
|
```
|
|
119
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
116
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.40 ...
|
|
120
117
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
121
118
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
122
119
|
```
|
|
@@ -137,15 +134,6 @@ arena.toml → ArenaToml (Zod) → ExecutionPlan (pure) → per-cell agent
|
|
|
137
134
|
|
|
138
135
|
Built on `@lythos/test-utils` shared infrastructure.
|
|
139
136
|
|
|
140
|
-
## Test Coverage
|
|
141
|
-
|
|
142
|
-
| Layer | Count | CI | Notes |
|
|
143
|
-
|-------|-------|----|-------|
|
|
144
|
-
| Unit tests | 41 | ✅ | TOML parser, player resolution, Pareto, stats |
|
|
145
|
-
| Agent BDD | — | ❌ | Requires `claude` CLI; run locally |
|
|
146
|
-
|
|
147
|
-
Pareto frontier is a **deterministic algorithm** — never delegated to LLM. 8 unit tests cover dominance, cross-dominance, transitive chains, partial criteria, and empty scores.
|
|
148
|
-
|
|
149
137
|
## License
|
|
150
138
|
|
|
151
139
|
MIT
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -37,52 +37,49 @@ function printHelp(): void {
|
|
|
37
37
|
console.log(`🎭 lythoskill-arena — Skill comparison runner
|
|
38
38
|
|
|
39
39
|
Usage:
|
|
40
|
-
lythoskill-arena
|
|
41
|
-
lythoskill-arena
|
|
42
|
-
lythoskill-arena
|
|
43
|
-
lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
|
|
40
|
+
lythoskill-arena single --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
|
|
41
|
+
lythoskill-arena single --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
|
|
42
|
+
lythoskill-arena vs --config arena.toml [--dry-run]
|
|
44
43
|
lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
|
|
45
44
|
lythoskill-arena viz <arena-dir>
|
|
46
45
|
|
|
47
46
|
Commands:
|
|
48
|
-
|
|
47
|
+
single Single-player deck test (exec shortcut): test a deck with one player
|
|
48
|
+
vs Multi-side comparison: run arena from declarative arena.toml
|
|
49
49
|
scaffold Create arena directory structure (legacy, manual subagent execution)
|
|
50
50
|
viz Visualize arena report (ASCII charts)
|
|
51
51
|
|
|
52
52
|
Options:
|
|
53
|
-
-t, --task <path|desc> Task description or path to TASK-arena.md
|
|
54
|
-
|
|
55
|
-
--
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
--config <path> Path to arena.toml (
|
|
59
|
-
--dry-run Print execution plan without running (
|
|
60
|
-
--
|
|
61
|
-
|
|
62
|
-
-
|
|
63
|
-
|
|
53
|
+
-t, --task <path|desc> Task description or path to TASK-arena.md / .agent.md
|
|
54
|
+
--deck <path> Deck path (single only)
|
|
55
|
+
--brief "<text>" Inline task description (single only, alternative to --task)
|
|
56
|
+
--player <name> Agent player (single only, default: kimi)
|
|
57
|
+
-c, --criteria <list> Evaluation criteria (scaffold only, default: syntax,context,logic,token)
|
|
58
|
+
--config <path> Path to arena.toml (vs only)
|
|
59
|
+
--dry-run Print execution plan without running (vs --config only)
|
|
60
|
+
--out <dir> Output directory
|
|
61
|
+
-d, --dir <dir> Parent dir (scaffold: defaults to tmp)
|
|
62
|
+
-p, --project <dir> Project root (default: .)
|
|
63
|
+
--timeout <ms> Subagent timeout (single only)
|
|
64
64
|
|
|
65
65
|
Examples:
|
|
66
|
-
# Single
|
|
67
|
-
lythoskill-arena
|
|
68
|
-
lythoskill-arena
|
|
66
|
+
# Single-player deck test (exec shortcut)
|
|
67
|
+
lythoskill-arena single --task ./TASK.agent.md --deck ./deck.toml
|
|
68
|
+
lythoskill-arena single --brief "Generate auth flow diagram" --deck ./deck.toml --player kimi
|
|
69
69
|
|
|
70
|
-
#
|
|
71
|
-
lythoskill-arena
|
|
72
|
-
lythoskill-arena
|
|
73
|
-
|
|
74
|
-
# CLI-flag mode (backward compat)
|
|
75
|
-
lythoskill-arena run --task ./TASK-arena.md --players ./players/claude.toml --decks ./decks/run-01.toml,./decks/run-02.toml --criteria coverage,relevance
|
|
70
|
+
# Multi-side comparison (declarative)
|
|
71
|
+
lythoskill-arena vs --config ./arena.toml
|
|
72
|
+
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
76
73
|
|
|
77
74
|
# Legacy scaffolding
|
|
78
|
-
lythoskill-arena scaffold --task "Refactor auth module" --
|
|
75
|
+
lythoskill-arena scaffold --task "Refactor auth module" --decks ./decks/a.toml,./decks/b.toml
|
|
79
76
|
lythoskill-arena viz runs/arena-20260504
|
|
80
77
|
`)
|
|
81
78
|
}
|
|
82
79
|
|
|
83
|
-
// ──
|
|
80
|
+
// ── single: single-player deck test (exec shortcut) ──────────────────────
|
|
84
81
|
|
|
85
|
-
async function
|
|
82
|
+
async function singleRun(args: string[]) {
|
|
86
83
|
const opts: Record<string, string | undefined> = {}
|
|
87
84
|
for (let i = 0; i < args.length; i++) {
|
|
88
85
|
if (args[i] === '--task' || args[i] === '-t') opts.task = args[++i]
|
|
@@ -94,11 +91,16 @@ async function agentRun(args: string[]) {
|
|
|
94
91
|
}
|
|
95
92
|
|
|
96
93
|
if (!opts.deck) {
|
|
97
|
-
console.error(
|
|
94
|
+
console.error(`❌ --deck <path> is required.
|
|
95
|
+
Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
|
|
96
|
+
lythoskill-arena single --deck ./deck.toml --brief "your task description"
|
|
97
|
+
Example decks: examples/decks/scout.toml, examples/decks/documents.toml`)
|
|
98
98
|
process.exit(1)
|
|
99
99
|
}
|
|
100
100
|
if (!opts.task && (!opts.brief || !opts.brief.trim())) {
|
|
101
|
-
console.error(
|
|
101
|
+
console.error(`❌ --task <path> or --brief "<prompt>" is required.
|
|
102
|
+
Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
|
|
103
|
+
lythoskill-arena single --deck ./deck.toml --brief "your task description"`)
|
|
102
104
|
process.exit(1)
|
|
103
105
|
}
|
|
104
106
|
|
|
@@ -122,7 +124,10 @@ async function agentRun(args: string[]) {
|
|
|
122
124
|
deckPath = dest
|
|
123
125
|
} else {
|
|
124
126
|
deckPath = resolve(opts.deck)
|
|
125
|
-
if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
|
|
127
|
+
if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
|
|
128
|
+
Create one: examples/decks/scout.toml (minimal), examples/decks/documents.toml (documents)
|
|
129
|
+
Or fetch: curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml > deck.toml
|
|
130
|
+
Or create: see https://github.com/lythos-labs/lythoskill/tree/main/examples/decks/`); process.exit(1) }
|
|
126
131
|
}
|
|
127
132
|
|
|
128
133
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
@@ -142,7 +147,10 @@ async function agentRun(args: string[]) {
|
|
|
142
147
|
const scenarioOpt: Record<string, unknown> = {}
|
|
143
148
|
if (opts.task) {
|
|
144
149
|
const taskPath = resolve(opts.task)
|
|
145
|
-
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
|
|
150
|
+
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
|
|
151
|
+
Create a .agent.md scenario or use --brief for inline tasks.
|
|
152
|
+
Format: frontmatter (name, description, timeout) + Given/When/Then/Judge sections.
|
|
153
|
+
Example: see playground/arena-one-shot/TASK-arena.agent.md`); process.exit(1) }
|
|
146
154
|
scenarioOpt.scenarioPath = taskPath
|
|
147
155
|
} else {
|
|
148
156
|
scenarioOpt.scenario = {
|
|
@@ -268,10 +276,8 @@ function parseArgs(argv: string[]) {
|
|
|
268
276
|
|
|
269
277
|
const options: Record<string, string | undefined> = {
|
|
270
278
|
task: undefined,
|
|
271
|
-
skills: undefined,
|
|
272
279
|
decks: undefined,
|
|
273
280
|
criteria: 'syntax,context,logic,token',
|
|
274
|
-
control: 'lythoskill-project-scribe',
|
|
275
281
|
dir: 'tmp',
|
|
276
282
|
project: '.',
|
|
277
283
|
config: undefined,
|
|
@@ -284,13 +290,10 @@ function parseArgs(argv: string[]) {
|
|
|
284
290
|
const arg = argv[i]
|
|
285
291
|
if (arg === '--task' || arg === '-t') {
|
|
286
292
|
options.task = argv[++i]
|
|
287
|
-
} else if (arg === '--skills' || arg === '-s') {
|
|
288
|
-
options.skills = argv[++i]
|
|
289
293
|
} else if (arg === '--decks') {
|
|
290
294
|
options.decks = argv[++i]
|
|
291
295
|
} else if (arg === '--criteria' || arg === '-c') {
|
|
292
296
|
options.criteria = argv[++i]
|
|
293
|
-
} else if (arg === '--control') {
|
|
294
297
|
options.control = argv[++i]
|
|
295
298
|
} else if (arg === '--dir' || arg === '-d') {
|
|
296
299
|
options.dir = argv[++i]
|
|
@@ -319,39 +322,13 @@ export function runArena(argv: string[]) {
|
|
|
319
322
|
process.exit(1)
|
|
320
323
|
}
|
|
321
324
|
|
|
322
|
-
const
|
|
323
|
-
const HAS_SKILLS = !!options.skills
|
|
325
|
+
const DECK_PATHS = (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
|
|
324
326
|
|
|
325
|
-
if (
|
|
326
|
-
console.error('❌ 请提供 --skills 或 --decks')
|
|
327
|
-
process.exit(1)
|
|
328
|
-
}
|
|
329
|
-
if (HAS_DECKS && HAS_SKILLS) {
|
|
330
|
-
console.error('❌ --skills 和 --decks 不能同时使用')
|
|
331
|
-
process.exit(1)
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
const DECK_PATHS = HAS_DECKS
|
|
335
|
-
? (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
|
|
336
|
-
: []
|
|
337
|
-
|
|
338
|
-
const SKILLS = HAS_SKILLS
|
|
339
|
-
? (options.skills || '').split(',').map(s => s.trim()).filter(Boolean)
|
|
340
|
-
: []
|
|
341
|
-
|
|
342
|
-
if (HAS_SKILLS && SKILLS.length < 2) {
|
|
343
|
-
console.error('❌ 至少需要 2 个 skill 才能进行 arena')
|
|
344
|
-
process.exit(1)
|
|
345
|
-
}
|
|
346
|
-
if (HAS_SKILLS && SKILLS.length > 5) {
|
|
347
|
-
console.error('❌ 一次 arena 最多 5 个 skill')
|
|
348
|
-
process.exit(1)
|
|
349
|
-
}
|
|
350
|
-
if (HAS_DECKS && DECK_PATHS.length < 2) {
|
|
327
|
+
if (DECK_PATHS.length < 2) {
|
|
351
328
|
console.error('❌ 至少需要 2 个 deck 才能进行 arena')
|
|
352
329
|
process.exit(1)
|
|
353
330
|
}
|
|
354
|
-
if (
|
|
331
|
+
if (DECK_PATHS.length > 5) {
|
|
355
332
|
console.error('❌ 一次 arena 最多 5 个 deck')
|
|
356
333
|
process.exit(1)
|
|
357
334
|
}
|
|
@@ -359,9 +336,6 @@ export function runArena(argv: string[]) {
|
|
|
359
336
|
const CRITERIA = (options.criteria || 'syntax,context,logic,token')
|
|
360
337
|
.split(',').map(s => s.trim()).filter(Boolean)
|
|
361
338
|
|
|
362
|
-
const CONTROL_SKILLS = (options.control || 'lythoskill-project-scribe')
|
|
363
|
-
.split(',').map(s => s.trim()).filter(Boolean)
|
|
364
|
-
|
|
365
339
|
const PROJECT_DIR = resolve(options.project!)
|
|
366
340
|
const ARENA_SLUG = slugify(TASK)
|
|
367
341
|
const ARENA_ID = `arena-${timestamp()}-${ARENA_SLUG.slice(0, 30)}`
|
|
@@ -373,37 +347,20 @@ export function runArena(argv: string[]) {
|
|
|
373
347
|
mkdirSync(join(ARENA_DIR, 'sides'), { recursive: true })
|
|
374
348
|
|
|
375
349
|
// ── 生成参与者与 deck ───────────────────────────────────────
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
const
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
console.error(`❌ Deck 文件不存在: ${deckPath}`)
|
|
391
|
-
process.exit(1)
|
|
392
|
-
}
|
|
393
|
-
return { id, name, skill_name: name, deck_path: destPath }
|
|
394
|
-
})
|
|
395
|
-
} else {
|
|
396
|
-
mode = 'single-skill'
|
|
397
|
-
participants = SKILLS.map((skill, i) => {
|
|
398
|
-
const id = `run-${String(i + 1).padStart(2, '0')}`
|
|
399
|
-
return {
|
|
400
|
-
id,
|
|
401
|
-
name: skill,
|
|
402
|
-
skill_name: skill,
|
|
403
|
-
deck_path: join(ARENA_DIR, 'decks', `arena-${id}.toml`),
|
|
404
|
-
}
|
|
405
|
-
})
|
|
406
|
-
}
|
|
350
|
+
const participants = DECK_PATHS.map((deckPath, i) => {
|
|
351
|
+
const id = `run-${String(i + 1).padStart(2, '0')}`
|
|
352
|
+
const name = basename(deckPath).replace(/\.toml$/, '')
|
|
353
|
+
const destPath = join(ARENA_DIR, 'decks', `arena-${id}.toml`)
|
|
354
|
+
// Copy the provided deck to arena directory
|
|
355
|
+
if (existsSync(deckPath)) {
|
|
356
|
+
const content = readFileSync(deckPath, 'utf-8')
|
|
357
|
+
writeFileSync(destPath, content)
|
|
358
|
+
} else {
|
|
359
|
+
console.error(`❌ Deck 文件不存在: ${deckPath}`)
|
|
360
|
+
process.exit(1)
|
|
361
|
+
}
|
|
362
|
+
return { id, name, skill_name: name, deck_path: destPath }
|
|
363
|
+
})
|
|
407
364
|
|
|
408
365
|
const criteria = CRITERIA.map((c) => ({
|
|
409
366
|
name: c,
|
|
@@ -411,29 +368,6 @@ export function runArena(argv: string[]) {
|
|
|
411
368
|
weight: 1,
|
|
412
369
|
}))
|
|
413
370
|
|
|
414
|
-
if (mode === 'single-skill') {
|
|
415
|
-
for (const p of participants) {
|
|
416
|
-
const deckContent = `# ============================================================
|
|
417
|
-
# Arena Deck: ${p.id} — ${p.name}
|
|
418
|
-
# ============================================================
|
|
419
|
-
# 变量:${p.name}
|
|
420
|
-
# 控制变量:${CONTROL_SKILLS.join(', ')}
|
|
421
|
-
# ============================================================
|
|
422
|
-
|
|
423
|
-
[deck]
|
|
424
|
-
working_set = ".claude/skills"
|
|
425
|
-
cold_pool = "~/.agents/skill-repos"
|
|
426
|
-
max_cards = 10
|
|
427
|
-
|
|
428
|
-
[tool]
|
|
429
|
-
skills = [
|
|
430
|
-
${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n')}
|
|
431
|
-
]
|
|
432
|
-
`
|
|
433
|
-
writeFileSync(p.deck_path, deckContent)
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
|
|
437
371
|
// ── 为每个 side 创建隔离工作空间 ────────────────────────────
|
|
438
372
|
for (const p of participants) {
|
|
439
373
|
const sideDir = join(ARENA_DIR, 'sides', p.id)
|
|
@@ -481,14 +415,11 @@ ${criteria.map(c => ` - ${c.label}`).join('\n')}
|
|
|
481
415
|
arena_decks:
|
|
482
416
|
${participants.map(p => ` - ${p.deck_path.replace(PROJECT_DIR, '.')}`).join('\n')}
|
|
483
417
|
judge_persona: |
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。`
|
|
490
|
-
: `你是一个中立的技能评测员。对比所有 subagent 的输出,
|
|
491
|
-
按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。`}
|
|
418
|
+
你是一个多目标优化分析师。不要选 Winner。
|
|
419
|
+
对每个 deck 配置,按 evaluation_criteria 输出评分向量(1-5 分)。
|
|
420
|
+
识别 Pareto 非支配解集——没有"最强",只有"在不同维度上的最优权衡"。
|
|
421
|
+
对被支配的解,说明它被谁支配、在哪个维度上劣势。
|
|
422
|
+
如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。
|
|
492
423
|
acceptance:
|
|
493
424
|
${participants.map(p => ` - Subagent ${p.id} 在 sides/${p.id}/ 隔离环境完成任务并写入 runs/${p.id}.md`).join('\n')}
|
|
494
425
|
- Judge 读取所有 run 文件并生成 report.md
|
|
@@ -527,9 +458,9 @@ cd "${ARENA_DIR}"
|
|
|
527
458
|
ID: ${ARENA_ID}
|
|
528
459
|
任务: ${TASK}
|
|
529
460
|
目录: ${ARENA_DIR}
|
|
530
|
-
模式:
|
|
461
|
+
模式: deck 配置对比
|
|
531
462
|
参与者: ${participants.map(p => p.name).join(', ')}
|
|
532
|
-
|
|
463
|
+
评测维度: ${CRITERIA.join(', ')}
|
|
533
464
|
|
|
534
465
|
生成文件:
|
|
535
466
|
📋 ${join(ARENA_DIR, 'arena.json')}
|
|
@@ -773,7 +704,7 @@ function runViz(argv: string[]) {
|
|
|
773
704
|
|
|
774
705
|
// ── Run: programmatic arena execution ───────────────────────
|
|
775
706
|
|
|
776
|
-
async function
|
|
707
|
+
async function vsRun(argv: string[]) {
|
|
777
708
|
const { options } = parseArgs(argv)
|
|
778
709
|
const { readFileSync } = await import('node:fs')
|
|
779
710
|
|
|
@@ -811,13 +742,15 @@ async function runProgrammaticArena(argv: string[]) {
|
|
|
811
742
|
return
|
|
812
743
|
}
|
|
813
744
|
|
|
814
|
-
//
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
745
|
+
// --config was not provided
|
|
746
|
+
console.error(`❌ --config <arena.toml> is required.
|
|
747
|
+
Usage: lythoskill-arena vs --config ./arena.toml
|
|
748
|
+
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
749
|
+
Example configs:
|
|
750
|
+
examples/arena/research-compare/arena.toml — two-side A/B
|
|
751
|
+
examples/arena/add-remove/arena.toml — three-side Pareto
|
|
752
|
+
Create one: cp examples/arena/research-compare/arena.toml ./arena.toml`)
|
|
753
|
+
process.exit(1)
|
|
821
754
|
|
|
822
755
|
const result = await runArenaProgrammatic({
|
|
823
756
|
taskPath: options.task,
|
|
@@ -838,18 +771,20 @@ if (import.meta.main) {
|
|
|
838
771
|
const args = process.argv.slice(2)
|
|
839
772
|
const cmd = args[0]
|
|
840
773
|
|
|
841
|
-
if (cmd === '
|
|
842
|
-
|
|
774
|
+
if (cmd === 'single') {
|
|
775
|
+
singleRun(args.slice(1))
|
|
843
776
|
} else if (cmd === 'viz') {
|
|
844
777
|
runViz(args.slice(1))
|
|
845
|
-
} else if (cmd === '
|
|
846
|
-
|
|
778
|
+
} else if (cmd === 'vs') {
|
|
779
|
+
vsRun(args.slice(1))
|
|
847
780
|
} else if (cmd === 'scaffold' || !cmd || args[0]?.startsWith('-')) {
|
|
848
781
|
// Legacy behavior: if no subcommand or starts with flags, treat as scaffold
|
|
849
782
|
runArena(cmd === 'scaffold' ? args.slice(1) : args)
|
|
850
783
|
} else {
|
|
851
|
-
console.error(`❌ Unknown command: ${cmd}
|
|
852
|
-
|
|
784
|
+
console.error(`❌ Unknown command: "${cmd}"
|
|
785
|
+
Available: single, vs, scaffold, viz
|
|
786
|
+
Usage: lythoskill-arena <command> [options]
|
|
787
|
+
Help: lythoskill-arena --help`)
|
|
853
788
|
process.exit(1)
|
|
854
789
|
}
|
|
855
790
|
}
|