@lythos/skill-arena 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/package.json +5 -5
- package/src/arena-toml.test.ts +44 -46
- package/src/arena-toml.ts +12 -13
- package/src/cli.ts +238 -667
- package/src/runner.ts +152 -183
package/src/cli.ts
CHANGED
|
@@ -1,85 +1,120 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
1
|
+
import { writeFileSync, readFileSync, mkdirSync, existsSync, realpathSync } from 'node:fs'
|
|
2
|
+
import { join, resolve } from 'node:path'
|
|
3
|
+
import { homedir } from 'node:os'
|
|
4
|
+
import { ZodError } from 'zod'
|
|
5
|
+
import { formatPlanOutput, type ArenaResult } from './runner'
|
|
6
|
+
import { parseArenaToml, buildExecutionPlan } from './arena-toml'
|
|
7
|
+
import { buildCopyPlan, parseDeckSkills } from './preflight'
|
|
8
|
+
import { checkSkillExistence, formatSkillWarnings, resolveColdPoolDir } from './preflight'
|
|
9
|
+
|
|
10
|
+
// ─── fetchWithProxy (infra dependency, no package boundary) ─────────────────
|
|
11
|
+
|
|
12
|
+
async function fetchWithProxy(url: string, init?: RequestInit): Promise<Response> {
|
|
13
|
+
const { LYTHOS_SOCKS_PROXY } = process.env
|
|
14
|
+
if (!LYTHOS_SOCKS_PROXY) return fetch(url, init)
|
|
15
|
+
const [host, portStr] = LYTHOS_SOCKS_PROXY.split(':')
|
|
16
|
+
const port = parseInt(portStr || '1086', 10)
|
|
17
|
+
if (!host) return fetch(url, init)
|
|
18
|
+
try {
|
|
19
|
+
const net = await import('node:net')
|
|
20
|
+
const tls = await import('node:tls')
|
|
21
|
+
const u = new URL(url)
|
|
22
|
+
const isHttps = u.protocol === 'https:'
|
|
23
|
+
const targetHost = u.hostname
|
|
24
|
+
const targetPort = parseInt(u.port || (isHttps ? '443' : '80'), 10)
|
|
25
|
+
const socket = await new Promise<import('node:net').Socket>((resolve, reject) => {
|
|
26
|
+
const s = net.connect({ host, port }, () => resolve(s))
|
|
27
|
+
s.on('error', reject)
|
|
28
|
+
})
|
|
29
|
+
try {
|
|
30
|
+
if (isHttps) {
|
|
31
|
+
await new Promise<void>((res, rej) => {
|
|
32
|
+
socket.write(`CONNECT ${targetHost}:${targetPort} HTTP/1.1\r\nHost: ${targetHost}:${targetPort}\r\n\r\n`)
|
|
33
|
+
socket.once('data', (d: Buffer) => {
|
|
34
|
+
const status = d.toString().split(' ')[1]
|
|
35
|
+
if (status === '200') res()
|
|
36
|
+
else rej(new Error(`SOCKS CONNECT rejected: ${status}`))
|
|
37
|
+
})
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
const agent = isHttps
|
|
41
|
+
? new tls.TLSSocket(socket, { isServer: false, servername: targetHost })
|
|
42
|
+
: socket
|
|
43
|
+
await new Promise<void>((res) => agent.once('secureConnect', res).once('connect', res))
|
|
44
|
+
const method = init?.method ?? 'GET'
|
|
45
|
+
const headers = init?.headers ? new Headers(init.headers) : new Headers()
|
|
46
|
+
headers.set('Host', targetHost)
|
|
47
|
+
const req = `${method} ${u.pathname}${u.search} HTTP/1.1\r\nHost: ${targetHost}\r\nConnection: close\r\n`
|
|
48
|
+
let headerBlock = req
|
|
49
|
+
for (const [k, v] of headers) headerBlock += `${k}: ${v}\r\n`
|
|
50
|
+
headerBlock += '\r\n'
|
|
51
|
+
agent.write(headerBlock)
|
|
52
|
+
|
|
53
|
+
let body = init?.body
|
|
54
|
+
if (body && init?.duplex !== 'half') {
|
|
55
|
+
if (typeof body === 'string') agent.write(body)
|
|
56
|
+
else agent.write(Buffer.from(await (body as Blob).arrayBuffer()))
|
|
57
|
+
}
|
|
58
|
+
agent.end()
|
|
59
|
+
|
|
60
|
+
const chunks: Buffer[] = []
|
|
61
|
+
for await (const chunk of agent) chunks.push(chunk as Buffer)
|
|
62
|
+
const raw = Buffer.concat(chunks).toString()
|
|
63
|
+
const headEnd = raw.indexOf('\r\n\r\n')
|
|
64
|
+
const status = parseInt(raw.split(' ')[1] || '200', 10)
|
|
65
|
+
return new Response(raw.slice(headEnd + 4), { status })
|
|
66
|
+
} finally { socket.destroy() }
|
|
67
|
+
} catch (e) { throw e }
|
|
29
68
|
}
|
|
30
69
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
70
|
+
// ── Link validation ────────────────────────────────────────────────────────
|
|
71
|
+
// "no skills found to symlink" is a warning, not an error — a deck may
|
|
72
|
+
// legitimately have only innate/innate-only cards.
|
|
73
|
+
|
|
74
|
+
function validateLinkResult(exitCode: number | null, stderr: string): { ok: boolean; error?: string } {
|
|
75
|
+
if (exitCode === 0) return { ok: true }
|
|
76
|
+
if (stderr.includes('Cannot find module')) {
|
|
77
|
+
return { ok: false, error: `deck link failed: @lythos/skill-deck not installed or not found. Run: bun install` }
|
|
78
|
+
}
|
|
79
|
+
if (stderr.includes('no skills found to symlink')) return { ok: true }
|
|
80
|
+
return { ok: false, error: `deck link exited with code ${exitCode}: ${stderr.slice(0, 200)}` }
|
|
34
81
|
}
|
|
35
82
|
|
|
36
|
-
//
|
|
37
|
-
function
|
|
38
|
-
|
|
83
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
84
|
+
export async function main(args: string[] = process.argv.slice(2)) {
|
|
85
|
+
if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
|
|
86
|
+
console.log(`lythoskill-arena — skill evaluation CLI
|
|
39
87
|
|
|
40
88
|
Usage:
|
|
41
|
-
lythoskill-arena single
|
|
42
|
-
lythoskill-arena single --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
|
|
43
|
-
lythoskill-arena vs --config arena.toml [--dry-run]
|
|
44
|
-
lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
|
|
45
|
-
lythoskill-arena viz <arena-dir>
|
|
89
|
+
lythoskill-arena single|vs|viz <options>
|
|
46
90
|
|
|
47
91
|
Commands:
|
|
48
|
-
single
|
|
49
|
-
vs
|
|
50
|
-
|
|
51
|
-
viz Visualize arena report (ASCII charts)
|
|
52
|
-
|
|
53
|
-
Options:
|
|
54
|
-
-t, --task <path|desc> Task description or path to TASK-arena.md / .agent.md
|
|
55
|
-
--deck <path> Deck path (single only)
|
|
56
|
-
--brief "<text>" Inline task description (single only, alternative to --task)
|
|
57
|
-
--player <name> Agent player (single only, default: kimi)
|
|
58
|
-
--config <path> Path to arena.toml (vs only)
|
|
59
|
-
--dry-run Print execution plan without running (vs --config only)
|
|
60
|
-
--out <dir> Output directory
|
|
61
|
-
-p, --project <dir> Project root (default: .)
|
|
62
|
-
--timeout <ms> Subagent timeout (single only)
|
|
92
|
+
single Test one deck against a task (--deck + --brief or --task)
|
|
93
|
+
vs Compare decks via arena.toml (declarative, Pareto-optimal)
|
|
94
|
+
viz Visualize a completed arena run (HTML + chart)
|
|
63
95
|
|
|
64
96
|
Examples:
|
|
65
|
-
|
|
66
|
-
lythoskill-arena single
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# If you already have a local deck file, point to it directly:
|
|
70
|
-
# lythoskill-arena single --deck ./examples/decks/scout.toml --brief "..."
|
|
71
|
-
|
|
72
|
-
# Multi-side comparison (declarative)
|
|
73
|
-
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
74
|
-
lythoskill-arena vs --config ./arena.toml
|
|
75
|
-
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
76
|
-
|
|
77
|
-
# Legacy scaffolding
|
|
78
|
-
# scaffold creates structure; decks via URL (auto-downloaded during link):
|
|
79
|
-
lythoskill-arena scaffold --task "Refactor auth module" \\
|
|
80
|
-
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
97
|
+
lythoskill-arena single --brief "find and research" --deck ./decks/scout.toml
|
|
98
|
+
lythoskill-arena single --brief "find and research" --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml
|
|
99
|
+
lythoskill-arena vs --config arena.toml --dry-run
|
|
100
|
+
lythoskill-arena vs --config arena.toml
|
|
81
101
|
lythoskill-arena viz runs/arena-20260504
|
|
82
102
|
`)
|
|
103
|
+
process.exit(0)
|
|
104
|
+
}
|
|
105
|
+
return cli(args)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function cli(args: string[]) {
|
|
109
|
+
const cmd = args[0]
|
|
110
|
+
const rest = args.slice(1)
|
|
111
|
+
|
|
112
|
+
if (cmd === 'vs' || cmd === 'compare') return vsRun(rest)
|
|
113
|
+
if (cmd === 'single' || cmd === 'run') return singleRun(rest)
|
|
114
|
+
if (cmd === 'viz') return vizRun(rest)
|
|
115
|
+
|
|
116
|
+
console.error(`Unknown command: ${cmd}`)
|
|
117
|
+
process.exit(1)
|
|
83
118
|
}
|
|
84
119
|
|
|
85
120
|
// ── single: single-player deck test (exec shortcut) ──────────────────────
|
|
@@ -122,14 +157,13 @@ async function singleRun(args: string[]) {
|
|
|
122
157
|
process.exit(1)
|
|
123
158
|
}
|
|
124
159
|
|
|
125
|
-
// Validate --task file early — before any URL fetch — so bad path fails fast without a wasted network call.
|
|
126
160
|
let resolvedTaskPath: string | undefined
|
|
127
161
|
if (opts.task) {
|
|
128
162
|
resolvedTaskPath = resolve(opts.task)
|
|
129
163
|
if (!existsSync(resolvedTaskPath)) {
|
|
130
164
|
console.error(`❌ Task file not found: ${resolvedTaskPath}
|
|
131
165
|
Use --brief for inline tasks, or point --task to an existing .agent.md file.
|
|
132
|
-
Format: name + description + Given/When/Then
|
|
166
|
+
Format: name + description + Given/When/Then sections.
|
|
133
167
|
|
|
134
168
|
Example (URL): lythoskill-arena single --brief "your task" --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml
|
|
135
169
|
Or (local): lythoskill-arena single --brief "your task" --deck ./examples/decks/scout.toml`)
|
|
@@ -149,10 +183,7 @@ async function singleRun(args: string[]) {
|
|
|
149
183
|
## When
|
|
150
184
|
...
|
|
151
185
|
## Then
|
|
152
|
-
|
|
153
|
-
## Judge
|
|
154
|
-
...
|
|
155
|
-
Template: playground/arena-one-shot/TASK-arena.agent.md`)
|
|
186
|
+
...`)
|
|
156
187
|
process.exit(1)
|
|
157
188
|
}
|
|
158
189
|
if (!raw.includes('## When')) {
|
|
@@ -181,10 +212,8 @@ async function singleRun(args: string[]) {
|
|
|
181
212
|
let res: Response | undefined
|
|
182
213
|
let allFailed = true
|
|
183
214
|
|
|
184
|
-
// Try direct first
|
|
185
215
|
try { res = await fetchWithProxy(url, { signal: AbortSignal.timeout(30_000) }); if (res.ok) allFailed = false } catch {}
|
|
186
216
|
|
|
187
|
-
// Auto-fallback: try mirrors when direct fails
|
|
188
217
|
if (!res?.ok) {
|
|
189
218
|
for (const mirrorUrl of mirrorUrls(url)) {
|
|
190
219
|
try {
|
|
@@ -198,7 +227,7 @@ async function singleRun(args: string[]) {
|
|
|
198
227
|
if (!res?.ok) {
|
|
199
228
|
const errorDetail = res ? `HTTP ${res.status}` : 'unreachable'
|
|
200
229
|
console.error(`❌ Cannot reach ${url} (${errorDetail})`)
|
|
201
|
-
if (allFailed) console.error('
|
|
230
|
+
if (allFailed) console.error(' Set LYTHOSKILL_GH_MIRROR to use a custom mirror.')
|
|
202
231
|
console.error(' Or download manually and reference the local file.')
|
|
203
232
|
process.exit(1)
|
|
204
233
|
}
|
|
@@ -215,11 +244,9 @@ async function singleRun(args: string[]) {
|
|
|
215
244
|
}
|
|
216
245
|
|
|
217
246
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
218
|
-
// Optional: register claude-sdk adapter if the package is installed
|
|
219
247
|
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
220
248
|
try { await import('@lythos/agent-adapter-deepseek-serve') } catch { /* package not installed */ }
|
|
221
249
|
try { await import('@lythos/agent-adapter-codex') } catch { /* package not installed */ }
|
|
222
|
-
const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
|
|
223
250
|
const { resolvePlayer } = await import('./player')
|
|
224
251
|
|
|
225
252
|
const player = resolvePlayer(opts.player ?? 'kimi')
|
|
@@ -227,635 +254,179 @@ async function singleRun(args: string[]) {
|
|
|
227
254
|
const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
|
|
228
255
|
mkdirSync(outDir, { recursive: true })
|
|
229
256
|
|
|
230
|
-
//
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
scenarioOpt.scenarioPath = resolvedTaskPath
|
|
234
|
-
} else {
|
|
235
|
-
scenarioOpt.scenario = {
|
|
236
|
-
name: 'ad-hoc task',
|
|
237
|
-
description: opts.brief!.slice(0, 80),
|
|
238
|
-
timeout: Number(opts.timeout ?? 120000),
|
|
239
|
-
given: { deck: {} },
|
|
240
|
-
when: opts.brief!,
|
|
241
|
-
then: ['Write your output to output.md', 'The output should be complete and well-structured'],
|
|
242
|
-
judge: 'Evaluate whether the output is complete, accurate, and well-structured.',
|
|
243
|
-
}
|
|
244
|
-
}
|
|
257
|
+
// Direct agent.spawn — no parseAgentMd, no AgentScenario, no runAgentScenario.
|
|
258
|
+
// Markdown is for LLM agents; task text is read/stored as a raw string.
|
|
259
|
+
const taskText = resolvedTaskPath ? readFileSync(resolvedTaskPath, 'utf-8') : opts.brief!
|
|
245
260
|
|
|
246
261
|
console.log(`🤖 agent-run: ${player} × ${deckPath}`)
|
|
247
262
|
if (opts.task) console.log(`📋 task: ${resolve(opts.task!)}`)
|
|
248
263
|
else console.log(`📋 brief: ${opts.brief!.slice(0, 60)}...`)
|
|
249
264
|
|
|
250
|
-
|
|
251
|
-
const
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
await linkProc.exited
|
|
276
|
-
const linkStderr = await new Response(linkProc.stderr).text()
|
|
277
|
-
const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
|
|
278
|
-
if (!linkResult.ok) {
|
|
279
|
-
console.error(`❌ ${linkResult.error}`)
|
|
280
|
-
process.exit(1)
|
|
281
|
-
}
|
|
282
|
-
} else {
|
|
283
|
-
console.log('ℹ️ No skills declared in deck — skipping link')
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
// ── Pre-flight: skill existence check (reuses deckParsed from above) ─
|
|
287
|
-
const { existsSync: es } = await import('node:fs')
|
|
288
|
-
const { homedir: hd } = await import('node:os')
|
|
289
|
-
try {
|
|
290
|
-
const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
|
|
291
|
-
const coldPoolDir = resolveColdPoolDir(
|
|
292
|
-
deckParsed?.deck?.cold_pool,
|
|
293
|
-
hd(),
|
|
294
|
-
coldPoolDefault
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
const skills = parseDeckSkills(deckParsed)
|
|
298
|
-
const checks = checkSkillExistence(skills, coldPoolDir, es)
|
|
299
|
-
for (const warning of formatSkillWarnings(checks)) {
|
|
300
|
-
console.warn(`⚠️ ${warning}`)
|
|
301
|
-
}
|
|
302
|
-
} catch (e) {
|
|
303
|
-
console.warn('⚠️ Could not check skill existence:', e instanceof Error ? e.message : e)
|
|
304
|
-
}
|
|
305
|
-
},
|
|
306
|
-
})
|
|
307
|
-
|
|
308
|
-
// ── Copy agent output to outDir ──────────────────────────────────
|
|
309
|
-
writeFileSync(join(outDir, 'agent-stdout.txt'), result.agentResult.stdout, 'utf-8')
|
|
310
|
-
if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
|
|
311
|
-
if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
|
|
312
|
-
|
|
313
|
-
// Copy all agent-produced files from workdir (output.md, output.docx, etc.)
|
|
314
|
-
// Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
|
|
315
|
-
if (agentWorkdir) {
|
|
316
|
-
const { cpSync, readdirSync, existsSync: es2 } = await import('node:fs')
|
|
317
|
-
if (!es2(agentWorkdir)) {
|
|
318
|
-
console.warn(`⚠️ Agent workdir vanished before copy: ${agentWorkdir}`)
|
|
319
|
-
} else {
|
|
320
|
-
const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
321
|
-
try {
|
|
322
|
-
const entries = readdirSync(agentWorkdir)
|
|
323
|
-
const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
|
|
324
|
-
for (const { src, dest, name } of plan) {
|
|
325
|
-
try {
|
|
326
|
-
cpSync(src, dest, { recursive: true })
|
|
327
|
-
} catch (e) {
|
|
328
|
-
console.warn(`⚠️ Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
} catch (e) {
|
|
332
|
-
console.warn(`⚠️ Failed to read agent workdir for copy: ${e instanceof Error ? e.message : e}`)
|
|
333
|
-
}
|
|
265
|
+
// Setup workdir
|
|
266
|
+
const agentWorkdir = join(process.cwd(), `arena-single-${Date.now()}`)
|
|
267
|
+
mkdirSync(agentWorkdir, { recursive: true })
|
|
268
|
+
writeFileSync(join(agentWorkdir, 'skill-deck.toml'), readFileSync(deckPath, 'utf-8'))
|
|
269
|
+
|
|
270
|
+
const deckRaw = readFileSync(join(agentWorkdir, 'skill-deck.toml'), 'utf-8')
|
|
271
|
+
let deckParsed: Record<string, any> = {}
|
|
272
|
+
try { deckParsed = Bun.TOML.parse(deckRaw) as Record<string, any> } catch {}
|
|
273
|
+
const hasSkills = parseDeckSkills(deckParsed).length > 0
|
|
274
|
+
|
|
275
|
+
if (hasSkills) {
|
|
276
|
+
const { existsSync: es2 } = await import('node:fs')
|
|
277
|
+
const localDeckCli = join(import.meta.dir, '..', '..', 'lythoskill-deck', 'src', 'cli.ts')
|
|
278
|
+
const linkCmd = es2(localDeckCli)
|
|
279
|
+
? ['bun', localDeckCli, 'link']
|
|
280
|
+
: ['bunx', '@lythos/skill-deck', 'link']
|
|
281
|
+
const linkProc = Bun.spawn(linkCmd,
|
|
282
|
+
{ cwd: agentWorkdir, env: { ...process.env, HOME: process.env.HOME! } },
|
|
283
|
+
)
|
|
284
|
+
await linkProc.exited
|
|
285
|
+
const linkStderr = await new Response(linkProc.stderr).text()
|
|
286
|
+
const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
|
|
287
|
+
if (!linkResult.ok) {
|
|
288
|
+
console.error(`❌ ${linkResult.error}`)
|
|
289
|
+
process.exit(1)
|
|
334
290
|
}
|
|
291
|
+
} else {
|
|
292
|
+
console.log('ℹ️ No skills declared in deck — skipping link')
|
|
335
293
|
}
|
|
336
294
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
console.log(`🏆 Verdict: ${result.verdict.verdict} — ${result.verdict.reason.slice(0, 120)}`)
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
function parseArgs(argv: string[]) {
|
|
351
|
-
if (argv.includes('--help') || argv.includes('-h')) {
|
|
352
|
-
printHelp()
|
|
353
|
-
process.exit(0)
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
const options: Record<string, string | undefined> = {
|
|
357
|
-
task: undefined,
|
|
358
|
-
dir: 'tmp',
|
|
359
|
-
project: '.',
|
|
360
|
-
config: undefined,
|
|
361
|
-
out: undefined,
|
|
362
|
-
players: undefined,
|
|
363
|
-
}
|
|
364
|
-
const positionals: string[] = []
|
|
365
|
-
|
|
366
|
-
for (let i = 0; i < argv.length; i++) {
|
|
367
|
-
const arg = argv[i]
|
|
368
|
-
if (arg === '--task' || arg === '-t') {
|
|
369
|
-
options.task = argv[++i]
|
|
370
|
-
} else if (arg === '--dir' || arg === '-d') {
|
|
371
|
-
options.dir = argv[++i]
|
|
372
|
-
} else if (arg === '--project' || arg === '-p') {
|
|
373
|
-
options.project = argv[++i]
|
|
374
|
-
} else if (arg === '--config') {
|
|
375
|
-
options.config = argv[++i]
|
|
376
|
-
} else if (arg === '--out') {
|
|
377
|
-
options.out = argv[++i]
|
|
378
|
-
} else if (arg === '--players') {
|
|
379
|
-
options.players = argv[++i]
|
|
380
|
-
} else if (!arg.startsWith('-')) {
|
|
381
|
-
positionals.push(arg)
|
|
295
|
+
const { existsSync: es } = await import('node:fs')
|
|
296
|
+
const { homedir: hd } = await import('node:os')
|
|
297
|
+
try {
|
|
298
|
+
const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
|
|
299
|
+
const coldPoolDir = resolveColdPoolDir(deckParsed?.deck?.cold_pool, hd(), coldPoolDefault)
|
|
300
|
+
const skills = parseDeckSkills(deckParsed)
|
|
301
|
+
const checks = checkSkillExistence(skills, coldPoolDir, es)
|
|
302
|
+
for (const warning of formatSkillWarnings(checks)) {
|
|
303
|
+
console.warn(`⚠️ ${warning}`)
|
|
382
304
|
}
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
// ── 主流程 ──────────────────────────────────────────────────
|
|
388
|
-
export function runArena(argv: string[]) {
|
|
389
|
-
const { options, positionals } = parseArgs(argv)
|
|
390
|
-
|
|
391
|
-
const TASK = options.task || positionals.join(' ') || ''
|
|
392
|
-
if (!TASK) {
|
|
393
|
-
console.error('❌ 请提供 --task 或位置参数')
|
|
394
|
-
process.exit(1)
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
const DECK_PATHS = (options.decks || '').split(',').map(s => s.trim()).filter(Boolean)
|
|
398
|
-
|
|
399
|
-
if (DECK_PATHS.length < 2) {
|
|
400
|
-
console.error('❌ 至少需要 2 个 deck 才能进行 arena')
|
|
401
|
-
process.exit(1)
|
|
402
|
-
}
|
|
403
|
-
if (DECK_PATHS.length > 5) {
|
|
404
|
-
console.error('❌ 一次 arena 最多 5 个 deck')
|
|
405
|
-
process.exit(1)
|
|
305
|
+
} catch (e) {
|
|
306
|
+
console.warn('⚠️ Could not check skill existence:', e instanceof Error ? e.message : e)
|
|
406
307
|
}
|
|
407
308
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
const ARENA_ID = `arena-${timestamp()}-${ARENA_SLUG.slice(0, 30)}`
|
|
414
|
-
const ARENA_DIR = resolve(PROJECT_DIR, options.dir!, ARENA_ID)
|
|
415
|
-
|
|
416
|
-
// ── 创建目录结构 ────────────────────────────────────────────
|
|
417
|
-
mkdirSync(join(ARENA_DIR, 'decks'), { recursive: true })
|
|
418
|
-
mkdirSync(join(ARENA_DIR, 'runs'), { recursive: true })
|
|
419
|
-
mkdirSync(join(ARENA_DIR, 'sides'), { recursive: true })
|
|
420
|
-
|
|
421
|
-
// ── 生成参与者与 deck ───────────────────────────────────────
|
|
422
|
-
const participants = DECK_PATHS.map((deckPath, i) => {
|
|
423
|
-
const id = `run-${String(i + 1).padStart(2, '0')}`
|
|
424
|
-
const name = basename(deckPath).replace(/\.toml$/, '')
|
|
425
|
-
const destPath = join(ARENA_DIR, 'decks', `arena-${id}.toml`)
|
|
426
|
-
// Copy the provided deck to arena directory
|
|
427
|
-
if (existsSync(deckPath)) {
|
|
428
|
-
const content = readFileSync(deckPath, 'utf-8')
|
|
429
|
-
writeFileSync(destPath, content)
|
|
430
|
-
} else {
|
|
431
|
-
console.error(`❌ Deck 文件不存在: ${deckPath}`)
|
|
432
|
-
process.exit(1)
|
|
433
|
-
}
|
|
434
|
-
return { id, name, skill_name: name, deck_path: destPath }
|
|
309
|
+
// Direct agent.spawn — natural-language task text, no parsing
|
|
310
|
+
const agentResult = await agent.spawn({
|
|
311
|
+
cwd: agentWorkdir,
|
|
312
|
+
brief: taskText,
|
|
313
|
+
timeoutMs: Number(opts.timeout ?? 120000),
|
|
435
314
|
})
|
|
436
315
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
weight: 1,
|
|
441
|
-
}))
|
|
442
|
-
|
|
443
|
-
// ── 为每个 side 创建隔离工作空间 ────────────────────────────
|
|
444
|
-
for (const p of participants) {
|
|
445
|
-
const sideDir = join(ARENA_DIR, 'sides', p.id)
|
|
446
|
-
mkdirSync(sideDir, { recursive: true })
|
|
447
|
-
// 复制 deck 到 side 目录作为 skill-deck.toml
|
|
448
|
-
const sideDeckPath = join(sideDir, 'skill-deck.toml')
|
|
449
|
-
const deckContent = readFileSync(p.deck_path, 'utf-8')
|
|
450
|
-
writeFileSync(sideDeckPath, deckContent)
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
// ── 生成 arena.json ─────────────────────────────────────────
|
|
454
|
-
const arenaJson = {
|
|
455
|
-
version: '1.0.0',
|
|
456
|
-
metadata: {
|
|
457
|
-
id: ARENA_ID,
|
|
458
|
-
slug: ARENA_SLUG,
|
|
459
|
-
created_at: new Date().toISOString(),
|
|
460
|
-
task_description: TASK,
|
|
461
|
-
participants: participants.map(p => ({
|
|
462
|
-
...p,
|
|
463
|
-
side_dir: join(ARENA_DIR, 'sides', p.id),
|
|
464
|
-
})),
|
|
465
|
-
criteria,
|
|
466
|
-
working_dir: ARENA_DIR,
|
|
467
|
-
},
|
|
468
|
-
status: 'setup',
|
|
469
|
-
runs: participants.map(p => ({
|
|
470
|
-
participant_id: p.id,
|
|
471
|
-
side_dir: join(ARENA_DIR, 'sides', p.id),
|
|
472
|
-
output_path: join(ARENA_DIR, 'runs', `${p.id}.md`),
|
|
473
|
-
})),
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
writeFileSync(join(ARENA_DIR, 'arena.json'), JSON.stringify(arenaJson, null, 2) + '\n')
|
|
477
|
-
|
|
478
|
-
// ── 生成 Task Card 模板 ─────────────────────────────────────
|
|
479
|
-
const taskCardPath = join(ARENA_DIR, 'TASK-arena.md')
|
|
480
|
-
const relArenaDir = ARENA_DIR.replace(PROJECT_DIR, '.')
|
|
481
|
-
const taskCardContent = `---
|
|
482
|
-
type: arena
|
|
483
|
-
objective: |
|
|
484
|
-
${TASK}
|
|
485
|
-
evaluation_criteria:
|
|
486
|
-
${criteria.map(c => ` - ${c.label}`).join('\n')}
|
|
487
|
-
arena_decks:
|
|
488
|
-
${participants.map(p => ` - ${p.deck_path.replace(PROJECT_DIR, '.')}`).join('\n')}
|
|
489
|
-
judge_persona: |
|
|
490
|
-
你是一个多目标优化分析师。不要选 Winner。
|
|
491
|
-
对每个 deck 配置,按 evaluation_criteria 输出评分向量(1-5 分)。
|
|
492
|
-
识别 Pareto 非支配解集——没有"最强",只有"在不同维度上的最优权衡"。
|
|
493
|
-
对被支配的解,说明它被谁支配、在哪个维度上劣势。
|
|
494
|
-
如果发现任何涌现 combo(多个 skill 组合产生 1+1>2 的效果),单独标注。
|
|
495
|
-
acceptance:
|
|
496
|
-
${participants.map(p => ` - Subagent ${p.id} 在 sides/${p.id}/ 隔离环境完成任务并写入 runs/${p.id}.md`).join('\n')}
|
|
497
|
-
- Judge 读取所有 run 文件并生成 report.md
|
|
498
|
-
managed_dirs:
|
|
499
|
-
- ${relArenaDir}/
|
|
500
|
-
---
|
|
501
|
-
|
|
502
|
-
# Arena Task: ${TASK}
|
|
503
|
-
|
|
504
|
-
## Subagent 指令
|
|
505
|
-
|
|
506
|
-
${participants.map(p => `### ${p.id} (${p.name})
|
|
507
|
-
\`\`\`bash
|
|
508
|
-
# 进入隔离工作空间(已预装 deck)
|
|
509
|
-
cd "${join(ARENA_DIR, 'sides', p.id)}"
|
|
510
|
-
# 确认 skill-deck.toml 存在后 link(首次或 deck 更新时)
|
|
511
|
-
bunx @lythos/skill-deck link
|
|
512
|
-
# 然后执行任务,输出写入 "../../runs/${p.id}.md"
|
|
513
|
-
\`\`\`
|
|
514
|
-
`).join('')}
|
|
515
|
-
|
|
516
|
-
### Judge
|
|
517
|
-
\`\`\`bash
|
|
518
|
-
# 在 Host 侧读取所有 side 输出,生成报告
|
|
519
|
-
cd "${ARENA_DIR}"
|
|
520
|
-
# 读取 runs/*.md,按 evaluation_criteria 评分,生成 report.md
|
|
521
|
-
\`\`\`
|
|
522
|
-
`
|
|
523
|
-
|
|
524
|
-
writeFileSync(taskCardPath, taskCardContent)
|
|
525
|
-
|
|
526
|
-
// ── 报告 ────────────────────────────────────────────────────
|
|
527
|
-
console.log(`
|
|
528
|
-
🎮 Skill Arena 初始化完成
|
|
529
|
-
|
|
530
|
-
ID: ${ARENA_ID}
|
|
531
|
-
任务: ${TASK}
|
|
532
|
-
目录: ${ARENA_DIR}
|
|
533
|
-
模式: deck 配置对比
|
|
534
|
-
参与者: ${participants.map(p => p.name).join(', ')}
|
|
535
|
-
评测维度: ${CRITERIA.join(', ')}
|
|
536
|
-
|
|
537
|
-
生成文件:
|
|
538
|
-
📋 ${join(ARENA_DIR, 'arena.json')}
|
|
539
|
-
🎴 ${participants.length} 个 arena deck → ${join(ARENA_DIR, 'decks')}
|
|
540
|
-
🏟️ ${participants.length} 个 side 隔离工作空间 → ${join(ARENA_DIR, 'sides')}
|
|
541
|
-
📝 Task Card → ${taskCardPath}
|
|
542
|
-
|
|
543
|
-
下一步:
|
|
544
|
-
1. 阅读 Task Card: cat "${taskCardPath}"
|
|
545
|
-
2. 按指令逐个/并行启动 subagent(每个在独立的 side 目录)
|
|
546
|
-
3. Judge 生成 report.md
|
|
547
|
-
`)
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
// ── Viz: Report Visualizer ─────────────────────────────────
|
|
316
|
+
// Persist agent output to outDir
|
|
317
|
+
writeFileSync(join(outDir, 'agent-stdout.txt'), agentResult.stdout, 'utf-8')
|
|
318
|
+
if (agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), agentResult.stderr, 'utf-8')
|
|
551
319
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
// Extract title
|
|
564
|
-
const titleMatch = text.match(/^#\s+(.+)$/m)
|
|
565
|
-
const title = titleMatch ? titleMatch[1].trim() : 'Arena Report'
|
|
566
|
-
|
|
567
|
-
const lines = text.split('\n')
|
|
568
|
-
const rows: ScoreRow[] = []
|
|
569
|
-
const summaries: Record<string, number> = {}
|
|
570
|
-
|
|
571
|
-
let currentSection = ''
|
|
572
|
-
let inTable = false
|
|
573
|
-
let headers: string[] = []
|
|
574
|
-
|
|
575
|
-
for (const line of lines) {
|
|
576
|
-
const trimmed = line.trim()
|
|
577
|
-
|
|
578
|
-
// Detect section headers like "### Memory Condition" or "### Control Condition"
|
|
579
|
-
const sectionMatch = trimmed.match(/^#{2,4}\s+(.*Condition.*|.*Variable.*|.*Group.*)/i)
|
|
580
|
-
if (sectionMatch) {
|
|
581
|
-
currentSection = sectionMatch[1].replace(/[()]/g, '').trim()
|
|
582
|
-
inTable = false
|
|
583
|
-
continue
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
// Table header row
|
|
587
|
-
if (trimmed.startsWith('|') && trimmed.includes('Checkpoint') && !trimmed.includes('---')) {
|
|
588
|
-
inTable = true
|
|
589
|
-
const parts = trimmed.split('|').map(s => s.trim()).filter(Boolean)
|
|
590
|
-
headers = parts.slice(1)
|
|
591
|
-
continue
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
// Table separator
|
|
595
|
-
if (inTable && trimmed.startsWith('|') && trimmed.includes('---')) continue
|
|
596
|
-
|
|
597
|
-
// Table data row
|
|
598
|
-
if (inTable && trimmed.startsWith('|')) {
|
|
599
|
-
const parts = trimmed.split('|').map(s => s.trim()).filter(Boolean)
|
|
600
|
-
if (parts.length >= 2) {
|
|
601
|
-
const firstCell = parts[0]
|
|
602
|
-
const checkpoint = firstCell.replace(/\*\*/g, '').trim()
|
|
603
|
-
|
|
604
|
-
// Skip "Total" rows — handle them as summary
|
|
605
|
-
if (/^total/i.test(checkpoint)) {
|
|
606
|
-
for (let i = 1; i < parts.length && i <= headers.length; i++) {
|
|
607
|
-
const num = parseFloat(parts[i])
|
|
608
|
-
if (!isNaN(num)) {
|
|
609
|
-
const key = currentSection
|
|
610
|
-
? `${currentSection} ${headers[i - 1]}`.trim()
|
|
611
|
-
: headers[i - 1]
|
|
612
|
-
summaries[key] = num
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
continue
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
// Skip non-numeric rows (section headers inside table)
|
|
619
|
-
const secondCell = parts[1]
|
|
620
|
-
if (isNaN(parseFloat(secondCell))) continue
|
|
621
|
-
|
|
622
|
-
const scores: Record<string, number> = {}
|
|
623
|
-
let maxScore = 0
|
|
624
|
-
for (let i = 1; i < parts.length && i <= headers.length; i++) {
|
|
625
|
-
const header = headers[i - 1]
|
|
626
|
-
if (/notes?/i.test(header)) continue // Skip notes column
|
|
627
|
-
const val = parts[i]
|
|
628
|
-
const num = parseFloat(val)
|
|
629
|
-
if (!isNaN(num)) {
|
|
630
|
-
// Prefix with section name if multiple condition tables exist
|
|
631
|
-
const key = currentSection && headers.length <= 2
|
|
632
|
-
? `${currentSection} Score`
|
|
633
|
-
: header
|
|
634
|
-
scores[key] = num
|
|
635
|
-
maxScore = Math.max(maxScore, num)
|
|
636
|
-
}
|
|
637
|
-
}
|
|
638
|
-
|
|
639
|
-
const notes = parts[parts.length - 1] || ''
|
|
640
|
-
if (Object.keys(scores).length > 0) {
|
|
641
|
-
rows.push({ checkpoint, scores, notes, maxScore })
|
|
320
|
+
// Copy agent-produced files to outDir
|
|
321
|
+
const { cpSync, readdirSync, existsSync: es3 } = await import('node:fs')
|
|
322
|
+
if (es3(agentWorkdir)) {
|
|
323
|
+
const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
324
|
+
try {
|
|
325
|
+
const entries = readdirSync(agentWorkdir)
|
|
326
|
+
const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
|
|
327
|
+
for (const { src, dest, name } of plan) {
|
|
328
|
+
try { cpSync(src, dest, { recursive: true }) } catch (e) {
|
|
329
|
+
console.warn(`⚠️ Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
|
|
642
330
|
}
|
|
643
331
|
}
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
// End of table
|
|
648
|
-
if (inTable && !trimmed.startsWith('|') && trimmed !== '') {
|
|
649
|
-
inTable = false
|
|
650
|
-
currentSection = ''
|
|
332
|
+
} catch (e) {
|
|
333
|
+
console.warn(`⚠️ Failed to copy agent output: ${e instanceof Error ? e.message : e}`)
|
|
651
334
|
}
|
|
652
335
|
}
|
|
653
336
|
|
|
654
|
-
|
|
337
|
+
// Summary (no judge — single mode is execution-only)
|
|
338
|
+
console.log(`\n✅ Agent run complete → ${outDir}`)
|
|
339
|
+
console.log(` deck: ${deckPath}`)
|
|
340
|
+
console.log(` player: ${player}`)
|
|
655
341
|
}
|
|
656
342
|
|
|
657
|
-
|
|
658
|
-
const filled = Math.round((value / max) * width)
|
|
659
|
-
const empty = width - filled
|
|
660
|
-
return '█'.repeat(filled) + '░'.repeat(empty)
|
|
661
|
-
}
|
|
343
|
+
// ── vs: arena.toml-driven comparison ──────────────────────────────────────
|
|
662
344
|
|
|
663
|
-
function
|
|
664
|
-
|
|
665
|
-
const
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
// Per-checkpoint bars
|
|
673
|
-
for (const row of rows) {
|
|
674
|
-
out += `📋 ${row.checkpoint}\n`
|
|
675
|
-
for (const [name, score] of Object.entries(row.scores)) {
|
|
676
|
-
const bar = renderBar(score, maxVal)
|
|
677
|
-
out += ` ${name.padEnd(12)} ${bar} ${score}/${maxVal}\n`
|
|
678
|
-
}
|
|
679
|
-
if (row.notes) {
|
|
680
|
-
out += ` 💡 ${row.notes.slice(0, 80)}${row.notes.length > 80 ? '...' : ''}\n`
|
|
681
|
-
}
|
|
682
|
-
out += '\n'
|
|
345
|
+
async function vsRun(args: string[]) {
|
|
346
|
+
// Native TOML parser is simpler than adding smol-toml dependency
|
|
347
|
+
const opts: Record<string, string | undefined> = {}
|
|
348
|
+
for (let i = 0; i < args.length; i++) {
|
|
349
|
+
if (args[i] === '--config' || args[i] === '-c') opts.config = args[++i]
|
|
350
|
+
else if (args[i] === '--out' || args[i] === '-o') opts.out = args[++i]
|
|
351
|
+
else if (args[i] === '--dry-run') opts.dryRun = 'true'
|
|
352
|
+
else if (args[i] === '--player' || args[i] === '-p') opts.player = args[++i]
|
|
683
353
|
}
|
|
684
354
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
out += `📊 TOTAL SCORES\n`
|
|
689
|
-
for (const [name, score] of Object.entries(summary)) {
|
|
690
|
-
const bar = renderBar(score, maxVal * rows.length)
|
|
691
|
-
out += ` ${name.padEnd(12)} ${bar} ${score}\n`
|
|
692
|
-
}
|
|
693
|
-
out += '\n'
|
|
355
|
+
if (!opts.config) {
|
|
356
|
+
console.error('❌ arena.toml path required: lythoskill-arena vs --config arena.toml')
|
|
357
|
+
process.exit(1)
|
|
694
358
|
}
|
|
695
359
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
if (rows.length === 0) return ''
|
|
702
|
-
|
|
703
|
-
const participants = Object.keys(rows[0].scores)
|
|
704
|
-
if (participants.length < 2) return ''
|
|
705
|
-
|
|
706
|
-
// Use checkpoint names as axes
|
|
707
|
-
const axes = rows.map(r => r.checkpoint.slice(0, 12))
|
|
708
|
-
const maxVal = rows.reduce((m, r) => Math.max(m, ...Object.values(r.scores)), 0) || 10
|
|
360
|
+
const configPath = resolve(opts.config)
|
|
361
|
+
if (!existsSync(configPath)) {
|
|
362
|
+
console.error(`❌ Config file not found: ${configPath}`)
|
|
363
|
+
process.exit(1)
|
|
364
|
+
}
|
|
709
365
|
|
|
710
|
-
|
|
711
|
-
const size = 16
|
|
712
|
-
const center = size / 2
|
|
713
|
-
let out = `\n🕸️ RADAR CHART (MOO Scoring)\n\n`
|
|
366
|
+
const toml = parseArenaToml(readFileSync(configPath, 'utf-8'))
|
|
714
367
|
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
const sym = symbols[pi % symbols.length]
|
|
720
|
-
out += ` ${sym} ${p}\n`
|
|
721
|
-
}
|
|
722
|
-
out += '\n'
|
|
723
|
-
|
|
724
|
-
// Per-axis score table (more readable than pure ASCII art)
|
|
725
|
-
out += ` Axis `
|
|
726
|
-
for (const p of participants) out += `${p.slice(0, 8).padStart(8)} `
|
|
727
|
-
out += '\n'
|
|
728
|
-
out += ` ${'─'.repeat(14 + participants.length * 9)}\n`
|
|
729
|
-
|
|
730
|
-
for (let i = 0; i < rows.length; i++) {
|
|
731
|
-
const axis = axes[i].padEnd(12)
|
|
732
|
-
out += ` ${axis} `
|
|
733
|
-
for (const p of participants) {
|
|
734
|
-
const score = rows[i].scores[p] ?? 0
|
|
735
|
-
out += `${String(score).padStart(8)} `
|
|
368
|
+
if (opts.player) {
|
|
369
|
+
// Override all sides' player for --player flag
|
|
370
|
+
for (const side of toml.side) {
|
|
371
|
+
;(side as Record<string, unknown>).player = opts.player
|
|
736
372
|
}
|
|
737
|
-
out += '\n'
|
|
738
373
|
}
|
|
739
374
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
function runViz(argv: string[]) {
|
|
744
|
-
const arenaDir = argv.find(a => !a.startsWith('-')) || '.'
|
|
745
|
-
const resolvedDir = resolve(arenaDir)
|
|
746
|
-
|
|
747
|
-
const arenaJsonPath = join(resolvedDir, 'arena.json')
|
|
748
|
-
const reportPath = join(resolvedDir, 'report.md')
|
|
375
|
+
const taskPath = toml.arena.task
|
|
376
|
+
const isDryRun = opts.dryRun === 'true'
|
|
749
377
|
|
|
750
|
-
if (
|
|
751
|
-
console.
|
|
752
|
-
|
|
378
|
+
if (isDryRun) {
|
|
379
|
+
console.log(`🔍 Scanning arena.toml: ${configPath}`)
|
|
380
|
+
} else {
|
|
381
|
+
console.log(`🏟 Arena VS: ${configPath}`)
|
|
382
|
+
console.log(` sides: ${toml.side.length}`)
|
|
383
|
+
console.log(` runs per side: ${toml.arena.runs_per_side}`)
|
|
753
384
|
}
|
|
754
385
|
|
|
755
|
-
const
|
|
756
|
-
const
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
return
|
|
765
|
-
}
|
|
386
|
+
const { runArenaFromToml } = await import('./runner')
|
|
387
|
+
const result = await runArenaFromToml({
|
|
388
|
+
toml,
|
|
389
|
+
taskPath,
|
|
390
|
+
outDir: opts.out ? resolve(opts.out) : undefined,
|
|
391
|
+
dryRun: isDryRun,
|
|
392
|
+
log: console.log,
|
|
393
|
+
configDir: resolve(configPath, '..'),
|
|
394
|
+
})
|
|
766
395
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
console.log(
|
|
770
|
-
|
|
396
|
+
if ('plan' in result) {
|
|
397
|
+
if (!isDryRun) console.log('📋 Execution plan (dry-run):')
|
|
398
|
+
for (const line of formatPlanOutput(result.plan)) console.log(line)
|
|
399
|
+
} else if ('manifest' in result) {
|
|
400
|
+
const r = result
|
|
401
|
+
console.log(`\n📊 Arena complete: ${r.manifest.id}`)
|
|
402
|
+
console.log(` report: ${r.artifactsDir}/report.md`)
|
|
403
|
+
console.log(` participants: ${r.manifest.participants.map(p => p.name).join(', ')}`)
|
|
771
404
|
}
|
|
772
|
-
|
|
773
|
-
console.log(renderAsciiChart(report))
|
|
774
|
-
console.log(renderRadarChart(report))
|
|
775
405
|
}
|
|
776
406
|
|
|
777
|
-
// ──
|
|
778
|
-
|
|
779
|
-
async function vsRun(argv: string[]) {
|
|
780
|
-
const { options } = parseArgs(argv)
|
|
781
|
-
const { readFileSync } = await import('node:fs')
|
|
782
|
-
|
|
783
|
-
const hasConfig = !!(options as Record<string, string | undefined>).config
|
|
784
|
-
const dryRun = argv.includes('--dry-run')
|
|
785
|
-
|
|
786
|
-
if (hasConfig) {
|
|
787
|
-
// arena.toml declarative mode
|
|
788
|
-
const { parseArenaToml } = await import('./arena-toml')
|
|
789
|
-
const { runArenaFromToml } = await import('./runner')
|
|
790
|
-
const configPath = (options as Record<string, string | undefined>).config!
|
|
791
|
-
|
|
792
|
-
const toml = parseArenaToml(readFileSync(configPath, 'utf-8'))
|
|
793
|
-
const { dirname } = await import('node:path')
|
|
794
|
-
const result = await runArenaFromToml({
|
|
795
|
-
toml,
|
|
796
|
-
taskPath: toml.arena.task,
|
|
797
|
-
configDir: dirname(configPath), // resolve relative paths against config file dir
|
|
798
|
-
outDir: (options as Record<string, string | undefined>).out,
|
|
799
|
-
dryRun,
|
|
800
|
-
})
|
|
801
|
-
|
|
802
|
-
if ('plan' in result) {
|
|
803
|
-
// dry-run
|
|
804
|
-
console.log(`\n📋 Dry-run: ${result.plan.total_runs} cells across ${result.plan.cells.length / Math.max(1, toml.arena.runs_per_side)} sides × ${toml.arena.runs_per_side} runs`)
|
|
805
|
-
for (const cell of result.plan.cells) {
|
|
806
|
-
console.log(` ${cell.side}/run-${cell.run}: ${cell.player} × ${cell.deck}${cell.control ? ' [control]' : ''}`)
|
|
807
|
-
}
|
|
808
|
-
return
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
console.log(`\n🎮 Arena complete: ${result.manifest.id}`)
|
|
812
|
-
console.log(`📁 Artifacts: ${result.artifactsDir}`)
|
|
813
|
-
console.log(`📊 Report: ${result.artifactsDir}/report.md`)
|
|
814
|
-
return
|
|
815
|
-
}
|
|
407
|
+
// ── viz: generate HTML report from arena.json ─────────────────────────────
|
|
816
408
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
821
|
-
Fetch an example:
|
|
822
|
-
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
823
|
-
Then edit arena.toml and run: lythoskill-arena vs --config ./arena.toml`)
|
|
824
|
-
process.exit(1)
|
|
409
|
+
async function vizRun(args: string[]) {
|
|
410
|
+
const runsDir = args.find(a => !a.startsWith('-'))
|
|
411
|
+
if (!runsDir) { console.error('❌ runs/<arena-id> path required: lythoskill-arena viz runs/arena-20260504'); process.exit(1) }
|
|
825
412
|
|
|
826
|
-
const
|
|
827
|
-
|
|
828
|
-
playerPaths: (options.players ?? 'players/claude-code.toml').split(',').map(s => s.trim()).filter(Boolean),
|
|
829
|
-
deckPaths: options.decks.split(',').map(s => s.trim()).filter(Boolean),
|
|
830
|
-
criteria: (options.criteria ?? 'syntax,context,logic,token').split(',').map(s => s.trim()).filter(Boolean),
|
|
831
|
-
outDir: options.out ?? `runs/arena-${timestamp()}`,
|
|
832
|
-
})
|
|
413
|
+
const arenaJsonPath = resolve(runsDir, 'arena.json')
|
|
414
|
+
if (!existsSync(arenaJsonPath)) { console.error(`❌ arena.json not found in: ${runsDir}`); process.exit(1) }
|
|
833
415
|
|
|
834
|
-
console.log(
|
|
835
|
-
console.log(`📁 Artifacts: ${result.artifactsDir}`)
|
|
836
|
-
console.log(`📊 Report: ${result.artifactsDir}/report.md`)
|
|
416
|
+
console.log(`📈 Arena HTML report not yet implemented. See report.md in ${runsDir}/`)
|
|
837
417
|
}
|
|
838
418
|
|
|
839
|
-
// ──
|
|
840
|
-
|
|
419
|
+
// ── Entry point ────────────────────────────────────────────────────────────
|
|
841
420
|
if (import.meta.main) {
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
} else if (cmd === 'scaffold' || !cmd || args[0]?.startsWith('-')) {
|
|
852
|
-
// Legacy behavior: if no subcommand or starts with flags, treat as scaffold
|
|
853
|
-
runArena(cmd === 'scaffold' ? args.slice(1) : args)
|
|
854
|
-
} else {
|
|
855
|
-
console.error(`❌ Unknown command: "${cmd}"
|
|
856
|
-
Available: single, vs, scaffold, viz
|
|
857
|
-
Usage: lythoskill-arena <command> [options]
|
|
858
|
-
Help: lythoskill-arena --help`)
|
|
421
|
+
main().catch(err => {
|
|
422
|
+
if (err instanceof ZodError) {
|
|
423
|
+
console.error('❌ Schema validation failed:')
|
|
424
|
+
for (const issue of err.issues) {
|
|
425
|
+
console.error(` - ${issue.path.join('.')}: ${issue.message}`)
|
|
426
|
+
}
|
|
427
|
+
} else {
|
|
428
|
+
console.error('❌', err instanceof Error ? err.message : err)
|
|
429
|
+
}
|
|
859
430
|
process.exit(1)
|
|
860
|
-
}
|
|
431
|
+
})
|
|
861
432
|
}
|