@lythos/skill-arena 0.9.22 → 0.9.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/package.json +5 -1
- package/src/cli.ts +107 -53
- package/src/player.ts +1 -0
- package/src/preflight.test.ts +395 -0
- package/src/preflight.ts +208 -0
- package/src/runner.ts +2 -0
package/README.md
CHANGED
|
@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.24 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
58
|
# Mode 1: Compare two skills on the same task
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
59
|
+
bunx @lythos/skill-arena@0.9.24 \
|
|
60
60
|
--task "Generate auth flow diagram" \
|
|
61
61
|
--skills "design-doc-mermaid,mermaid-tools" \
|
|
62
62
|
--criteria "syntax,context,token"
|
|
63
63
|
|
|
64
64
|
# Mode 2: Compare full deck configurations
|
|
65
|
-
bunx @lythos/skill-arena@0.9.
|
|
65
|
+
bunx @lythos/skill-arena@0.9.24 \
|
|
66
66
|
--task "Generate auth flow diagram" \
|
|
67
67
|
--decks "./decks/minimal.toml,./decks/rich.toml" \
|
|
68
68
|
--criteria "quality,token,maintainability"
|
|
69
69
|
|
|
70
70
|
# Visualize results
|
|
71
|
-
bunx @lythos/skill-arena@0.9.
|
|
71
|
+
bunx @lythos/skill-arena@0.9.24 viz tmp/arena-<id>/
|
|
72
72
|
```
|
|
73
73
|
|
|
74
74
|
## Commands
|
|
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.22 viz tmp/arena-<id>/
|
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
79
|
# Print execution plan without running
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
80
|
+
bunx @lythos/skill-arena@0.9.24 run --config arena.toml --dry-run
|
|
81
81
|
|
|
82
82
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
83
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.24 run --config arena.toml
|
|
84
84
|
```
|
|
85
85
|
|
|
86
86
|
### CLI-flag mode (backward compat)
|
|
87
87
|
|
|
88
88
|
```
|
|
89
|
-
bunx @lythos/skill-arena@0.9.
|
|
89
|
+
bunx @lythos/skill-arena@0.9.24 run \
|
|
90
90
|
--task ./TASK-arena.md \
|
|
91
91
|
--players ./players/claude.toml \
|
|
92
92
|
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.22 run \
|
|
|
96
96
|
### Scaffold mode (legacy, manual execution)
|
|
97
97
|
|
|
98
98
|
```
|
|
99
|
-
bunx @lythos/skill-arena@0.9.
|
|
99
|
+
bunx @lythos/skill-arena@0.9.24 scaffold --task "..." --skills a,b
|
|
100
100
|
```
|
|
101
101
|
|
|
102
102
|
### Viz
|
|
103
103
|
|
|
104
104
|
```bash
|
|
105
|
-
bunx @lythos/skill-arena@0.9.
|
|
105
|
+
bunx @lythos/skill-arena@0.9.24 viz runs/arena-<id>/
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
## Skill Documentation
|
|
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
116
116
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
117
117
|
|
|
118
118
|
```
|
|
119
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
119
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.24 ...
|
|
120
120
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
121
121
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
122
122
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lythos/skill-arena",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.24",
|
|
4
4
|
"description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-agent",
|
|
@@ -37,8 +37,12 @@
|
|
|
37
37
|
"bun": ">=1.0.0"
|
|
38
38
|
},
|
|
39
39
|
"dependencies": {
|
|
40
|
+
"@lythos/cold-pool": "workspace:*",
|
|
40
41
|
"@lythos/test-utils": "^0.9.1",
|
|
41
42
|
"zod": "^3.24.0",
|
|
42
43
|
"zod-to-json-schema": "^3.25.2"
|
|
44
|
+
},
|
|
45
|
+
"optionalDependencies": {
|
|
46
|
+
"@lythos/agent-adapter-claude-sdk": "workspace:*"
|
|
43
47
|
}
|
|
44
48
|
}
|
package/src/cli.ts
CHANGED
|
@@ -9,6 +9,14 @@ import {
|
|
|
9
9
|
existsSync, mkdirSync, writeFileSync, readFileSync,
|
|
10
10
|
} from 'node:fs'
|
|
11
11
|
import { join, resolve, basename } from 'node:path'
|
|
12
|
+
import {
|
|
13
|
+
parseDeckSkills,
|
|
14
|
+
checkSkillExistence,
|
|
15
|
+
validateLinkResult,
|
|
16
|
+
buildCopyPlan,
|
|
17
|
+
resolveColdPoolDir,
|
|
18
|
+
formatSkillWarnings,
|
|
19
|
+
} from './preflight'
|
|
12
20
|
|
|
13
21
|
// ── 简单的 slugify ──────────────────────────────────────────
|
|
14
22
|
function slugify(input: string): string {
|
|
@@ -29,8 +37,8 @@ function printHelp(): void {
|
|
|
29
37
|
console.log(`🎭 lythoskill-arena — Skill comparison runner
|
|
30
38
|
|
|
31
39
|
Usage:
|
|
32
|
-
lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>]
|
|
33
|
-
lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>]
|
|
40
|
+
lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
|
|
41
|
+
lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
|
|
34
42
|
lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
|
|
35
43
|
lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
|
|
36
44
|
lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
|
|
@@ -82,14 +90,15 @@ async function agentRun(args: string[]) {
|
|
|
82
90
|
else if (args[i] === '--deck' || args[i] === '-d') opts.deck = args[++i]
|
|
83
91
|
else if (args[i] === '--player' || args[i] === '-p') opts.player = args[++i]
|
|
84
92
|
else if (args[i] === '--out' || args[i] === '-o') opts.out = args[++i]
|
|
93
|
+
else if (args[i] === '--timeout') opts.timeout = args[++i]
|
|
85
94
|
}
|
|
86
95
|
|
|
87
96
|
if (!opts.deck) {
|
|
88
97
|
console.error('❌ --deck <path> is required')
|
|
89
98
|
process.exit(1)
|
|
90
99
|
}
|
|
91
|
-
if (!opts.task && !opts.brief) {
|
|
92
|
-
console.error('❌ --task <path> or --brief "<prompt>" is required')
|
|
100
|
+
if (!opts.task && (!opts.brief || !opts.brief.trim())) {
|
|
101
|
+
console.error('❌ --task <path> or --brief "<prompt>" is required and cannot be empty')
|
|
93
102
|
process.exit(1)
|
|
94
103
|
}
|
|
95
104
|
|
|
@@ -97,39 +106,9 @@ async function agentRun(args: string[]) {
|
|
|
97
106
|
const deckPath = resolve(opts.deck)
|
|
98
107
|
if (!existsSync(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}`); process.exit(1) }
|
|
99
108
|
|
|
100
|
-
// Resolve task: either from file, or create temp task from --brief
|
|
101
|
-
let taskPath: string
|
|
102
|
-
if (opts.task) {
|
|
103
|
-
taskPath = resolve(opts.task)
|
|
104
|
-
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
|
|
105
|
-
} else {
|
|
106
|
-
const { mkdtempSync, writeFileSync } = await import('node:fs')
|
|
107
|
-
const { tmpdir } = await import('node:os')
|
|
108
|
-
const tmpDir = mkdtempSync(join(tmpdir(), 'arena-brief-'))
|
|
109
|
-
taskPath = join(tmpDir, 'TASK.md')
|
|
110
|
-
const briefTask = `---
|
|
111
|
-
name: ad-hoc task
|
|
112
|
-
description: ${opts.brief!.replace(/"/g, '\\"').slice(0, 80)}
|
|
113
|
-
timeout: 120000
|
|
114
|
-
---
|
|
115
|
-
|
|
116
|
-
## Given
|
|
117
|
-
- You are an AI agent with the skills declared in the deck
|
|
118
|
-
|
|
119
|
-
## When
|
|
120
|
-
${opts.brief}
|
|
121
|
-
|
|
122
|
-
## Then
|
|
123
|
-
- Write your output to output.md
|
|
124
|
-
- The output should be complete and well-structured
|
|
125
|
-
|
|
126
|
-
## Judge
|
|
127
|
-
Evaluate whether the output is complete, accurate, and well-structured.
|
|
128
|
-
`
|
|
129
|
-
writeFileSync(taskPath, briefTask, 'utf-8')
|
|
130
|
-
}
|
|
131
|
-
|
|
132
109
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
110
|
+
// Optional: register claude-sdk adapter if the package is installed
|
|
111
|
+
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
133
112
|
const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
|
|
134
113
|
const { resolvePlayer } = await import('./player')
|
|
135
114
|
const { readFileSync, writeFileSync, mkdirSync } = await import('node:fs')
|
|
@@ -139,27 +118,87 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
139
118
|
const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
|
|
140
119
|
mkdirSync(outDir, { recursive: true })
|
|
141
120
|
|
|
121
|
+
// Resolve task: --brief builds scenario directly, --task reads .agent.md file
|
|
122
|
+
const scenarioOpt: Record<string, unknown> = {}
|
|
123
|
+
if (opts.task) {
|
|
124
|
+
const taskPath = resolve(opts.task)
|
|
125
|
+
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
|
|
126
|
+
scenarioOpt.scenarioPath = taskPath
|
|
127
|
+
} else {
|
|
128
|
+
scenarioOpt.scenario = {
|
|
129
|
+
name: 'ad-hoc task',
|
|
130
|
+
description: opts.brief!.slice(0, 80),
|
|
131
|
+
timeout: Number(opts.timeout ?? 120000),
|
|
132
|
+
given: { deck: {} },
|
|
133
|
+
when: opts.brief!,
|
|
134
|
+
then: ['Write your output to output.md', 'The output should be complete and well-structured'],
|
|
135
|
+
judge: 'Evaluate whether the output is complete, accurate, and well-structured.',
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
142
139
|
console.log(`🤖 agent-run: ${player} × ${deckPath}`)
|
|
143
|
-
console.log(`📋 task: ${
|
|
140
|
+
if (opts.task) console.log(`📋 task: ${resolve(opts.task!)}`)
|
|
141
|
+
else console.log(`📋 brief: ${opts.brief!.slice(0, 60)}...`)
|
|
144
142
|
|
|
145
143
|
let agentWorkdir = ''
|
|
146
144
|
const result = await runAgentScenario({
|
|
147
|
-
|
|
145
|
+
...scenarioOpt,
|
|
148
146
|
agent,
|
|
149
147
|
async setupWorkdir(_scenario, workdir) {
|
|
150
148
|
agentWorkdir = workdir
|
|
151
149
|
mkdirSync(workdir, { recursive: true })
|
|
152
150
|
writeFileSync(join(workdir, 'skill-deck.toml'), readFileSync(deckPath, 'utf-8'))
|
|
153
151
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
158
|
-
|
|
152
|
+
// ── Pre-flight: deck link (skip if deck declares no skills) ──
|
|
153
|
+
const deckRaw = readFileSync(join(workdir, 'skill-deck.toml'), 'utf-8')
|
|
154
|
+
let deckParsed: Record<string, any> = {}
|
|
155
|
+
try { deckParsed = Bun.TOML.parse(deckRaw) as Record<string, any> } catch {}
|
|
156
|
+
const hasSkills = parseDeckSkills(deckParsed).length > 0
|
|
157
|
+
|
|
158
|
+
if (hasSkills) {
|
|
159
|
+
// Prefer local dev CLI over bunx (bunx needs tempdir write, blocked by some sandboxes)
|
|
160
|
+
const { existsSync: es2 } = await import('node:fs')
|
|
161
|
+
const localDeckCli = join(import.meta.dir, '..', '..', 'lythoskill-deck', 'src', 'cli.ts')
|
|
162
|
+
const linkCmd = es2(localDeckCli)
|
|
163
|
+
? ['bun', localDeckCli, 'link']
|
|
164
|
+
: ['bunx', '@lythos/skill-deck', 'link']
|
|
165
|
+
const linkProc = Bun.spawn(linkCmd,
|
|
166
|
+
{ cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
|
|
167
|
+
)
|
|
168
|
+
await linkProc.exited
|
|
169
|
+
const linkStderr = await new Response(linkProc.stderr).text()
|
|
170
|
+
const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
|
|
171
|
+
if (!linkResult.ok) {
|
|
172
|
+
console.error(`❌ ${linkResult.error}`)
|
|
173
|
+
process.exit(1)
|
|
174
|
+
}
|
|
175
|
+
} else {
|
|
176
|
+
console.log('ℹ️ No skills declared in deck — skipping link')
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ── Pre-flight: skill existence check (reuses deckParsed from above) ─
|
|
180
|
+
const { existsSync: es } = await import('node:fs')
|
|
181
|
+
const { homedir: hd } = await import('node:os')
|
|
182
|
+
try {
|
|
183
|
+
const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
|
|
184
|
+
const coldPoolDir = resolveColdPoolDir(
|
|
185
|
+
deckParsed?.deck?.cold_pool,
|
|
186
|
+
hd(),
|
|
187
|
+
coldPoolDefault
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
const skills = parseDeckSkills(deckParsed)
|
|
191
|
+
const checks = checkSkillExistence(skills, coldPoolDir, es)
|
|
192
|
+
for (const warning of formatSkillWarnings(checks)) {
|
|
193
|
+
console.warn(`⚠️ ${warning}`)
|
|
194
|
+
}
|
|
195
|
+
} catch (e) {
|
|
196
|
+
console.warn('⚠️ Could not check skill existence:', e instanceof Error ? e.message : e)
|
|
197
|
+
}
|
|
159
198
|
},
|
|
160
199
|
})
|
|
161
200
|
|
|
162
|
-
// Copy agent output to outDir
|
|
201
|
+
// ── Copy agent output to outDir ──────────────────────────────────
|
|
163
202
|
writeFileSync(join(outDir, 'agent-stdout.txt'), result.agentResult.stdout, 'utf-8')
|
|
164
203
|
if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
|
|
165
204
|
if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
|
|
@@ -167,16 +206,31 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
167
206
|
// Copy all agent-produced files from workdir (output.md, output.docx, etc.)
|
|
168
207
|
// Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
|
|
169
208
|
if (agentWorkdir) {
|
|
170
|
-
const { cpSync, readdirSync } = await import('node:fs')
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const
|
|
177
|
-
|
|
209
|
+
const { cpSync, readdirSync, existsSync: es2 } = await import('node:fs')
|
|
210
|
+
if (!es2(agentWorkdir)) {
|
|
211
|
+
console.warn(`⚠️ Agent workdir vanished before copy: ${agentWorkdir}`)
|
|
212
|
+
} else {
|
|
213
|
+
const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
214
|
+
try {
|
|
215
|
+
const entries = readdirSync(agentWorkdir)
|
|
216
|
+
const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
|
|
217
|
+
for (const { src, dest, name } of plan) {
|
|
218
|
+
try {
|
|
219
|
+
cpSync(src, dest, { recursive: true })
|
|
220
|
+
} catch (e) {
|
|
221
|
+
console.warn(`⚠️ Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
} catch (e) {
|
|
225
|
+
console.warn(`⚠️ Failed to read agent workdir for copy: ${e instanceof Error ? e.message : e}`)
|
|
178
226
|
}
|
|
179
|
-
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ── Post-flight: output validation ──────────────────────────────
|
|
231
|
+
if (!result.agentResult.stdout || result.agentResult.stdout.trim().length === 0) {
|
|
232
|
+
console.warn('⚠️ Agent produced empty stdout — the task may have failed silently.')
|
|
233
|
+
console.warn(` Agent stderr: ${(result.agentResult.stderr || '(empty)').slice(0, 200)}`)
|
|
180
234
|
}
|
|
181
235
|
|
|
182
236
|
console.log(`\n✅ Agent complete (${result.agentResult.durationMs}ms)`)
|
package/src/player.ts
CHANGED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* preflight.test.ts — TDD tests for arena agent-run pre-flight pure functions
|
|
3
|
+
*
|
|
4
|
+
* Coverage targets:
|
|
5
|
+
* parseDeckSkills — all TOML formats, edge cases
|
|
6
|
+
* checkSkillExistence — cold pool hit/miss, path resolution
|
|
7
|
+
* validateLinkResult — exit codes, error formatting
|
|
8
|
+
* buildCopyPlan — skip set, path mapping
|
|
9
|
+
* resolveColdPoolDir — tilde expansion, fallback
|
|
10
|
+
* formatSkillWarnings — warning string generation
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, test, expect } from 'bun:test'
|
|
14
|
+
import {
|
|
15
|
+
parseDeckSkills,
|
|
16
|
+
checkSkillExistence,
|
|
17
|
+
validateLinkResult,
|
|
18
|
+
buildCopyPlan,
|
|
19
|
+
resolveColdPoolDir,
|
|
20
|
+
formatSkillWarnings,
|
|
21
|
+
} from './preflight'
|
|
22
|
+
|
|
23
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
24
|
+
// parseDeckSkills
|
|
25
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
26
|
+
|
|
27
|
+
describe('parseDeckSkills', () => {
|
|
28
|
+
|
|
29
|
+
test('empty deck → empty array', () => {
|
|
30
|
+
expect(parseDeckSkills({})).toEqual([])
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
test('deck with no skill sections → empty array', () => {
|
|
34
|
+
expect(parseDeckSkills({ deck: { max_cards: 10 } })).toEqual([])
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test('inline-table format: single tool skill with path', () => {
|
|
38
|
+
const parsed = {
|
|
39
|
+
tool: {
|
|
40
|
+
skills: {
|
|
41
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
46
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' }
|
|
47
|
+
])
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
test('inline-table format: multiple skills', () => {
|
|
51
|
+
const parsed = {
|
|
52
|
+
tool: {
|
|
53
|
+
skills: {
|
|
54
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' },
|
|
55
|
+
docx: { path: 'github.com/anthropics/skills/skills/docx' },
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
60
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
|
|
61
|
+
{ name: 'docx', path: 'github.com/anthropics/skills/skills/docx', section: 'tool' },
|
|
62
|
+
])
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
test('array format: skills = ["a", "b"]', () => {
|
|
66
|
+
const parsed = {
|
|
67
|
+
tool: {
|
|
68
|
+
skills: ['web-search', 'docx']
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
72
|
+
{ name: 'web-search', path: null, section: 'tool' },
|
|
73
|
+
{ name: 'docx', path: null, section: 'tool' },
|
|
74
|
+
])
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test('innate section parsed separately', () => {
|
|
78
|
+
const parsed = {
|
|
79
|
+
innate: {
|
|
80
|
+
skills: {
|
|
81
|
+
deck: { path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck' }
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
tool: {
|
|
85
|
+
skills: {
|
|
86
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
91
|
+
{ name: 'deck', path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck', section: 'innate' },
|
|
92
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
|
|
93
|
+
])
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
test('transient section parsed', () => {
|
|
97
|
+
const parsed = {
|
|
98
|
+
transient: {
|
|
99
|
+
skills: {
|
|
100
|
+
experiment: { path: 'localhost/my-experiment' }
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
105
|
+
{ name: 'experiment', path: 'localhost/my-experiment', section: 'transient' }
|
|
106
|
+
])
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
test('object entry without path → path=null', () => {
|
|
110
|
+
const parsed = {
|
|
111
|
+
tool: {
|
|
112
|
+
skills: {
|
|
113
|
+
bare: {} // no path field
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
118
|
+
{ name: 'bare', path: null, section: 'tool' }
|
|
119
|
+
])
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
test('object entry with non-string path → path=null', () => {
|
|
123
|
+
const parsed = {
|
|
124
|
+
tool: {
|
|
125
|
+
skills: {
|
|
126
|
+
weird: { path: 42 } // number, not string
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
131
|
+
{ name: 'weird', path: null, section: 'tool' }
|
|
132
|
+
])
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
test('array entry that is not a string → skipped', () => {
|
|
136
|
+
const parsed = {
|
|
137
|
+
tool: { skills: ['valid', 123, null, 'also-valid'] }
|
|
138
|
+
}
|
|
139
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
140
|
+
{ name: 'valid', path: null, section: 'tool' },
|
|
141
|
+
{ name: 'also-valid', path: null, section: 'tool' },
|
|
142
|
+
])
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
test('all three sections populated → ordered innate, tool, transient', () => {
|
|
146
|
+
const parsed = {
|
|
147
|
+
innate: { skills: { a: { path: '/a' } } },
|
|
148
|
+
tool: { skills: { b: { path: '/b' } } },
|
|
149
|
+
transient: { skills: { c: { path: '/c' } } },
|
|
150
|
+
}
|
|
151
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
152
|
+
{ name: 'a', path: '/a', section: 'innate' },
|
|
153
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
154
|
+
{ name: 'c', path: '/c', section: 'transient' },
|
|
155
|
+
])
|
|
156
|
+
})
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
160
|
+
// checkSkillExistence
|
|
161
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
162
|
+
|
|
163
|
+
describe('checkSkillExistence', () => {
|
|
164
|
+
|
|
165
|
+
test('empty skills → empty array', () => {
|
|
166
|
+
const exists = (_: string) => true
|
|
167
|
+
expect(checkSkillExistence([], '/cold', exists)).toEqual([])
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
test('skill with explicit path → resolves <coldPool>/<path>/SKILL.md', () => {
|
|
171
|
+
const exists = (p: string) => p === '/cold/github.com/owner/repo/skills/my-skill/SKILL.md'
|
|
172
|
+
const skills = [{ name: 'my-skill', path: 'github.com/owner/repo/skills/my-skill', section: 'tool' }]
|
|
173
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
174
|
+
expect(result).toEqual([
|
|
175
|
+
{ name: 'my-skill', expectedPath: '/cold/github.com/owner/repo/skills/my-skill/SKILL.md', found: true, section: 'tool' }
|
|
176
|
+
])
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
test('skill without path (array format) → resolves <coldPool>/<name>/SKILL.md', () => {
|
|
180
|
+
const exists = (p: string) => p === '/cold/web-search/SKILL.md'
|
|
181
|
+
const skills = [{ name: 'web-search', path: null, section: 'tool' }]
|
|
182
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
183
|
+
expect(result).toEqual([
|
|
184
|
+
{ name: 'web-search', expectedPath: '/cold/web-search/SKILL.md', found: true, section: 'tool' }
|
|
185
|
+
])
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
test('HTTP path → uses name as fallback for path resolution', () => {
|
|
189
|
+
const exists = (p: string) => p === '/cold/my-skill/SKILL.md'
|
|
190
|
+
const skills = [{ name: 'my-skill', path: 'https://example.com/deck.toml', section: 'tool' }]
|
|
191
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
192
|
+
expect(result).toEqual([
|
|
193
|
+
{ name: 'my-skill', expectedPath: '/cold/my-skill/SKILL.md', found: true, section: 'tool' }
|
|
194
|
+
])
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
test('all found → all found=true', () => {
|
|
198
|
+
const exists = (_: string) => true
|
|
199
|
+
const skills = [
|
|
200
|
+
{ name: 'a', path: '/a', section: 'tool' },
|
|
201
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
202
|
+
]
|
|
203
|
+
expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
|
|
204
|
+
{ name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
|
|
205
|
+
{ name: 'b', expectedPath: '/cold//b/SKILL.md', found: true, section: 'tool' },
|
|
206
|
+
])
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
test('some missing → mixed found/not-found', () => {
|
|
210
|
+
const exists = (p: string) => p.includes('a')
|
|
211
|
+
const skills = [
|
|
212
|
+
{ name: 'a', path: '/a', section: 'tool' },
|
|
213
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
214
|
+
]
|
|
215
|
+
expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
|
|
216
|
+
{ name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
|
|
217
|
+
{ name: 'b', expectedPath: '/cold//b/SKILL.md', found: false, section: 'tool' },
|
|
218
|
+
])
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
test('different coldPoolDir → different expectedPath prefix', () => {
|
|
222
|
+
const exists = (_: string) => true
|
|
223
|
+
const skills = [{ name: 'x', path: 'github.com/x', section: 'tool' }]
|
|
224
|
+
const a = checkSkillExistence(skills, '/home/user/.agents/skill-repos', exists)
|
|
225
|
+
const b = checkSkillExistence(skills, '/opt/cold', exists)
|
|
226
|
+
expect(a[0].expectedPath).toStartWith('/home/user/.agents/skill-repos/')
|
|
227
|
+
expect(b[0].expectedPath).toStartWith('/opt/cold/')
|
|
228
|
+
})
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
232
|
+
// validateLinkResult
|
|
233
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
234
|
+
|
|
235
|
+
describe('validateLinkResult', () => {
|
|
236
|
+
|
|
237
|
+
test('exitCode 0 → ok', () => {
|
|
238
|
+
expect(validateLinkResult(0, '')).toEqual({ ok: true })
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
test('exitCode 0 with stderr → still ok (stderr is not always errors)', () => {
|
|
242
|
+
expect(validateLinkResult(0, 'some warning output')).toEqual({ ok: true })
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
test('exitCode 1 → not ok, error contains snippet', () => {
|
|
246
|
+
const result = validateLinkResult(1, 'something went wrong')
|
|
247
|
+
expect(result.ok).toBe(false)
|
|
248
|
+
expect(result.error).toContain('exit 1')
|
|
249
|
+
expect(result.error).toContain('something went wrong')
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
test('exitCode null → not ok (null !== 0)', () => {
|
|
253
|
+
const result = validateLinkResult(null, 'process killed')
|
|
254
|
+
expect(result.ok).toBe(false)
|
|
255
|
+
expect(result.error).toContain('exit null')
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
test('stderr truncated to 300 chars in error message', () => {
|
|
259
|
+
const longStderr = 'x'.repeat(500)
|
|
260
|
+
const result = validateLinkResult(1, longStderr)
|
|
261
|
+
expect(result.ok).toBe(false)
|
|
262
|
+
expect(result.error!.length).toBeLessThan(350) // "Deck link failed (exit 1): " + 300 chars
|
|
263
|
+
})
|
|
264
|
+
|
|
265
|
+
test('exitCode 0, empty stderr → ok with no error field', () => {
|
|
266
|
+
const result = validateLinkResult(0, '')
|
|
267
|
+
expect(result.ok).toBe(true)
|
|
268
|
+
expect(result.error).toBeUndefined()
|
|
269
|
+
})
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
273
|
+
// buildCopyPlan
|
|
274
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
275
|
+
|
|
276
|
+
describe('buildCopyPlan', () => {
|
|
277
|
+
|
|
278
|
+
test('empty entries → empty plan', () => {
|
|
279
|
+
expect(buildCopyPlan('/work', '/out', [], new Set())).toEqual([])
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
test('all skipped → empty plan', () => {
|
|
283
|
+
const skip = new Set(['.claude', 'skill-deck.toml'])
|
|
284
|
+
expect(buildCopyPlan('/work', '/out', ['.claude', 'skill-deck.toml'], skip)).toEqual([])
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
test('normal entries → mapped to outDir', () => {
|
|
288
|
+
const skip = new Set<string>()
|
|
289
|
+
expect(buildCopyPlan('/work', '/out', ['output.md', 'report.docx'], skip)).toEqual([
|
|
290
|
+
{ src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
|
|
291
|
+
{ src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
|
|
292
|
+
])
|
|
293
|
+
})
|
|
294
|
+
|
|
295
|
+
test('mixed skip and non-skip → only non-skipped', () => {
|
|
296
|
+
const skip = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
297
|
+
const entries = ['.claude', 'output.md', 'skill-deck.toml', 'report.docx', 'skill-deck.lock']
|
|
298
|
+
expect(buildCopyPlan('/work', '/out', entries, skip)).toEqual([
|
|
299
|
+
{ src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
|
|
300
|
+
{ src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
|
|
301
|
+
])
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
test('preserves entry order', () => {
|
|
305
|
+
const skip = new Set<string>()
|
|
306
|
+
const entries = ['c', 'a', 'b']
|
|
307
|
+
expect(buildCopyPlan('/w', '/o', entries, skip).map(e => e.name)).toEqual(['c', 'a', 'b'])
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
test('nested paths work (agent-produced subdirectories)', () => {
|
|
311
|
+
const skip = new Set<string>()
|
|
312
|
+
expect(buildCopyPlan('/work', '/out', ['subdir/output.pdf'], skip)).toEqual([
|
|
313
|
+
{ src: '/work/subdir/output.pdf', dest: '/out/subdir/output.pdf', name: 'subdir/output.pdf' },
|
|
314
|
+
])
|
|
315
|
+
})
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
319
|
+
// resolveColdPoolDir
|
|
320
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
321
|
+
|
|
322
|
+
describe('resolveColdPoolDir', () => {
|
|
323
|
+
|
|
324
|
+
test('explicit absolute path → returned as-is', () => {
|
|
325
|
+
expect(resolveColdPoolDir('/opt/cold', '/home/user', '/fallback')).toBe('/opt/cold')
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
test('explicit relative path → returned as-is', () => {
|
|
329
|
+
expect(resolveColdPoolDir('my-cold-pool', '/home/user', '/fallback')).toBe('my-cold-pool')
|
|
330
|
+
})
|
|
331
|
+
|
|
332
|
+
test('tilde path → expanded with homeDir', () => {
|
|
333
|
+
expect(resolveColdPoolDir('~/.agents/skill-repos', '/home/user', '/fallback'))
|
|
334
|
+
.toBe('/home/user/.agents/skill-repos')
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
test('tilde at start only → expanded; tilde elsewhere not expanded', () => {
|
|
338
|
+
expect(resolveColdPoolDir('path/with~/tilde', '/home/user', '/fallback'))
|
|
339
|
+
.toBe('path/with~/tilde')
|
|
340
|
+
})
|
|
341
|
+
|
|
342
|
+
test('undefined → uses fallback', () => {
|
|
343
|
+
expect(resolveColdPoolDir(undefined, '/home/user', '/default/cold'))
|
|
344
|
+
.toBe('/default/cold')
|
|
345
|
+
})
|
|
346
|
+
|
|
347
|
+
test('empty string → uses fallback (|| operator)', () => {
|
|
348
|
+
expect(resolveColdPoolDir('', '/home/user', '/default/cold'))
|
|
349
|
+
.toBe('/default/cold')
|
|
350
|
+
})
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
354
|
+
// formatSkillWarnings
|
|
355
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
356
|
+
|
|
357
|
+
describe('formatSkillWarnings', () => {
|
|
358
|
+
|
|
359
|
+
test('all found → empty array', () => {
|
|
360
|
+
const checks = [
|
|
361
|
+
{ name: 'a', expectedPath: '/p/a/SKILL.md', found: true, section: 'tool' },
|
|
362
|
+
{ name: 'b', expectedPath: '/p/b/SKILL.md', found: true, section: 'tool' },
|
|
363
|
+
]
|
|
364
|
+
expect(formatSkillWarnings(checks)).toEqual([])
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
test('some missing → one warning per missing skill', () => {
|
|
368
|
+
const checks = [
|
|
369
|
+
{ name: 'pdf', expectedPath: '/cold/pdf/SKILL.md', found: false, section: 'tool' },
|
|
370
|
+
{ name: 'docx', expectedPath: '/cold/docx/SKILL.md', found: true, section: 'tool' },
|
|
371
|
+
]
|
|
372
|
+
expect(formatSkillWarnings(checks)).toEqual([
|
|
373
|
+
'Skill "pdf" declared in deck [tool] but SKILL.md not found at: /cold/pdf/SKILL.md',
|
|
374
|
+
])
|
|
375
|
+
})
|
|
376
|
+
|
|
377
|
+
test('all missing → warning for each', () => {
|
|
378
|
+
const checks = [
|
|
379
|
+
{ name: 'a', expectedPath: '/p/a/SKILL.md', found: false, section: 'innate' },
|
|
380
|
+
{ name: 'b', expectedPath: '/p/b/SKILL.md', found: false, section: 'tool' },
|
|
381
|
+
]
|
|
382
|
+
expect(formatSkillWarnings(checks)).toHaveLength(2)
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
test('empty array → empty array', () => {
|
|
386
|
+
expect(formatSkillWarnings([])).toEqual([])
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
test('section name appears in warning string', () => {
|
|
390
|
+
const checks = [
|
|
391
|
+
{ name: 'x', expectedPath: '/p/x', found: false, section: 'transient' },
|
|
392
|
+
]
|
|
393
|
+
expect(formatSkillWarnings(checks)[0]).toContain('[transient]')
|
|
394
|
+
})
|
|
395
|
+
})
|
package/src/preflight.ts
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* preflight.ts — Arena agent-run pre-flight pure functions
|
|
3
|
+
*
|
|
4
|
+
* Extracted from cli.ts agentRun to enable unit testing.
|
|
5
|
+
* All functions are pure: no filesystem IO, no spawn, no console.
|
|
6
|
+
* IO is injected via function parameters (e.g., existsFn, readdirFn).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { ColdPool, parseLocator } from '@lythos/cold-pool'
|
|
10
|
+
|
|
11
|
+
// ── Types ─────────────────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
/** A skill as declared in skill-deck.toml */
|
|
14
|
+
export interface SkillDecl {
|
|
15
|
+
name: string // TOML key (e.g., "pdf")
|
|
16
|
+
path: string | null // explicit path from inline-table format; null for array format
|
|
17
|
+
section: string // "innate" | "tool" | "transient"
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Result of checking one skill against the cold pool */
|
|
21
|
+
export interface SkillCheck {
|
|
22
|
+
name: string
|
|
23
|
+
expectedPath: string // resolved cold pool path that was checked
|
|
24
|
+
found: boolean
|
|
25
|
+
section: string
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Result of deck link validation */
|
|
29
|
+
export interface LinkResult {
|
|
30
|
+
ok: boolean
|
|
31
|
+
error?: string
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** A single file copy operation plan entry */
|
|
35
|
+
export interface CopyEntry {
|
|
36
|
+
src: string
|
|
37
|
+
dest: string
|
|
38
|
+
name: string // entry basename for error reporting
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ── parseDeckSkills ──────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Parse a skill-deck.toml string and extract all declared skills.
|
|
45
|
+
*
|
|
46
|
+
* Handles both TOML formats:
|
|
47
|
+
* [tool.skills.pdf] → { name: "pdf", path: "github.com/...", section: "tool" }
|
|
48
|
+
* path = "github.com/..."
|
|
49
|
+
*
|
|
50
|
+
* skills = ["a", "b"] → { name: "a", path: null, section: "tool" }
|
|
51
|
+
*
|
|
52
|
+
* Pure: string → SkillDecl[]. No IO, no Bun.TOML dependency (caller parses first).
|
|
53
|
+
*/
|
|
54
|
+
export function parseDeckSkills(
|
|
55
|
+
deckParsed: Record<string, any>
|
|
56
|
+
): SkillDecl[] {
|
|
57
|
+
const results: SkillDecl[] = []
|
|
58
|
+
const sections = ['innate', 'tool', 'transient'] as const
|
|
59
|
+
|
|
60
|
+
for (const section of sections) {
|
|
61
|
+
const skills = deckParsed?.[section]?.skills
|
|
62
|
+
if (!skills) continue
|
|
63
|
+
|
|
64
|
+
if (Array.isArray(skills)) {
|
|
65
|
+
// Array format: skills = ["name1", "name2"]
|
|
66
|
+
for (const name of skills) {
|
|
67
|
+
if (typeof name === 'string') {
|
|
68
|
+
results.push({ name, path: null, section })
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
} else if (typeof skills === 'object') {
|
|
72
|
+
// Inline-table format: [tool.skills.name], path = "..."
|
|
73
|
+
for (const [name, entry] of Object.entries(skills as Record<string, any>)) {
|
|
74
|
+
const skillPath = typeof entry?.path === 'string' ? entry.path : null
|
|
75
|
+
results.push({ name, path: skillPath, section })
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return results
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ── checkSkillExistence ──────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Check each declared skill against the cold pool filesystem.
|
|
87
|
+
*
|
|
88
|
+
* Path resolution delegates to @lythos/cold-pool's `parseLocator` and
|
|
89
|
+
* `ColdPool.resolveDir` so localhost / FQ / standalone forms all map to
|
|
90
|
+
* the right physical layout (per ADR-20260507021957847). Non-FQ legacy
|
|
91
|
+
* names (e.g., bare `pdf`) fall back to `<coldPoolDir>/<name>/SKILL.md`.
|
|
92
|
+
*
|
|
93
|
+
* Skills with HTTP/URL paths are skipped (not local).
|
|
94
|
+
*
|
|
95
|
+
* `existsFn` is the IO injection point — swap for real fs or mock.
|
|
96
|
+
*/
|
|
97
|
+
export function checkSkillExistence(
|
|
98
|
+
skills: SkillDecl[],
|
|
99
|
+
coldPoolDir: string,
|
|
100
|
+
existsFn: (path: string) => boolean
|
|
101
|
+
): SkillCheck[] {
|
|
102
|
+
const pool = new ColdPool(coldPoolDir)
|
|
103
|
+
return skills.map(skill => {
|
|
104
|
+
const candidatePath = skill.path && !skill.path.startsWith('http')
|
|
105
|
+
? skill.path
|
|
106
|
+
: skill.name
|
|
107
|
+
|
|
108
|
+
let expectedPath: string
|
|
109
|
+
const locator = parseLocator(candidatePath)
|
|
110
|
+
if (!locator) {
|
|
111
|
+
// Legacy bare-name fallback. Per ADR-20260502012643244 this should
|
|
112
|
+
// be removed in 0.10.x once arena.toml authors switch to FQ.
|
|
113
|
+
expectedPath = `${coldPoolDir}/${candidatePath}/SKILL.md`
|
|
114
|
+
} else if (locator.isLocalhost) {
|
|
115
|
+
// localhost layout: top-level dir under coldPool, no `localhost/` prefix
|
|
116
|
+
expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
|
|
117
|
+
} else if (locator.skill) {
|
|
118
|
+
expectedPath = `${pool.resolveDir(locator)}/${locator.skill}/SKILL.md`
|
|
119
|
+
} else {
|
|
120
|
+
// Standalone repo: SKILL.md at repo root
|
|
121
|
+
expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
name: skill.name,
|
|
126
|
+
expectedPath,
|
|
127
|
+
found: existsFn(expectedPath),
|
|
128
|
+
section: skill.section,
|
|
129
|
+
}
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// ── validateLinkResult ───────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Validate the outcome of `bunx @lythos/skill-deck link`.
|
|
137
|
+
*
|
|
138
|
+
* Pure: (exitCode, stderr) → LinkResult.
|
|
139
|
+
* Non-zero exit code = failure. Zero + no stderr = success.
|
|
140
|
+
*/
|
|
141
|
+
export function validateLinkResult(
|
|
142
|
+
exitCode: number | null,
|
|
143
|
+
stderr: string
|
|
144
|
+
): LinkResult {
|
|
145
|
+
if (exitCode !== 0) {
|
|
146
|
+
const snippet = (stderr || '').slice(0, 300)
|
|
147
|
+
return {
|
|
148
|
+
ok: false,
|
|
149
|
+
error: `Deck link failed (exit ${exitCode}): ${snippet}`,
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return { ok: true }
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ── buildCopyPlan ────────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Build a copy plan from workdir entries → outDir.
|
|
159
|
+
*
|
|
160
|
+
* Skips entries in `skipSet`. Each surviving entry maps to `<outDir>/<name>`.
|
|
161
|
+
* Pure: strings + set → CopyEntry[]. No filesystem access.
|
|
162
|
+
*/
|
|
163
|
+
export function buildCopyPlan(
|
|
164
|
+
workdir: string,
|
|
165
|
+
outDir: string,
|
|
166
|
+
entries: string[],
|
|
167
|
+
skipSet: Set<string>
|
|
168
|
+
): CopyEntry[] {
|
|
169
|
+
const plan: CopyEntry[] = []
|
|
170
|
+
for (const name of entries) {
|
|
171
|
+
if (skipSet.has(name)) continue
|
|
172
|
+
plan.push({
|
|
173
|
+
src: `${workdir}/${name}`,
|
|
174
|
+
dest: `${outDir}/${name}`,
|
|
175
|
+
name,
|
|
176
|
+
})
|
|
177
|
+
}
|
|
178
|
+
return plan
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ── resolveColdPoolDir ───────────────────────────────────────────────────
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Resolve cold_pool root from deck config, expanding ~.
|
|
185
|
+
*
|
|
186
|
+
* Pure: string → string. No filesystem access.
|
|
187
|
+
*/
|
|
188
|
+
export function resolveColdPoolDir(
|
|
189
|
+
coldPoolRoot: string | undefined,
|
|
190
|
+
homeDir: string,
|
|
191
|
+
fallbackDir: string
|
|
192
|
+
): string {
|
|
193
|
+
const raw = coldPoolRoot || fallbackDir
|
|
194
|
+
return raw.startsWith('~') ? `${homeDir}${raw.slice(1)}` : raw
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// ── formatSkillWarnings ──────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Format skill check results into human-readable warning strings.
|
|
201
|
+
*
|
|
202
|
+
* Pure: SkillCheck[] → string[].
|
|
203
|
+
*/
|
|
204
|
+
export function formatSkillWarnings(checks: SkillCheck[]): string[] {
|
|
205
|
+
return checks
|
|
206
|
+
.filter(c => !c.found)
|
|
207
|
+
.map(c => `Skill "${c.name}" declared in deck [${c.section}] but SKILL.md not found at: ${c.expectedPath}`)
|
|
208
|
+
}
|
package/src/runner.ts
CHANGED
|
@@ -3,6 +3,8 @@ import { join, resolve } from 'node:path'
|
|
|
3
3
|
import { tmpdir } from 'node:os'
|
|
4
4
|
import { runAgentScenario, type AgentScenario } from '@lythos/test-utils/agent-bdd'
|
|
5
5
|
import { useAgent } from '@lythos/test-utils/agents'
|
|
6
|
+
// Optional: register claude-sdk adapter if the package is installed
|
|
7
|
+
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
6
8
|
import { ArenaManifest, Player } from '@lythos/test-utils/schema'
|
|
7
9
|
import type { ArenaManifest as ArenaManifestType, JudgeVerdict } from '@lythos/test-utils/schema'
|
|
8
10
|
import { runComparativeJudge } from './comparative-judge'
|