@lythos/skill-arena 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/package.json +4 -1
- package/src/cli.ts +104 -51
- package/src/player.ts +1 -0
- package/src/preflight.test.ts +395 -0
- package/src/preflight.ts +186 -0
- package/src/runner.ts +2 -0
package/README.md
CHANGED
|
@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.23 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
58
|
# Mode 1: Compare two skills on the same task
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
59
|
+
bunx @lythos/skill-arena@0.9.23 \
|
|
60
60
|
--task "Generate auth flow diagram" \
|
|
61
61
|
--skills "design-doc-mermaid,mermaid-tools" \
|
|
62
62
|
--criteria "syntax,context,token"
|
|
63
63
|
|
|
64
64
|
# Mode 2: Compare full deck configurations
|
|
65
|
-
bunx @lythos/skill-arena@0.9.
|
|
65
|
+
bunx @lythos/skill-arena@0.9.23 \
|
|
66
66
|
--task "Generate auth flow diagram" \
|
|
67
67
|
--decks "./decks/minimal.toml,./decks/rich.toml" \
|
|
68
68
|
--criteria "quality,token,maintainability"
|
|
69
69
|
|
|
70
70
|
# Visualize results
|
|
71
|
-
bunx @lythos/skill-arena@0.9.
|
|
71
|
+
bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
|
|
72
72
|
```
|
|
73
73
|
|
|
74
74
|
## Commands
|
|
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.21 viz tmp/arena-<id>/
|
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
79
|
# Print execution plan without running
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
80
|
+
bunx @lythos/skill-arena@0.9.23 run --config arena.toml --dry-run
|
|
81
81
|
|
|
82
82
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
83
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.23 run --config arena.toml
|
|
84
84
|
```
|
|
85
85
|
|
|
86
86
|
### CLI-flag mode (backward compat)
|
|
87
87
|
|
|
88
88
|
```
|
|
89
|
-
bunx @lythos/skill-arena@0.9.
|
|
89
|
+
bunx @lythos/skill-arena@0.9.23 run \
|
|
90
90
|
--task ./TASK-arena.md \
|
|
91
91
|
--players ./players/claude.toml \
|
|
92
92
|
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.21 run \
|
|
|
96
96
|
### Scaffold mode (legacy, manual execution)
|
|
97
97
|
|
|
98
98
|
```
|
|
99
|
-
bunx @lythos/skill-arena@0.9.
|
|
99
|
+
bunx @lythos/skill-arena@0.9.23 scaffold --task "..." --skills a,b
|
|
100
100
|
```
|
|
101
101
|
|
|
102
102
|
### Viz
|
|
103
103
|
|
|
104
104
|
```bash
|
|
105
|
-
bunx @lythos/skill-arena@0.9.
|
|
105
|
+
bunx @lythos/skill-arena@0.9.23 viz runs/arena-<id>/
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
## Skill Documentation
|
|
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
116
116
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
117
117
|
|
|
118
118
|
```
|
|
119
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
119
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.23 ...
|
|
120
120
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
121
121
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
122
122
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lythos/skill-arena",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.23",
|
|
4
4
|
"description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-agent",
|
|
@@ -40,5 +40,8 @@
|
|
|
40
40
|
"@lythos/test-utils": "^0.9.1",
|
|
41
41
|
"zod": "^3.24.0",
|
|
42
42
|
"zod-to-json-schema": "^3.25.2"
|
|
43
|
+
},
|
|
44
|
+
"optionalDependencies": {
|
|
45
|
+
"@lythos/agent-adapter-claude-sdk": "workspace:*"
|
|
43
46
|
}
|
|
44
47
|
}
|
package/src/cli.ts
CHANGED
|
@@ -9,6 +9,14 @@ import {
|
|
|
9
9
|
existsSync, mkdirSync, writeFileSync, readFileSync,
|
|
10
10
|
} from 'node:fs'
|
|
11
11
|
import { join, resolve, basename } from 'node:path'
|
|
12
|
+
import {
|
|
13
|
+
parseDeckSkills,
|
|
14
|
+
checkSkillExistence,
|
|
15
|
+
validateLinkResult,
|
|
16
|
+
buildCopyPlan,
|
|
17
|
+
resolveColdPoolDir,
|
|
18
|
+
formatSkillWarnings,
|
|
19
|
+
} from './preflight'
|
|
12
20
|
|
|
13
21
|
// ── 简单的 slugify ──────────────────────────────────────────
|
|
14
22
|
function slugify(input: string): string {
|
|
@@ -88,8 +96,8 @@ async function agentRun(args: string[]) {
|
|
|
88
96
|
console.error('❌ --deck <path> is required')
|
|
89
97
|
process.exit(1)
|
|
90
98
|
}
|
|
91
|
-
if (!opts.task && !opts.brief) {
|
|
92
|
-
console.error('❌ --task <path> or --brief "<prompt>" is required')
|
|
99
|
+
if (!opts.task && (!opts.brief || !opts.brief.trim())) {
|
|
100
|
+
console.error('❌ --task <path> or --brief "<prompt>" is required and cannot be empty')
|
|
93
101
|
process.exit(1)
|
|
94
102
|
}
|
|
95
103
|
|
|
@@ -97,39 +105,9 @@ async function agentRun(args: string[]) {
|
|
|
97
105
|
const deckPath = resolve(opts.deck)
|
|
98
106
|
if (!existsSync(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}`); process.exit(1) }
|
|
99
107
|
|
|
100
|
-
// Resolve task: either from file, or create temp task from --brief
|
|
101
|
-
let taskPath: string
|
|
102
|
-
if (opts.task) {
|
|
103
|
-
taskPath = resolve(opts.task)
|
|
104
|
-
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
|
|
105
|
-
} else {
|
|
106
|
-
const { mkdtempSync, writeFileSync } = await import('node:fs')
|
|
107
|
-
const { tmpdir } = await import('node:os')
|
|
108
|
-
const tmpDir = mkdtempSync(join(tmpdir(), 'arena-brief-'))
|
|
109
|
-
taskPath = join(tmpDir, 'TASK.md')
|
|
110
|
-
const briefTask = `---
|
|
111
|
-
name: ad-hoc task
|
|
112
|
-
description: ${opts.brief!.slice(0, 80)}
|
|
113
|
-
timeout: 120000
|
|
114
|
-
---
|
|
115
|
-
|
|
116
|
-
## Given
|
|
117
|
-
- You are an AI agent with the skills declared in the deck
|
|
118
|
-
|
|
119
|
-
## When
|
|
120
|
-
${opts.brief}
|
|
121
|
-
|
|
122
|
-
## Then
|
|
123
|
-
- Write your output to output.md
|
|
124
|
-
- The output should be complete and well-structured
|
|
125
|
-
|
|
126
|
-
## Judge
|
|
127
|
-
Evaluate whether the output is complete, accurate, and well-structured.
|
|
128
|
-
`
|
|
129
|
-
writeFileSync(taskPath, briefTask, 'utf-8')
|
|
130
|
-
}
|
|
131
|
-
|
|
132
108
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
109
|
+
// Optional: register claude-sdk adapter if the package is installed
|
|
110
|
+
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
133
111
|
const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
|
|
134
112
|
const { resolvePlayer } = await import('./player')
|
|
135
113
|
const { readFileSync, writeFileSync, mkdirSync } = await import('node:fs')
|
|
@@ -139,27 +117,87 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
139
117
|
const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
|
|
140
118
|
mkdirSync(outDir, { recursive: true })
|
|
141
119
|
|
|
120
|
+
// Resolve task: --brief builds scenario directly, --task reads .agent.md file
|
|
121
|
+
const scenarioOpt: Record<string, unknown> = {}
|
|
122
|
+
if (opts.task) {
|
|
123
|
+
const taskPath = resolve(opts.task)
|
|
124
|
+
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
|
|
125
|
+
scenarioOpt.scenarioPath = taskPath
|
|
126
|
+
} else {
|
|
127
|
+
scenarioOpt.scenario = {
|
|
128
|
+
name: 'ad-hoc task',
|
|
129
|
+
description: opts.brief!.slice(0, 80),
|
|
130
|
+
timeout: 120000,
|
|
131
|
+
given: { deck: {} },
|
|
132
|
+
when: opts.brief!,
|
|
133
|
+
then: ['Write your output to output.md', 'The output should be complete and well-structured'],
|
|
134
|
+
judge: 'Evaluate whether the output is complete, accurate, and well-structured.',
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
142
138
|
console.log(`🤖 agent-run: ${player} × ${deckPath}`)
|
|
143
|
-
console.log(`📋 task: ${
|
|
139
|
+
if (opts.task) console.log(`📋 task: ${resolve(opts.task!)}`)
|
|
140
|
+
else console.log(`📋 brief: ${opts.brief!.slice(0, 60)}...`)
|
|
144
141
|
|
|
145
142
|
let agentWorkdir = ''
|
|
146
143
|
const result = await runAgentScenario({
|
|
147
|
-
|
|
144
|
+
...scenarioOpt,
|
|
148
145
|
agent,
|
|
149
146
|
async setupWorkdir(_scenario, workdir) {
|
|
150
147
|
agentWorkdir = workdir
|
|
151
148
|
mkdirSync(workdir, { recursive: true })
|
|
152
149
|
writeFileSync(join(workdir, 'skill-deck.toml'), readFileSync(deckPath, 'utf-8'))
|
|
153
150
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
158
|
-
|
|
151
|
+
// ── Pre-flight: deck link (skip if deck declares no skills) ──
|
|
152
|
+
const deckRaw = readFileSync(join(workdir, 'skill-deck.toml'), 'utf-8')
|
|
153
|
+
let deckParsed: Record<string, any> = {}
|
|
154
|
+
try { deckParsed = Bun.TOML.parse(deckRaw) as Record<string, any> } catch {}
|
|
155
|
+
const hasSkills = parseDeckSkills(deckParsed).length > 0
|
|
156
|
+
|
|
157
|
+
if (hasSkills) {
|
|
158
|
+
// Prefer local dev CLI over bunx (bunx needs tempdir write, blocked by some sandboxes)
|
|
159
|
+
const { existsSync: es2 } = await import('node:fs')
|
|
160
|
+
const localDeckCli = join(import.meta.dir, '..', '..', 'lythoskill-deck', 'src', 'cli.ts')
|
|
161
|
+
const linkCmd = es2(localDeckCli)
|
|
162
|
+
? ['bun', localDeckCli, 'link']
|
|
163
|
+
: ['bunx', '@lythos/skill-deck', 'link']
|
|
164
|
+
const linkProc = Bun.spawn(linkCmd,
|
|
165
|
+
{ cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
|
|
166
|
+
)
|
|
167
|
+
await linkProc.exited
|
|
168
|
+
const linkStderr = await new Response(linkProc.stderr).text()
|
|
169
|
+
const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
|
|
170
|
+
if (!linkResult.ok) {
|
|
171
|
+
console.error(`❌ ${linkResult.error}`)
|
|
172
|
+
process.exit(1)
|
|
173
|
+
}
|
|
174
|
+
} else {
|
|
175
|
+
console.log('ℹ️ No skills declared in deck — skipping link')
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ── Pre-flight: skill existence check (reuses deckParsed from above) ─
|
|
179
|
+
const { existsSync: es } = await import('node:fs')
|
|
180
|
+
const { homedir: hd } = await import('node:os')
|
|
181
|
+
try {
|
|
182
|
+
const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
|
|
183
|
+
const coldPoolDir = resolveColdPoolDir(
|
|
184
|
+
deckParsed?.deck?.cold_pool,
|
|
185
|
+
hd(),
|
|
186
|
+
coldPoolDefault
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
const skills = parseDeckSkills(deckParsed)
|
|
190
|
+
const checks = checkSkillExistence(skills, coldPoolDir, es)
|
|
191
|
+
for (const warning of formatSkillWarnings(checks)) {
|
|
192
|
+
console.warn(`⚠️ ${warning}`)
|
|
193
|
+
}
|
|
194
|
+
} catch (e) {
|
|
195
|
+
console.warn('⚠️ Could not check skill existence:', e instanceof Error ? e.message : e)
|
|
196
|
+
}
|
|
159
197
|
},
|
|
160
198
|
})
|
|
161
199
|
|
|
162
|
-
// Copy agent output to outDir
|
|
200
|
+
// ── Copy agent output to outDir ──────────────────────────────────
|
|
163
201
|
writeFileSync(join(outDir, 'agent-stdout.txt'), result.agentResult.stdout, 'utf-8')
|
|
164
202
|
if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
|
|
165
203
|
if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
|
|
@@ -167,16 +205,31 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
167
205
|
// Copy all agent-produced files from workdir (output.md, output.docx, etc.)
|
|
168
206
|
// Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
|
|
169
207
|
if (agentWorkdir) {
|
|
170
|
-
const { cpSync, readdirSync } = await import('node:fs')
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const
|
|
177
|
-
|
|
208
|
+
const { cpSync, readdirSync, existsSync: es2 } = await import('node:fs')
|
|
209
|
+
if (!es2(agentWorkdir)) {
|
|
210
|
+
console.warn(`⚠️ Agent workdir vanished before copy: ${agentWorkdir}`)
|
|
211
|
+
} else {
|
|
212
|
+
const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
213
|
+
try {
|
|
214
|
+
const entries = readdirSync(agentWorkdir)
|
|
215
|
+
const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
|
|
216
|
+
for (const { src, dest, name } of plan) {
|
|
217
|
+
try {
|
|
218
|
+
cpSync(src, dest, { recursive: true })
|
|
219
|
+
} catch (e) {
|
|
220
|
+
console.warn(`⚠️ Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
} catch (e) {
|
|
224
|
+
console.warn(`⚠️ Failed to read agent workdir for copy: ${e instanceof Error ? e.message : e}`)
|
|
178
225
|
}
|
|
179
|
-
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ── Post-flight: output validation ──────────────────────────────
|
|
230
|
+
if (!result.agentResult.stdout || result.agentResult.stdout.trim().length === 0) {
|
|
231
|
+
console.warn('⚠️ Agent produced empty stdout — the task may have failed silently.')
|
|
232
|
+
console.warn(` Agent stderr: ${(result.agentResult.stderr || '(empty)').slice(0, 200)}`)
|
|
180
233
|
}
|
|
181
234
|
|
|
182
235
|
console.log(`\n✅ Agent complete (${result.agentResult.durationMs}ms)`)
|
package/src/player.ts
CHANGED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* preflight.test.ts — TDD tests for arena agent-run pre-flight pure functions
|
|
3
|
+
*
|
|
4
|
+
* Coverage targets:
|
|
5
|
+
* parseDeckSkills — all TOML formats, edge cases
|
|
6
|
+
* checkSkillExistence — cold pool hit/miss, path resolution
|
|
7
|
+
* validateLinkResult — exit codes, error formatting
|
|
8
|
+
* buildCopyPlan — skip set, path mapping
|
|
9
|
+
* resolveColdPoolDir — tilde expansion, fallback
|
|
10
|
+
* formatSkillWarnings — warning string generation
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, test, expect } from 'bun:test'
|
|
14
|
+
import {
|
|
15
|
+
parseDeckSkills,
|
|
16
|
+
checkSkillExistence,
|
|
17
|
+
validateLinkResult,
|
|
18
|
+
buildCopyPlan,
|
|
19
|
+
resolveColdPoolDir,
|
|
20
|
+
formatSkillWarnings,
|
|
21
|
+
} from './preflight'
|
|
22
|
+
|
|
23
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
24
|
+
// parseDeckSkills
|
|
25
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
26
|
+
|
|
27
|
+
describe('parseDeckSkills', () => {
|
|
28
|
+
|
|
29
|
+
test('empty deck → empty array', () => {
|
|
30
|
+
expect(parseDeckSkills({})).toEqual([])
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
test('deck with no skill sections → empty array', () => {
|
|
34
|
+
expect(parseDeckSkills({ deck: { max_cards: 10 } })).toEqual([])
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
test('inline-table format: single tool skill with path', () => {
|
|
38
|
+
const parsed = {
|
|
39
|
+
tool: {
|
|
40
|
+
skills: {
|
|
41
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
46
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' }
|
|
47
|
+
])
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
test('inline-table format: multiple skills', () => {
|
|
51
|
+
const parsed = {
|
|
52
|
+
tool: {
|
|
53
|
+
skills: {
|
|
54
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' },
|
|
55
|
+
docx: { path: 'github.com/anthropics/skills/skills/docx' },
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
60
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
|
|
61
|
+
{ name: 'docx', path: 'github.com/anthropics/skills/skills/docx', section: 'tool' },
|
|
62
|
+
])
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
test('array format: skills = ["a", "b"]', () => {
|
|
66
|
+
const parsed = {
|
|
67
|
+
tool: {
|
|
68
|
+
skills: ['web-search', 'docx']
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
72
|
+
{ name: 'web-search', path: null, section: 'tool' },
|
|
73
|
+
{ name: 'docx', path: null, section: 'tool' },
|
|
74
|
+
])
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test('innate section parsed separately', () => {
|
|
78
|
+
const parsed = {
|
|
79
|
+
innate: {
|
|
80
|
+
skills: {
|
|
81
|
+
deck: { path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck' }
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
tool: {
|
|
85
|
+
skills: {
|
|
86
|
+
pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
91
|
+
{ name: 'deck', path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck', section: 'innate' },
|
|
92
|
+
{ name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
|
|
93
|
+
])
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
test('transient section parsed', () => {
|
|
97
|
+
const parsed = {
|
|
98
|
+
transient: {
|
|
99
|
+
skills: {
|
|
100
|
+
experiment: { path: 'localhost/my-experiment' }
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
105
|
+
{ name: 'experiment', path: 'localhost/my-experiment', section: 'transient' }
|
|
106
|
+
])
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
test('object entry without path → path=null', () => {
|
|
110
|
+
const parsed = {
|
|
111
|
+
tool: {
|
|
112
|
+
skills: {
|
|
113
|
+
bare: {} // no path field
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
118
|
+
{ name: 'bare', path: null, section: 'tool' }
|
|
119
|
+
])
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
test('object entry with non-string path → path=null', () => {
|
|
123
|
+
const parsed = {
|
|
124
|
+
tool: {
|
|
125
|
+
skills: {
|
|
126
|
+
weird: { path: 42 } // number, not string
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
131
|
+
{ name: 'weird', path: null, section: 'tool' }
|
|
132
|
+
])
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
test('array entry that is not a string → skipped', () => {
|
|
136
|
+
const parsed = {
|
|
137
|
+
tool: { skills: ['valid', 123, null, 'also-valid'] }
|
|
138
|
+
}
|
|
139
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
140
|
+
{ name: 'valid', path: null, section: 'tool' },
|
|
141
|
+
{ name: 'also-valid', path: null, section: 'tool' },
|
|
142
|
+
])
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
test('all three sections populated → ordered innate, tool, transient', () => {
|
|
146
|
+
const parsed = {
|
|
147
|
+
innate: { skills: { a: { path: '/a' } } },
|
|
148
|
+
tool: { skills: { b: { path: '/b' } } },
|
|
149
|
+
transient: { skills: { c: { path: '/c' } } },
|
|
150
|
+
}
|
|
151
|
+
expect(parseDeckSkills(parsed)).toEqual([
|
|
152
|
+
{ name: 'a', path: '/a', section: 'innate' },
|
|
153
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
154
|
+
{ name: 'c', path: '/c', section: 'transient' },
|
|
155
|
+
])
|
|
156
|
+
})
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
160
|
+
// checkSkillExistence
|
|
161
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
162
|
+
|
|
163
|
+
describe('checkSkillExistence', () => {
|
|
164
|
+
|
|
165
|
+
test('empty skills → empty array', () => {
|
|
166
|
+
const exists = (_: string) => true
|
|
167
|
+
expect(checkSkillExistence([], '/cold', exists)).toEqual([])
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
test('skill with explicit path → resolves <coldPool>/<path>/SKILL.md', () => {
|
|
171
|
+
const exists = (p: string) => p === '/cold/github.com/owner/repo/skills/my-skill/SKILL.md'
|
|
172
|
+
const skills = [{ name: 'my-skill', path: 'github.com/owner/repo/skills/my-skill', section: 'tool' }]
|
|
173
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
174
|
+
expect(result).toEqual([
|
|
175
|
+
{ name: 'my-skill', expectedPath: '/cold/github.com/owner/repo/skills/my-skill/SKILL.md', found: true, section: 'tool' }
|
|
176
|
+
])
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
test('skill without path (array format) → resolves <coldPool>/<name>/SKILL.md', () => {
|
|
180
|
+
const exists = (p: string) => p === '/cold/web-search/SKILL.md'
|
|
181
|
+
const skills = [{ name: 'web-search', path: null, section: 'tool' }]
|
|
182
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
183
|
+
expect(result).toEqual([
|
|
184
|
+
{ name: 'web-search', expectedPath: '/cold/web-search/SKILL.md', found: true, section: 'tool' }
|
|
185
|
+
])
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
test('HTTP path → uses name as fallback for path resolution', () => {
|
|
189
|
+
const exists = (p: string) => p === '/cold/my-skill/SKILL.md'
|
|
190
|
+
const skills = [{ name: 'my-skill', path: 'https://example.com/deck.toml', section: 'tool' }]
|
|
191
|
+
const result = checkSkillExistence(skills, '/cold', exists)
|
|
192
|
+
expect(result).toEqual([
|
|
193
|
+
{ name: 'my-skill', expectedPath: '/cold/my-skill/SKILL.md', found: true, section: 'tool' }
|
|
194
|
+
])
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
test('all found → all found=true', () => {
|
|
198
|
+
const exists = (_: string) => true
|
|
199
|
+
const skills = [
|
|
200
|
+
{ name: 'a', path: '/a', section: 'tool' },
|
|
201
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
202
|
+
]
|
|
203
|
+
expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
|
|
204
|
+
{ name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
|
|
205
|
+
{ name: 'b', expectedPath: '/cold//b/SKILL.md', found: true, section: 'tool' },
|
|
206
|
+
])
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
test('some missing → mixed found/not-found', () => {
|
|
210
|
+
const exists = (p: string) => p.includes('a')
|
|
211
|
+
const skills = [
|
|
212
|
+
{ name: 'a', path: '/a', section: 'tool' },
|
|
213
|
+
{ name: 'b', path: '/b', section: 'tool' },
|
|
214
|
+
]
|
|
215
|
+
expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
|
|
216
|
+
{ name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
|
|
217
|
+
{ name: 'b', expectedPath: '/cold//b/SKILL.md', found: false, section: 'tool' },
|
|
218
|
+
])
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
test('different coldPoolDir → different expectedPath prefix', () => {
|
|
222
|
+
const exists = (_: string) => true
|
|
223
|
+
const skills = [{ name: 'x', path: 'github.com/x', section: 'tool' }]
|
|
224
|
+
const a = checkSkillExistence(skills, '/home/user/.agents/skill-repos', exists)
|
|
225
|
+
const b = checkSkillExistence(skills, '/opt/cold', exists)
|
|
226
|
+
expect(a[0].expectedPath).toStartWith('/home/user/.agents/skill-repos/')
|
|
227
|
+
expect(b[0].expectedPath).toStartWith('/opt/cold/')
|
|
228
|
+
})
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
232
|
+
// validateLinkResult
|
|
233
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
234
|
+
|
|
235
|
+
describe('validateLinkResult', () => {
|
|
236
|
+
|
|
237
|
+
test('exitCode 0 → ok', () => {
|
|
238
|
+
expect(validateLinkResult(0, '')).toEqual({ ok: true })
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
test('exitCode 0 with stderr → still ok (stderr is not always errors)', () => {
|
|
242
|
+
expect(validateLinkResult(0, 'some warning output')).toEqual({ ok: true })
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
test('exitCode 1 → not ok, error contains snippet', () => {
|
|
246
|
+
const result = validateLinkResult(1, 'something went wrong')
|
|
247
|
+
expect(result.ok).toBe(false)
|
|
248
|
+
expect(result.error).toContain('exit 1')
|
|
249
|
+
expect(result.error).toContain('something went wrong')
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
test('exitCode null → not ok (null !== 0)', () => {
|
|
253
|
+
const result = validateLinkResult(null, 'process killed')
|
|
254
|
+
expect(result.ok).toBe(false)
|
|
255
|
+
expect(result.error).toContain('exit null')
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
test('stderr truncated to 300 chars in error message', () => {
|
|
259
|
+
const longStderr = 'x'.repeat(500)
|
|
260
|
+
const result = validateLinkResult(1, longStderr)
|
|
261
|
+
expect(result.ok).toBe(false)
|
|
262
|
+
expect(result.error!.length).toBeLessThan(350) // "Deck link failed (exit 1): " + 300 chars
|
|
263
|
+
})
|
|
264
|
+
|
|
265
|
+
test('exitCode 0, empty stderr → ok with no error field', () => {
|
|
266
|
+
const result = validateLinkResult(0, '')
|
|
267
|
+
expect(result.ok).toBe(true)
|
|
268
|
+
expect(result.error).toBeUndefined()
|
|
269
|
+
})
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
273
|
+
// buildCopyPlan
|
|
274
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
275
|
+
|
|
276
|
+
describe('buildCopyPlan', () => {
|
|
277
|
+
|
|
278
|
+
test('empty entries → empty plan', () => {
|
|
279
|
+
expect(buildCopyPlan('/work', '/out', [], new Set())).toEqual([])
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
test('all skipped → empty plan', () => {
|
|
283
|
+
const skip = new Set(['.claude', 'skill-deck.toml'])
|
|
284
|
+
expect(buildCopyPlan('/work', '/out', ['.claude', 'skill-deck.toml'], skip)).toEqual([])
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
test('normal entries → mapped to outDir', () => {
|
|
288
|
+
const skip = new Set<string>()
|
|
289
|
+
expect(buildCopyPlan('/work', '/out', ['output.md', 'report.docx'], skip)).toEqual([
|
|
290
|
+
{ src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
|
|
291
|
+
{ src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
|
|
292
|
+
])
|
|
293
|
+
})
|
|
294
|
+
|
|
295
|
+
test('mixed skip and non-skip → only non-skipped', () => {
|
|
296
|
+
const skip = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
297
|
+
const entries = ['.claude', 'output.md', 'skill-deck.toml', 'report.docx', 'skill-deck.lock']
|
|
298
|
+
expect(buildCopyPlan('/work', '/out', entries, skip)).toEqual([
|
|
299
|
+
{ src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
|
|
300
|
+
{ src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
|
|
301
|
+
])
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
test('preserves entry order', () => {
|
|
305
|
+
const skip = new Set<string>()
|
|
306
|
+
const entries = ['c', 'a', 'b']
|
|
307
|
+
expect(buildCopyPlan('/w', '/o', entries, skip).map(e => e.name)).toEqual(['c', 'a', 'b'])
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
test('nested paths work (agent-produced subdirectories)', () => {
|
|
311
|
+
const skip = new Set<string>()
|
|
312
|
+
expect(buildCopyPlan('/work', '/out', ['subdir/output.pdf'], skip)).toEqual([
|
|
313
|
+
{ src: '/work/subdir/output.pdf', dest: '/out/subdir/output.pdf', name: 'subdir/output.pdf' },
|
|
314
|
+
])
|
|
315
|
+
})
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
319
|
+
// resolveColdPoolDir
|
|
320
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
321
|
+
|
|
322
|
+
describe('resolveColdPoolDir', () => {
|
|
323
|
+
|
|
324
|
+
test('explicit absolute path → returned as-is', () => {
|
|
325
|
+
expect(resolveColdPoolDir('/opt/cold', '/home/user', '/fallback')).toBe('/opt/cold')
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
test('explicit relative path → returned as-is', () => {
|
|
329
|
+
expect(resolveColdPoolDir('my-cold-pool', '/home/user', '/fallback')).toBe('my-cold-pool')
|
|
330
|
+
})
|
|
331
|
+
|
|
332
|
+
test('tilde path → expanded with homeDir', () => {
|
|
333
|
+
expect(resolveColdPoolDir('~/.agents/skill-repos', '/home/user', '/fallback'))
|
|
334
|
+
.toBe('/home/user/.agents/skill-repos')
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
test('tilde at start only → expanded; tilde elsewhere not expanded', () => {
|
|
338
|
+
expect(resolveColdPoolDir('path/with~/tilde', '/home/user', '/fallback'))
|
|
339
|
+
.toBe('path/with~/tilde')
|
|
340
|
+
})
|
|
341
|
+
|
|
342
|
+
test('undefined → uses fallback', () => {
|
|
343
|
+
expect(resolveColdPoolDir(undefined, '/home/user', '/default/cold'))
|
|
344
|
+
.toBe('/default/cold')
|
|
345
|
+
})
|
|
346
|
+
|
|
347
|
+
test('empty string → uses fallback (|| operator)', () => {
|
|
348
|
+
expect(resolveColdPoolDir('', '/home/user', '/default/cold'))
|
|
349
|
+
.toBe('/default/cold')
|
|
350
|
+
})
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
354
|
+
// formatSkillWarnings
|
|
355
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
356
|
+
|
|
357
|
+
describe('formatSkillWarnings', () => {
|
|
358
|
+
|
|
359
|
+
test('all found → empty array', () => {
|
|
360
|
+
const checks = [
|
|
361
|
+
{ name: 'a', expectedPath: '/p/a/SKILL.md', found: true, section: 'tool' },
|
|
362
|
+
{ name: 'b', expectedPath: '/p/b/SKILL.md', found: true, section: 'tool' },
|
|
363
|
+
]
|
|
364
|
+
expect(formatSkillWarnings(checks)).toEqual([])
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
test('some missing → one warning per missing skill', () => {
|
|
368
|
+
const checks = [
|
|
369
|
+
{ name: 'pdf', expectedPath: '/cold/pdf/SKILL.md', found: false, section: 'tool' },
|
|
370
|
+
{ name: 'docx', expectedPath: '/cold/docx/SKILL.md', found: true, section: 'tool' },
|
|
371
|
+
]
|
|
372
|
+
expect(formatSkillWarnings(checks)).toEqual([
|
|
373
|
+
'Skill "pdf" declared in deck [tool] but SKILL.md not found at: /cold/pdf/SKILL.md',
|
|
374
|
+
])
|
|
375
|
+
})
|
|
376
|
+
|
|
377
|
+
test('all missing → warning for each', () => {
|
|
378
|
+
const checks = [
|
|
379
|
+
{ name: 'a', expectedPath: '/p/a/SKILL.md', found: false, section: 'innate' },
|
|
380
|
+
{ name: 'b', expectedPath: '/p/b/SKILL.md', found: false, section: 'tool' },
|
|
381
|
+
]
|
|
382
|
+
expect(formatSkillWarnings(checks)).toHaveLength(2)
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
test('empty array → empty array', () => {
|
|
386
|
+
expect(formatSkillWarnings([])).toEqual([])
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
test('section name appears in warning string', () => {
|
|
390
|
+
const checks = [
|
|
391
|
+
{ name: 'x', expectedPath: '/p/x', found: false, section: 'transient' },
|
|
392
|
+
]
|
|
393
|
+
expect(formatSkillWarnings(checks)[0]).toContain('[transient]')
|
|
394
|
+
})
|
|
395
|
+
})
|
package/src/preflight.ts
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* preflight.ts — Arena agent-run pre-flight pure functions
|
|
3
|
+
*
|
|
4
|
+
* Extracted from cli.ts agentRun to enable unit testing.
|
|
5
|
+
* All functions are pure: no filesystem IO, no spawn, no console.
|
|
6
|
+
* IO is injected via function parameters (e.g., existsFn, readdirFn).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ── Types ─────────────────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
/** A skill as declared in skill-deck.toml */
|
|
12
|
+
export interface SkillDecl {
|
|
13
|
+
name: string // TOML key (e.g., "pdf")
|
|
14
|
+
path: string | null // explicit path from inline-table format; null for array format
|
|
15
|
+
section: string // "innate" | "tool" | "transient"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/** Result of checking one skill against the cold pool */
|
|
19
|
+
export interface SkillCheck {
|
|
20
|
+
name: string
|
|
21
|
+
expectedPath: string // resolved cold pool path that was checked
|
|
22
|
+
found: boolean
|
|
23
|
+
section: string
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Result of deck link validation */
|
|
27
|
+
export interface LinkResult {
|
|
28
|
+
ok: boolean
|
|
29
|
+
error?: string
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** A single file copy operation plan entry */
|
|
33
|
+
export interface CopyEntry {
|
|
34
|
+
src: string
|
|
35
|
+
dest: string
|
|
36
|
+
name: string // entry basename for error reporting
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ── parseDeckSkills ──────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Parse a skill-deck.toml string and extract all declared skills.
|
|
43
|
+
*
|
|
44
|
+
* Handles both TOML formats:
|
|
45
|
+
* [tool.skills.pdf] → { name: "pdf", path: "github.com/...", section: "tool" }
|
|
46
|
+
* path = "github.com/..."
|
|
47
|
+
*
|
|
48
|
+
* skills = ["a", "b"] → { name: "a", path: null, section: "tool" }
|
|
49
|
+
*
|
|
50
|
+
* Pure: string → SkillDecl[]. No IO, no Bun.TOML dependency (caller parses first).
|
|
51
|
+
*/
|
|
52
|
+
export function parseDeckSkills(
|
|
53
|
+
deckParsed: Record<string, any>
|
|
54
|
+
): SkillDecl[] {
|
|
55
|
+
const results: SkillDecl[] = []
|
|
56
|
+
const sections = ['innate', 'tool', 'transient'] as const
|
|
57
|
+
|
|
58
|
+
for (const section of sections) {
|
|
59
|
+
const skills = deckParsed?.[section]?.skills
|
|
60
|
+
if (!skills) continue
|
|
61
|
+
|
|
62
|
+
if (Array.isArray(skills)) {
|
|
63
|
+
// Array format: skills = ["name1", "name2"]
|
|
64
|
+
for (const name of skills) {
|
|
65
|
+
if (typeof name === 'string') {
|
|
66
|
+
results.push({ name, path: null, section })
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
} else if (typeof skills === 'object') {
|
|
70
|
+
// Inline-table format: [tool.skills.name], path = "..."
|
|
71
|
+
for (const [name, entry] of Object.entries(skills as Record<string, any>)) {
|
|
72
|
+
const skillPath = typeof entry?.path === 'string' ? entry.path : null
|
|
73
|
+
results.push({ name, path: skillPath, section })
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return results
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── checkSkillExistence ──────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Check each declared skill against the cold pool filesystem.
|
|
85
|
+
*
|
|
86
|
+
* For skills with explicit paths: resolve `<coldPoolDir>/<path>/SKILL.md`
|
|
87
|
+
* For skills without paths (array format): resolve `<coldPoolDir>/<name>/SKILL.md`
|
|
88
|
+
* Skills with HTTP/URL paths are skipped (not local).
|
|
89
|
+
*
|
|
90
|
+
* `existsFn` is the IO injection point — swap for real fs or mock.
|
|
91
|
+
*/
|
|
92
|
+
export function checkSkillExistence(
|
|
93
|
+
skills: SkillDecl[],
|
|
94
|
+
coldPoolDir: string,
|
|
95
|
+
existsFn: (path: string) => boolean
|
|
96
|
+
): SkillCheck[] {
|
|
97
|
+
return skills.map(skill => {
|
|
98
|
+
const resolvedName = skill.path && !skill.path.startsWith('http')
|
|
99
|
+
? skill.path
|
|
100
|
+
: skill.name
|
|
101
|
+
const expectedPath = `${coldPoolDir}/${resolvedName}/SKILL.md`
|
|
102
|
+
return {
|
|
103
|
+
name: skill.name,
|
|
104
|
+
expectedPath,
|
|
105
|
+
found: existsFn(expectedPath),
|
|
106
|
+
section: skill.section,
|
|
107
|
+
}
|
|
108
|
+
})
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ── validateLinkResult ───────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Validate the outcome of `bunx @lythos/skill-deck link`.
|
|
115
|
+
*
|
|
116
|
+
* Pure: (exitCode, stderr) → LinkResult.
|
|
117
|
+
* Non-zero exit code = failure. Zero + no stderr = success.
|
|
118
|
+
*/
|
|
119
|
+
export function validateLinkResult(
|
|
120
|
+
exitCode: number | null,
|
|
121
|
+
stderr: string
|
|
122
|
+
): LinkResult {
|
|
123
|
+
if (exitCode !== 0) {
|
|
124
|
+
const snippet = (stderr || '').slice(0, 300)
|
|
125
|
+
return {
|
|
126
|
+
ok: false,
|
|
127
|
+
error: `Deck link failed (exit ${exitCode}): ${snippet}`,
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return { ok: true }
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// ── buildCopyPlan ────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Build a copy plan from workdir entries → outDir.
|
|
137
|
+
*
|
|
138
|
+
* Skips entries in `skipSet`. Each surviving entry maps to `<outDir>/<name>`.
|
|
139
|
+
* Pure: strings + set → CopyEntry[]. No filesystem access.
|
|
140
|
+
*/
|
|
141
|
+
export function buildCopyPlan(
|
|
142
|
+
workdir: string,
|
|
143
|
+
outDir: string,
|
|
144
|
+
entries: string[],
|
|
145
|
+
skipSet: Set<string>
|
|
146
|
+
): CopyEntry[] {
|
|
147
|
+
const plan: CopyEntry[] = []
|
|
148
|
+
for (const name of entries) {
|
|
149
|
+
if (skipSet.has(name)) continue
|
|
150
|
+
plan.push({
|
|
151
|
+
src: `${workdir}/${name}`,
|
|
152
|
+
dest: `${outDir}/${name}`,
|
|
153
|
+
name,
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
return plan
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ── resolveColdPoolDir ───────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Resolve cold_pool root from deck config, expanding ~.
|
|
163
|
+
*
|
|
164
|
+
* Pure: string → string. No filesystem access.
|
|
165
|
+
*/
|
|
166
|
+
export function resolveColdPoolDir(
|
|
167
|
+
coldPoolRoot: string | undefined,
|
|
168
|
+
homeDir: string,
|
|
169
|
+
fallbackDir: string
|
|
170
|
+
): string {
|
|
171
|
+
const raw = coldPoolRoot || fallbackDir
|
|
172
|
+
return raw.startsWith('~') ? `${homeDir}${raw.slice(1)}` : raw
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ── formatSkillWarnings ──────────────────────────────────────────────────
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Format skill check results into human-readable warning strings.
|
|
179
|
+
*
|
|
180
|
+
* Pure: SkillCheck[] → string[].
|
|
181
|
+
*/
|
|
182
|
+
export function formatSkillWarnings(checks: SkillCheck[]): string[] {
|
|
183
|
+
return checks
|
|
184
|
+
.filter(c => !c.found)
|
|
185
|
+
.map(c => `Skill "${c.name}" declared in deck [${c.section}] but SKILL.md not found at: ${c.expectedPath}`)
|
|
186
|
+
}
|
package/src/runner.ts
CHANGED
|
@@ -3,6 +3,8 @@ import { join, resolve } from 'node:path'
|
|
|
3
3
|
import { tmpdir } from 'node:os'
|
|
4
4
|
import { runAgentScenario, type AgentScenario } from '@lythos/test-utils/agent-bdd'
|
|
5
5
|
import { useAgent } from '@lythos/test-utils/agents'
|
|
6
|
+
// Optional: register claude-sdk adapter if the package is installed
|
|
7
|
+
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
6
8
|
import { ArenaManifest, Player } from '@lythos/test-utils/schema'
|
|
7
9
|
import type { ArenaManifest as ArenaManifestType, JudgeVerdict } from '@lythos/test-utils/schema'
|
|
8
10
|
import { runComparativeJudge } from './comparative-judge'
|