@lythos/skill-arena 0.9.22 → 0.9.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
49
49
  ```bash
50
50
  bun add -d @lythos/skill-arena
51
51
  # or use directly
52
- bunx @lythos/skill-arena@0.9.22 <command>
52
+ bunx @lythos/skill-arena@0.9.24 <command>
53
53
  ```
54
54
 
55
55
  ## Quick Start
56
56
 
57
57
  ```bash
58
58
  # Mode 1: Compare two skills on the same task
59
- bunx @lythos/skill-arena@0.9.22 \
59
+ bunx @lythos/skill-arena@0.9.24 \
60
60
  --task "Generate auth flow diagram" \
61
61
  --skills "design-doc-mermaid,mermaid-tools" \
62
62
  --criteria "syntax,context,token"
63
63
 
64
64
  # Mode 2: Compare full deck configurations
65
- bunx @lythos/skill-arena@0.9.22 \
65
+ bunx @lythos/skill-arena@0.9.24 \
66
66
  --task "Generate auth flow diagram" \
67
67
  --decks "./decks/minimal.toml,./decks/rich.toml" \
68
68
  --criteria "quality,token,maintainability"
69
69
 
70
70
  # Visualize results
71
- bunx @lythos/skill-arena@0.9.22 viz tmp/arena-<id>/
71
+ bunx @lythos/skill-arena@0.9.24 viz tmp/arena-<id>/
72
72
  ```
73
73
 
74
74
  ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.22 viz tmp/arena-<id>/
77
77
 
78
78
  ```bash
79
79
  # Print execution plan without running
80
- bunx @lythos/skill-arena@0.9.22 run --config arena.toml --dry-run
80
+ bunx @lythos/skill-arena@0.9.24 run --config arena.toml --dry-run
81
81
 
82
82
  # Execute with per-side runs_per_side and statistical aggregation
83
- bunx @lythos/skill-arena@0.9.22 run --config arena.toml
83
+ bunx @lythos/skill-arena@0.9.24 run --config arena.toml
84
84
  ```
85
85
 
86
86
  ### CLI-flag mode (backward compat)
87
87
 
88
88
  ```
89
- bunx @lythos/skill-arena@0.9.22 run \
89
+ bunx @lythos/skill-arena@0.9.24 run \
90
90
  --task ./TASK-arena.md \
91
91
  --players ./players/claude.toml \
92
92
  --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.22 run \
96
96
  ### Scaffold mode (legacy, manual execution)
97
97
 
98
98
  ```
99
- bunx @lythos/skill-arena@0.9.22 scaffold --task "..." --skills a,b
99
+ bunx @lythos/skill-arena@0.9.24 scaffold --task "..." --skills a,b
100
100
  ```
101
101
 
102
102
  ### Viz
103
103
 
104
104
  ```bash
105
- bunx @lythos/skill-arena@0.9.22 viz runs/arena-<id>/
105
+ bunx @lythos/skill-arena@0.9.24 viz runs/arena-<id>/
106
106
  ```
107
107
 
108
108
  ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
116
116
  Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
117
117
 
118
118
  ```
119
- Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.22 ...
119
+ Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.24 ...
120
120
  Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
121
121
  Output (skills/<name>/) → git commit → agent-visible skill
122
122
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.22",
3
+ "version": "0.9.24",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
@@ -37,8 +37,12 @@
37
37
  "bun": ">=1.0.0"
38
38
  },
39
39
  "dependencies": {
40
+ "@lythos/cold-pool": "workspace:*",
40
41
  "@lythos/test-utils": "^0.9.1",
41
42
  "zod": "^3.24.0",
42
43
  "zod-to-json-schema": "^3.25.2"
44
+ },
45
+ "optionalDependencies": {
46
+ "@lythos/agent-adapter-claude-sdk": "workspace:*"
43
47
  }
44
48
  }
package/src/cli.ts CHANGED
@@ -9,6 +9,14 @@ import {
9
9
  existsSync, mkdirSync, writeFileSync, readFileSync,
10
10
  } from 'node:fs'
11
11
  import { join, resolve, basename } from 'node:path'
12
+ import {
13
+ parseDeckSkills,
14
+ checkSkillExistence,
15
+ validateLinkResult,
16
+ buildCopyPlan,
17
+ resolveColdPoolDir,
18
+ formatSkillWarnings,
19
+ } from './preflight'
12
20
 
13
21
  // ── 简单的 slugify ──────────────────────────────────────────
14
22
  function slugify(input: string): string {
@@ -29,8 +37,8 @@ function printHelp(): void {
29
37
  console.log(`🎭 lythoskill-arena — Skill comparison runner
30
38
 
31
39
  Usage:
32
- lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>]
33
- lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>]
40
+ lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
41
+ lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
34
42
  lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
35
43
  lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
36
44
  lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
@@ -82,14 +90,15 @@ async function agentRun(args: string[]) {
82
90
  else if (args[i] === '--deck' || args[i] === '-d') opts.deck = args[++i]
83
91
  else if (args[i] === '--player' || args[i] === '-p') opts.player = args[++i]
84
92
  else if (args[i] === '--out' || args[i] === '-o') opts.out = args[++i]
93
+ else if (args[i] === '--timeout') opts.timeout = args[++i]
85
94
  }
86
95
 
87
96
  if (!opts.deck) {
88
97
  console.error('❌ --deck <path> is required')
89
98
  process.exit(1)
90
99
  }
91
- if (!opts.task && !opts.brief) {
92
- console.error('❌ --task <path> or --brief "<prompt>" is required')
100
+ if (!opts.task && (!opts.brief || !opts.brief.trim())) {
101
+ console.error('❌ --task <path> or --brief "<prompt>" is required and cannot be empty')
93
102
  process.exit(1)
94
103
  }
95
104
 
@@ -97,39 +106,9 @@ async function agentRun(args: string[]) {
97
106
  const deckPath = resolve(opts.deck)
98
107
  if (!existsSync(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}`); process.exit(1) }
99
108
 
100
- // Resolve task: either from file, or create temp task from --brief
101
- let taskPath: string
102
- if (opts.task) {
103
- taskPath = resolve(opts.task)
104
- if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
105
- } else {
106
- const { mkdtempSync, writeFileSync } = await import('node:fs')
107
- const { tmpdir } = await import('node:os')
108
- const tmpDir = mkdtempSync(join(tmpdir(), 'arena-brief-'))
109
- taskPath = join(tmpDir, 'TASK.md')
110
- const briefTask = `---
111
- name: ad-hoc task
112
- description: ${opts.brief!.replace(/"/g, '\\"').slice(0, 80)}
113
- timeout: 120000
114
- ---
115
-
116
- ## Given
117
- - You are an AI agent with the skills declared in the deck
118
-
119
- ## When
120
- ${opts.brief}
121
-
122
- ## Then
123
- - Write your output to output.md
124
- - The output should be complete and well-structured
125
-
126
- ## Judge
127
- Evaluate whether the output is complete, accurate, and well-structured.
128
- `
129
- writeFileSync(taskPath, briefTask, 'utf-8')
130
- }
131
-
132
109
  const { useAgent } = await import('@lythos/test-utils/agents')
110
+ // Optional: register claude-sdk adapter if the package is installed
111
+ try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
133
112
  const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
134
113
  const { resolvePlayer } = await import('./player')
135
114
  const { readFileSync, writeFileSync, mkdirSync } = await import('node:fs')
@@ -139,27 +118,87 @@ Evaluate whether the output is complete, accurate, and well-structured.
139
118
  const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
140
119
  mkdirSync(outDir, { recursive: true })
141
120
 
121
+ // Resolve task: --brief builds scenario directly, --task reads .agent.md file
122
+ const scenarioOpt: Record<string, unknown> = {}
123
+ if (opts.task) {
124
+ const taskPath = resolve(opts.task)
125
+ if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
126
+ scenarioOpt.scenarioPath = taskPath
127
+ } else {
128
+ scenarioOpt.scenario = {
129
+ name: 'ad-hoc task',
130
+ description: opts.brief!.slice(0, 80),
131
+ timeout: Number(opts.timeout ?? 120000),
132
+ given: { deck: {} },
133
+ when: opts.brief!,
134
+ then: ['Write your output to output.md', 'The output should be complete and well-structured'],
135
+ judge: 'Evaluate whether the output is complete, accurate, and well-structured.',
136
+ }
137
+ }
138
+
142
139
  console.log(`🤖 agent-run: ${player} × ${deckPath}`)
143
- console.log(`📋 task: ${taskPath}`)
140
+ if (opts.task) console.log(`📋 task: ${resolve(opts.task!)}`)
141
+ else console.log(`📋 brief: ${opts.brief!.slice(0, 60)}...`)
144
142
 
145
143
  let agentWorkdir = ''
146
144
  const result = await runAgentScenario({
147
- scenarioPath: taskPath,
145
+ ...scenarioOpt,
148
146
  agent,
149
147
  async setupWorkdir(_scenario, workdir) {
150
148
  agentWorkdir = workdir
151
149
  mkdirSync(workdir, { recursive: true })
152
150
  writeFileSync(join(workdir, 'skill-deck.toml'), readFileSync(deckPath, 'utf-8'))
153
151
 
154
- const linkProc = Bun.spawn(
155
- ['bunx', '@lythos/skill-deck', 'link'],
156
- { cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
157
- )
158
- await linkProc.exited
152
+ // ── Pre-flight: deck link (skip if deck declares no skills) ──
153
+ const deckRaw = readFileSync(join(workdir, 'skill-deck.toml'), 'utf-8')
154
+ let deckParsed: Record<string, any> = {}
155
+ try { deckParsed = Bun.TOML.parse(deckRaw) as Record<string, any> } catch {}
156
+ const hasSkills = parseDeckSkills(deckParsed).length > 0
157
+
158
+ if (hasSkills) {
159
+ // Prefer local dev CLI over bunx (bunx needs tempdir write, blocked by some sandboxes)
160
+ const { existsSync: es2 } = await import('node:fs')
161
+ const localDeckCli = join(import.meta.dir, '..', '..', 'lythoskill-deck', 'src', 'cli.ts')
162
+ const linkCmd = es2(localDeckCli)
163
+ ? ['bun', localDeckCli, 'link']
164
+ : ['bunx', '@lythos/skill-deck', 'link']
165
+ const linkProc = Bun.spawn(linkCmd,
166
+ { cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
167
+ )
168
+ await linkProc.exited
169
+ const linkStderr = await new Response(linkProc.stderr).text()
170
+ const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
171
+ if (!linkResult.ok) {
172
+ console.error(`❌ ${linkResult.error}`)
173
+ process.exit(1)
174
+ }
175
+ } else {
176
+ console.log('ℹ️ No skills declared in deck — skipping link')
177
+ }
178
+
179
+ // ── Pre-flight: skill existence check (reuses deckParsed from above) ─
180
+ const { existsSync: es } = await import('node:fs')
181
+ const { homedir: hd } = await import('node:os')
182
+ try {
183
+ const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
184
+ const coldPoolDir = resolveColdPoolDir(
185
+ deckParsed?.deck?.cold_pool,
186
+ hd(),
187
+ coldPoolDefault
188
+ )
189
+
190
+ const skills = parseDeckSkills(deckParsed)
191
+ const checks = checkSkillExistence(skills, coldPoolDir, es)
192
+ for (const warning of formatSkillWarnings(checks)) {
193
+ console.warn(`⚠️ ${warning}`)
194
+ }
195
+ } catch (e) {
196
+ console.warn('⚠️ Could not check skill existence:', e instanceof Error ? e.message : e)
197
+ }
159
198
  },
160
199
  })
161
200
 
162
- // Copy agent output to outDir
201
+ // ── Copy agent output to outDir ──────────────────────────────────
163
202
  writeFileSync(join(outDir, 'agent-stdout.txt'), result.agentResult.stdout, 'utf-8')
164
203
  if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
165
204
  if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
@@ -167,16 +206,31 @@ Evaluate whether the output is complete, accurate, and well-structured.
167
206
  // Copy all agent-produced files from workdir (output.md, output.docx, etc.)
168
207
  // Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
169
208
  if (agentWorkdir) {
170
- const { cpSync, readdirSync } = await import('node:fs')
171
- const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
172
- try {
173
- for (const entry of readdirSync(agentWorkdir)) {
174
- if (skipSet.has(entry)) continue
175
- const src = join(agentWorkdir, entry)
176
- const dest = join(outDir, entry)
177
- try { cpSync(src, dest, { recursive: true }) } catch {}
209
+ const { cpSync, readdirSync, existsSync: es2 } = await import('node:fs')
210
+ if (!es2(agentWorkdir)) {
211
+ console.warn(`⚠️ Agent workdir vanished before copy: ${agentWorkdir}`)
212
+ } else {
213
+ const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
214
+ try {
215
+ const entries = readdirSync(agentWorkdir)
216
+ const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
217
+ for (const { src, dest, name } of plan) {
218
+ try {
219
+ cpSync(src, dest, { recursive: true })
220
+ } catch (e) {
221
+ console.warn(`⚠️ Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
222
+ }
223
+ }
224
+ } catch (e) {
225
+ console.warn(`⚠️ Failed to read agent workdir for copy: ${e instanceof Error ? e.message : e}`)
178
226
  }
179
- } catch {}
227
+ }
228
+ }
229
+
230
+ // ── Post-flight: output validation ──────────────────────────────
231
+ if (!result.agentResult.stdout || result.agentResult.stdout.trim().length === 0) {
232
+ console.warn('⚠️ Agent produced empty stdout — the task may have failed silently.')
233
+ console.warn(` Agent stderr: ${(result.agentResult.stderr || '(empty)').slice(0, 200)}`)
180
234
  }
181
235
 
182
236
  console.log(`\n✅ Agent complete (${result.agentResult.durationMs}ms)`)
package/src/player.ts CHANGED
@@ -15,6 +15,7 @@ const BUILTIN_PLAYERS: Record<string, string> = {
15
15
  'claude': 'claude',
16
16
  'claude-code': 'claude',
17
17
  'kimi': 'kimi',
18
+ 'deepseek': 'deepseek',
18
19
  'cursor': 'cursor',
19
20
  'gemini': 'gemini',
20
21
  }
@@ -0,0 +1,395 @@
1
+ /**
2
+ * preflight.test.ts — TDD tests for arena agent-run pre-flight pure functions
3
+ *
4
+ * Coverage targets:
5
+ * parseDeckSkills — all TOML formats, edge cases
6
+ * checkSkillExistence — cold pool hit/miss, path resolution
7
+ * validateLinkResult — exit codes, error formatting
8
+ * buildCopyPlan — skip set, path mapping
9
+ * resolveColdPoolDir — tilde expansion, fallback
10
+ * formatSkillWarnings — warning string generation
11
+ */
12
+
13
+ import { describe, test, expect } from 'bun:test'
14
+ import {
15
+ parseDeckSkills,
16
+ checkSkillExistence,
17
+ validateLinkResult,
18
+ buildCopyPlan,
19
+ resolveColdPoolDir,
20
+ formatSkillWarnings,
21
+ } from './preflight'
22
+
23
+ // ═══════════════════════════════════════════════════════════════════════════
24
+ // parseDeckSkills
25
+ // ═══════════════════════════════════════════════════════════════════════════
26
+
27
+ describe('parseDeckSkills', () => {
28
+
29
+ test('empty deck → empty array', () => {
30
+ expect(parseDeckSkills({})).toEqual([])
31
+ })
32
+
33
+ test('deck with no skill sections → empty array', () => {
34
+ expect(parseDeckSkills({ deck: { max_cards: 10 } })).toEqual([])
35
+ })
36
+
37
+ test('inline-table format: single tool skill with path', () => {
38
+ const parsed = {
39
+ tool: {
40
+ skills: {
41
+ pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
42
+ }
43
+ }
44
+ }
45
+ expect(parseDeckSkills(parsed)).toEqual([
46
+ { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' }
47
+ ])
48
+ })
49
+
50
+ test('inline-table format: multiple skills', () => {
51
+ const parsed = {
52
+ tool: {
53
+ skills: {
54
+ pdf: { path: 'github.com/anthropics/skills/skills/pdf' },
55
+ docx: { path: 'github.com/anthropics/skills/skills/docx' },
56
+ }
57
+ }
58
+ }
59
+ expect(parseDeckSkills(parsed)).toEqual([
60
+ { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
61
+ { name: 'docx', path: 'github.com/anthropics/skills/skills/docx', section: 'tool' },
62
+ ])
63
+ })
64
+
65
+ test('array format: skills = ["a", "b"]', () => {
66
+ const parsed = {
67
+ tool: {
68
+ skills: ['web-search', 'docx']
69
+ }
70
+ }
71
+ expect(parseDeckSkills(parsed)).toEqual([
72
+ { name: 'web-search', path: null, section: 'tool' },
73
+ { name: 'docx', path: null, section: 'tool' },
74
+ ])
75
+ })
76
+
77
+ test('innate section parsed separately', () => {
78
+ const parsed = {
79
+ innate: {
80
+ skills: {
81
+ deck: { path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck' }
82
+ }
83
+ },
84
+ tool: {
85
+ skills: {
86
+ pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
87
+ }
88
+ }
89
+ }
90
+ expect(parseDeckSkills(parsed)).toEqual([
91
+ { name: 'deck', path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck', section: 'innate' },
92
+ { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
93
+ ])
94
+ })
95
+
96
+ test('transient section parsed', () => {
97
+ const parsed = {
98
+ transient: {
99
+ skills: {
100
+ experiment: { path: 'localhost/my-experiment' }
101
+ }
102
+ }
103
+ }
104
+ expect(parseDeckSkills(parsed)).toEqual([
105
+ { name: 'experiment', path: 'localhost/my-experiment', section: 'transient' }
106
+ ])
107
+ })
108
+
109
+ test('object entry without path → path=null', () => {
110
+ const parsed = {
111
+ tool: {
112
+ skills: {
113
+ bare: {} // no path field
114
+ }
115
+ }
116
+ }
117
+ expect(parseDeckSkills(parsed)).toEqual([
118
+ { name: 'bare', path: null, section: 'tool' }
119
+ ])
120
+ })
121
+
122
+ test('object entry with non-string path → path=null', () => {
123
+ const parsed = {
124
+ tool: {
125
+ skills: {
126
+ weird: { path: 42 } // number, not string
127
+ }
128
+ }
129
+ }
130
+ expect(parseDeckSkills(parsed)).toEqual([
131
+ { name: 'weird', path: null, section: 'tool' }
132
+ ])
133
+ })
134
+
135
+ test('array entry that is not a string → skipped', () => {
136
+ const parsed = {
137
+ tool: { skills: ['valid', 123, null, 'also-valid'] }
138
+ }
139
+ expect(parseDeckSkills(parsed)).toEqual([
140
+ { name: 'valid', path: null, section: 'tool' },
141
+ { name: 'also-valid', path: null, section: 'tool' },
142
+ ])
143
+ })
144
+
145
+ test('all three sections populated → ordered innate, tool, transient', () => {
146
+ const parsed = {
147
+ innate: { skills: { a: { path: '/a' } } },
148
+ tool: { skills: { b: { path: '/b' } } },
149
+ transient: { skills: { c: { path: '/c' } } },
150
+ }
151
+ expect(parseDeckSkills(parsed)).toEqual([
152
+ { name: 'a', path: '/a', section: 'innate' },
153
+ { name: 'b', path: '/b', section: 'tool' },
154
+ { name: 'c', path: '/c', section: 'transient' },
155
+ ])
156
+ })
157
+ })
158
+
159
+ // ═══════════════════════════════════════════════════════════════════════════
160
+ // checkSkillExistence
161
+ // ═══════════════════════════════════════════════════════════════════════════
162
+
163
+ describe('checkSkillExistence', () => {
164
+
165
+ test('empty skills → empty array', () => {
166
+ const exists = (_: string) => true
167
+ expect(checkSkillExistence([], '/cold', exists)).toEqual([])
168
+ })
169
+
170
+ test('skill with explicit path → resolves <coldPool>/<path>/SKILL.md', () => {
171
+ const exists = (p: string) => p === '/cold/github.com/owner/repo/skills/my-skill/SKILL.md'
172
+ const skills = [{ name: 'my-skill', path: 'github.com/owner/repo/skills/my-skill', section: 'tool' }]
173
+ const result = checkSkillExistence(skills, '/cold', exists)
174
+ expect(result).toEqual([
175
+ { name: 'my-skill', expectedPath: '/cold/github.com/owner/repo/skills/my-skill/SKILL.md', found: true, section: 'tool' }
176
+ ])
177
+ })
178
+
179
+ test('skill without path (array format) → resolves <coldPool>/<name>/SKILL.md', () => {
180
+ const exists = (p: string) => p === '/cold/web-search/SKILL.md'
181
+ const skills = [{ name: 'web-search', path: null, section: 'tool' }]
182
+ const result = checkSkillExistence(skills, '/cold', exists)
183
+ expect(result).toEqual([
184
+ { name: 'web-search', expectedPath: '/cold/web-search/SKILL.md', found: true, section: 'tool' }
185
+ ])
186
+ })
187
+
188
+ test('HTTP path → uses name as fallback for path resolution', () => {
189
+ const exists = (p: string) => p === '/cold/my-skill/SKILL.md'
190
+ const skills = [{ name: 'my-skill', path: 'https://example.com/deck.toml', section: 'tool' }]
191
+ const result = checkSkillExistence(skills, '/cold', exists)
192
+ expect(result).toEqual([
193
+ { name: 'my-skill', expectedPath: '/cold/my-skill/SKILL.md', found: true, section: 'tool' }
194
+ ])
195
+ })
196
+
197
+ test('all found → all found=true', () => {
198
+ const exists = (_: string) => true
199
+ const skills = [
200
+ { name: 'a', path: '/a', section: 'tool' },
201
+ { name: 'b', path: '/b', section: 'tool' },
202
+ ]
203
+ expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
204
+ { name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
205
+ { name: 'b', expectedPath: '/cold//b/SKILL.md', found: true, section: 'tool' },
206
+ ])
207
+ })
208
+
209
+ test('some missing → mixed found/not-found', () => {
210
+ const exists = (p: string) => p.includes('a')
211
+ const skills = [
212
+ { name: 'a', path: '/a', section: 'tool' },
213
+ { name: 'b', path: '/b', section: 'tool' },
214
+ ]
215
+ expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
216
+ { name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
217
+ { name: 'b', expectedPath: '/cold//b/SKILL.md', found: false, section: 'tool' },
218
+ ])
219
+ })
220
+
221
+ test('different coldPoolDir → different expectedPath prefix', () => {
222
+ const exists = (_: string) => true
223
+ const skills = [{ name: 'x', path: 'github.com/x', section: 'tool' }]
224
+ const a = checkSkillExistence(skills, '/home/user/.agents/skill-repos', exists)
225
+ const b = checkSkillExistence(skills, '/opt/cold', exists)
226
+ expect(a[0].expectedPath).toStartWith('/home/user/.agents/skill-repos/')
227
+ expect(b[0].expectedPath).toStartWith('/opt/cold/')
228
+ })
229
+ })
230
+
231
+ // ═══════════════════════════════════════════════════════════════════════════
232
+ // validateLinkResult
233
+ // ═══════════════════════════════════════════════════════════════════════════
234
+
235
+ describe('validateLinkResult', () => {
236
+
237
+ test('exitCode 0 → ok', () => {
238
+ expect(validateLinkResult(0, '')).toEqual({ ok: true })
239
+ })
240
+
241
+ test('exitCode 0 with stderr → still ok (stderr is not always errors)', () => {
242
+ expect(validateLinkResult(0, 'some warning output')).toEqual({ ok: true })
243
+ })
244
+
245
+ test('exitCode 1 → not ok, error contains snippet', () => {
246
+ const result = validateLinkResult(1, 'something went wrong')
247
+ expect(result.ok).toBe(false)
248
+ expect(result.error).toContain('exit 1')
249
+ expect(result.error).toContain('something went wrong')
250
+ })
251
+
252
+ test('exitCode null → not ok (null !== 0)', () => {
253
+ const result = validateLinkResult(null, 'process killed')
254
+ expect(result.ok).toBe(false)
255
+ expect(result.error).toContain('exit null')
256
+ })
257
+
258
+ test('stderr truncated to 300 chars in error message', () => {
259
+ const longStderr = 'x'.repeat(500)
260
+ const result = validateLinkResult(1, longStderr)
261
+ expect(result.ok).toBe(false)
262
+ expect(result.error!.length).toBeLessThan(350) // "Deck link failed (exit 1): " + 300 chars
263
+ })
264
+
265
+ test('exitCode 0, empty stderr → ok with no error field', () => {
266
+ const result = validateLinkResult(0, '')
267
+ expect(result.ok).toBe(true)
268
+ expect(result.error).toBeUndefined()
269
+ })
270
+ })
271
+
272
+ // ═══════════════════════════════════════════════════════════════════════════
273
+ // buildCopyPlan
274
+ // ═══════════════════════════════════════════════════════════════════════════
275
+
276
+ describe('buildCopyPlan', () => {
277
+
278
+ test('empty entries → empty plan', () => {
279
+ expect(buildCopyPlan('/work', '/out', [], new Set())).toEqual([])
280
+ })
281
+
282
+ test('all skipped → empty plan', () => {
283
+ const skip = new Set(['.claude', 'skill-deck.toml'])
284
+ expect(buildCopyPlan('/work', '/out', ['.claude', 'skill-deck.toml'], skip)).toEqual([])
285
+ })
286
+
287
+ test('normal entries → mapped to outDir', () => {
288
+ const skip = new Set<string>()
289
+ expect(buildCopyPlan('/work', '/out', ['output.md', 'report.docx'], skip)).toEqual([
290
+ { src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
291
+ { src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
292
+ ])
293
+ })
294
+
295
+ test('mixed skip and non-skip → only non-skipped', () => {
296
+ const skip = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
297
+ const entries = ['.claude', 'output.md', 'skill-deck.toml', 'report.docx', 'skill-deck.lock']
298
+ expect(buildCopyPlan('/work', '/out', entries, skip)).toEqual([
299
+ { src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
300
+ { src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
301
+ ])
302
+ })
303
+
304
+ test('preserves entry order', () => {
305
+ const skip = new Set<string>()
306
+ const entries = ['c', 'a', 'b']
307
+ expect(buildCopyPlan('/w', '/o', entries, skip).map(e => e.name)).toEqual(['c', 'a', 'b'])
308
+ })
309
+
310
+ test('nested paths work (agent-produced subdirectories)', () => {
311
+ const skip = new Set<string>()
312
+ expect(buildCopyPlan('/work', '/out', ['subdir/output.pdf'], skip)).toEqual([
313
+ { src: '/work/subdir/output.pdf', dest: '/out/subdir/output.pdf', name: 'subdir/output.pdf' },
314
+ ])
315
+ })
316
+ })
317
+
318
+ // ═══════════════════════════════════════════════════════════════════════════
319
+ // resolveColdPoolDir
320
+ // ═══════════════════════════════════════════════════════════════════════════
321
+
322
+ describe('resolveColdPoolDir', () => {
323
+
324
+ test('explicit absolute path → returned as-is', () => {
325
+ expect(resolveColdPoolDir('/opt/cold', '/home/user', '/fallback')).toBe('/opt/cold')
326
+ })
327
+
328
+ test('explicit relative path → returned as-is', () => {
329
+ expect(resolveColdPoolDir('my-cold-pool', '/home/user', '/fallback')).toBe('my-cold-pool')
330
+ })
331
+
332
+ test('tilde path → expanded with homeDir', () => {
333
+ expect(resolveColdPoolDir('~/.agents/skill-repos', '/home/user', '/fallback'))
334
+ .toBe('/home/user/.agents/skill-repos')
335
+ })
336
+
337
+ test('tilde at start only → expanded; tilde elsewhere not expanded', () => {
338
+ expect(resolveColdPoolDir('path/with~/tilde', '/home/user', '/fallback'))
339
+ .toBe('path/with~/tilde')
340
+ })
341
+
342
+ test('undefined → uses fallback', () => {
343
+ expect(resolveColdPoolDir(undefined, '/home/user', '/default/cold'))
344
+ .toBe('/default/cold')
345
+ })
346
+
347
+ test('empty string → uses fallback (|| operator)', () => {
348
+ expect(resolveColdPoolDir('', '/home/user', '/default/cold'))
349
+ .toBe('/default/cold')
350
+ })
351
+ })
352
+
353
+ // ═══════════════════════════════════════════════════════════════════════════
354
+ // formatSkillWarnings
355
+ // ═══════════════════════════════════════════════════════════════════════════
356
+
357
+ describe('formatSkillWarnings', () => {
358
+
359
+ test('all found → empty array', () => {
360
+ const checks = [
361
+ { name: 'a', expectedPath: '/p/a/SKILL.md', found: true, section: 'tool' },
362
+ { name: 'b', expectedPath: '/p/b/SKILL.md', found: true, section: 'tool' },
363
+ ]
364
+ expect(formatSkillWarnings(checks)).toEqual([])
365
+ })
366
+
367
+ test('some missing → one warning per missing skill', () => {
368
+ const checks = [
369
+ { name: 'pdf', expectedPath: '/cold/pdf/SKILL.md', found: false, section: 'tool' },
370
+ { name: 'docx', expectedPath: '/cold/docx/SKILL.md', found: true, section: 'tool' },
371
+ ]
372
+ expect(formatSkillWarnings(checks)).toEqual([
373
+ 'Skill "pdf" declared in deck [tool] but SKILL.md not found at: /cold/pdf/SKILL.md',
374
+ ])
375
+ })
376
+
377
+ test('all missing → warning for each', () => {
378
+ const checks = [
379
+ { name: 'a', expectedPath: '/p/a/SKILL.md', found: false, section: 'innate' },
380
+ { name: 'b', expectedPath: '/p/b/SKILL.md', found: false, section: 'tool' },
381
+ ]
382
+ expect(formatSkillWarnings(checks)).toHaveLength(2)
383
+ })
384
+
385
+ test('empty array → empty array', () => {
386
+ expect(formatSkillWarnings([])).toEqual([])
387
+ })
388
+
389
+ test('section name appears in warning string', () => {
390
+ const checks = [
391
+ { name: 'x', expectedPath: '/p/x', found: false, section: 'transient' },
392
+ ]
393
+ expect(formatSkillWarnings(checks)[0]).toContain('[transient]')
394
+ })
395
+ })
@@ -0,0 +1,208 @@
1
+ /**
2
+ * preflight.ts — Arena agent-run pre-flight pure functions
3
+ *
4
+ * Extracted from cli.ts agentRun to enable unit testing.
5
+ * All functions are pure: no filesystem IO, no spawn, no console.
6
+ * IO is injected via function parameters (e.g., existsFn, readdirFn).
7
+ */
8
+
9
+ import { ColdPool, parseLocator } from '@lythos/cold-pool'
10
+
11
+ // ── Types ─────────────────────────────────────────────────────────────────
12
+
13
+ /** A skill as declared in skill-deck.toml */
14
+ export interface SkillDecl {
15
+ name: string // TOML key (e.g., "pdf")
16
+ path: string | null // explicit path from inline-table format; null for array format
17
+ section: string // "innate" | "tool" | "transient"
18
+ }
19
+
20
+ /** Result of checking one skill against the cold pool */
21
+ export interface SkillCheck {
22
+ name: string
23
+ expectedPath: string // resolved cold pool path that was checked
24
+ found: boolean
25
+ section: string
26
+ }
27
+
28
+ /** Result of deck link validation */
29
+ export interface LinkResult {
30
+ ok: boolean
31
+ error?: string
32
+ }
33
+
34
+ /** A single file copy operation plan entry */
35
+ export interface CopyEntry {
36
+ src: string
37
+ dest: string
38
+ name: string // entry basename for error reporting
39
+ }
40
+
41
+ // ── parseDeckSkills ──────────────────────────────────────────────────────
42
+
43
+ /**
44
+ * Parse a skill-deck.toml string and extract all declared skills.
45
+ *
46
+ * Handles both TOML formats:
47
+ * [tool.skills.pdf] → { name: "pdf", path: "github.com/...", section: "tool" }
48
+ * path = "github.com/..."
49
+ *
50
+ * skills = ["a", "b"] → { name: "a", path: null, section: "tool" }
51
+ *
52
+ * Pure: string → SkillDecl[]. No IO, no Bun.TOML dependency (caller parses first).
53
+ */
54
+ export function parseDeckSkills(
55
+ deckParsed: Record<string, any>
56
+ ): SkillDecl[] {
57
+ const results: SkillDecl[] = []
58
+ const sections = ['innate', 'tool', 'transient'] as const
59
+
60
+ for (const section of sections) {
61
+ const skills = deckParsed?.[section]?.skills
62
+ if (!skills) continue
63
+
64
+ if (Array.isArray(skills)) {
65
+ // Array format: skills = ["name1", "name2"]
66
+ for (const name of skills) {
67
+ if (typeof name === 'string') {
68
+ results.push({ name, path: null, section })
69
+ }
70
+ }
71
+ } else if (typeof skills === 'object') {
72
+ // Inline-table format: [tool.skills.name], path = "..."
73
+ for (const [name, entry] of Object.entries(skills as Record<string, any>)) {
74
+ const skillPath = typeof entry?.path === 'string' ? entry.path : null
75
+ results.push({ name, path: skillPath, section })
76
+ }
77
+ }
78
+ }
79
+
80
+ return results
81
+ }
82
+
83
+ // ── checkSkillExistence ──────────────────────────────────────────────────
84
+
85
+ /**
86
+ * Check each declared skill against the cold pool filesystem.
87
+ *
88
+ * Path resolution delegates to @lythos/cold-pool's `parseLocator` and
89
+ * `ColdPool.resolveDir` so localhost / FQ / standalone forms all map to
90
+ * the right physical layout (per ADR-20260507021957847). Non-FQ legacy
91
+ * names (e.g., bare `pdf`) fall back to `<coldPoolDir>/<name>/SKILL.md`.
92
+ *
93
+ * Skills with HTTP/URL paths are skipped (not local).
94
+ *
95
+ * `existsFn` is the IO injection point — swap for real fs or mock.
96
+ */
97
+ export function checkSkillExistence(
98
+ skills: SkillDecl[],
99
+ coldPoolDir: string,
100
+ existsFn: (path: string) => boolean
101
+ ): SkillCheck[] {
102
+ const pool = new ColdPool(coldPoolDir)
103
+ return skills.map(skill => {
104
+ const candidatePath = skill.path && !skill.path.startsWith('http')
105
+ ? skill.path
106
+ : skill.name
107
+
108
+ let expectedPath: string
109
+ const locator = parseLocator(candidatePath)
110
+ if (!locator) {
111
+ // Legacy bare-name fallback. Per ADR-20260502012643244 this should
112
+ // be removed in 0.10.x once arena.toml authors switch to FQ.
113
+ expectedPath = `${coldPoolDir}/${candidatePath}/SKILL.md`
114
+ } else if (locator.isLocalhost) {
115
+ // localhost layout: top-level dir under coldPool, no `localhost/` prefix
116
+ expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
117
+ } else if (locator.skill) {
118
+ expectedPath = `${pool.resolveDir(locator)}/${locator.skill}/SKILL.md`
119
+ } else {
120
+ // Standalone repo: SKILL.md at repo root
121
+ expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
122
+ }
123
+
124
+ return {
125
+ name: skill.name,
126
+ expectedPath,
127
+ found: existsFn(expectedPath),
128
+ section: skill.section,
129
+ }
130
+ })
131
+ }
132
+
133
+ // ── validateLinkResult ───────────────────────────────────────────────────
134
+
135
+ /**
136
+ * Validate the outcome of `bunx @lythos/skill-deck link`.
137
+ *
138
+ * Pure: (exitCode, stderr) → LinkResult.
139
+ * Non-zero exit code = failure. Zero + no stderr = success.
140
+ */
141
+ export function validateLinkResult(
142
+ exitCode: number | null,
143
+ stderr: string
144
+ ): LinkResult {
145
+ if (exitCode !== 0) {
146
+ const snippet = (stderr || '').slice(0, 300)
147
+ return {
148
+ ok: false,
149
+ error: `Deck link failed (exit ${exitCode}): ${snippet}`,
150
+ }
151
+ }
152
+ return { ok: true }
153
+ }
154
+
155
+ // ── buildCopyPlan ────────────────────────────────────────────────────────
156
+
157
+ /**
158
+ * Build a copy plan from workdir entries → outDir.
159
+ *
160
+ * Skips entries in `skipSet`. Each surviving entry maps to `<outDir>/<name>`.
161
+ * Pure: strings + set → CopyEntry[]. No filesystem access.
162
+ */
163
+ export function buildCopyPlan(
164
+ workdir: string,
165
+ outDir: string,
166
+ entries: string[],
167
+ skipSet: Set<string>
168
+ ): CopyEntry[] {
169
+ const plan: CopyEntry[] = []
170
+ for (const name of entries) {
171
+ if (skipSet.has(name)) continue
172
+ plan.push({
173
+ src: `${workdir}/${name}`,
174
+ dest: `${outDir}/${name}`,
175
+ name,
176
+ })
177
+ }
178
+ return plan
179
+ }
180
+
181
+ // ── resolveColdPoolDir ───────────────────────────────────────────────────
182
+
183
+ /**
184
+ * Resolve cold_pool root from deck config, expanding ~.
185
+ *
186
+ * Pure: string → string. No filesystem access.
187
+ */
188
+ export function resolveColdPoolDir(
189
+ coldPoolRoot: string | undefined,
190
+ homeDir: string,
191
+ fallbackDir: string
192
+ ): string {
193
+ const raw = coldPoolRoot || fallbackDir
194
+ return raw.startsWith('~') ? `${homeDir}${raw.slice(1)}` : raw
195
+ }
196
+
197
+ // ── formatSkillWarnings ──────────────────────────────────────────────────
198
+
199
+ /**
200
+ * Format skill check results into human-readable warning strings.
201
+ *
202
+ * Pure: SkillCheck[] → string[].
203
+ */
204
+ export function formatSkillWarnings(checks: SkillCheck[]): string[] {
205
+ return checks
206
+ .filter(c => !c.found)
207
+ .map(c => `Skill "${c.name}" declared in deck [${c.section}] but SKILL.md not found at: ${c.expectedPath}`)
208
+ }
package/src/runner.ts CHANGED
@@ -3,6 +3,8 @@ import { join, resolve } from 'node:path'
3
3
  import { tmpdir } from 'node:os'
4
4
  import { runAgentScenario, type AgentScenario } from '@lythos/test-utils/agent-bdd'
5
5
  import { useAgent } from '@lythos/test-utils/agents'
6
+ // Optional: register claude-sdk adapter if the package is installed
7
+ try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
6
8
  import { ArenaManifest, Player } from '@lythos/test-utils/schema'
7
9
  import type { ArenaManifest as ArenaManifestType, JudgeVerdict } from '@lythos/test-utils/schema'
8
10
  import { runComparativeJudge } from './comparative-judge'