npm - @lythos/skill-arena - Versions diffs - 0.9.21 → 0.9.23 - Mend

@lythos/skill-arena 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
 ```bash
 bun add -d @lythos/skill-arena
 # or use directly
-bunx @lythos/skill-arena@0.9.21 <command>
+bunx @lythos/skill-arena@0.9.23 <command>
 ```
 ## Quick Start
 ```bash
 # Mode 1: Compare two skills on the same task
-bunx @lythos/skill-arena@0.9.21 \
+bunx @lythos/skill-arena@0.9.23 \
   --task "Generate auth flow diagram" \
   --skills "design-doc-mermaid,mermaid-tools" \
   --criteria "syntax,context,token"
 # Mode 2: Compare full deck configurations
-bunx @lythos/skill-arena@0.9.21 \
+bunx @lythos/skill-arena@0.9.23 \
   --task "Generate auth flow diagram" \
   --decks "./decks/minimal.toml,./decks/rich.toml" \
   --criteria "quality,token,maintainability"
 # Visualize results
-bunx @lythos/skill-arena@0.9.21 viz tmp/arena-<id>/
+bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
 ```
 ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.21 viz tmp/arena-<id>/
 ```bash
 # Print execution plan without running
-bunx @lythos/skill-arena@0.9.21 run --config arena.toml --dry-run
+bunx @lythos/skill-arena@0.9.23 run --config arena.toml --dry-run
 # Execute with per-side runs_per_side and statistical aggregation
-bunx @lythos/skill-arena@0.9.21 run --config arena.toml
+bunx @lythos/skill-arena@0.9.23 run --config arena.toml
 ```
 ### CLI-flag mode (backward compat)
 ```
-bunx @lythos/skill-arena@0.9.21 run \
+bunx @lythos/skill-arena@0.9.23 run \
   --task ./TASK-arena.md \
   --players ./players/claude.toml \
   --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.21 run \
 ### Scaffold mode (legacy, manual execution)
 ```
-bunx @lythos/skill-arena@0.9.21 scaffold --task "..." --skills a,b
+bunx @lythos/skill-arena@0.9.23 scaffold --task "..." --skills a,b
 ```
 ### Viz
 ```bash
-bunx @lythos/skill-arena@0.9.21 viz runs/arena-<id>/
+bunx @lythos/skill-arena@0.9.23 viz runs/arena-<id>/
 ```
 ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
 Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
 ```
-Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.21 ...
+Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.23 ...
 Skill   (packages/<name>/skill/)     → build → SKILL.md + thin scripts
 Output  (skills/<name>/)             → git commit → agent-visible skill
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.9.21",
+  "version": "0.9.23",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
   "keywords": [
     "ai-agent",
@@ -40,5 +40,8 @@
     "@lythos/test-utils": "^0.9.1",
     "zod": "^3.24.0",
     "zod-to-json-schema": "^3.25.2"
+  },
+  "optionalDependencies": {
+    "@lythos/agent-adapter-claude-sdk": "workspace:*"
   }
 }

package/src/cli.ts CHANGED Viewed

@@ -9,6 +9,14 @@ import {
   existsSync, mkdirSync, writeFileSync, readFileSync,
 } from 'node:fs'
 import { join, resolve, basename } from 'node:path'
+import {
+  parseDeckSkills,
+  checkSkillExistence,
+  validateLinkResult,
+  buildCopyPlan,
+  resolveColdPoolDir,
+  formatSkillWarnings,
+} from './preflight'
 // ── 简单的 slugify ──────────────────────────────────────────
 function slugify(input: string): string {
@@ -88,8 +96,8 @@ async function agentRun(args: string[]) {
     console.error('❌ --deck <path> is required')
     process.exit(1)
   }
-  if (!opts.task && !opts.brief) {
-    console.error('❌ --task <path> or --brief "<prompt>" is required')
+  if (!opts.task && (!opts.brief || !opts.brief.trim())) {
+    console.error('❌ --task <path> or --brief "<prompt>" is required and cannot be empty')
     process.exit(1)
   }
@@ -97,39 +105,9 @@ async function agentRun(args: string[]) {
   const deckPath = resolve(opts.deck)
   if (!existsSync(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}`); process.exit(1) }
-  // Resolve task: either from file, or create temp task from --brief
-  let taskPath: string
-  if (opts.task) {
-    taskPath = resolve(opts.task)
-    if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
-  } else {
-    const { mkdtempSync, writeFileSync } = await import('node:fs')
-    const { tmpdir } = await import('node:os')
-    const tmpDir = mkdtempSync(join(tmpdir(), 'arena-brief-'))
-    taskPath = join(tmpDir, 'TASK.md')
-    const briefTask = `---
-name: ad-hoc task
-description: ${opts.brief!.slice(0, 80)}
-timeout: 120000
----
-## Given
-- You are an AI agent with the skills declared in the deck
-## When
-${opts.brief}
-## Then
-- Write your output to output.md
-- The output should be complete and well-structured
-## Judge
-Evaluate whether the output is complete, accurate, and well-structured.
-`
-    writeFileSync(taskPath, briefTask, 'utf-8')
-  }
   const { useAgent } = await import('@lythos/test-utils/agents')
+  // Optional: register claude-sdk adapter if the package is installed
+  try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
   const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
   const { resolvePlayer } = await import('./player')
   const { readFileSync, writeFileSync, mkdirSync } = await import('node:fs')
@@ -139,27 +117,87 @@ Evaluate whether the output is complete, accurate, and well-structured.
   const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
   mkdirSync(outDir, { recursive: true })
+  // Resolve task: --brief builds scenario directly, --task reads .agent.md file
+  const scenarioOpt: Record<string, unknown> = {}
+  if (opts.task) {
+    const taskPath = resolve(opts.task)
+    if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}`); process.exit(1) }
+    scenarioOpt.scenarioPath = taskPath
+  } else {
+    scenarioOpt.scenario = {
+      name: 'ad-hoc task',
+      description: opts.brief!.slice(0, 80),
+      timeout: 120000,
+      given: { deck: {} },
+      when: opts.brief!,
+      then: ['Write your output to output.md', 'The output should be complete and well-structured'],
+      judge: 'Evaluate whether the output is complete, accurate, and well-structured.',
+    }
+  }
   console.log(`🤖 agent-run: ${player} × ${deckPath}`)
-  console.log(`📋 task: ${taskPath}`)
+  if (opts.task) console.log(`📋 task: ${resolve(opts.task!)}`)
+  else console.log(`📋 brief: ${opts.brief!.slice(0, 60)}...`)
   let agentWorkdir = ''
   const result = await runAgentScenario({
-    scenarioPath: taskPath,
+    ...scenarioOpt,
     agent,
     async setupWorkdir(_scenario, workdir) {
       agentWorkdir = workdir
       mkdirSync(workdir, { recursive: true })
       writeFileSync(join(workdir, 'skill-deck.toml'), readFileSync(deckPath, 'utf-8'))
-      const linkProc = Bun.spawn(
-        ['bunx', '@lythos/skill-deck', 'link'],
-        { cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
-      )
-      await linkProc.exited
+      // ── Pre-flight: deck link (skip if deck declares no skills) ──
+      const deckRaw = readFileSync(join(workdir, 'skill-deck.toml'), 'utf-8')
+      let deckParsed: Record<string, any> = {}
+      try { deckParsed = Bun.TOML.parse(deckRaw) as Record<string, any> } catch {}
+      const hasSkills = parseDeckSkills(deckParsed).length > 0
+      if (hasSkills) {
+        // Prefer local dev CLI over bunx (bunx needs tempdir write, blocked by some sandboxes)
+        const { existsSync: es2 } = await import('node:fs')
+        const localDeckCli = join(import.meta.dir, '..', '..', 'lythoskill-deck', 'src', 'cli.ts')
+        const linkCmd = es2(localDeckCli)
+          ? ['bun', localDeckCli, 'link']
+          : ['bunx', '@lythos/skill-deck', 'link']
+        const linkProc = Bun.spawn(linkCmd,
+          { cwd: workdir, env: { ...process.env, HOME: process.env.HOME! } },
+        )
+        await linkProc.exited
+        const linkStderr = await new Response(linkProc.stderr).text()
+        const linkResult = validateLinkResult(linkProc.exitCode, linkStderr)
+        if (!linkResult.ok) {
+          console.error(`❌ ${linkResult.error}`)
+          process.exit(1)
+        }
+      } else {
+        console.log('ℹ️  No skills declared in deck — skipping link')
+      }
+      // ── Pre-flight: skill existence check (reuses deckParsed from above) ─
+      const { existsSync: es } = await import('node:fs')
+      const { homedir: hd } = await import('node:os')
+      try {
+        const coldPoolDefault = join(hd(), '.agents', 'skill-repos')
+        const coldPoolDir = resolveColdPoolDir(
+          deckParsed?.deck?.cold_pool,
+          hd(),
+          coldPoolDefault
+        )
+        const skills = parseDeckSkills(deckParsed)
+        const checks = checkSkillExistence(skills, coldPoolDir, es)
+        for (const warning of formatSkillWarnings(checks)) {
+          console.warn(`⚠️  ${warning}`)
+        }
+      } catch (e) {
+        console.warn('⚠️  Could not check skill existence:', e instanceof Error ? e.message : e)
+      }
     },
   })
-  // Copy agent output to outDir
+  // ── Copy agent output to outDir ──────────────────────────────────
   writeFileSync(join(outDir, 'agent-stdout.txt'), result.agentResult.stdout, 'utf-8')
   if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
   if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
@@ -167,16 +205,31 @@ Evaluate whether the output is complete, accurate, and well-structured.
   // Copy all agent-produced files from workdir (output.md, output.docx, etc.)
   // Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
   if (agentWorkdir) {
-    const { cpSync, readdirSync } = await import('node:fs')
-    const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
-    try {
-      for (const entry of readdirSync(agentWorkdir)) {
-        if (skipSet.has(entry)) continue
-        const src = join(agentWorkdir, entry)
-        const dest = join(outDir, entry)
-        try { cpSync(src, dest, { recursive: true }) } catch {}
+    const { cpSync, readdirSync, existsSync: es2 } = await import('node:fs')
+    if (!es2(agentWorkdir)) {
+      console.warn(`⚠️  Agent workdir vanished before copy: ${agentWorkdir}`)
+    } else {
+      const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
+      try {
+        const entries = readdirSync(agentWorkdir)
+        const plan = buildCopyPlan(agentWorkdir, outDir, entries, skipSet)
+        for (const { src, dest, name } of plan) {
+          try {
+            cpSync(src, dest, { recursive: true })
+          } catch (e) {
+            console.warn(`⚠️  Failed to copy agent output: ${name} — ${e instanceof Error ? e.message : e}`)
+          }
+        }
+      } catch (e) {
+        console.warn(`⚠️  Failed to read agent workdir for copy: ${e instanceof Error ? e.message : e}`)
       }
-    } catch {}
+    }
+  }
+  // ── Post-flight: output validation ──────────────────────────────
+  if (!result.agentResult.stdout || result.agentResult.stdout.trim().length === 0) {
+    console.warn('⚠️  Agent produced empty stdout — the task may have failed silently.')
+    console.warn(`   Agent stderr: ${(result.agentResult.stderr || '(empty)').slice(0, 200)}`)
   }
   console.log(`\n✅ Agent complete (${result.agentResult.durationMs}ms)`)

package/src/player.ts CHANGED Viewed

@@ -15,6 +15,7 @@ const BUILTIN_PLAYERS: Record<string, string> = {
   'claude': 'claude',
   'claude-code': 'claude',
   'kimi': 'kimi',
+  'deepseek': 'deepseek',
   'cursor': 'cursor',
   'gemini': 'gemini',
 }

package/src/preflight.test.ts ADDED Viewed

@@ -0,0 +1,395 @@
+/**
+ * preflight.test.ts — TDD tests for arena agent-run pre-flight pure functions
+ *
+ * Coverage targets:
+ *   parseDeckSkills      — all TOML formats, edge cases
+ *   checkSkillExistence  — cold pool hit/miss, path resolution
+ *   validateLinkResult   — exit codes, error formatting
+ *   buildCopyPlan        — skip set, path mapping
+ *   resolveColdPoolDir   — tilde expansion, fallback
+ *   formatSkillWarnings  — warning string generation
+ */
+import { describe, test, expect } from 'bun:test'
+import {
+  parseDeckSkills,
+  checkSkillExistence,
+  validateLinkResult,
+  buildCopyPlan,
+  resolveColdPoolDir,
+  formatSkillWarnings,
+} from './preflight'
+// ═══════════════════════════════════════════════════════════════════════════
+// parseDeckSkills
+// ═══════════════════════════════════════════════════════════════════════════
+describe('parseDeckSkills', () => {
+  test('empty deck → empty array', () => {
+    expect(parseDeckSkills({})).toEqual([])
+  })
+  test('deck with no skill sections → empty array', () => {
+    expect(parseDeckSkills({ deck: { max_cards: 10 } })).toEqual([])
+  })
+  test('inline-table format: single tool skill with path', () => {
+    const parsed = {
+      tool: {
+        skills: {
+          pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' }
+    ])
+  })
+  test('inline-table format: multiple skills', () => {
+    const parsed = {
+      tool: {
+        skills: {
+          pdf: { path: 'github.com/anthropics/skills/skills/pdf' },
+          docx: { path: 'github.com/anthropics/skills/skills/docx' },
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
+      { name: 'docx', path: 'github.com/anthropics/skills/skills/docx', section: 'tool' },
+    ])
+  })
+  test('array format: skills = ["a", "b"]', () => {
+    const parsed = {
+      tool: {
+        skills: ['web-search', 'docx']
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'web-search', path: null, section: 'tool' },
+      { name: 'docx', path: null, section: 'tool' },
+    ])
+  })
+  test('innate section parsed separately', () => {
+    const parsed = {
+      innate: {
+        skills: {
+          deck: { path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck' }
+        }
+      },
+      tool: {
+        skills: {
+          pdf: { path: 'github.com/anthropics/skills/skills/pdf' }
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'deck', path: 'github.com/lythos-labs/lythoskill/skills/lythoskill-deck', section: 'innate' },
+      { name: 'pdf', path: 'github.com/anthropics/skills/skills/pdf', section: 'tool' },
+    ])
+  })
+  test('transient section parsed', () => {
+    const parsed = {
+      transient: {
+        skills: {
+          experiment: { path: 'localhost/my-experiment' }
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'experiment', path: 'localhost/my-experiment', section: 'transient' }
+    ])
+  })
+  test('object entry without path → path=null', () => {
+    const parsed = {
+      tool: {
+        skills: {
+          bare: {}  // no path field
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'bare', path: null, section: 'tool' }
+    ])
+  })
+  test('object entry with non-string path → path=null', () => {
+    const parsed = {
+      tool: {
+        skills: {
+          weird: { path: 42 }  // number, not string
+        }
+      }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'weird', path: null, section: 'tool' }
+    ])
+  })
+  test('array entry that is not a string → skipped', () => {
+    const parsed = {
+      tool: { skills: ['valid', 123, null, 'also-valid'] }
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'valid', path: null, section: 'tool' },
+      { name: 'also-valid', path: null, section: 'tool' },
+    ])
+  })
+  test('all three sections populated → ordered innate, tool, transient', () => {
+    const parsed = {
+      innate: { skills: { a: { path: '/a' } } },
+      tool: { skills: { b: { path: '/b' } } },
+      transient: { skills: { c: { path: '/c' } } },
+    }
+    expect(parseDeckSkills(parsed)).toEqual([
+      { name: 'a', path: '/a', section: 'innate' },
+      { name: 'b', path: '/b', section: 'tool' },
+      { name: 'c', path: '/c', section: 'transient' },
+    ])
+  })
+})
+// ═══════════════════════════════════════════════════════════════════════════
+// checkSkillExistence
+// ═══════════════════════════════════════════════════════════════════════════
+describe('checkSkillExistence', () => {
+  test('empty skills → empty array', () => {
+    const exists = (_: string) => true
+    expect(checkSkillExistence([], '/cold', exists)).toEqual([])
+  })
+  test('skill with explicit path → resolves <coldPool>/<path>/SKILL.md', () => {
+    const exists = (p: string) => p === '/cold/github.com/owner/repo/skills/my-skill/SKILL.md'
+    const skills = [{ name: 'my-skill', path: 'github.com/owner/repo/skills/my-skill', section: 'tool' }]
+    const result = checkSkillExistence(skills, '/cold', exists)
+    expect(result).toEqual([
+      { name: 'my-skill', expectedPath: '/cold/github.com/owner/repo/skills/my-skill/SKILL.md', found: true, section: 'tool' }
+    ])
+  })
+  test('skill without path (array format) → resolves <coldPool>/<name>/SKILL.md', () => {
+    const exists = (p: string) => p === '/cold/web-search/SKILL.md'
+    const skills = [{ name: 'web-search', path: null, section: 'tool' }]
+    const result = checkSkillExistence(skills, '/cold', exists)
+    expect(result).toEqual([
+      { name: 'web-search', expectedPath: '/cold/web-search/SKILL.md', found: true, section: 'tool' }
+    ])
+  })
+  test('HTTP path → uses name as fallback for path resolution', () => {
+    const exists = (p: string) => p === '/cold/my-skill/SKILL.md'
+    const skills = [{ name: 'my-skill', path: 'https://example.com/deck.toml', section: 'tool' }]
+    const result = checkSkillExistence(skills, '/cold', exists)
+    expect(result).toEqual([
+      { name: 'my-skill', expectedPath: '/cold/my-skill/SKILL.md', found: true, section: 'tool' }
+    ])
+  })
+  test('all found → all found=true', () => {
+    const exists = (_: string) => true
+    const skills = [
+      { name: 'a', path: '/a', section: 'tool' },
+      { name: 'b', path: '/b', section: 'tool' },
+    ]
+    expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
+      { name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
+      { name: 'b', expectedPath: '/cold//b/SKILL.md', found: true, section: 'tool' },
+    ])
+  })
+  test('some missing → mixed found/not-found', () => {
+    const exists = (p: string) => p.includes('a')
+    const skills = [
+      { name: 'a', path: '/a', section: 'tool' },
+      { name: 'b', path: '/b', section: 'tool' },
+    ]
+    expect(checkSkillExistence(skills, '/cold', exists)).toEqual([
+      { name: 'a', expectedPath: '/cold//a/SKILL.md', found: true, section: 'tool' },
+      { name: 'b', expectedPath: '/cold//b/SKILL.md', found: false, section: 'tool' },
+    ])
+  })
+  test('different coldPoolDir → different expectedPath prefix', () => {
+    const exists = (_: string) => true
+    const skills = [{ name: 'x', path: 'github.com/x', section: 'tool' }]
+    const a = checkSkillExistence(skills, '/home/user/.agents/skill-repos', exists)
+    const b = checkSkillExistence(skills, '/opt/cold', exists)
+    expect(a[0].expectedPath).toStartWith('/home/user/.agents/skill-repos/')
+    expect(b[0].expectedPath).toStartWith('/opt/cold/')
+  })
+})
+// ═══════════════════════════════════════════════════════════════════════════
+// validateLinkResult
+// ═══════════════════════════════════════════════════════════════════════════
+describe('validateLinkResult', () => {
+  test('exitCode 0 → ok', () => {
+    expect(validateLinkResult(0, '')).toEqual({ ok: true })
+  })
+  test('exitCode 0 with stderr → still ok (stderr is not always errors)', () => {
+    expect(validateLinkResult(0, 'some warning output')).toEqual({ ok: true })
+  })
+  test('exitCode 1 → not ok, error contains snippet', () => {
+    const result = validateLinkResult(1, 'something went wrong')
+    expect(result.ok).toBe(false)
+    expect(result.error).toContain('exit 1')
+    expect(result.error).toContain('something went wrong')
+  })
+  test('exitCode null → not ok (null !== 0)', () => {
+    const result = validateLinkResult(null, 'process killed')
+    expect(result.ok).toBe(false)
+    expect(result.error).toContain('exit null')
+  })
+  test('stderr truncated to 300 chars in error message', () => {
+    const longStderr = 'x'.repeat(500)
+    const result = validateLinkResult(1, longStderr)
+    expect(result.ok).toBe(false)
+    expect(result.error!.length).toBeLessThan(350) // "Deck link failed (exit 1): " + 300 chars
+  })
+  test('exitCode 0, empty stderr → ok with no error field', () => {
+    const result = validateLinkResult(0, '')
+    expect(result.ok).toBe(true)
+    expect(result.error).toBeUndefined()
+  })
+})
+// ═══════════════════════════════════════════════════════════════════════════
+// buildCopyPlan
+// ═══════════════════════════════════════════════════════════════════════════
+describe('buildCopyPlan', () => {
+  test('empty entries → empty plan', () => {
+    expect(buildCopyPlan('/work', '/out', [], new Set())).toEqual([])
+  })
+  test('all skipped → empty plan', () => {
+    const skip = new Set(['.claude', 'skill-deck.toml'])
+    expect(buildCopyPlan('/work', '/out', ['.claude', 'skill-deck.toml'], skip)).toEqual([])
+  })
+  test('normal entries → mapped to outDir', () => {
+    const skip = new Set<string>()
+    expect(buildCopyPlan('/work', '/out', ['output.md', 'report.docx'], skip)).toEqual([
+      { src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
+      { src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
+    ])
+  })
+  test('mixed skip and non-skip → only non-skipped', () => {
+    const skip = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
+    const entries = ['.claude', 'output.md', 'skill-deck.toml', 'report.docx', 'skill-deck.lock']
+    expect(buildCopyPlan('/work', '/out', entries, skip)).toEqual([
+      { src: '/work/output.md', dest: '/out/output.md', name: 'output.md' },
+      { src: '/work/report.docx', dest: '/out/report.docx', name: 'report.docx' },
+    ])
+  })
+  test('preserves entry order', () => {
+    const skip = new Set<string>()
+    const entries = ['c', 'a', 'b']
+    expect(buildCopyPlan('/w', '/o', entries, skip).map(e => e.name)).toEqual(['c', 'a', 'b'])
+  })
+  test('nested paths work (agent-produced subdirectories)', () => {
+    const skip = new Set<string>()
+    expect(buildCopyPlan('/work', '/out', ['subdir/output.pdf'], skip)).toEqual([
+      { src: '/work/subdir/output.pdf', dest: '/out/subdir/output.pdf', name: 'subdir/output.pdf' },
+    ])
+  })
+})
+// ═══════════════════════════════════════════════════════════════════════════
+// resolveColdPoolDir
+// ═══════════════════════════════════════════════════════════════════════════
+describe('resolveColdPoolDir', () => {
+  test('explicit absolute path → returned as-is', () => {
+    expect(resolveColdPoolDir('/opt/cold', '/home/user', '/fallback')).toBe('/opt/cold')
+  })
+  test('explicit relative path → returned as-is', () => {
+    expect(resolveColdPoolDir('my-cold-pool', '/home/user', '/fallback')).toBe('my-cold-pool')
+  })
+  test('tilde path → expanded with homeDir', () => {
+    expect(resolveColdPoolDir('~/.agents/skill-repos', '/home/user', '/fallback'))
+      .toBe('/home/user/.agents/skill-repos')
+  })
+  test('tilde at start only → expanded; tilde elsewhere not expanded', () => {
+    expect(resolveColdPoolDir('path/with~/tilde', '/home/user', '/fallback'))
+      .toBe('path/with~/tilde')
+  })
+  test('undefined → uses fallback', () => {
+    expect(resolveColdPoolDir(undefined, '/home/user', '/default/cold'))
+      .toBe('/default/cold')
+  })
+  test('empty string → uses fallback (|| operator)', () => {
+    expect(resolveColdPoolDir('', '/home/user', '/default/cold'))
+      .toBe('/default/cold')
+  })
+})
+// ═══════════════════════════════════════════════════════════════════════════
+// formatSkillWarnings
+// ═══════════════════════════════════════════════════════════════════════════
+describe('formatSkillWarnings', () => {
+  test('all found → empty array', () => {
+    const checks = [
+      { name: 'a', expectedPath: '/p/a/SKILL.md', found: true, section: 'tool' },
+      { name: 'b', expectedPath: '/p/b/SKILL.md', found: true, section: 'tool' },
+    ]
+    expect(formatSkillWarnings(checks)).toEqual([])
+  })
+  test('some missing → one warning per missing skill', () => {
+    const checks = [
+      { name: 'pdf', expectedPath: '/cold/pdf/SKILL.md', found: false, section: 'tool' },
+      { name: 'docx', expectedPath: '/cold/docx/SKILL.md', found: true, section: 'tool' },
+    ]
+    expect(formatSkillWarnings(checks)).toEqual([
+      'Skill "pdf" declared in deck [tool] but SKILL.md not found at: /cold/pdf/SKILL.md',
+    ])
+  })
+  test('all missing → warning for each', () => {
+    const checks = [
+      { name: 'a', expectedPath: '/p/a/SKILL.md', found: false, section: 'innate' },
+      { name: 'b', expectedPath: '/p/b/SKILL.md', found: false, section: 'tool' },
+    ]
+    expect(formatSkillWarnings(checks)).toHaveLength(2)
+  })
+  test('empty array → empty array', () => {
+    expect(formatSkillWarnings([])).toEqual([])
+  })
+  test('section name appears in warning string', () => {
+    const checks = [
+      { name: 'x', expectedPath: '/p/x', found: false, section: 'transient' },
+    ]
+    expect(formatSkillWarnings(checks)[0]).toContain('[transient]')
+  })
+})

package/src/preflight.ts ADDED Viewed

@@ -0,0 +1,186 @@
+/**
+ * preflight.ts — Arena agent-run pre-flight pure functions
+ *
+ * Extracted from cli.ts agentRun to enable unit testing.
+ * All functions are pure: no filesystem IO, no spawn, no console.
+ * IO is injected via function parameters (e.g., existsFn, readdirFn).
+ */
+// ── Types ─────────────────────────────────────────────────────────────────
+/** A skill as declared in skill-deck.toml */
+export interface SkillDecl {
+  name: string       // TOML key (e.g., "pdf")
+  path: string | null // explicit path from inline-table format; null for array format
+  section: string    // "innate" | "tool" | "transient"
+}
+/** Result of checking one skill against the cold pool */
+export interface SkillCheck {
+  name: string
+  expectedPath: string  // resolved cold pool path that was checked
+  found: boolean
+  section: string
+}
+/** Result of deck link validation */
+export interface LinkResult {
+  ok: boolean
+  error?: string
+}
+/** A single file copy operation plan entry */
+export interface CopyEntry {
+  src: string
+  dest: string
+  name: string       // entry basename for error reporting
+}
+// ── parseDeckSkills ──────────────────────────────────────────────────────
+/**
+ * Parse a skill-deck.toml string and extract all declared skills.
+ *
+ * Handles both TOML formats:
+ *   [tool.skills.pdf]          → { name: "pdf", path: "github.com/...", section: "tool" }
+ *   path = "github.com/..."
+ *
+ *   skills = ["a", "b"]        → { name: "a", path: null, section: "tool" }
+ *
+ * Pure: string → SkillDecl[]. No IO, no Bun.TOML dependency (caller parses first).
+ */
+export function parseDeckSkills(
+  deckParsed: Record<string, any>
+): SkillDecl[] {
+  const results: SkillDecl[] = []
+  const sections = ['innate', 'tool', 'transient'] as const
+  for (const section of sections) {
+    const skills = deckParsed?.[section]?.skills
+    if (!skills) continue
+    if (Array.isArray(skills)) {
+      // Array format: skills = ["name1", "name2"]
+      for (const name of skills) {
+        if (typeof name === 'string') {
+          results.push({ name, path: null, section })
+        }
+      }
+    } else if (typeof skills === 'object') {
+      // Inline-table format: [tool.skills.name], path = "..."
+      for (const [name, entry] of Object.entries(skills as Record<string, any>)) {
+        const skillPath = typeof entry?.path === 'string' ? entry.path : null
+        results.push({ name, path: skillPath, section })
+      }
+    }
+  }
+  return results
+}
+// ── checkSkillExistence ──────────────────────────────────────────────────
+/**
+ * Check each declared skill against the cold pool filesystem.
+ *
+ * For skills with explicit paths: resolve `<coldPoolDir>/<path>/SKILL.md`
+ * For skills without paths (array format): resolve `<coldPoolDir>/<name>/SKILL.md`
+ * Skills with HTTP/URL paths are skipped (not local).
+ *
+ * `existsFn` is the IO injection point — swap for real fs or mock.
+ */
+export function checkSkillExistence(
+  skills: SkillDecl[],
+  coldPoolDir: string,
+  existsFn: (path: string) => boolean
+): SkillCheck[] {
+  return skills.map(skill => {
+    const resolvedName = skill.path && !skill.path.startsWith('http')
+      ? skill.path
+      : skill.name
+    const expectedPath = `${coldPoolDir}/${resolvedName}/SKILL.md`
+    return {
+      name: skill.name,
+      expectedPath,
+      found: existsFn(expectedPath),
+      section: skill.section,
+    }
+  })
+}
+// ── validateLinkResult ───────────────────────────────────────────────────
+/**
+ * Validate the outcome of `bunx @lythos/skill-deck link`.
+ *
+ * Pure: (exitCode, stderr) → LinkResult.
+ * Non-zero exit code = failure. Zero + no stderr = success.
+ */
+export function validateLinkResult(
+  exitCode: number | null,
+  stderr: string
+): LinkResult {
+  if (exitCode !== 0) {
+    const snippet = (stderr || '').slice(0, 300)
+    return {
+      ok: false,
+      error: `Deck link failed (exit ${exitCode}): ${snippet}`,
+    }
+  }
+  return { ok: true }
+}
+// ── buildCopyPlan ────────────────────────────────────────────────────────
+/**
+ * Build a copy plan from workdir entries → outDir.
+ *
+ * Skips entries in `skipSet`. Each surviving entry maps to `<outDir>/<name>`.
+ * Pure: strings + set → CopyEntry[]. No filesystem access.
+ */
+export function buildCopyPlan(
+  workdir: string,
+  outDir: string,
+  entries: string[],
+  skipSet: Set<string>
+): CopyEntry[] {
+  const plan: CopyEntry[] = []
+  for (const name of entries) {
+    if (skipSet.has(name)) continue
+    plan.push({
+      src: `${workdir}/${name}`,
+      dest: `${outDir}/${name}`,
+      name,
+    })
+  }
+  return plan
+}
+// ── resolveColdPoolDir ───────────────────────────────────────────────────
+/**
+ * Resolve cold_pool root from deck config, expanding ~.
+ *
+ * Pure: string → string. No filesystem access.
+ */
+export function resolveColdPoolDir(
+  coldPoolRoot: string | undefined,
+  homeDir: string,
+  fallbackDir: string
+): string {
+  const raw = coldPoolRoot || fallbackDir
+  return raw.startsWith('~') ? `${homeDir}${raw.slice(1)}` : raw
+}
+// ── formatSkillWarnings ──────────────────────────────────────────────────
+/**
+ * Format skill check results into human-readable warning strings.
+ *
+ * Pure: SkillCheck[] → string[].
+ */
+export function formatSkillWarnings(checks: SkillCheck[]): string[] {
+  return checks
+    .filter(c => !c.found)
+    .map(c => `Skill "${c.name}" declared in deck [${c.section}] but SKILL.md not found at: ${c.expectedPath}`)
+}

package/src/runner.ts CHANGED Viewed

@@ -3,6 +3,8 @@ import { join, resolve } from 'node:path'
 import { tmpdir } from 'node:os'
 import { runAgentScenario, type AgentScenario } from '@lythos/test-utils/agent-bdd'
 import { useAgent } from '@lythos/test-utils/agents'
+// Optional: register claude-sdk adapter if the package is installed
+try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
 import { ArenaManifest, Player } from '@lythos/test-utils/schema'
 import type { ArenaManifest as ArenaManifestType, JudgeVerdict } from '@lythos/test-utils/schema'
 import { runComparativeJudge } from './comparative-judge'