npm - @lythos/skill-arena - Versions diffs - 0.9.1 → 0.9.3 - Mend

@lythos/skill-arena 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +5 -1
package/src/arena-toml.test.ts +164 -0
package/src/arena-toml.ts +172 -0
package/src/cli.ts +95 -10
package/src/comparative-judge.test.ts +92 -0
package/src/comparative-judge.ts +166 -0
package/src/player.test.ts +95 -0
package/src/player.ts +71 -0
package/src/runner.ts +250 -0
package/src/stats.test.ts +111 -0
package/src/stats.ts +117 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.9.1",
+  "version": "0.9.3",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
   "keywords": [
     "ai-agent",
@@ -35,5 +35,9 @@
   "homepage": "https://github.com/lythos-labs/lythoskill/tree/main/packages/lythoskill-arena#readme",
   "engines": {
     "bun": ">=1.0.0"
+  },
+  "dependencies": {
+    "@lythos/test-utils": "^0.9.1",
+    "zod-to-json-schema": "^3.25.2"
   }
 }

package/src/arena-toml.test.ts ADDED Viewed

@@ -0,0 +1,164 @@
+import { describe, test, expect } from 'bun:test'
+import { parseArenaToml, buildExecutionPlan, ArenaToml } from './arena-toml'
+const minimalToml = `
+[arena]
+task = "Test task"
+criteria = ["a", "b"]
+[[side]]
+name = "runner-a"
+player = "claude-code"
+deck = "./decks/a.toml"
+[[side]]
+name = "runner-b"
+player = "claude-code"
+deck = "./decks/b.toml"
+`
+const fullToml = `
+[arena]
+task = "Generate auth flow diagram"
+criteria = ["syntax", "context", "logic", "token"]
+runs_per_side = 3
+[[side]]
+name = "minimal"
+player = "standard-coder"
+deck = "./decks/minimal.toml"
+[[side]]
+name = "rich"
+player = "expert-architect"
+deck = "./decks/rich.toml"
+[[side]]
+name = "baseline"
+player = "standard-coder"
+deck = "./decks/baseline.toml"
+control = true
+[side.env]
+container = "node:20-alpine"
+pre_run = ["npm ci", "npm run build"]
+working_dir = "/workspace"
+`
+// ── Schema + Parser ────────────────────────────────────────────────────────
+describe('parseArenaToml', () => {
+  test('parses minimal two-side arena', () => {
+    const result = parseArenaToml(minimalToml)
+    expect(result.arena.task).toBe('Test task')
+    expect(result.arena.criteria).toEqual(['a', 'b'])
+    expect(result.arena.runs_per_side).toBe(1)       // default
+    expect(result.side).toHaveLength(2)
+    expect(result.side[0].name).toBe('runner-a')
+    expect(result.side[0].player).toBe('claude-code')
+    expect(result.side[0].deck).toBe('./decks/a.toml')
+    expect(result.side[0].control).toBe(false)         // default
+  })
+  test('parses full arena with runs_per_side and control', () => {
+    const result = parseArenaToml(fullToml)
+    expect(result.arena.runs_per_side).toBe(3)
+    expect(result.side).toHaveLength(3)
+    expect(result.side[2].name).toBe('baseline')
+    expect(result.side[2].control).toBe(true)
+  })
+  test('parses side env block', () => {
+    const result = parseArenaToml(fullToml)
+    const env = result.side[2].env
+    expect(env.container).toBe('node:20-alpine')
+    expect(env.pre_run).toEqual(['npm ci', 'npm run build'])
+    expect(env.working_dir).toBe('/workspace')
+    expect(env.env_vars).toEqual({})
+  })
+  test('rejects fewer than 2 sides', () => {
+    const bad = `[arena]\ntask = "x"\ncriteria = ["a"]\n\n[[side]]\nname = "only"\nplayer = "c"\ndeck = "x.toml"`
+    expect(() => parseArenaToml(bad)).toThrow()
+  })
+  test('rejects empty criteria', () => {
+    const bad = `[arena]\ntask = "x"\ncriteria = []\n\n[[side]]\nname = "a"\nplayer = "c"\ndeck = "a.toml"\n\n[[side]]\nname = "b"\nplayer = "c"\ndeck = "b.toml"`
+    expect(() => parseArenaToml(bad)).toThrow()
+  })
+  test('rejects non-object input', () => {
+    expect(() => ArenaToml.parse('not valid')).toThrow()
+  })
+  test('rejects missing arena section', () => {
+    expect(() => parseArenaToml('[[side]]\nname = "a"')).toThrow()
+  })
+  test('rejects runs_per_side = 0', () => {
+    const bad = `[arena]\ntask = "x"\ncriteria = ["a"]\nruns_per_side = 0\n\n[[side]]\nname = "a"\nplayer = "c"\ndeck = "a.toml"\n\n[[side]]\nname = "b"\nplayer = "c"\ndeck = "b.toml"`
+    expect(() => parseArenaToml(bad)).toThrow()
+  })
+  test('parses integer and boolean values correctly', () => {
+    const toml = `[arena]\ntask = "x"\ncriteria = ["a"]\nruns_per_side = 2\nmax_participants = 5\n\n[[side]]\nname = "a"\nplayer = "c"\ndeck = "a.toml"\n\n[[side]]\nname = "b"\nplayer = "c"\ndeck = "b.toml"`
+    const result = parseArenaToml(toml)
+    expect(result.arena.runs_per_side).toBe(2)
+    expect(result.arena.max_participants).toBe(5)
+  })
+  test('comments are stripped', () => {
+    const toml = `[arena]\n# this is a comment\ntask = "x"\ncriteria = ["a"]\n\n[[side]]\nname = "a"\nplayer = "c"\ndeck = "a.toml"\n\n[[side]]\nname = "b"\nplayer = "c"\ndeck = "b.toml"`
+    const result = parseArenaToml(toml)
+    expect(result.arena.task).toBe('x')
+  })
+})
+// ── Execution Plan ─────────────────────────────────────────────────────────
+describe('buildExecutionPlan', () => {
+  test('generates plan: 2 sides × 1 run = 2 cells', () => {
+    const toml = parseArenaToml(minimalToml)
+    const plan = buildExecutionPlan(toml)
+    expect(plan.task).toBe('Test task')
+    expect(plan.criteria).toEqual(['a', 'b'])
+    expect(plan.cells).toHaveLength(2)
+    expect(plan.total_runs).toBe(2)
+    expect(plan.cells[0]).toEqual({ side: 'runner-a', player: 'claude-code', deck: './decks/a.toml', run: 1, control: false })
+    expect(plan.cells[1]).toEqual({ side: 'runner-b', player: 'claude-code', deck: './decks/b.toml', run: 1, control: false })
+  })
+  test('generates plan: 3 sides × 3 runs = 9 cells', () => {
+    const toml = parseArenaToml(fullToml)
+    const plan = buildExecutionPlan(toml)
+    expect(plan.cells).toHaveLength(9)
+    expect(plan.total_runs).toBe(9)
+    // Cells are ordered: side 0 run 1, side 0 run 2, side 0 run 3, side 1 run 1, ...
+    expect(plan.cells[0]).toEqual({ side: 'minimal', player: 'standard-coder', deck: './decks/minimal.toml', run: 1, control: false })
+    expect(plan.cells[1]).toEqual({ side: 'minimal', player: 'standard-coder', deck: './decks/minimal.toml', run: 2, control: false })
+    expect(plan.cells[2]).toEqual({ side: 'minimal', player: 'standard-coder', deck: './decks/minimal.toml', run: 3, control: false })
+    expect(plan.cells[3]).toEqual({ side: 'rich', player: 'expert-architect', deck: './decks/rich.toml', run: 1, control: false })
+    expect(plan.cells[8]).toEqual({ side: 'baseline', player: 'standard-coder', deck: './decks/baseline.toml', run: 3, control: true })
+  })
+  test('control flag preserved in plan cells', () => {
+    const toml = parseArenaToml(fullToml)
+    const plan = buildExecutionPlan(toml)
+    const baselineCells = plan.cells.filter(c => c.side === 'baseline')
+    expect(baselineCells).toHaveLength(3)
+    expect(baselineCells.every(c => c.control)).toBe(true)
+  })
+  test('dry-run: plan is pure data, no side effects', () => {
+    // The entire plan generation is a pure function — dry-run is just printing it
+    const toml = parseArenaToml(fullToml)
+    const plan = buildExecutionPlan(toml)
+    // Verify plan is self-describing for a --dry-run output
+    expect(plan.total_runs).toBeGreaterThan(0)
+    expect(plan.cells.every(c => typeof c.side === 'string')).toBe(true)
+    expect(plan.cells.every(c => typeof c.player === 'string')).toBe(true)
+    expect(plan.cells.every(c => typeof c.deck === 'string')).toBe(true)
+    expect(plan.cells.every(c => typeof c.run === 'number')).toBe(true)
+  })
+})

package/src/arena-toml.ts ADDED Viewed

@@ -0,0 +1,172 @@
+import { z } from 'zod'
+import type { ArenaManifest } from '@lythos/test-utils/schema'
+// ── arena.toml Zod schema (declarative input, k8s-manifest style) ──────────
+// Anchored on: ADR-20260502110308316
+export const SideEnv = z.object({
+  container: z.string().optional(),
+  pre_run: z.array(z.string()).default([]),
+  working_dir: z.string().optional(),
+  env_vars: z.record(z.string()).default({}),
+})
+export type SideEnv = z.infer<typeof SideEnv>
+export const Side = z.object({
+  name: z.string(),
+  player: z.string(),              // reference to player config (useAgent resolves)
+  deck: z.string(),                // path to deck.toml
+  control: z.boolean().default(false),
+  env: SideEnv.default({}),
+})
+export type Side = z.infer<typeof Side>
+export const ArenaToml = z.object({
+  arena: z.object({
+    task: z.string(),              // task description or path to TASK-arena.md
+    criteria: z.array(z.string()).min(1),
+    runs_per_side: z.number().int().positive().default(1),
+    max_participants: z.number().int().min(2).max(5).default(5),
+  }),
+  side: z.array(Side).min(2).max(5),
+})
+export type ArenaToml = z.infer<typeof ArenaToml>
+// ── Parser ─────────────────────────────────────────────────────────────────
+export function parseArenaToml(content: string): ArenaToml {
+  // Simple inline TOML parser for arena.toml (no external dep needed for this subset)
+  const parsed = parseToml(content)
+  return ArenaToml.parse(parsed)
+}
+// ── Plan generation (pure function, dry-run visible) ───────────────────────
+export interface ExecutionCell {
+  side: string                     // side name
+  player: string                   // player reference
+  deck: string                     // deck path
+  run: number                      // 1-indexed run number
+  control: boolean
+}
+export interface ExecutionPlan {
+  task: string
+  criteria: string[]
+  cells: ExecutionCell[]
+  total_runs: number
+}
+export function buildExecutionPlan(toml: ArenaToml): ExecutionPlan {
+  const cells: ExecutionCell[] = []
+  for (const side of toml.side) {
+    for (let run = 1; run <= toml.arena.runs_per_side; run++) {
+      cells.push({
+        side: side.name,
+        player: side.player,
+        deck: side.deck,
+        run,
+        control: side.control,
+      })
+    }
+  }
+  return {
+    task: toml.arena.task,
+    criteria: toml.arena.criteria,
+    cells,
+    total_runs: cells.length,
+  }
+}
+// ── Minimal TOML parser (handles the arena.toml subset without external dep) ──
+function parseToml(text: string): Record<string, unknown> {
+  const result: Record<string, unknown> = {}
+  let currentTable: Record<string, unknown> = result
+  let currentTableKey = ''
+  const arrayTables: Map<string, Record<string, unknown>[]> = new Map()
+  for (const rawLine of text.split('\n')) {
+    const line = rawLine.split('#')[0].trim()
+    if (!line) continue
+    // [[array]]
+    const arrayMatch = line.match(/^\[\[(.+?)\]\]$/)
+    if (arrayMatch) {
+      const key = arrayMatch[1] // e.g. "side"
+      if (!arrayTables.has(key)) arrayTables.set(key, [])
+      currentTable = {}
+      arrayTables.get(key)!.push(currentTable)
+      currentTableKey = key
+      continue
+    }
+    // [section]
+    const sectionMatch = line.match(/^\[(.+?)\]$/)
+    if (sectionMatch) {
+      const key = sectionMatch[1]
+      // nested key like "side.env"
+      if (key.includes('.')) {
+        const [parent, child] = key.split('.')
+        const parentArr = arrayTables.get(parent)
+        if (parentArr && parentArr.length > 0) {
+          currentTable = {}
+          parentArr[parentArr.length - 1][child] = currentTable
+        }
+      } else {
+        result[key] = {}
+        currentTable = result[key] as Record<string, unknown>
+      }
+      currentTableKey = ''
+      continue
+    }
+    // key = value
+    const eqIdx = line.indexOf('=')
+    if (eqIdx !== -1) {
+      const key = line.slice(0, eqIdx).trim()
+      let value = line.slice(eqIdx + 1).trim()
+      // String value
+      if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+        value = value.slice(1, -1)
+      } else if (value === 'true') {
+        value = 'true'
+      } else if (value === 'false') {
+        value = 'false'
+      }
+      // Array value: ["a", "b"]
+      if (value.startsWith('[') && value.endsWith(']')) {
+        const inner = value.slice(1, -1).trim()
+        if (!inner) {
+          currentTable[key] = []
+        } else {
+          const arr = inner.split(',').map(s => {
+            const t = s.trim()
+            if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) {
+              return t.slice(1, -1)
+            }
+            return t
+          })
+          currentTable[key] = arr
+        }
+      } else if (value === 'true') {
+        currentTable[key] = true
+      } else if (value === 'false') {
+        currentTable[key] = false
+      } else if (/^-?\d+(\.\d+)?$/.test(value)) {
+        currentTable[key] = Number(value)
+      } else {
+        currentTable[key] = value
+      }
+    }
+  }
+  // Materialize array tables into result
+  for (const [key, arr] of arrayTables) {
+    result[key] = arr
+  }
+  return result
+}

package/src/cli.ts CHANGED Viewed

@@ -29,23 +29,40 @@ function printHelp(): void {
   console.log(`🎭 lythoskill-arena — Skill comparison runner
 Usage:
-  lythoskill-arena --task "<task description>" --skills <skill1,skill2,...>
-  lythoskill-arena --task "<task description>" --decks <deck1,deck2,...>
+  lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
+  lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
+  lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
   lythoskill-arena viz <arena-dir>
+Commands:
+  run       Run arena programmatically (declarative arena.toml or CLI flags)
+  scaffold  Create arena directory structure (legacy, manual subagent execution)
+  viz       Visualize arena report (ASCII charts)
 Options:
-  -t, --task <desc>      Task description (required)
-  -s, --skills <list>    Comma-separated skill names
+  -t, --task <path|desc> Task description or path to TASK-arena.md
+  -s, --skills <list>    Comma-separated skill names (scaffold only)
       --decks <list>     Comma-separated deck paths
   -c, --criteria <list>  Evaluation criteria (default: syntax,context,logic,token)
-      --control <skill>  Control skill for comparison (default: lythoskill-project-scribe)
-  -d, --dir <dir>        Output directory (default: tmp)
+      --players <list>   Comma-separated player.toml paths (CLI run only)
+      --config <path>    Path to arena.toml (declarative mode, k8s-style)
+      --dry-run          Print execution plan without running (with --config)
+      --control <skill>  Control skill for comparison (scaffold only)
+      --out <dir>        Output directory (run: defaults to runs/arena-<id>)
+  -d, --dir <dir>        Output directory (scaffold: defaults to tmp)
   -p, --project <dir>    Project directory (default: .)
 Examples:
-  lythoskill-arena --task "Refactor auth module" --skills skill-a,skill-b
-  lythoskill-arena --task "Write tests" --decks ./decks/minimal.toml,./decks/full.toml
-  lythoskill-arena viz tmp/arena-20260430
+  # Declarative mode (k8s-style)
+  lythoskill-arena run --config ./arena.toml
+  lythoskill-arena run --config ./arena.toml --dry-run
+  # CLI-flag mode (backward compat)
+  lythoskill-arena run --task ./TASK-arena.md --players ./players/claude.toml --decks ./decks/run-01.toml,./decks/run-02.toml --criteria coverage,relevance
+  # Legacy scaffolding
+  lythoskill-arena scaffold --task "Refactor auth module" --skills skill-a,skill-b
+  lythoskill-arena viz runs/arena-20260504
 `)
 }
@@ -551,6 +568,67 @@ function runViz(argv: string[]) {
   console.log(renderRadarChart(report))
 }
+// ── Run: programmatic arena execution ───────────────────────
+async function runProgrammaticArena(argv: string[]) {
+  const { options } = parseArgs(argv)
+  const { readFileSync } = await import('node:fs')
+  const hasConfig = !!(options as Record<string, string | undefined>).config
+  const dryRun = argv.includes('--dry-run')
+  if (hasConfig) {
+    // arena.toml declarative mode
+    const { parseArenaToml } = await import('./arena-toml')
+    const { runArenaFromToml } = await import('./runner')
+    const configPath = (options as Record<string, string | undefined>).config!
+    const toml = parseArenaToml(readFileSync(configPath, 'utf-8'))
+    const result = await runArenaFromToml({
+      toml,
+      taskPath: toml.arena.task.startsWith('/') || toml.arena.task.startsWith('./')
+        ? toml.arena.task
+        : (options as Record<string, string | undefined>).task ?? toml.arena.task,
+      outDir: (options as Record<string, string | undefined>).out,
+      dryRun,
+    })
+    if ('plan' in result) {
+      // dry-run
+      console.log(`\n📋 Dry-run: ${result.plan.total_runs} cells across ${result.plan.cells.length / Math.max(1, toml.arena.runs_per_side)} sides × ${toml.arena.runs_per_side} runs`)
+      for (const cell of result.plan.cells) {
+        console.log(`   ${cell.side}/run-${cell.run}: ${cell.player} × ${cell.deck}${cell.control ? ' [control]' : ''}`)
+      }
+      return
+    }
+    console.log(`\n🎮 Arena complete: ${result.manifest.id}`)
+    console.log(`📁 Artifacts: ${result.artifactsDir}`)
+    console.log(`📊 Report: ${result.artifactsDir}/report.md`)
+    return
+  }
+  // CLI-flag mode (backward compat)
+  if (!options.task || !options.decks) {
+    console.error('❌ --task <path> and --decks <list> are required for "run" (or use --config <arena.toml>)')
+    process.exit(1)
+  }
+  const { runArena: runArenaProgrammatic } = await import('./runner')
+  const result = await runArenaProgrammatic({
+    taskPath: options.task,
+    playerPaths: (options.players ?? 'players/claude-code.toml').split(',').map(s => s.trim()).filter(Boolean),
+    deckPaths: options.decks.split(',').map(s => s.trim()).filter(Boolean),
+    criteria: (options.criteria ?? 'syntax,context,logic,token').split(',').map(s => s.trim()).filter(Boolean),
+    outDir: options.out ?? `runs/arena-${timestamp()}`,
+  })
+  console.log(`\n🎮 Arena complete: ${result.manifest.id}`)
+  console.log(`📁 Artifacts: ${result.artifactsDir}`)
+  console.log(`📊 Report: ${result.artifactsDir}/report.md`)
+}
 // ── Main Entry ───────────────────────────────────────────────
 if (import.meta.main) {
@@ -559,7 +637,14 @@ if (import.meta.main) {
   if (cmd === 'viz') {
     runViz(args.slice(1))
+  } else if (cmd === 'run') {
+    runProgrammaticArena(args.slice(1))
+  } else if (cmd === 'scaffold' || !cmd || args[0]?.startsWith('-')) {
+    // Legacy behavior: if no subcommand or starts with flags, treat as scaffold
+    runArena(cmd === 'scaffold' ? args.slice(1) : args)
   } else {
-    runArena(args)
+    console.error(`❌ Unknown command: ${cmd}`)
+    printHelp()
+    process.exit(1)
   }
 }

package/src/comparative-judge.test.ts ADDED Viewed

@@ -0,0 +1,92 @@
+import { describe, test, expect } from 'bun:test'
+import { computePareto } from './comparative-judge'
+describe('computePareto', () => {
+  test('single participant is always non-dominated', () => {
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { a: 5, b: 3 } },
+    ])
+    expect(result).toHaveLength(1)
+    expect(result[0].dominated).toBe(false)
+    expect(result[0].dominated_by).toEqual([])
+  })
+  test('clear dominance: run-01 dominates run-02 on all criteria', () => {
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { coverage: 5, relevance: 5 } },
+      { participant_id: 'run-02', scores: { coverage: 3, relevance: 2 } },
+    ])
+    expect(result[0].dominated).toBe(false)
+    expect(result[1].dominated).toBe(true)
+    expect(result[1].dominated_by).toEqual(['run-01'])
+  })
+  test('equal scores: no one dominates', () => {
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { a: 4, b: 4 } },
+      { participant_id: 'run-02', scores: { a: 4, b: 4 } },
+    ])
+    expect(result[0].dominated).toBe(false)
+    expect(result[1].dominated).toBe(false)
+  })
+  test('cross dominance: each wins on different criteria', () => {
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { speed: 5, accuracy: 2 } },
+      { participant_id: 'run-02', scores: { speed: 2, accuracy: 5 } },
+    ])
+    // Neither dominates: run-01 better on speed but worse on accuracy
+    expect(result[0].dominated).toBe(false)
+    expect(result[1].dominated).toBe(false)
+  })
+  test('multi-participant: transitive dominance chain', () => {
+    const result = computePareto([
+      { participant_id: 'best', scores: { a: 5, b: 5, c: 5 } },
+      { participant_id: 'mid', scores: { a: 4, b: 4, c: 4 } },
+      { participant_id: 'worst', scores: { a: 2, b: 2, c: 2 } },
+    ])
+    // best dominates both, mid dominates worst
+    expect(result[0].dominated).toBe(false) // best
+    expect(result[1].dominated).toBe(true)  // mid (by best)
+    expect(result[1].dominated_by).toEqual(['best'])
+    expect(result[2].dominated).toBe(true)  // worst (by both)
+    expect(result[2].dominated_by.sort()).toEqual(['best', 'mid'].sort())
+  })
+  test('Pareto frontier from playground BDD-research: run-01 dominates run-02', () => {
+    // From playground/arena-bdd-research/report.md:
+    // Run-01: coverage=5, relevance=5, actionability=5, depth=5
+    // Run-02: coverage=3, relevance=2, actionability=2, depth=1
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { coverage: 5, relevance: 5, actionability: 5, depth: 5 } },
+      { participant_id: 'run-02', scores: { coverage: 3, relevance: 2, actionability: 2, depth: 1 } },
+    ])
+    expect(result[0].dominated).toBe(false) // run-01: Pareto-optimal
+    expect(result[1].dominated).toBe(true)  // run-02: dominated by run-01
+    expect(result[1].dominated_by).toEqual(['run-01'])
+  })
+  test('empty scores object', () => {
+    const result = computePareto([
+      { participant_id: 'a', scores: {} },
+      { participant_id: 'b', scores: {} },
+    ])
+    expect(result).toHaveLength(2)
+    expect(result[0].dominated).toBe(false)
+    expect(result[1].dominated).toBe(false)
+  })
+  test('partial criteria overlap', () => {
+    const result = computePareto([
+      { participant_id: 'run-01', scores: { a: 5, b: 3 } },
+      { participant_id: 'run-02', scores: { a: 3, c: 5 } },
+    ])
+    // run-01 has a=5 vs run-02 a=3 (a wins)
+    // run-02 has b=undefined vs run-01 b=3 → treated as 0. So run-01 >= run-02 on all shared crit, > on one.
+    // But c: run-01 has 0, run-02 has 5. So run-02 > run-01 on c.
+    // Cross-dominance → neither dominates
+    expect(result[0].dominated).toBe(false)
+    expect(result[1].dominated).toBe(false)
+  })
+})