@lythos/skill-arena 0.9.23 → 0.9.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
49
49
  ```bash
50
50
  bun add -d @lythos/skill-arena
51
51
  # or use directly
52
- bunx @lythos/skill-arena@0.9.23 <command>
52
+ bunx @lythos/skill-arena@0.9.25 <command>
53
53
  ```
54
54
 
55
55
  ## Quick Start
56
56
 
57
57
  ```bash
58
58
  # Mode 1: Compare two skills on the same task
59
- bunx @lythos/skill-arena@0.9.23 \
59
+ bunx @lythos/skill-arena@0.9.25 \
60
60
  --task "Generate auth flow diagram" \
61
61
  --skills "design-doc-mermaid,mermaid-tools" \
62
62
  --criteria "syntax,context,token"
63
63
 
64
64
  # Mode 2: Compare full deck configurations
65
- bunx @lythos/skill-arena@0.9.23 \
65
+ bunx @lythos/skill-arena@0.9.25 \
66
66
  --task "Generate auth flow diagram" \
67
67
  --decks "./decks/minimal.toml,./decks/rich.toml" \
68
68
  --criteria "quality,token,maintainability"
69
69
 
70
70
  # Visualize results
71
- bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
71
+ bunx @lythos/skill-arena@0.9.25 viz tmp/arena-<id>/
72
72
  ```
73
73
 
74
74
  ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
77
77
 
78
78
  ```bash
79
79
  # Print execution plan without running
80
- bunx @lythos/skill-arena@0.9.23 run --config arena.toml --dry-run
80
+ bunx @lythos/skill-arena@0.9.25 run --config arena.toml --dry-run
81
81
 
82
82
  # Execute with per-side runs_per_side and statistical aggregation
83
- bunx @lythos/skill-arena@0.9.23 run --config arena.toml
83
+ bunx @lythos/skill-arena@0.9.25 run --config arena.toml
84
84
  ```
85
85
 
86
86
  ### CLI-flag mode (backward compat)
87
87
 
88
88
  ```
89
- bunx @lythos/skill-arena@0.9.23 run \
89
+ bunx @lythos/skill-arena@0.9.25 run \
90
90
  --task ./TASK-arena.md \
91
91
  --players ./players/claude.toml \
92
92
  --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.23 run \
96
96
  ### Scaffold mode (legacy, manual execution)
97
97
 
98
98
  ```
99
- bunx @lythos/skill-arena@0.9.23 scaffold --task "..." --skills a,b
99
+ bunx @lythos/skill-arena@0.9.25 scaffold --task "..." --skills a,b
100
100
  ```
101
101
 
102
102
  ### Viz
103
103
 
104
104
  ```bash
105
- bunx @lythos/skill-arena@0.9.23 viz runs/arena-<id>/
105
+ bunx @lythos/skill-arena@0.9.25 viz runs/arena-<id>/
106
106
  ```
107
107
 
108
108
  ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
116
116
  Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
117
117
 
118
118
  ```
119
- Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.23 ...
119
+ Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.25 ...
120
120
  Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
121
121
  Output (skills/<name>/) → git commit → agent-visible skill
122
122
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.23",
3
+ "version": "0.9.25",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
@@ -37,11 +37,12 @@
37
37
  "bun": ">=1.0.0"
38
38
  },
39
39
  "dependencies": {
40
+ "@lythos/cold-pool": "^0.9.25",
40
41
  "@lythos/test-utils": "^0.9.1",
41
42
  "zod": "^3.24.0",
42
43
  "zod-to-json-schema": "^3.25.2"
43
44
  },
44
45
  "optionalDependencies": {
45
- "@lythos/agent-adapter-claude-sdk": "workspace:*"
46
+ "@lythos/agent-adapter-claude-sdk": "^0.9.25"
46
47
  }
47
48
  }
package/src/cli.ts CHANGED
@@ -37,8 +37,8 @@ function printHelp(): void {
37
37
  console.log(`🎭 lythoskill-arena — Skill comparison runner
38
38
 
39
39
  Usage:
40
- lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>]
41
- lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>]
40
+ lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
41
+ lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
42
42
  lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
43
43
  lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
44
44
  lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
@@ -90,6 +90,7 @@ async function agentRun(args: string[]) {
90
90
  else if (args[i] === '--deck' || args[i] === '-d') opts.deck = args[++i]
91
91
  else if (args[i] === '--player' || args[i] === '-p') opts.player = args[++i]
92
92
  else if (args[i] === '--out' || args[i] === '-o') opts.out = args[++i]
93
+ else if (args[i] === '--timeout') opts.timeout = args[++i]
93
94
  }
94
95
 
95
96
  if (!opts.deck) {
@@ -127,7 +128,7 @@ async function agentRun(args: string[]) {
127
128
  scenarioOpt.scenario = {
128
129
  name: 'ad-hoc task',
129
130
  description: opts.brief!.slice(0, 80),
130
- timeout: 120000,
131
+ timeout: Number(opts.timeout ?? 120000),
131
132
  given: { deck: {} },
132
133
  when: opts.brief!,
133
134
  then: ['Write your output to output.md', 'The output should be complete and well-structured'],
package/src/preflight.ts CHANGED
@@ -6,6 +6,8 @@
6
6
  * IO is injected via function parameters (e.g., existsFn, readdirFn).
7
7
  */
8
8
 
9
+ import { ColdPool, parseLocator } from '@lythos/cold-pool'
10
+
9
11
  // ── Types ─────────────────────────────────────────────────────────────────
10
12
 
11
13
  /** A skill as declared in skill-deck.toml */
@@ -83,8 +85,11 @@ export function parseDeckSkills(
83
85
  /**
84
86
  * Check each declared skill against the cold pool filesystem.
85
87
  *
86
- * For skills with explicit paths: resolve `<coldPoolDir>/<path>/SKILL.md`
87
- * For skills without paths (array format): resolve `<coldPoolDir>/<name>/SKILL.md`
88
+ * Path resolution delegates to @lythos/cold-pool's `parseLocator` and
89
+ * `ColdPool.resolveDir` so localhost / FQ / standalone forms all map to
90
+ * the right physical layout (per ADR-20260507021957847). Non-FQ legacy
91
+ * names (e.g., bare `pdf`) fall back to `<coldPoolDir>/<name>/SKILL.md`.
92
+ *
88
93
  * Skills with HTTP/URL paths are skipped (not local).
89
94
  *
90
95
  * `existsFn` is the IO injection point — swap for real fs or mock.
@@ -94,11 +99,28 @@ export function checkSkillExistence(
94
99
  coldPoolDir: string,
95
100
  existsFn: (path: string) => boolean
96
101
  ): SkillCheck[] {
102
+ const pool = new ColdPool(coldPoolDir)
97
103
  return skills.map(skill => {
98
- const resolvedName = skill.path && !skill.path.startsWith('http')
104
+ const candidatePath = skill.path && !skill.path.startsWith('http')
99
105
  ? skill.path
100
106
  : skill.name
101
- const expectedPath = `${coldPoolDir}/${resolvedName}/SKILL.md`
107
+
108
+ let expectedPath: string
109
+ const locator = parseLocator(candidatePath)
110
+ if (!locator) {
111
+ // Legacy bare-name fallback. Per ADR-20260502012643244 this should
112
+ // be removed in 0.10.x once arena.toml authors switch to FQ.
113
+ expectedPath = `${coldPoolDir}/${candidatePath}/SKILL.md`
114
+ } else if (locator.isLocalhost) {
115
+ // localhost layout: top-level dir under coldPool, no `localhost/` prefix
116
+ expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
117
+ } else if (locator.skill) {
118
+ expectedPath = `${pool.resolveDir(locator)}/${locator.skill}/SKILL.md`
119
+ } else {
120
+ // Standalone repo: SKILL.md at repo root
121
+ expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
122
+ }
123
+
102
124
  return {
103
125
  name: skill.name,
104
126
  expectedPath,