@lythos/skill-arena 0.15.3 → 0.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,20 +16,20 @@
16
16
  ```bash
17
17
  bun add -d @lythos/skill-arena
18
18
  # or use directly
19
- bunx @lythos/skill-arena@0.15.3 <command>
19
+ bunx @lythos/skill-arena@0.15.5 <command>
20
20
  ```
21
21
 
22
22
  ## Quick Start
23
23
 
24
24
  ```bash
25
25
  # single — test one deck (most common)
26
- bunx @lythos/skill-arena@0.15.3 single \
26
+ bunx @lythos/skill-arena@0.15.5 single \
27
27
  --deck ./examples/decks/scout.toml \
28
28
  --brief "Generate auth flow diagram" \
29
29
  --out ./output
30
30
 
31
31
  # single with explicit player
32
- bunx @lythos/skill-arena@0.15.3 single \
32
+ bunx @lythos/skill-arena@0.15.5 single \
33
33
  --deck ./examples/decks/scout.toml \
34
34
  --brief "Generate auth flow diagram" \
35
35
  --player kimi \
@@ -37,10 +37,10 @@ bunx @lythos/skill-arena@0.15.3 single \
37
37
 
38
38
  # cross-deck vs — compare two decks (agent-orchestrated)
39
39
  # Create arena.toml declaring sides with different decks, then:
40
- bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml
40
+ bunx @lythos/skill-arena@0.15.5 vs --config ./arena.toml
41
41
 
42
42
  # cross-player vs — compare kimi vs codex (CLI only)
43
- bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --player kimi
43
+ bunx @lythos/skill-arena@0.15.5 vs --config ./arena.toml --player kimi
44
44
  ```
45
45
 
46
46
  **What happens**: Agent creates isolated `/tmp` workdir per side, `deck link` skills, spawns parallel subagents, collects artifacts, judge scores outputs. Parent deck restored after.
@@ -50,14 +50,14 @@ bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --player kimi
50
50
  ### `single` — one deck, one task
51
51
 
52
52
  ```bash
53
- bunx @lythos/skill-arena@0.15.3 single \
53
+ bunx @lythos/skill-arena@0.15.5 single \
54
54
  --deck ./deck.toml \
55
55
  --brief "Produce a .docx report with radar chart" \
56
56
  --timeout 600000 \
57
57
  --out ./output
58
58
 
59
59
  # with explicit player
60
- bunx @lythos/skill-arena@0.15.3 single \
60
+ bunx @lythos/skill-arena@0.15.5 single \
61
61
  --deck ./deck.toml \
62
62
  --brief "Produce a .docx report with radar chart" \
63
63
  --player kimi \
@@ -67,14 +67,14 @@ bunx @lythos/skill-arena@0.15.3 single \
67
67
  ### `vs` — multi-deck comparison
68
68
 
69
69
  ```bash
70
- bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml
71
- bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --dry-run
70
+ bunx @lythos/skill-arena@0.15.5 vs --config ./arena.toml
71
+ bunx @lythos/skill-arena@0.15.5 vs --config ./arena.toml --dry-run
72
72
  ```
73
73
 
74
74
  ### `prepare-workdir` — isolate + link skills (agent-orchestrated)
75
75
 
76
76
  ```bash
77
- bunx @lythos/skill-arena@0.15.3 prepare-workdir \
77
+ bunx @lythos/skill-arena@0.15.5 prepare-workdir \
78
78
  --deck ./skill-deck.toml \
79
79
  --out /tmp/arena-side-a \
80
80
  --brief "task description"
@@ -85,7 +85,7 @@ Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck l
85
85
  ### `archive` — collect agent outputs (agent-orchestrated)
86
86
 
87
87
  ```bash
88
- bunx @lythos/skill-arena@0.15.3 archive \
88
+ bunx @lythos/skill-arena@0.15.5 archive \
89
89
  --from /tmp/arena-side-a \
90
90
  --to ./playground/output \
91
91
  --sides side-a
@@ -96,7 +96,7 @@ Copies agent artifacts from workdir(s) to output, skipping internal files (`.cla
96
96
  ### `viz` — render results (WIP — HTML report generation pending)
97
97
 
98
98
  ```bash
99
- bunx @lythos/skill-arena@0.15.3 viz runs/arena-<id>/
99
+ bunx @lythos/skill-arena@0.15.5 viz runs/arena-<id>/
100
100
  ```
101
101
 
102
102
  ## Parameters
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.15.3",
3
+ "version": "0.15.5",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
@@ -42,15 +42,15 @@
42
42
  "bun": ">=1.0.0"
43
43
  },
44
44
  "dependencies": {
45
- "@lythos/cold-pool": "^0.15.3",
46
- "@lythos/infra": "^0.15.3",
47
- "@lythos/test-utils": "^0.15.3",
45
+ "@lythos/cold-pool": "^0.15.5",
46
+ "@lythos/infra": "^0.15.5",
47
+ "@lythos/test-utils": "^0.15.5",
48
48
  "zod": "^3.24.0",
49
49
  "zod-to-json-schema": "^3.25.2"
50
50
  },
51
51
  "optionalDependencies": {
52
- "@lythos/agent-adapter-claude-sdk": "^0.15.3",
53
- "@lythos/agent-adapter-deepseek-serve": "^0.15.3",
54
- "@lythos/agent-adapter-codex": "^0.15.3"
52
+ "@lythos/agent-adapter-claude-sdk": "^0.15.5",
53
+ "@lythos/agent-adapter-deepseek-serve": "^0.15.5",
54
+ "@lythos/agent-adapter-codex": "^0.15.5"
55
55
  }
56
56
  }
package/src/preflight.ts CHANGED
@@ -276,14 +276,14 @@ export function buildPreparePlan(params: {
276
276
  '',
277
277
  '## Setup Order (why this sequence)',
278
278
  '1. `skill-deck.toml` copied here → declares which skills you can use',
279
- '2. `deck link` runs → cold pool skills become visible in `.claude/skills/`',
279
+ '2. `deck link` runs → cold pool skills become visible in the working set',
280
280
  '3. Skill existence checked → warns if any declared skill is missing from cold pool',
281
281
  '4. `AGENTS.md` written last → confirms setup succeeded before agent starts',
282
282
  'If setup fails mid-sequence, the workdir is incomplete and nothing runs.',
283
283
  '',
284
284
  '## How This Works',
285
285
  '- Write ALL output files to this directory (CWD).',
286
- '- Use available skills — check `ls .claude/skills/`.',
286
+ '- Use available skills — check the working set directory (e.g. `ls .claude/skills/`).',
287
287
  '',
288
288
  '## Output Contract',
289
289
  '- MANDATORY: `decision-log.jsonl` — one JSON line per decision:',
package/src/runner.ts CHANGED
@@ -84,7 +84,7 @@ export function buildArenaPrompt(opts: {
84
84
  'ROBUSTNESS — If any command or script fails, read the error output, fix the issue, and retry.',
85
85
  'Do not stop on the first error. Ensure all required output files exist before finishing.',
86
86
  '',
87
- 'TOOLS — Use the skills already linked in .claude/skills/ (check with `ls .claude/skills/`).',
87
+ 'TOOLS — Use the skills already linked in your working set (check with `ls .claude/skills/` or your configured path).',
88
88
  'They are available and tested. Only write alternative scripts if the linked skills explicitly',
89
89
  'cannot handle the task.',
90
90
  ]