@lythos/skill-arena 0.14.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +26 -34
  2. package/package.json +7 -7
  3. package/src/cli.ts +1 -1
package/README.md CHANGED
@@ -16,24 +16,31 @@
16
16
  ```bash
17
17
  bun add -d @lythos/skill-arena
18
18
  # or use directly
19
- bunx @lythos/skill-arena@0.14.5 <command>
19
+ bunx @lythos/skill-arena@0.15.0 <command>
20
20
  ```
21
21
 
22
22
  ## Quick Start
23
23
 
24
24
  ```bash
25
25
  # single — test one deck (most common)
26
- bunx @lythos/skill-arena@latest single \
26
+ bunx @lythos/skill-arena@0.15.0 single \
27
27
  --deck ./examples/decks/scout.toml \
28
28
  --brief "Generate auth flow diagram" \
29
29
  --out ./output
30
30
 
31
+ # single with explicit player
32
+ bunx @lythos/skill-arena@0.15.0 single \
33
+ --deck ./examples/decks/scout.toml \
34
+ --brief "Generate auth flow diagram" \
35
+ --player kimi \
36
+ --out ./output
37
+
31
38
  # cross-deck vs — compare two decks (agent-orchestrated)
32
39
  # Create arena.toml declaring sides with different decks, then:
33
- bunx @lythos/skill-arena@latest vs --config ./arena.toml
40
+ bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml
34
41
 
35
42
  # cross-player vs — compare kimi vs codex (CLI only)
36
- bunx @lythos/skill-arena@latest vs --config ./arena.toml --player kimi
43
+ bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml --player kimi
37
44
  ```
38
45
 
39
46
  **What happens**: Agent creates isolated `/tmp` workdir per side, `deck link` skills, spawns parallel subagents, collects artifacts, judge scores outputs. Parent deck restored after.
@@ -43,42 +50,34 @@ bunx @lythos/skill-arena@latest vs --config ./arena.toml --player kimi
43
50
  ### `single` — one deck, one task
44
51
 
45
52
  ```bash
46
- bunx @lythos/skill-arena@latest single \
53
+ bunx @lythos/skill-arena@0.15.0 single \
47
54
  --deck ./deck.toml \
48
55
  --brief "Produce a .docx report with radar chart" \
49
56
  --timeout 600000 \
50
57
  --out ./output
51
- ```
52
58
 
53
- ### `vs` multi-deck comparison
54
-
55
- ```bash
56
- bunx @lythos/skill-arena@latest vs --config ./arena.toml
57
- bunx @lythos/skill-arena@latest vs --config ./arena.toml --dry-run
59
+ # with explicit player
60
+ bunx @lythos/skill-arena@0.15.0 single \
61
+ --deck ./deck.toml \
62
+ --brief "Produce a .docx report with radar chart" \
63
+ --player kimi \
64
+ --out ./output
58
65
  ```
59
66
 
60
- ### `scaffold` — legacy directory setup
67
+ ### `vs` — multi-deck comparison
61
68
 
62
69
  ```bash
63
- bunx @lythos/skill-arena@latest scaffold \
64
- --task "Generate auth flow diagram" \
65
- --decks "./decks/minimal.toml,./decks/rich.toml"
70
+ bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml
71
+ bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml --dry-run
66
72
  ```
67
73
 
68
74
  ### `prepare-workdir` — isolate + link skills (agent-orchestrated)
69
75
 
70
76
  ```bash
71
- bunx @lythos/skill-arena@latest prepare-workdir \
77
+ bunx @lythos/skill-arena@0.15.0 prepare-workdir \
72
78
  --deck ./skill-deck.toml \
73
79
  --out /tmp/arena-side-a \
74
80
  --brief "task description"
75
-
76
- # Plan-first: review before executing
77
- bunx @lythos/skill-arena@latest prepare-workdir \
78
- --deck ./skill-deck.toml \
79
- --out /tmp/arena-side-a \
80
- --brief "task" \
81
- --dry-run
82
81
  ```
83
82
 
84
83
  Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck link` run. `--dry-run` prints the plan (skills, workdir path, link needed) without creating anything.
@@ -86,25 +85,18 @@ Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck l
86
85
  ### `archive` — collect agent outputs (agent-orchestrated)
87
86
 
88
87
  ```bash
89
- bunx @lythos/skill-arena@latest archive \
88
+ bunx @lythos/skill-arena@0.15.0 archive \
90
89
  --from /tmp/arena-side-a \
91
90
  --to ./playground/output \
92
91
  --sides side-a
93
-
94
- # Plan-first: review what would be copied
95
- bunx @lythos/skill-arena@latest archive \
96
- --from /tmp/arena-side-a \
97
- --to ./playground/output \
98
- --sides side-a \
99
- --dry-run
100
92
  ```
101
93
 
102
94
  Copies agent artifacts from workdir(s) to output, skipping internal files (`.claude`, `skill-deck.toml`, `skill-deck.lock`, `AGENTS.md`). Single-side archives fall back to workdir root when the named side subdirectory doesn't exist. `--dry-run` shows the per-side plan before copying.
103
95
 
104
- ### `viz` — render results
96
+ ### `viz` — render results (WIP — HTML report generation pending)
105
97
 
106
98
  ```bash
107
- bunx @lythos/skill-arena@latest viz runs/arena-<id>/
99
+ bunx @lythos/skill-arena@0.15.0 viz runs/arena-<id>/
108
100
  ```
109
101
 
110
102
  ## Parameters
@@ -113,7 +105,7 @@ bunx @lythos/skill-arena@latest viz runs/arena-<id>/
113
105
  |------|---------|-------------|
114
106
  | `--brief "<text>"` | single | Inline task brief |
115
107
  | `--deck <path\|url>` | single | Deck file (URL auto-fetched) |
116
- | `--player <name>` | single, vs | Only for cross-player: kimi\|codex\|deepseek\|claude |
108
+ | `--player <name>` | single, vs | Agent player: kimi\|codex\|deepseek\|claude |
117
109
  | `--timeout <ms>` | single | Subagent timeout (300000–600000 for complex tasks) |
118
110
  | `--from <dir>` | archive | Source workdir |
119
111
  | `--to <dir>` | archive | Output directory |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.14.5",
3
+ "version": "0.15.0",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
@@ -42,15 +42,15 @@
42
42
  "bun": ">=1.0.0"
43
43
  },
44
44
  "dependencies": {
45
- "@lythos/cold-pool": "^0.14.5",
46
- "@lythos/infra": "^0.14.5",
47
- "@lythos/test-utils": "^0.14.5",
45
+ "@lythos/cold-pool": "^0.15.0",
46
+ "@lythos/infra": "^0.15.0",
47
+ "@lythos/test-utils": "^0.15.0",
48
48
  "zod": "^3.24.0",
49
49
  "zod-to-json-schema": "^3.25.2"
50
50
  },
51
51
  "optionalDependencies": {
52
- "@lythos/agent-adapter-claude-sdk": "^0.14.5",
53
- "@lythos/agent-adapter-deepseek-serve": "^0.14.5",
54
- "@lythos/agent-adapter-codex": "^0.14.5"
52
+ "@lythos/agent-adapter-claude-sdk": "^0.15.0",
53
+ "@lythos/agent-adapter-deepseek-serve": "^0.15.0",
54
+ "@lythos/agent-adapter-codex": "^0.15.0"
55
55
  }
56
56
  }
package/src/cli.ts CHANGED
@@ -232,7 +232,7 @@ async function singleRun(args: string[]) {
232
232
  if (!res?.ok) {
233
233
  const errorDetail = res ? `HTTP ${res.status}` : 'unreachable'
234
234
  console.error(`❌ Cannot reach ${url} (${errorDetail})`)
235
- if (allFailed) console.error(' Set LYTHOSKILL_GH_MIRROR to use a custom mirror.')
235
+ if (allFailed) console.error(' Set LYTHOS_GH_MIRROR to use a custom mirror.')
236
236
  console.error(' Or download manually and reference the local file.')
237
237
  process.exit(1)
238
238
  }