@lythos/skill-arena 0.14.5 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -34
- package/package.json +7 -7
- package/src/cli.ts +1 -1
package/README.md
CHANGED
|
@@ -16,24 +16,31 @@
|
|
|
16
16
|
```bash
|
|
17
17
|
bun add -d @lythos/skill-arena
|
|
18
18
|
# or use directly
|
|
19
|
-
bunx @lythos/skill-arena@0.
|
|
19
|
+
bunx @lythos/skill-arena@0.15.0 <command>
|
|
20
20
|
```
|
|
21
21
|
|
|
22
22
|
## Quick Start
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
25
|
# single — test one deck (most common)
|
|
26
|
-
bunx @lythos/skill-arena@
|
|
26
|
+
bunx @lythos/skill-arena@0.15.0 single \
|
|
27
27
|
--deck ./examples/decks/scout.toml \
|
|
28
28
|
--brief "Generate auth flow diagram" \
|
|
29
29
|
--out ./output
|
|
30
30
|
|
|
31
|
+
# single with explicit player
|
|
32
|
+
bunx @lythos/skill-arena@0.15.0 single \
|
|
33
|
+
--deck ./examples/decks/scout.toml \
|
|
34
|
+
--brief "Generate auth flow diagram" \
|
|
35
|
+
--player kimi \
|
|
36
|
+
--out ./output
|
|
37
|
+
|
|
31
38
|
# cross-deck vs — compare two decks (agent-orchestrated)
|
|
32
39
|
# Create arena.toml declaring sides with different decks, then:
|
|
33
|
-
bunx @lythos/skill-arena@
|
|
40
|
+
bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml
|
|
34
41
|
|
|
35
42
|
# cross-player vs — compare kimi vs codex (CLI only)
|
|
36
|
-
bunx @lythos/skill-arena@
|
|
43
|
+
bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml --player kimi
|
|
37
44
|
```
|
|
38
45
|
|
|
39
46
|
**What happens**: Agent creates isolated `/tmp` workdir per side, `deck link` skills, spawns parallel subagents, collects artifacts, judge scores outputs. Parent deck restored after.
|
|
@@ -43,42 +50,34 @@ bunx @lythos/skill-arena@latest vs --config ./arena.toml --player kimi
|
|
|
43
50
|
### `single` — one deck, one task
|
|
44
51
|
|
|
45
52
|
```bash
|
|
46
|
-
bunx @lythos/skill-arena@
|
|
53
|
+
bunx @lythos/skill-arena@0.15.0 single \
|
|
47
54
|
--deck ./deck.toml \
|
|
48
55
|
--brief "Produce a .docx report with radar chart" \
|
|
49
56
|
--timeout 600000 \
|
|
50
57
|
--out ./output
|
|
51
|
-
```
|
|
52
58
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
# with explicit player
|
|
60
|
+
bunx @lythos/skill-arena@0.15.0 single \
|
|
61
|
+
--deck ./deck.toml \
|
|
62
|
+
--brief "Produce a .docx report with radar chart" \
|
|
63
|
+
--player kimi \
|
|
64
|
+
--out ./output
|
|
58
65
|
```
|
|
59
66
|
|
|
60
|
-
### `
|
|
67
|
+
### `vs` — multi-deck comparison
|
|
61
68
|
|
|
62
69
|
```bash
|
|
63
|
-
bunx @lythos/skill-arena@
|
|
64
|
-
|
|
65
|
-
--decks "./decks/minimal.toml,./decks/rich.toml"
|
|
70
|
+
bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml
|
|
71
|
+
bunx @lythos/skill-arena@0.15.0 vs --config ./arena.toml --dry-run
|
|
66
72
|
```
|
|
67
73
|
|
|
68
74
|
### `prepare-workdir` — isolate + link skills (agent-orchestrated)
|
|
69
75
|
|
|
70
76
|
```bash
|
|
71
|
-
bunx @lythos/skill-arena@
|
|
77
|
+
bunx @lythos/skill-arena@0.15.0 prepare-workdir \
|
|
72
78
|
--deck ./skill-deck.toml \
|
|
73
79
|
--out /tmp/arena-side-a \
|
|
74
80
|
--brief "task description"
|
|
75
|
-
|
|
76
|
-
# Plan-first: review before executing
|
|
77
|
-
bunx @lythos/skill-arena@latest prepare-workdir \
|
|
78
|
-
--deck ./skill-deck.toml \
|
|
79
|
-
--out /tmp/arena-side-a \
|
|
80
|
-
--brief "task" \
|
|
81
|
-
--dry-run
|
|
82
81
|
```
|
|
83
82
|
|
|
84
83
|
Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck link` run. `--dry-run` prints the plan (skills, workdir path, link needed) without creating anything.
|
|
@@ -86,25 +85,18 @@ Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck l
|
|
|
86
85
|
### `archive` — collect agent outputs (agent-orchestrated)
|
|
87
86
|
|
|
88
87
|
```bash
|
|
89
|
-
bunx @lythos/skill-arena@
|
|
88
|
+
bunx @lythos/skill-arena@0.15.0 archive \
|
|
90
89
|
--from /tmp/arena-side-a \
|
|
91
90
|
--to ./playground/output \
|
|
92
91
|
--sides side-a
|
|
93
|
-
|
|
94
|
-
# Plan-first: review what would be copied
|
|
95
|
-
bunx @lythos/skill-arena@latest archive \
|
|
96
|
-
--from /tmp/arena-side-a \
|
|
97
|
-
--to ./playground/output \
|
|
98
|
-
--sides side-a \
|
|
99
|
-
--dry-run
|
|
100
92
|
```
|
|
101
93
|
|
|
102
94
|
Copies agent artifacts from workdir(s) to output, skipping internal files (`.claude`, `skill-deck.toml`, `skill-deck.lock`, `AGENTS.md`). Single-side archives fall back to workdir root when the named side subdirectory doesn't exist. `--dry-run` shows the per-side plan before copying.
|
|
103
95
|
|
|
104
|
-
### `viz` — render results
|
|
96
|
+
### `viz` — render results (WIP — HTML report generation pending)
|
|
105
97
|
|
|
106
98
|
```bash
|
|
107
|
-
bunx @lythos/skill-arena@
|
|
99
|
+
bunx @lythos/skill-arena@0.15.0 viz runs/arena-<id>/
|
|
108
100
|
```
|
|
109
101
|
|
|
110
102
|
## Parameters
|
|
@@ -113,7 +105,7 @@ bunx @lythos/skill-arena@latest viz runs/arena-<id>/
|
|
|
113
105
|
|------|---------|-------------|
|
|
114
106
|
| `--brief "<text>"` | single | Inline task brief |
|
|
115
107
|
| `--deck <path\|url>` | single | Deck file (URL auto-fetched) |
|
|
116
|
-
| `--player <name>` | single, vs |
|
|
108
|
+
| `--player <name>` | single, vs | Agent player: kimi\|codex\|deepseek\|claude |
|
|
117
109
|
| `--timeout <ms>` | single | Subagent timeout (300000–600000 for complex tasks) |
|
|
118
110
|
| `--from <dir>` | archive | Source workdir |
|
|
119
111
|
| `--to <dir>` | archive | Output directory |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lythos/skill-arena",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.15.0",
|
|
4
4
|
"description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-agent",
|
|
@@ -42,15 +42,15 @@
|
|
|
42
42
|
"bun": ">=1.0.0"
|
|
43
43
|
},
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@lythos/cold-pool": "^0.
|
|
46
|
-
"@lythos/infra": "^0.
|
|
47
|
-
"@lythos/test-utils": "^0.
|
|
45
|
+
"@lythos/cold-pool": "^0.15.0",
|
|
46
|
+
"@lythos/infra": "^0.15.0",
|
|
47
|
+
"@lythos/test-utils": "^0.15.0",
|
|
48
48
|
"zod": "^3.24.0",
|
|
49
49
|
"zod-to-json-schema": "^3.25.2"
|
|
50
50
|
},
|
|
51
51
|
"optionalDependencies": {
|
|
52
|
-
"@lythos/agent-adapter-claude-sdk": "^0.
|
|
53
|
-
"@lythos/agent-adapter-deepseek-serve": "^0.
|
|
54
|
-
"@lythos/agent-adapter-codex": "^0.
|
|
52
|
+
"@lythos/agent-adapter-claude-sdk": "^0.15.0",
|
|
53
|
+
"@lythos/agent-adapter-deepseek-serve": "^0.15.0",
|
|
54
|
+
"@lythos/agent-adapter-codex": "^0.15.0"
|
|
55
55
|
}
|
|
56
56
|
}
|
package/src/cli.ts
CHANGED
|
@@ -232,7 +232,7 @@ async function singleRun(args: string[]) {
|
|
|
232
232
|
if (!res?.ok) {
|
|
233
233
|
const errorDetail = res ? `HTTP ${res.status}` : 'unreachable'
|
|
234
234
|
console.error(`❌ Cannot reach ${url} (${errorDetail})`)
|
|
235
|
-
if (allFailed) console.error(' Set
|
|
235
|
+
if (allFailed) console.error(' Set LYTHOS_GH_MIRROR to use a custom mirror.')
|
|
236
236
|
console.error(' Or download manually and reference the local file.')
|
|
237
237
|
process.exit(1)
|
|
238
238
|
}
|