@lythos/skill-arena 0.15.3 → 0.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -12
- package/package.json +7 -7
- package/src/preflight.ts +2 -2
- package/src/runner.ts +1 -1
package/README.md
CHANGED
|
@@ -16,20 +16,20 @@
|
|
|
16
16
|
```bash
|
|
17
17
|
bun add -d @lythos/skill-arena
|
|
18
18
|
# or use directly
|
|
19
|
-
bunx @lythos/skill-arena@0.15.
|
|
19
|
+
bunx @lythos/skill-arena@0.15.6 <command>
|
|
20
20
|
```
|
|
21
21
|
|
|
22
22
|
## Quick Start
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
25
|
# single — test one deck (most common)
|
|
26
|
-
bunx @lythos/skill-arena@0.15.
|
|
26
|
+
bunx @lythos/skill-arena@0.15.6 single \
|
|
27
27
|
--deck ./examples/decks/scout.toml \
|
|
28
28
|
--brief "Generate auth flow diagram" \
|
|
29
29
|
--out ./output
|
|
30
30
|
|
|
31
31
|
# single with explicit player
|
|
32
|
-
bunx @lythos/skill-arena@0.15.
|
|
32
|
+
bunx @lythos/skill-arena@0.15.6 single \
|
|
33
33
|
--deck ./examples/decks/scout.toml \
|
|
34
34
|
--brief "Generate auth flow diagram" \
|
|
35
35
|
--player kimi \
|
|
@@ -37,10 +37,10 @@ bunx @lythos/skill-arena@0.15.3 single \
|
|
|
37
37
|
|
|
38
38
|
# cross-deck vs — compare two decks (agent-orchestrated)
|
|
39
39
|
# Create arena.toml declaring sides with different decks, then:
|
|
40
|
-
bunx @lythos/skill-arena@0.15.
|
|
40
|
+
bunx @lythos/skill-arena@0.15.6 vs --config ./arena.toml
|
|
41
41
|
|
|
42
42
|
# cross-player vs — compare kimi vs codex (CLI only)
|
|
43
|
-
bunx @lythos/skill-arena@0.15.
|
|
43
|
+
bunx @lythos/skill-arena@0.15.6 vs --config ./arena.toml --player kimi
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
**What happens**: Agent creates isolated `/tmp` workdir per side, `deck link` skills, spawns parallel subagents, collects artifacts, judge scores outputs. Parent deck restored after.
|
|
@@ -50,14 +50,14 @@ bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --player kimi
|
|
|
50
50
|
### `single` — one deck, one task
|
|
51
51
|
|
|
52
52
|
```bash
|
|
53
|
-
bunx @lythos/skill-arena@0.15.
|
|
53
|
+
bunx @lythos/skill-arena@0.15.6 single \
|
|
54
54
|
--deck ./deck.toml \
|
|
55
55
|
--brief "Produce a .docx report with radar chart" \
|
|
56
56
|
--timeout 600000 \
|
|
57
57
|
--out ./output
|
|
58
58
|
|
|
59
59
|
# with explicit player
|
|
60
|
-
bunx @lythos/skill-arena@0.15.
|
|
60
|
+
bunx @lythos/skill-arena@0.15.6 single \
|
|
61
61
|
--deck ./deck.toml \
|
|
62
62
|
--brief "Produce a .docx report with radar chart" \
|
|
63
63
|
--player kimi \
|
|
@@ -67,14 +67,14 @@ bunx @lythos/skill-arena@0.15.3 single \
|
|
|
67
67
|
### `vs` — multi-deck comparison
|
|
68
68
|
|
|
69
69
|
```bash
|
|
70
|
-
bunx @lythos/skill-arena@0.15.
|
|
71
|
-
bunx @lythos/skill-arena@0.15.
|
|
70
|
+
bunx @lythos/skill-arena@0.15.6 vs --config ./arena.toml
|
|
71
|
+
bunx @lythos/skill-arena@0.15.6 vs --config ./arena.toml --dry-run
|
|
72
72
|
```
|
|
73
73
|
|
|
74
74
|
### `prepare-workdir` — isolate + link skills (agent-orchestrated)
|
|
75
75
|
|
|
76
76
|
```bash
|
|
77
|
-
bunx @lythos/skill-arena@0.15.
|
|
77
|
+
bunx @lythos/skill-arena@0.15.6 prepare-workdir \
|
|
78
78
|
--deck ./skill-deck.toml \
|
|
79
79
|
--out /tmp/arena-side-a \
|
|
80
80
|
--brief "task description"
|
|
@@ -85,7 +85,7 @@ Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck l
|
|
|
85
85
|
### `archive` — collect agent outputs (agent-orchestrated)
|
|
86
86
|
|
|
87
87
|
```bash
|
|
88
|
-
bunx @lythos/skill-arena@0.15.
|
|
88
|
+
bunx @lythos/skill-arena@0.15.6 archive \
|
|
89
89
|
--from /tmp/arena-side-a \
|
|
90
90
|
--to ./playground/output \
|
|
91
91
|
--sides side-a
|
|
@@ -96,7 +96,7 @@ Copies agent artifacts from workdir(s) to output, skipping internal files (`.cla
|
|
|
96
96
|
### `viz` — render results (WIP — HTML report generation pending)
|
|
97
97
|
|
|
98
98
|
```bash
|
|
99
|
-
bunx @lythos/skill-arena@0.15.
|
|
99
|
+
bunx @lythos/skill-arena@0.15.6 viz runs/arena-<id>/
|
|
100
100
|
```
|
|
101
101
|
|
|
102
102
|
## Parameters
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lythos/skill-arena",
|
|
3
|
-
"version": "0.15.
|
|
3
|
+
"version": "0.15.6",
|
|
4
4
|
"description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-agent",
|
|
@@ -42,15 +42,15 @@
|
|
|
42
42
|
"bun": ">=1.0.0"
|
|
43
43
|
},
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@lythos/cold-pool": "^0.15.
|
|
46
|
-
"@lythos/infra": "^0.15.
|
|
47
|
-
"@lythos/test-utils": "^0.15.
|
|
45
|
+
"@lythos/cold-pool": "^0.15.6",
|
|
46
|
+
"@lythos/infra": "^0.15.6",
|
|
47
|
+
"@lythos/test-utils": "^0.15.6",
|
|
48
48
|
"zod": "^3.24.0",
|
|
49
49
|
"zod-to-json-schema": "^3.25.2"
|
|
50
50
|
},
|
|
51
51
|
"optionalDependencies": {
|
|
52
|
-
"@lythos/agent-adapter-claude-sdk": "^0.15.
|
|
53
|
-
"@lythos/agent-adapter-deepseek-serve": "^0.15.
|
|
54
|
-
"@lythos/agent-adapter-codex": "^0.15.
|
|
52
|
+
"@lythos/agent-adapter-claude-sdk": "^0.15.6",
|
|
53
|
+
"@lythos/agent-adapter-deepseek-serve": "^0.15.6",
|
|
54
|
+
"@lythos/agent-adapter-codex": "^0.15.6"
|
|
55
55
|
}
|
|
56
56
|
}
|
package/src/preflight.ts
CHANGED
|
@@ -276,14 +276,14 @@ export function buildPreparePlan(params: {
|
|
|
276
276
|
'',
|
|
277
277
|
'## Setup Order (why this sequence)',
|
|
278
278
|
'1. `skill-deck.toml` copied here → declares which skills you can use',
|
|
279
|
-
'2. `deck link` runs → cold pool skills become visible in
|
|
279
|
+
'2. `deck link` runs → cold pool skills become visible in the working set',
|
|
280
280
|
'3. Skill existence checked → warns if any declared skill is missing from cold pool',
|
|
281
281
|
'4. `AGENTS.md` written last → confirms setup succeeded before agent starts',
|
|
282
282
|
'If setup fails mid-sequence, the workdir is incomplete and nothing runs.',
|
|
283
283
|
'',
|
|
284
284
|
'## How This Works',
|
|
285
285
|
'- Write ALL output files to this directory (CWD).',
|
|
286
|
-
'- Use available skills — check `ls .claude/skills
|
|
286
|
+
'- Use available skills — check the working set directory (e.g. `ls .claude/skills/`).',
|
|
287
287
|
'',
|
|
288
288
|
'## Output Contract',
|
|
289
289
|
'- MANDATORY: `decision-log.jsonl` — one JSON line per decision:',
|
package/src/runner.ts
CHANGED
|
@@ -84,7 +84,7 @@ export function buildArenaPrompt(opts: {
|
|
|
84
84
|
'ROBUSTNESS — If any command or script fails, read the error output, fix the issue, and retry.',
|
|
85
85
|
'Do not stop on the first error. Ensure all required output files exist before finishing.',
|
|
86
86
|
'',
|
|
87
|
-
'TOOLS — Use the skills already linked in
|
|
87
|
+
'TOOLS — Use the skills already linked in your working set (check with `ls .claude/skills/` or your configured path).',
|
|
88
88
|
'They are available and tested. Only write alternative scripts if the linked skills explicitly',
|
|
89
89
|
'cannot handle the task.',
|
|
90
90
|
]
|