@lythos/skill-arena 0.15.2 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -12
- package/package.json +7 -7
package/README.md
CHANGED
|
@@ -16,20 +16,20 @@
|
|
|
16
16
|
```bash
|
|
17
17
|
bun add -d @lythos/skill-arena
|
|
18
18
|
# or use directly
|
|
19
|
-
bunx @lythos/skill-arena@0.15.
|
|
19
|
+
bunx @lythos/skill-arena@0.15.3 <command>
|
|
20
20
|
```
|
|
21
21
|
|
|
22
22
|
## Quick Start
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
25
|
# single — test one deck (most common)
|
|
26
|
-
bunx @lythos/skill-arena@0.15.
|
|
26
|
+
bunx @lythos/skill-arena@0.15.3 single \
|
|
27
27
|
--deck ./examples/decks/scout.toml \
|
|
28
28
|
--brief "Generate auth flow diagram" \
|
|
29
29
|
--out ./output
|
|
30
30
|
|
|
31
31
|
# single with explicit player
|
|
32
|
-
bunx @lythos/skill-arena@0.15.
|
|
32
|
+
bunx @lythos/skill-arena@0.15.3 single \
|
|
33
33
|
--deck ./examples/decks/scout.toml \
|
|
34
34
|
--brief "Generate auth flow diagram" \
|
|
35
35
|
--player kimi \
|
|
@@ -37,10 +37,10 @@ bunx @lythos/skill-arena@0.15.2 single \
|
|
|
37
37
|
|
|
38
38
|
# cross-deck vs — compare two decks (agent-orchestrated)
|
|
39
39
|
# Create arena.toml declaring sides with different decks, then:
|
|
40
|
-
bunx @lythos/skill-arena@0.15.
|
|
40
|
+
bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml
|
|
41
41
|
|
|
42
42
|
# cross-player vs — compare kimi vs codex (CLI only)
|
|
43
|
-
bunx @lythos/skill-arena@0.15.
|
|
43
|
+
bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --player kimi
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
**What happens**: Agent creates isolated `/tmp` workdir per side, `deck link` skills, spawns parallel subagents, collects artifacts, judge scores outputs. Parent deck restored after.
|
|
@@ -50,14 +50,14 @@ bunx @lythos/skill-arena@0.15.2 vs --config ./arena.toml --player kimi
|
|
|
50
50
|
### `single` — one deck, one task
|
|
51
51
|
|
|
52
52
|
```bash
|
|
53
|
-
bunx @lythos/skill-arena@0.15.
|
|
53
|
+
bunx @lythos/skill-arena@0.15.3 single \
|
|
54
54
|
--deck ./deck.toml \
|
|
55
55
|
--brief "Produce a .docx report with radar chart" \
|
|
56
56
|
--timeout 600000 \
|
|
57
57
|
--out ./output
|
|
58
58
|
|
|
59
59
|
# with explicit player
|
|
60
|
-
bunx @lythos/skill-arena@0.15.
|
|
60
|
+
bunx @lythos/skill-arena@0.15.3 single \
|
|
61
61
|
--deck ./deck.toml \
|
|
62
62
|
--brief "Produce a .docx report with radar chart" \
|
|
63
63
|
--player kimi \
|
|
@@ -67,14 +67,14 @@ bunx @lythos/skill-arena@0.15.2 single \
|
|
|
67
67
|
### `vs` — multi-deck comparison
|
|
68
68
|
|
|
69
69
|
```bash
|
|
70
|
-
bunx @lythos/skill-arena@0.15.
|
|
71
|
-
bunx @lythos/skill-arena@0.15.
|
|
70
|
+
bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml
|
|
71
|
+
bunx @lythos/skill-arena@0.15.3 vs --config ./arena.toml --dry-run
|
|
72
72
|
```
|
|
73
73
|
|
|
74
74
|
### `prepare-workdir` — isolate + link skills (agent-orchestrated)
|
|
75
75
|
|
|
76
76
|
```bash
|
|
77
|
-
bunx @lythos/skill-arena@0.15.
|
|
77
|
+
bunx @lythos/skill-arena@0.15.3 prepare-workdir \
|
|
78
78
|
--deck ./skill-deck.toml \
|
|
79
79
|
--out /tmp/arena-side-a \
|
|
80
80
|
--brief "task description"
|
|
@@ -85,7 +85,7 @@ Creates `/tmp`-isolated workdir with deck copied, AGENTS.md written, and `deck l
|
|
|
85
85
|
### `archive` — collect agent outputs (agent-orchestrated)
|
|
86
86
|
|
|
87
87
|
```bash
|
|
88
|
-
bunx @lythos/skill-arena@0.15.
|
|
88
|
+
bunx @lythos/skill-arena@0.15.3 archive \
|
|
89
89
|
--from /tmp/arena-side-a \
|
|
90
90
|
--to ./playground/output \
|
|
91
91
|
--sides side-a
|
|
@@ -96,7 +96,7 @@ Copies agent artifacts from workdir(s) to output, skipping internal files (`.cla
|
|
|
96
96
|
### `viz` — render results (WIP — HTML report generation pending)
|
|
97
97
|
|
|
98
98
|
```bash
|
|
99
|
-
bunx @lythos/skill-arena@0.15.
|
|
99
|
+
bunx @lythos/skill-arena@0.15.3 viz runs/arena-<id>/
|
|
100
100
|
```
|
|
101
101
|
|
|
102
102
|
## Parameters
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lythos/skill-arena",
|
|
3
|
-
"version": "0.15.
|
|
3
|
+
"version": "0.15.3",
|
|
4
4
|
"description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-agent",
|
|
@@ -42,15 +42,15 @@
|
|
|
42
42
|
"bun": ">=1.0.0"
|
|
43
43
|
},
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@lythos/cold-pool": "^0.15.
|
|
46
|
-
"@lythos/infra": "^0.15.
|
|
47
|
-
"@lythos/test-utils": "^0.15.
|
|
45
|
+
"@lythos/cold-pool": "^0.15.3",
|
|
46
|
+
"@lythos/infra": "^0.15.3",
|
|
47
|
+
"@lythos/test-utils": "^0.15.3",
|
|
48
48
|
"zod": "^3.24.0",
|
|
49
49
|
"zod-to-json-schema": "^3.25.2"
|
|
50
50
|
},
|
|
51
51
|
"optionalDependencies": {
|
|
52
|
-
"@lythos/agent-adapter-claude-sdk": "^0.15.
|
|
53
|
-
"@lythos/agent-adapter-deepseek-serve": "^0.15.
|
|
54
|
-
"@lythos/agent-adapter-codex": "^0.15.
|
|
52
|
+
"@lythos/agent-adapter-claude-sdk": "^0.15.3",
|
|
53
|
+
"@lythos/agent-adapter-deepseek-serve": "^0.15.3",
|
|
54
|
+
"@lythos/agent-adapter-codex": "^0.15.3"
|
|
55
55
|
}
|
|
56
56
|
}
|