@lythos/skill-arena 0.9.41 → 0.9.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -27
- package/package.json +1 -1
- package/src/cli.ts +27 -21
package/README.md
CHANGED
|
@@ -49,23 +49,20 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.42 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
|
-
# Single: test a deck with one agent
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
60
|
-
--
|
|
61
|
-
--
|
|
62
|
-
|
|
63
|
-
# Vs: compare multiple decks side by side
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# Visualize results
|
|
68
|
-
bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
|
|
58
|
+
# Single: test a deck with one agent
|
|
59
|
+
bunx @lythos/skill-arena@0.9.42 single \
|
|
60
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \
|
|
61
|
+
--brief "Generate auth flow diagram"
|
|
62
|
+
|
|
63
|
+
# Vs: compare multiple decks side by side
|
|
64
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/research-compare/arena.toml > arena.toml
|
|
65
|
+
bunx @lythos/skill-arena@0.9.42 vs --config ./arena.toml
|
|
69
66
|
```
|
|
70
67
|
|
|
71
68
|
## Commands
|
|
@@ -74,32 +71,23 @@ bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
|
|
|
74
71
|
|
|
75
72
|
```bash
|
|
76
73
|
# Print execution plan without running
|
|
77
|
-
bunx @lythos/skill-arena@0.9.
|
|
74
|
+
bunx @lythos/skill-arena@0.9.42 vs --config arena.toml --dry-run
|
|
78
75
|
|
|
79
76
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
### CLI-flag mode (backward compat)
|
|
84
|
-
|
|
85
|
-
```
|
|
86
|
-
bunx @lythos/skill-arena@0.9.41 run \
|
|
87
|
-
--task ./TASK-arena.md \
|
|
88
|
-
--players ./players/claude.toml \
|
|
89
|
-
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
90
|
-
--criteria coverage,relevance,actionability,depth
|
|
77
|
+
bunx @lythos/skill-arena@0.9.42 vs --config arena.toml
|
|
91
78
|
```
|
|
92
79
|
|
|
93
80
|
### Scaffold mode (legacy, manual execution)
|
|
94
81
|
|
|
95
82
|
```
|
|
96
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.42 scaffold --task "Generate auth flow diagram" \
|
|
84
|
+
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
97
85
|
```
|
|
98
86
|
|
|
99
87
|
### Viz
|
|
100
88
|
|
|
101
89
|
```bash
|
|
102
|
-
bunx @lythos/skill-arena@0.9.
|
|
90
|
+
bunx @lythos/skill-arena@0.9.42 viz runs/arena-<id>/
|
|
103
91
|
```
|
|
104
92
|
|
|
105
93
|
## Skill Documentation
|
|
@@ -113,7 +101,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
113
101
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
114
102
|
|
|
115
103
|
```
|
|
116
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
104
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.42 ...
|
|
117
105
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
118
106
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
119
107
|
```
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -63,16 +63,20 @@ Options:
|
|
|
63
63
|
--timeout <ms> Subagent timeout (single only)
|
|
64
64
|
|
|
65
65
|
Examples:
|
|
66
|
-
# Single-player deck test (
|
|
67
|
-
lythoskill-arena single
|
|
68
|
-
|
|
66
|
+
# Single-player deck test (--deck accepts local paths and http/https URLs)
|
|
67
|
+
lythoskill-arena single \\
|
|
68
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
69
|
+
--brief "Generate auth flow diagram" --player kimi
|
|
69
70
|
|
|
70
71
|
# Multi-side comparison (declarative)
|
|
72
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
71
73
|
lythoskill-arena vs --config ./arena.toml
|
|
72
74
|
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
73
75
|
|
|
74
76
|
# Legacy scaffolding
|
|
75
|
-
|
|
77
|
+
# scaffold creates structure; decks via URL (auto-downloaded during link):
|
|
78
|
+
lythoskill-arena scaffold --task "Refactor auth module" \\
|
|
79
|
+
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
76
80
|
lythoskill-arena viz runs/arena-20260504
|
|
77
81
|
`)
|
|
78
82
|
}
|
|
@@ -91,16 +95,19 @@ async function singleRun(args: string[]) {
|
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
if (!opts.deck) {
|
|
94
|
-
console.error(`❌ --deck <path> is required.
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
console.error(`❌ --deck <path|url> is required.
|
|
99
|
+
--deck accepts local paths and http/https URLs (auto-fetched).
|
|
100
|
+
Example: lythoskill-arena single \\
|
|
101
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
102
|
+
--brief "your task"`)
|
|
98
103
|
process.exit(1)
|
|
99
104
|
}
|
|
100
105
|
if (!opts.task && (!opts.brief || !opts.brief.trim())) {
|
|
101
|
-
console.error(`❌ --task <path> or --brief "<
|
|
102
|
-
|
|
103
|
-
|
|
106
|
+
console.error(`❌ --task <path> or --brief "<text>" is required.
|
|
107
|
+
--task reads a .agent.md scenario file; --brief takes inline text.
|
|
108
|
+
Example: lythoskill-arena single \\
|
|
109
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
110
|
+
--brief "your task"`)
|
|
104
111
|
process.exit(1)
|
|
105
112
|
}
|
|
106
113
|
|
|
@@ -125,9 +132,9 @@ async function singleRun(args: string[]) {
|
|
|
125
132
|
} else {
|
|
126
133
|
deckPath = resolve(opts.deck)
|
|
127
134
|
if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
135
|
+
Make sure the path is correct, or use a URL:
|
|
136
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml
|
|
137
|
+
(URLs are auto-fetched — no local file needed)`); process.exit(1) }
|
|
131
138
|
}
|
|
132
139
|
|
|
133
140
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
@@ -148,9 +155,9 @@ async function singleRun(args: string[]) {
|
|
|
148
155
|
if (opts.task) {
|
|
149
156
|
const taskPath = resolve(opts.task)
|
|
150
157
|
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
|
|
151
|
-
|
|
152
|
-
Format:
|
|
153
|
-
Example:
|
|
158
|
+
Use --brief for inline tasks, or point --task to an existing .agent.md file.
|
|
159
|
+
Format: name + description + Given/When/Then/Judge sections.
|
|
160
|
+
Example: lythoskill-arena single --brief "your task" --deck <url>`); process.exit(1) }
|
|
154
161
|
scenarioOpt.scenarioPath = taskPath
|
|
155
162
|
// Quick validation: check frontmatter presence
|
|
156
163
|
const raw = readFileSync(taskPath, 'utf-8')
|
|
@@ -767,10 +774,9 @@ async function vsRun(argv: string[]) {
|
|
|
767
774
|
console.error(`❌ --config <arena.toml> is required.
|
|
768
775
|
Usage: lythoskill-arena vs --config ./arena.toml
|
|
769
776
|
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
770
|
-
|
|
771
|
-
examples/arena/
|
|
772
|
-
|
|
773
|
-
Create one: cp examples/arena/research-compare/arena.toml ./arena.toml`)
|
|
777
|
+
Fetch an example:
|
|
778
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
779
|
+
Then edit arena.toml and run: lythoskill-arena vs --config ./arena.toml`)
|
|
774
780
|
process.exit(1)
|
|
775
781
|
|
|
776
782
|
const result = await runArenaProgrammatic({
|