@lythos/skill-arena 0.9.41 → 0.9.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -27
- package/package.json +1 -1
- package/src/cli.ts +38 -23
package/README.md
CHANGED
|
@@ -49,23 +49,20 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.43 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
|
-
# Single: test a deck with one agent
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
60
|
-
--
|
|
61
|
-
--
|
|
62
|
-
|
|
63
|
-
# Vs: compare multiple decks side by side
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# Visualize results
|
|
68
|
-
bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
|
|
58
|
+
# Single: test a deck with one agent
|
|
59
|
+
bunx @lythos/skill-arena@0.9.43 single \
|
|
60
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \
|
|
61
|
+
--brief "Generate auth flow diagram"
|
|
62
|
+
|
|
63
|
+
# Vs: compare multiple decks side by side
|
|
64
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/research-compare/arena.toml > arena.toml
|
|
65
|
+
bunx @lythos/skill-arena@0.9.43 vs --config ./arena.toml
|
|
69
66
|
```
|
|
70
67
|
|
|
71
68
|
## Commands
|
|
@@ -74,32 +71,23 @@ bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
|
|
|
74
71
|
|
|
75
72
|
```bash
|
|
76
73
|
# Print execution plan without running
|
|
77
|
-
bunx @lythos/skill-arena@0.9.
|
|
74
|
+
bunx @lythos/skill-arena@0.9.43 vs --config arena.toml --dry-run
|
|
78
75
|
|
|
79
76
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
### CLI-flag mode (backward compat)
|
|
84
|
-
|
|
85
|
-
```
|
|
86
|
-
bunx @lythos/skill-arena@0.9.41 run \
|
|
87
|
-
--task ./TASK-arena.md \
|
|
88
|
-
--players ./players/claude.toml \
|
|
89
|
-
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
90
|
-
--criteria coverage,relevance,actionability,depth
|
|
77
|
+
bunx @lythos/skill-arena@0.9.43 vs --config arena.toml
|
|
91
78
|
```
|
|
92
79
|
|
|
93
80
|
### Scaffold mode (legacy, manual execution)
|
|
94
81
|
|
|
95
82
|
```
|
|
96
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.43 scaffold --task "Generate auth flow diagram" \
|
|
84
|
+
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
97
85
|
```
|
|
98
86
|
|
|
99
87
|
### Viz
|
|
100
88
|
|
|
101
89
|
```bash
|
|
102
|
-
bunx @lythos/skill-arena@0.9.
|
|
90
|
+
bunx @lythos/skill-arena@0.9.43 viz runs/arena-<id>/
|
|
103
91
|
```
|
|
104
92
|
|
|
105
93
|
## Skill Documentation
|
|
@@ -113,7 +101,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
113
101
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
114
102
|
|
|
115
103
|
```
|
|
116
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
104
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.43 ...
|
|
117
105
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
118
106
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
119
107
|
```
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -63,16 +63,20 @@ Options:
|
|
|
63
63
|
--timeout <ms> Subagent timeout (single only)
|
|
64
64
|
|
|
65
65
|
Examples:
|
|
66
|
-
# Single-player deck test (
|
|
67
|
-
lythoskill-arena single
|
|
68
|
-
|
|
66
|
+
# Single-player deck test (--deck accepts local paths and http/https URLs)
|
|
67
|
+
lythoskill-arena single \\
|
|
68
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
69
|
+
--brief "Generate auth flow diagram" --player kimi
|
|
69
70
|
|
|
70
71
|
# Multi-side comparison (declarative)
|
|
72
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
71
73
|
lythoskill-arena vs --config ./arena.toml
|
|
72
74
|
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
73
75
|
|
|
74
76
|
# Legacy scaffolding
|
|
75
|
-
|
|
77
|
+
# scaffold creates structure; decks via URL (auto-downloaded during link):
|
|
78
|
+
lythoskill-arena scaffold --task "Refactor auth module" \\
|
|
79
|
+
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
76
80
|
lythoskill-arena viz runs/arena-20260504
|
|
77
81
|
`)
|
|
78
82
|
}
|
|
@@ -91,16 +95,19 @@ async function singleRun(args: string[]) {
|
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
if (!opts.deck) {
|
|
94
|
-
console.error(`❌ --deck <path> is required.
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
console.error(`❌ --deck <path|url> is required.
|
|
99
|
+
--deck accepts local paths and http/https URLs (auto-fetched).
|
|
100
|
+
Example: lythoskill-arena single \\
|
|
101
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
102
|
+
--brief "your task"`)
|
|
98
103
|
process.exit(1)
|
|
99
104
|
}
|
|
100
105
|
if (!opts.task && (!opts.brief || !opts.brief.trim())) {
|
|
101
|
-
console.error(`❌ --task <path> or --brief "<
|
|
102
|
-
|
|
103
|
-
|
|
106
|
+
console.error(`❌ --task <path> or --brief "<text>" is required.
|
|
107
|
+
--task reads a .agent.md scenario file; --brief takes inline text.
|
|
108
|
+
Example: lythoskill-arena single \\
|
|
109
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
|
|
110
|
+
--brief "your task"`)
|
|
104
111
|
process.exit(1)
|
|
105
112
|
}
|
|
106
113
|
|
|
@@ -117,17 +124,26 @@ async function singleRun(args: string[]) {
|
|
|
117
124
|
} catch { /* keep original url */ }
|
|
118
125
|
const dest = resolve(process.cwd(), 'arena-deck.toml')
|
|
119
126
|
console.log(`📥 Fetching arena deck: ${url}`)
|
|
120
|
-
|
|
121
|
-
|
|
127
|
+
let res: Response
|
|
128
|
+
try { res = await fetch(url, { signal: AbortSignal.timeout(30_000) }) } catch (e: any) {
|
|
129
|
+
console.error(`❌ Cannot reach ${url}
|
|
130
|
+
Network issue? Try a GitHub proxy mirror:
|
|
131
|
+
${url.replace('https://raw.githubusercontent.com/', 'https://ghfast.top/https://raw.githubusercontent.com/')}
|
|
132
|
+
Or download manually and reference the local file.`)
|
|
133
|
+
process.exit(1)
|
|
134
|
+
}
|
|
135
|
+
if (!res.ok) { console.error(`❌ Failed to fetch deck (HTTP ${res.status}): ${url}
|
|
136
|
+
Try a GitHub proxy mirror:
|
|
137
|
+
${url.replace('https://raw.githubusercontent.com/', 'https://ghfast.top/https://raw.githubusercontent.com/')}`); process.exit(1) }
|
|
122
138
|
deckWrite(dest, await res.text())
|
|
123
139
|
console.log(` → saved to ${dest}`)
|
|
124
140
|
deckPath = dest
|
|
125
141
|
} else {
|
|
126
142
|
deckPath = resolve(opts.deck)
|
|
127
143
|
if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
144
|
+
Make sure the path is correct, or use a URL:
|
|
145
|
+
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml
|
|
146
|
+
(URLs are auto-fetched — no local file needed)`); process.exit(1) }
|
|
131
147
|
}
|
|
132
148
|
|
|
133
149
|
const { useAgent } = await import('@lythos/test-utils/agents')
|
|
@@ -148,9 +164,9 @@ async function singleRun(args: string[]) {
|
|
|
148
164
|
if (opts.task) {
|
|
149
165
|
const taskPath = resolve(opts.task)
|
|
150
166
|
if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
|
|
151
|
-
|
|
152
|
-
Format:
|
|
153
|
-
Example:
|
|
167
|
+
Use --brief for inline tasks, or point --task to an existing .agent.md file.
|
|
168
|
+
Format: name + description + Given/When/Then/Judge sections.
|
|
169
|
+
Example: lythoskill-arena single --brief "your task" --deck <url>`); process.exit(1) }
|
|
154
170
|
scenarioOpt.scenarioPath = taskPath
|
|
155
171
|
// Quick validation: check frontmatter presence
|
|
156
172
|
const raw = readFileSync(taskPath, 'utf-8')
|
|
@@ -767,10 +783,9 @@ async function vsRun(argv: string[]) {
|
|
|
767
783
|
console.error(`❌ --config <arena.toml> is required.
|
|
768
784
|
Usage: lythoskill-arena vs --config ./arena.toml
|
|
769
785
|
lythoskill-arena vs --config ./arena.toml --dry-run
|
|
770
|
-
|
|
771
|
-
examples/arena/
|
|
772
|
-
|
|
773
|
-
Create one: cp examples/arena/research-compare/arena.toml ./arena.toml`)
|
|
786
|
+
Fetch an example:
|
|
787
|
+
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
|
|
788
|
+
Then edit arena.toml and run: lythoskill-arena vs --config ./arena.toml`)
|
|
774
789
|
process.exit(1)
|
|
775
790
|
|
|
776
791
|
const result = await runArenaProgrammatic({
|