@lythos/skill-arena 0.9.20 → 0.9.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/package.json +1 -1
- package/src/cli.ts +8 -7
package/README.md
CHANGED
|
@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.21 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
58
|
# Mode 1: Compare two skills on the same task
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
59
|
+
bunx @lythos/skill-arena@0.9.21 \
|
|
60
60
|
--task "Generate auth flow diagram" \
|
|
61
61
|
--skills "design-doc-mermaid,mermaid-tools" \
|
|
62
62
|
--criteria "syntax,context,token"
|
|
63
63
|
|
|
64
64
|
# Mode 2: Compare full deck configurations
|
|
65
|
-
bunx @lythos/skill-arena@0.9.
|
|
65
|
+
bunx @lythos/skill-arena@0.9.21 \
|
|
66
66
|
--task "Generate auth flow diagram" \
|
|
67
67
|
--decks "./decks/minimal.toml,./decks/rich.toml" \
|
|
68
68
|
--criteria "quality,token,maintainability"
|
|
69
69
|
|
|
70
70
|
# Visualize results
|
|
71
|
-
bunx @lythos/skill-arena@0.9.
|
|
71
|
+
bunx @lythos/skill-arena@0.9.21 viz tmp/arena-<id>/
|
|
72
72
|
```
|
|
73
73
|
|
|
74
74
|
## Commands
|
|
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.20 viz tmp/arena-<id>/
|
|
|
77
77
|
|
|
78
78
|
```bash
|
|
79
79
|
# Print execution plan without running
|
|
80
|
-
bunx @lythos/skill-arena@0.9.
|
|
80
|
+
bunx @lythos/skill-arena@0.9.21 run --config arena.toml --dry-run
|
|
81
81
|
|
|
82
82
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
83
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.21 run --config arena.toml
|
|
84
84
|
```
|
|
85
85
|
|
|
86
86
|
### CLI-flag mode (backward compat)
|
|
87
87
|
|
|
88
88
|
```
|
|
89
|
-
bunx @lythos/skill-arena@0.9.
|
|
89
|
+
bunx @lythos/skill-arena@0.9.21 run \
|
|
90
90
|
--task ./TASK-arena.md \
|
|
91
91
|
--players ./players/claude.toml \
|
|
92
92
|
--decks ./decks/run-01.toml,./decks/run-02.toml \
|
|
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.20 run \
|
|
|
96
96
|
### Scaffold mode (legacy, manual execution)
|
|
97
97
|
|
|
98
98
|
```
|
|
99
|
-
bunx @lythos/skill-arena@0.9.
|
|
99
|
+
bunx @lythos/skill-arena@0.9.21 scaffold --task "..." --skills a,b
|
|
100
100
|
```
|
|
101
101
|
|
|
102
102
|
### Viz
|
|
103
103
|
|
|
104
104
|
```bash
|
|
105
|
-
bunx @lythos/skill-arena@0.9.
|
|
105
|
+
bunx @lythos/skill-arena@0.9.21 viz runs/arena-<id>/
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
## Skill Documentation
|
|
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
116
116
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
117
117
|
|
|
118
118
|
```
|
|
119
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
119
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.21 ...
|
|
120
120
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
121
121
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
122
122
|
```
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -136,7 +136,7 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
136
136
|
|
|
137
137
|
const player = resolvePlayer(opts.player ?? 'kimi')
|
|
138
138
|
const agent = useAgent(player)
|
|
139
|
-
const outDir = opts.out ? resolve(opts.out) : join(process.cwd(),
|
|
139
|
+
const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
|
|
140
140
|
mkdirSync(outDir, { recursive: true })
|
|
141
141
|
|
|
142
142
|
console.log(`🤖 agent-run: ${player} × ${deckPath}`)
|
|
@@ -164,16 +164,17 @@ Evaluate whether the output is complete, accurate, and well-structured.
|
|
|
164
164
|
if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
|
|
165
165
|
if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
|
|
166
166
|
|
|
167
|
-
// Copy agent-produced files from workdir (output.md, output.docx, etc.)
|
|
167
|
+
// Copy all agent-produced files from workdir (output.md, output.docx, etc.)
|
|
168
|
+
// Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
|
|
168
169
|
if (agentWorkdir) {
|
|
169
|
-
const {
|
|
170
|
+
const { cpSync, readdirSync } = await import('node:fs')
|
|
171
|
+
const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
|
|
170
172
|
try {
|
|
171
173
|
for (const entry of readdirSync(agentWorkdir)) {
|
|
172
|
-
if (
|
|
174
|
+
if (skipSet.has(entry)) continue
|
|
173
175
|
const src = join(agentWorkdir, entry)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
} catch {}
|
|
176
|
+
const dest = join(outDir, entry)
|
|
177
|
+
try { cpSync(src, dest, { recursive: true }) } catch {}
|
|
177
178
|
}
|
|
178
179
|
} catch {}
|
|
179
180
|
}
|