@lythos/skill-arena 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +52 -23
  2. package/package.json +20 -3
  3. package/src/cli.ts +1 -1
package/README.md CHANGED
@@ -1,52 +1,81 @@
1
1
  # @lythos/skill-arena
2
2
 
3
- > Skill comparison benchmark tool. Run control-variable decks against the same task to compare skill effectiveness.
3
+ > Controlled-variable benchmark for AI agent skills. Compare skills, decks, or configurations on the same task single-skill A/B or full-deck Pareto frontier analysis.
4
4
 
5
- Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) meta-skill ecosystem.
5
+ ## Why
6
6
 
7
- ## What it does
7
+ "Which skill is better?" is the wrong question. The right question is "which skill is better for what."
8
8
 
9
- Creates an arena directory with isolated decks for each skill under test, generates task cards for subagent dispatch, and produces a structured output for judge evaluation. Core principle: **control variables** — only the tested skill differs between decks.
9
+ `skill-arena` scaffolds isolated environments where subagents complete the same task under different decks. A judge agent scores outputs across multiple dimensions. Supports:
10
+
11
+ - **Mode 1**: Single-skill comparison (controlled variable — same helper skills, different test skill).
12
+ - **Mode 2**: Full-deck comparison (Pareto frontier — no single winner, only optimal trade-offs).
10
13
 
11
14
  ## Install
12
15
 
13
16
  ```bash
14
17
  bun add -d @lythos/skill-arena
15
- # or
16
- bunx @lythos/skill-arena <args>
18
+ # or use directly
19
+ bunx @lythos/skill-arena <command>
17
20
  ```
18
21
 
19
- ## Commands
22
+ ## Quick Start
20
23
 
21
24
  ```bash
22
- # Initialize an arena with 2-5 skills
25
+ # Mode 1: Compare two skills on the same task
23
26
  bunx @lythos/skill-arena \
24
- --task "Generate user auth flow diagram" \
27
+ --task "Generate auth flow diagram" \
25
28
  --skills "design-doc-mermaid,mermaid-tools" \
26
29
  --criteria "syntax,context,token"
27
30
 
28
- # Options
29
- # --task, -t Task description (required)
30
- # --skills, -s Comma-separated skill list, min 2, max 5
31
- # --criteria, -c Evaluation criteria (default: syntax,context,logic,token)
32
- # --control Control variable skill (default: project-scribe)
33
- # --dir, -d Arena parent directory (default: tmp)
34
- # --project, -p Project root (default: .)
31
+ # Mode 2: Compare full deck configurations
32
+ bunx @lythos/skill-arena \
33
+ --task "Generate auth flow diagram" \
34
+ --decks "./decks/minimal.toml,./decks/rich.toml" \
35
+ --criteria "quality,token,maintainability"
36
+
37
+ # Visualize results
38
+ bunx @lythos/skill-arena viz tmp/arena-<id>/
35
39
  ```
36
40
 
37
- ## Output
41
+ ## Commands
38
42
 
39
43
  ```
40
- tmp/arena-<timestamp>-<slug>/
41
- ├── arena.json # metadata + config
42
- ├── decks/ # one control-variable deck per skill
43
- ├── runs/ # subagent output (you fill this)
44
- └── TASK-arena.md # task card with subagent instructions
44
+ Usage: bunx @lythos/skill-arena <options> | bunx @lythos/skill-arena viz <dir>
45
+
46
+ Mode 1 Single-Skill Comparison:
47
+ --task, -t <desc> Task description (required)
48
+ --skills, -s <list> Comma-separated skills, 2–5 (Mode 1)
49
+ --criteria, -c <list> Evaluation dimensions (default: syntax,context,logic,token)
50
+ --control <skill> Control skill (default: lythoskill-project-scribe)
51
+
52
+ Mode 2 — Full-Deck Comparison:
53
+ --decks <paths> Comma-separated deck toml paths, 2–5 (Mode 2)
54
+ --criteria, -c <list> Evaluation dimensions
55
+
56
+ Common:
57
+ --dir, -d <path> Arena parent directory (default: tmp)
58
+ --project, -p <path> Project root (default: .)
59
+
60
+ Viz:
61
+ viz <dir> Render ASCII charts from report.md
45
62
  ```
46
63
 
64
+ ## Skill Documentation
65
+
66
+ This package is the **Starter** layer (CLI implementation).
67
+ The agent-visible **Skill** layer documentation is here:
68
+ [packages/lythoskill-arena/skill/SKILL.md](../../packages/lythoskill-arena/skill/SKILL.md)
69
+
47
70
  ## Architecture
48
71
 
49
- This is the **Starter** layer of the thin-skill pattern. The agent-visible **Skill** layer is in `packages/lythoskill-arena/skill/`.
72
+ Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
73
+
74
+ ```
75
+ Starter (this package) → npm publish → bunx @lythos/skill-arena ...
76
+ Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
77
+ Output (skills/<name>/) → git commit → agent-visible skill
78
+ ```
50
79
 
51
80
  ## License
52
81
 
package/package.json CHANGED
@@ -1,7 +1,16 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
+ "keywords": [
6
+ "ai-agent",
7
+ "skill",
8
+ "claude-code",
9
+ "agent-skills",
10
+ "llm-tooling",
11
+ "lythoskill"
12
+ ],
13
+ "author": "lythos-labs",
5
14
  "license": "MIT",
6
15
  "type": "module",
7
16
  "bin": {
@@ -12,8 +21,16 @@
12
21
  "README.md",
13
22
  "LICENSE"
14
23
  ],
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "git+https://github.com/lythos-labs/lythoskill.git",
27
+ "directory": "packages/lythoskill-arena"
28
+ },
29
+ "bugs": {
30
+ "url": "https://github.com/lythos-labs/lythoskill/issues"
31
+ },
32
+ "homepage": "https://github.com/lythos-labs/lythoskill/tree/main/packages/lythoskill-arena#readme",
15
33
  "engines": {
16
34
  "bun": ">=1.0.0"
17
- },
18
- "license": "MIT"
35
+ }
19
36
  }
package/src/cli.ts CHANGED
@@ -110,7 +110,7 @@ export function runArena(argv: string[]) {
110
110
  const CRITERIA = (options.criteria || 'syntax,context,logic,token')
111
111
  .split(',').map(s => s.trim()).filter(Boolean)
112
112
 
113
- const CONTROL_SKILLS = (options.control || 'lythoskill-project-cortex')
113
+ const CONTROL_SKILLS = (options.control || 'lythoskill-project-scribe')
114
114
  .split(',').map(s => s.trim()).filter(Boolean)
115
115
 
116
116
  const PROJECT_DIR = resolve(options.project!)