npm - @lythos/skill-arena - Versions diffs - 0.3.0 → 0.5.0 - Mend

@lythos/skill-arena 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -1,52 +1,81 @@
 # @lythos/skill-arena
-> Skill comparison benchmark tool. Run control-variable decks against the same task to compare skill effectiveness.
+> Controlled-variable benchmark for AI agent skills. Compare skills, decks, or configurations on the same task — single-skill A/B or full-deck Pareto frontier analysis.
-Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) meta-skill ecosystem.
+## Why
-## What it does
+"Which skill is better?" is the wrong question. The right question is "which skill is better for what."
-Creates an arena directory with isolated decks for each skill under test, generates task cards for subagent dispatch, and produces a structured output for judge evaluation. Core principle: **control variables** — only the tested skill differs between decks.
+`skill-arena` scaffolds isolated environments where subagents complete the same task under different decks. A judge agent scores outputs across multiple dimensions. Supports:
+- **Mode 1**: Single-skill comparison (controlled variable — same helper skills, different test skill).
+- **Mode 2**: Full-deck comparison (Pareto frontier — no single winner, only optimal trade-offs).
 ## Install
 ```bash
 bun add -d @lythos/skill-arena
-# or
-bunx @lythos/skill-arena <args>
+# or use directly
+bunx @lythos/skill-arena <command>
 ```
-## Commands
+## Quick Start
 ```bash
-# Initialize an arena with 2-5 skills
+# Mode 1: Compare two skills on the same task
 bunx @lythos/skill-arena \
-  --task "Generate user auth flow diagram" \
+  --task "Generate auth flow diagram" \
   --skills "design-doc-mermaid,mermaid-tools" \
   --criteria "syntax,context,token"
-# Options
-# --task, -t     Task description (required)
-# --skills, -s   Comma-separated skill list, min 2, max 5
-# --criteria, -c Evaluation criteria (default: syntax,context,logic,token)
-# --control      Control variable skill (default: project-scribe)
-# --dir, -d      Arena parent directory (default: tmp)
-# --project, -p  Project root (default: .)
+# Mode 2: Compare full deck configurations
+bunx @lythos/skill-arena \
+  --task "Generate auth flow diagram" \
+  --decks "./decks/minimal.toml,./decks/rich.toml" \
+  --criteria "quality,token,maintainability"
+# Visualize results
+bunx @lythos/skill-arena viz tmp/arena-<id>/
 ```
-## Output
+## Commands
 ```
-tmp/arena-<timestamp>-<slug>/
-├── arena.json       # metadata + config
-├── decks/           # one control-variable deck per skill
-├── runs/            # subagent output (you fill this)
-└── TASK-arena.md    # task card with subagent instructions
+Usage: bunx @lythos/skill-arena <options> | bunx @lythos/skill-arena viz <dir>
+Mode 1 — Single-Skill Comparison:
+  --task, -t <desc>       Task description (required)
+  --skills, -s <list>     Comma-separated skills, 2–5 (Mode 1)
+  --criteria, -c <list>   Evaluation dimensions (default: syntax,context,logic,token)
+  --control <skill>      Control skill (default: lythoskill-project-scribe)
+Mode 2 — Full-Deck Comparison:
+  --decks <paths>        Comma-separated deck toml paths, 2–5 (Mode 2)
+  --criteria, -c <list>   Evaluation dimensions
+Common:
+  --dir, -d <path>       Arena parent directory (default: tmp)
+  --project, -p <path>   Project root (default: .)
+Viz:
+  viz <dir>               Render ASCII charts from report.md
 ```
+## Skill Documentation
+This package is the **Starter** layer (CLI implementation).
+The agent-visible **Skill** layer documentation is here:
+[packages/lythoskill-arena/skill/SKILL.md](../../packages/lythoskill-arena/skill/SKILL.md)
 ## Architecture
-This is the **Starter** layer of the thin-skill pattern. The agent-visible **Skill** layer is in `packages/lythoskill-arena/skill/`.
+Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
+```
+Starter (this package) → npm publish → bunx @lythos/skill-arena ...
+Skill   (packages/<name>/skill/)     → build → SKILL.md + thin scripts
+Output  (skills/<name>/)             → git commit → agent-visible skill
+```
 ## License

package/package.json CHANGED Viewed

@@ -1,7 +1,16 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
+  "keywords": [
+    "ai-agent",
+    "skill",
+    "claude-code",
+    "agent-skills",
+    "llm-tooling",
+    "lythoskill"
+  ],
+  "author": "lythos-labs",
   "license": "MIT",
   "type": "module",
   "bin": {
@@ -12,8 +21,16 @@
     "README.md",
     "LICENSE"
   ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/lythos-labs/lythoskill.git",
+    "directory": "packages/lythoskill-arena"
+  },
+  "bugs": {
+    "url": "https://github.com/lythos-labs/lythoskill/issues"
+  },
+  "homepage": "https://github.com/lythos-labs/lythoskill/tree/main/packages/lythoskill-arena#readme",
   "engines": {
     "bun": ">=1.0.0"
-  },
-  "license": "MIT"
+  }
 }

package/src/cli.ts CHANGED Viewed

@@ -25,7 +25,36 @@ function timestamp(): string {
 }
 // ── 解析参数（简单 slice 风格）──────────────────────────────
+function printHelp(): void {
+  console.log(`🎭 lythoskill-arena — Skill comparison runner
+Usage:
+  lythoskill-arena --task "<task description>" --skills <skill1,skill2,...>
+  lythoskill-arena --task "<task description>" --decks <deck1,deck2,...>
+  lythoskill-arena viz <arena-dir>
+Options:
+  -t, --task <desc>      Task description (required)
+  -s, --skills <list>    Comma-separated skill names
+      --decks <list>     Comma-separated deck paths
+  -c, --criteria <list>  Evaluation criteria (default: syntax,context,logic,token)
+      --control <skill>  Control skill for comparison (default: lythoskill-project-scribe)
+  -d, --dir <dir>        Output directory (default: tmp)
+  -p, --project <dir>    Project directory (default: .)
+Examples:
+  lythoskill-arena --task "Refactor auth module" --skills skill-a,skill-b
+  lythoskill-arena --task "Write tests" --decks ./decks/minimal.toml,./decks/full.toml
+  lythoskill-arena viz tmp/arena-20260430
+`)
+}
 function parseArgs(argv: string[]) {
+  if (argv.includes('--help') || argv.includes('-h')) {
+    printHelp()
+    process.exit(0)
+  }
   const options: Record<string, string | undefined> = {
     task: undefined,
     skills: undefined,
@@ -110,7 +139,7 @@ export function runArena(argv: string[]) {
   const CRITERIA = (options.criteria || 'syntax,context,logic,token')
     .split(',').map(s => s.trim()).filter(Boolean)
-  const CONTROL_SKILLS = (options.control || 'lythoskill-project-cortex')
+  const CONTROL_SKILLS = (options.control || 'lythoskill-project-scribe')
     .split(',').map(s => s.trim()).filter(Boolean)
   const PROJECT_DIR = resolve(options.project!)