@lythos/skill-arena 0.9.41 → 0.9.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +15 -27
  2. package/package.json +1 -1
  3. package/src/cli.ts +38 -23
package/README.md CHANGED
@@ -49,23 +49,20 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
49
49
  ```bash
50
50
  bun add -d @lythos/skill-arena
51
51
  # or use directly
52
- bunx @lythos/skill-arena@0.9.41 <command>
52
+ bunx @lythos/skill-arena@0.9.43 <command>
53
53
  ```
54
54
 
55
55
  ## Quick Start
56
56
 
57
57
  ```bash
58
- # Single: test a deck with one agent (exec shortcut)
59
- bunx @lythos/skill-arena@0.9.41 single \
60
- --brief "Generate auth flow diagram" \
61
- --deck ./examples/decks/documents.toml
62
-
63
- # Vs: compare multiple decks side by side (declarative)
64
- bunx @lythos/skill-arena@0.9.41 vs \
65
- --config examples/arena/research-compare/arena.toml
66
-
67
- # Visualize results
68
- bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
58
+ # Single: test a deck with one agent
59
+ bunx @lythos/skill-arena@0.9.43 single \
60
+ --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \
61
+ --brief "Generate auth flow diagram"
62
+
63
+ # Vs: compare multiple decks side by side
64
+ curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/research-compare/arena.toml > arena.toml
65
+ bunx @lythos/skill-arena@0.9.43 vs --config ./arena.toml
69
66
  ```
70
67
 
71
68
  ## Commands
@@ -74,32 +71,23 @@ bunx @lythos/skill-arena@0.9.41 viz tmp/arena-<id>/
74
71
 
75
72
  ```bash
76
73
  # Print execution plan without running
77
- bunx @lythos/skill-arena@0.9.41 vs --config arena.toml --dry-run
74
+ bunx @lythos/skill-arena@0.9.43 vs --config arena.toml --dry-run
78
75
 
79
76
  # Execute with per-side runs_per_side and statistical aggregation
80
- bunx @lythos/skill-arena@0.9.41 vs --config arena.toml
81
- ```
82
-
83
- ### CLI-flag mode (backward compat)
84
-
85
- ```
86
- bunx @lythos/skill-arena@0.9.41 run \
87
- --task ./TASK-arena.md \
88
- --players ./players/claude.toml \
89
- --decks ./decks/run-01.toml,./decks/run-02.toml \
90
- --criteria coverage,relevance,actionability,depth
77
+ bunx @lythos/skill-arena@0.9.43 vs --config arena.toml
91
78
  ```
92
79
 
93
80
  ### Scaffold mode (legacy, manual execution)
94
81
 
95
82
  ```
96
- bunx @lythos/skill-arena@0.9.41 scaffold --task "..." --decks a.toml,b.toml
83
+ bunx @lythos/skill-arena@0.9.43 scaffold --task "Generate auth flow diagram" \
84
+ --decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
97
85
  ```
98
86
 
99
87
  ### Viz
100
88
 
101
89
  ```bash
102
- bunx @lythos/skill-arena@0.9.41 viz runs/arena-<id>/
90
+ bunx @lythos/skill-arena@0.9.43 viz runs/arena-<id>/
103
91
  ```
104
92
 
105
93
  ## Skill Documentation
@@ -113,7 +101,7 @@ The agent-visible **Skill** layer documentation is here:
113
101
  Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
114
102
 
115
103
  ```
116
- Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.41 ...
104
+ Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.43 ...
117
105
  Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
118
106
  Output (skills/<name>/) → git commit → agent-visible skill
119
107
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.41",
3
+ "version": "0.9.43",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
package/src/cli.ts CHANGED
@@ -63,16 +63,20 @@ Options:
63
63
  --timeout <ms> Subagent timeout (single only)
64
64
 
65
65
  Examples:
66
- # Single-player deck test (exec shortcut)
67
- lythoskill-arena single --task ./TASK.agent.md --deck ./deck.toml
68
- lythoskill-arena single --brief "Generate auth flow diagram" --deck ./deck.toml --player kimi
66
+ # Single-player deck test (--deck accepts local paths and http/https URLs)
67
+ lythoskill-arena single \\
68
+ --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
69
+ --brief "Generate auth flow diagram" --player kimi
69
70
 
70
71
  # Multi-side comparison (declarative)
72
+ curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
71
73
  lythoskill-arena vs --config ./arena.toml
72
74
  lythoskill-arena vs --config ./arena.toml --dry-run
73
75
 
74
76
  # Legacy scaffolding
75
- lythoskill-arena scaffold --task "Refactor auth module" --decks ./decks/a.toml,./decks/b.toml
77
+ # scaffold creates structure; decks via URL (auto-downloaded during link):
78
+ lythoskill-arena scaffold --task "Refactor auth module" \\
79
+ --decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
76
80
  lythoskill-arena viz runs/arena-20260504
77
81
  `)
78
82
  }
@@ -91,16 +95,19 @@ async function singleRun(args: string[]) {
91
95
  }
92
96
 
93
97
  if (!opts.deck) {
94
- console.error(`❌ --deck <path> is required.
95
- Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
96
- lythoskill-arena single --deck ./deck.toml --brief "your task description"
97
- Example decks: examples/decks/scout.toml, examples/decks/documents.toml`)
98
+ console.error(`❌ --deck <path|url> is required.
99
+ --deck accepts local paths and http/https URLs (auto-fetched).
100
+ Example: lythoskill-arena single \\
101
+ --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
102
+ --brief "your task"`)
98
103
  process.exit(1)
99
104
  }
100
105
  if (!opts.task && (!opts.brief || !opts.brief.trim())) {
101
- console.error(`❌ --task <path> or --brief "<prompt>" is required.
102
- Usage: lythoskill-arena single --deck ./deck.toml --task ./scenario.agent.md
103
- lythoskill-arena single --deck ./deck.toml --brief "your task description"`)
106
+ console.error(`❌ --task <path> or --brief "<text>" is required.
107
+ --task reads a .agent.md scenario file; --brief takes inline text.
108
+ Example: lythoskill-arena single \\
109
+ --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \\
110
+ --brief "your task"`)
104
111
  process.exit(1)
105
112
  }
106
113
 
@@ -117,17 +124,26 @@ async function singleRun(args: string[]) {
117
124
  } catch { /* keep original url */ }
118
125
  const dest = resolve(process.cwd(), 'arena-deck.toml')
119
126
  console.log(`📥 Fetching arena deck: ${url}`)
120
- const res = await fetch(url, { signal: AbortSignal.timeout(30_000) })
121
- if (!res.ok) { console.error(`❌ Failed to fetch deck (HTTP ${res.status}): ${url}`); process.exit(1) }
127
+ let res: Response
128
+ try { res = await fetch(url, { signal: AbortSignal.timeout(30_000) }) } catch (e: any) {
129
+ console.error(`❌ Cannot reach ${url}
130
+ Network issue? Try a GitHub proxy mirror:
131
+ ${url.replace('https://raw.githubusercontent.com/', 'https://ghfast.top/https://raw.githubusercontent.com/')}
132
+ Or download manually and reference the local file.`)
133
+ process.exit(1)
134
+ }
135
+ if (!res.ok) { console.error(`❌ Failed to fetch deck (HTTP ${res.status}): ${url}
136
+ Try a GitHub proxy mirror:
137
+ ${url.replace('https://raw.githubusercontent.com/', 'https://ghfast.top/https://raw.githubusercontent.com/')}`); process.exit(1) }
122
138
  deckWrite(dest, await res.text())
123
139
  console.log(` → saved to ${dest}`)
124
140
  deckPath = dest
125
141
  } else {
126
142
  deckPath = resolve(opts.deck)
127
143
  if (!deckExists(deckPath)) { console.error(`❌ Deck file not found: ${deckPath}
128
- Create one: examples/decks/scout.toml (minimal), examples/decks/documents.toml (documents)
129
- Or fetch: curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml > deck.toml
130
- Or create: see https://github.com/lythos-labs/lythoskill/tree/main/examples/decks/`); process.exit(1) }
144
+ Make sure the path is correct, or use a URL:
145
+ --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml
146
+ (URLs are auto-fetched — no local file needed)`); process.exit(1) }
131
147
  }
132
148
 
133
149
  const { useAgent } = await import('@lythos/test-utils/agents')
@@ -148,9 +164,9 @@ async function singleRun(args: string[]) {
148
164
  if (opts.task) {
149
165
  const taskPath = resolve(opts.task)
150
166
  if (!existsSync(taskPath)) { console.error(`❌ Task file not found: ${taskPath}
151
- Create a .agent.md scenario or use --brief for inline tasks.
152
- Format: frontmatter (name, description, timeout) + Given/When/Then/Judge sections.
153
- Example: see playground/arena-one-shot/TASK-arena.agent.md`); process.exit(1) }
167
+ Use --brief for inline tasks, or point --task to an existing .agent.md file.
168
+ Format: name + description + Given/When/Then/Judge sections.
169
+ Example: lythoskill-arena single --brief "your task" --deck <url>`); process.exit(1) }
154
170
  scenarioOpt.scenarioPath = taskPath
155
171
  // Quick validation: check frontmatter presence
156
172
  const raw = readFileSync(taskPath, 'utf-8')
@@ -767,10 +783,9 @@ async function vsRun(argv: string[]) {
767
783
  console.error(`❌ --config <arena.toml> is required.
768
784
  Usage: lythoskill-arena vs --config ./arena.toml
769
785
  lythoskill-arena vs --config ./arena.toml --dry-run
770
- Example configs:
771
- examples/arena/research-compare/arena.toml two-side A/B
772
- examples/arena/add-remove/arena.toml three-side Pareto
773
- Create one: cp examples/arena/research-compare/arena.toml ./arena.toml`)
786
+ Fetch an example:
787
+ curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/add-remove/arena.toml > arena.toml
788
+ Then edit arena.toml and run: lythoskill-arena vs --config ./arena.toml`)
774
789
  process.exit(1)
775
790
 
776
791
  const result = await runArenaProgrammatic({