@lythos/skill-arena 0.9.48 → 0.9.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/package.json +1 -1
- package/src/cli.ts +1 -9
- package/src/path-guard.ts +92 -0
- package/src/preflight.ts +3 -2
package/README.md
CHANGED
|
@@ -49,20 +49,20 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
|
|
|
49
49
|
```bash
|
|
50
50
|
bun add -d @lythos/skill-arena
|
|
51
51
|
# or use directly
|
|
52
|
-
bunx @lythos/skill-arena@0.9.
|
|
52
|
+
bunx @lythos/skill-arena@0.9.49 <command>
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Quick Start
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
58
|
# Single: test a deck with one agent
|
|
59
|
-
bunx @lythos/skill-arena@0.9.
|
|
59
|
+
bunx @lythos/skill-arena@0.9.49 single \
|
|
60
60
|
--deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \
|
|
61
61
|
--brief "Generate auth flow diagram"
|
|
62
62
|
|
|
63
63
|
# Vs: compare multiple decks side by side
|
|
64
64
|
curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/research-compare/arena.toml > arena.toml
|
|
65
|
-
bunx @lythos/skill-arena@0.9.
|
|
65
|
+
bunx @lythos/skill-arena@0.9.49 vs --config ./arena.toml
|
|
66
66
|
```
|
|
67
67
|
|
|
68
68
|
## Commands
|
|
@@ -71,23 +71,23 @@ bunx @lythos/skill-arena@0.9.48 vs --config ./arena.toml
|
|
|
71
71
|
|
|
72
72
|
```bash
|
|
73
73
|
# Print execution plan without running
|
|
74
|
-
bunx @lythos/skill-arena@0.9.
|
|
74
|
+
bunx @lythos/skill-arena@0.9.49 vs --config arena.toml --dry-run
|
|
75
75
|
|
|
76
76
|
# Execute with per-side runs_per_side and statistical aggregation
|
|
77
|
-
bunx @lythos/skill-arena@0.9.
|
|
77
|
+
bunx @lythos/skill-arena@0.9.49 vs --config arena.toml
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
### Scaffold mode (legacy, manual execution)
|
|
81
81
|
|
|
82
82
|
```
|
|
83
|
-
bunx @lythos/skill-arena@0.9.
|
|
83
|
+
bunx @lythos/skill-arena@0.9.49 scaffold --task "Generate auth flow diagram" \
|
|
84
84
|
--decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
|
|
85
85
|
```
|
|
86
86
|
|
|
87
87
|
### Viz
|
|
88
88
|
|
|
89
89
|
```bash
|
|
90
|
-
bunx @lythos/skill-arena@0.9.
|
|
90
|
+
bunx @lythos/skill-arena@0.9.49 viz runs/arena-<id>/
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
## Skill Documentation
|
|
@@ -101,7 +101,7 @@ The agent-visible **Skill** layer documentation is here:
|
|
|
101
101
|
Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
|
|
102
102
|
|
|
103
103
|
```
|
|
104
|
-
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.
|
|
104
|
+
Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.49 ...
|
|
105
105
|
Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
|
|
106
106
|
Output (skills/<name>/) → git commit → agent-visible skill
|
|
107
107
|
```
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -54,11 +54,9 @@ Options:
|
|
|
54
54
|
--deck <path> Deck path (single only)
|
|
55
55
|
--brief "<text>" Inline task description (single only, alternative to --task)
|
|
56
56
|
--player <name> Agent player (single only, default: kimi)
|
|
57
|
-
-c, --criteria <list> Evaluation criteria (scaffold only, default: syntax,context,logic,token)
|
|
58
57
|
--config <path> Path to arena.toml (vs only)
|
|
59
58
|
--dry-run Print execution plan without running (vs --config only)
|
|
60
59
|
--out <dir> Output directory
|
|
61
|
-
-d, --dir <dir> Parent dir (scaffold: defaults to tmp)
|
|
62
60
|
-p, --project <dir> Project root (default: .)
|
|
63
61
|
--timeout <ms> Subagent timeout (single only)
|
|
64
62
|
|
|
@@ -202,6 +200,7 @@ async function singleRun(args: string[]) {
|
|
|
202
200
|
// Optional: register claude-sdk adapter if the package is installed
|
|
203
201
|
try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
|
|
204
202
|
try { await import('@lythos/agent-adapter-deepseek-serve') } catch { /* package not installed */ }
|
|
203
|
+
try { await import('@lythos/agent-adapter-codex') } catch { /* package not installed */ }
|
|
205
204
|
const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
|
|
206
205
|
const { resolvePlayer } = await import('./player')
|
|
207
206
|
|
|
@@ -338,8 +337,6 @@ function parseArgs(argv: string[]) {
|
|
|
338
337
|
|
|
339
338
|
const options: Record<string, string | undefined> = {
|
|
340
339
|
task: undefined,
|
|
341
|
-
decks: undefined,
|
|
342
|
-
criteria: 'syntax,context,logic,token',
|
|
343
340
|
dir: 'tmp',
|
|
344
341
|
project: '.',
|
|
345
342
|
config: undefined,
|
|
@@ -352,11 +349,6 @@ function parseArgs(argv: string[]) {
|
|
|
352
349
|
const arg = argv[i]
|
|
353
350
|
if (arg === '--task' || arg === '-t') {
|
|
354
351
|
options.task = argv[++i]
|
|
355
|
-
} else if (arg === '--decks') {
|
|
356
|
-
options.decks = argv[++i]
|
|
357
|
-
} else if (arg === '--criteria' || arg === '-c') {
|
|
358
|
-
options.criteria = argv[++i]
|
|
359
|
-
options.control = argv[++i]
|
|
360
352
|
} else if (arg === '--dir' || arg === '-d') {
|
|
361
353
|
options.dir = argv[++i]
|
|
362
354
|
} else if (arg === '--project' || arg === '-p') {
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* path-guard — Path validation for arena CLI.
|
|
3
|
+
*
|
|
4
|
+
* Arena accepts deck/task/player paths from CLI arguments. These must
|
|
5
|
+
* be validated to prevent arbitrary file reads and directory traversal.
|
|
6
|
+
*
|
|
7
|
+
* Reference: arena sweep 2026-05-10 P1 path-trust findings
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { resolve, isAbsolute } from "node:path"
|
|
11
|
+
import { existsSync } from "node:fs"
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Validate a deck path: must point to a file within the project or
|
|
15
|
+
* be a valid absolute path. Refuse paths containing ".." traversal.
|
|
16
|
+
*/
|
|
17
|
+
export function validateDeckPath(raw: string, projectDir: string): string {
|
|
18
|
+
if (raw.includes("..")) {
|
|
19
|
+
throw new Error(`Deck path contains parent traversal (..): ${raw}`)
|
|
20
|
+
}
|
|
21
|
+
if (raw.includes("\0")) {
|
|
22
|
+
throw new Error(`Deck path contains null byte`)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
|
|
26
|
+
|
|
27
|
+
if (!resolved.startsWith(resolve(projectDir) + "/") && !isAbsolute(raw)) {
|
|
28
|
+
throw new Error(
|
|
29
|
+
`Deck path "${raw}" resolves outside the project directory.\n` +
|
|
30
|
+
` Resolved: ${resolved}\n` +
|
|
31
|
+
` Project: ${resolve(projectDir)}`
|
|
32
|
+
)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return resolved
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Validate a task path: must be a .md or .agent.md file, must exist,
|
|
40
|
+
* must not traverse outside the project.
|
|
41
|
+
*/
|
|
42
|
+
export function validateTaskPath(raw: string, projectDir: string): string {
|
|
43
|
+
if (raw.includes("..")) {
|
|
44
|
+
throw new Error(`Task path contains parent traversal (..): ${raw}`)
|
|
45
|
+
}
|
|
46
|
+
if (raw.includes("\0")) {
|
|
47
|
+
throw new Error(`Task path contains null byte`)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
|
|
51
|
+
|
|
52
|
+
if (!resolved.startsWith(resolve(projectDir) + "/")) {
|
|
53
|
+
throw new Error(
|
|
54
|
+
`Task path "${raw}" resolves outside the project directory.\n` +
|
|
55
|
+
` Resolved: ${resolved}\n` +
|
|
56
|
+
` Project: ${resolve(projectDir)}`
|
|
57
|
+
)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (!existsSync(resolved)) {
|
|
61
|
+
throw new Error(`Task file not found: ${raw}\n Resolved: ${resolved}`)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return resolved
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Validate the output directory — must be within the project or /tmp.
|
|
69
|
+
* Arena writes agent output and judge verdicts here.
|
|
70
|
+
*/
|
|
71
|
+
export function validateOutDir(raw: string, projectDir: string): string {
|
|
72
|
+
if (raw.includes("..")) {
|
|
73
|
+
throw new Error(`Output directory contains parent traversal (..): ${raw}`)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
|
|
77
|
+
const resolvedProject = resolve(projectDir)
|
|
78
|
+
|
|
79
|
+
// Allow /tmp as a valid output target
|
|
80
|
+
if (resolved.startsWith("/tmp/") || resolved === "/tmp") {
|
|
81
|
+
return resolved
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!resolved.startsWith(resolvedProject + "/")) {
|
|
85
|
+
throw new Error(
|
|
86
|
+
`Output directory "${raw}" is outside the project.\n` +
|
|
87
|
+
` Use --out with a path under the project or in /tmp.`
|
|
88
|
+
)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return resolved
|
|
92
|
+
}
|
package/src/preflight.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
* IO is injected via function parameters (e.g., existsFn, readdirFn).
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
+
import { join } from 'node:path'
|
|
9
10
|
import { ColdPool, parseLocator } from '@lythos/cold-pool'
|
|
10
11
|
|
|
11
12
|
// ── Types ─────────────────────────────────────────────────────────────────
|
|
@@ -170,8 +171,8 @@ export function buildCopyPlan(
|
|
|
170
171
|
for (const name of entries) {
|
|
171
172
|
if (skipSet.has(name)) continue
|
|
172
173
|
plan.push({
|
|
173
|
-
src:
|
|
174
|
-
dest:
|
|
174
|
+
src: join(workdir, name),
|
|
175
|
+
dest: join(outDir, name),
|
|
175
176
|
name,
|
|
176
177
|
})
|
|
177
178
|
}
|