@lythos/skill-arena 0.9.48 → 0.9.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,20 +49,20 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
49
49
  ```bash
50
50
  bun add -d @lythos/skill-arena
51
51
  # or use directly
52
- bunx @lythos/skill-arena@0.9.48 <command>
52
+ bunx @lythos/skill-arena@0.9.50 <command>
53
53
  ```
54
54
 
55
55
  ## Quick Start
56
56
 
57
57
  ```bash
58
58
  # Single: test a deck with one agent
59
- bunx @lythos/skill-arena@0.9.48 single \
59
+ bunx @lythos/skill-arena@0.9.50 single \
60
60
  --deck https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml \
61
61
  --brief "Generate auth flow diagram"
62
62
 
63
63
  # Vs: compare multiple decks side by side
64
64
  curl -fsSL https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/arena/research-compare/arena.toml > arena.toml
65
- bunx @lythos/skill-arena@0.9.48 vs --config ./arena.toml
65
+ bunx @lythos/skill-arena@0.9.50 vs --config ./arena.toml
66
66
  ```
67
67
 
68
68
  ## Commands
@@ -71,23 +71,23 @@ bunx @lythos/skill-arena@0.9.48 vs --config ./arena.toml
71
71
 
72
72
  ```bash
73
73
  # Print execution plan without running
74
- bunx @lythos/skill-arena@0.9.48 vs --config arena.toml --dry-run
74
+ bunx @lythos/skill-arena@0.9.50 vs --config arena.toml --dry-run
75
75
 
76
76
  # Execute with per-side runs_per_side and statistical aggregation
77
- bunx @lythos/skill-arena@0.9.48 vs --config arena.toml
77
+ bunx @lythos/skill-arena@0.9.50 vs --config arena.toml
78
78
  ```
79
79
 
80
80
  ### Scaffold mode (legacy, manual execution)
81
81
 
82
82
  ```
83
- bunx @lythos/skill-arena@0.9.48 scaffold --task "Generate auth flow diagram" \
83
+ bunx @lythos/skill-arena@0.9.50 scaffold --task "Generate auth flow diagram" \
84
84
  --decks https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/scout.toml,https://raw.githubusercontent.com/lythos-labs/lythoskill/main/examples/decks/documents.toml
85
85
  ```
86
86
 
87
87
  ### Viz
88
88
 
89
89
  ```bash
90
- bunx @lythos/skill-arena@0.9.48 viz runs/arena-<id>/
90
+ bunx @lythos/skill-arena@0.9.50 viz runs/arena-<id>/
91
91
  ```
92
92
 
93
93
  ## Skill Documentation
@@ -101,7 +101,7 @@ The agent-visible **Skill** layer documentation is here:
101
101
  Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
102
102
 
103
103
  ```
104
- Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.48 ...
104
+ Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.50 ...
105
105
  Skill (packages/<name>/skill/) → build → SKILL.md + thin scripts
106
106
  Output (skills/<name>/) → git commit → agent-visible skill
107
107
  ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.9.48",
3
+ "version": "0.9.50",
4
4
  "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
package/src/cli.ts CHANGED
@@ -54,11 +54,9 @@ Options:
54
54
  --deck <path> Deck path (single only)
55
55
  --brief "<text>" Inline task description (single only, alternative to --task)
56
56
  --player <name> Agent player (single only, default: kimi)
57
- -c, --criteria <list> Evaluation criteria (scaffold only, default: syntax,context,logic,token)
58
57
  --config <path> Path to arena.toml (vs only)
59
58
  --dry-run Print execution plan without running (vs --config only)
60
59
  --out <dir> Output directory
61
- -d, --dir <dir> Parent dir (scaffold: defaults to tmp)
62
60
  -p, --project <dir> Project root (default: .)
63
61
  --timeout <ms> Subagent timeout (single only)
64
62
 
@@ -202,6 +200,7 @@ async function singleRun(args: string[]) {
202
200
  // Optional: register claude-sdk adapter if the package is installed
203
201
  try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
204
202
  try { await import('@lythos/agent-adapter-deepseek-serve') } catch { /* package not installed */ }
203
+ try { await import('@lythos/agent-adapter-codex') } catch { /* package not installed */ }
205
204
  const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
206
205
  const { resolvePlayer } = await import('./player')
207
206
 
@@ -338,8 +337,6 @@ function parseArgs(argv: string[]) {
338
337
 
339
338
  const options: Record<string, string | undefined> = {
340
339
  task: undefined,
341
- decks: undefined,
342
- criteria: 'syntax,context,logic,token',
343
340
  dir: 'tmp',
344
341
  project: '.',
345
342
  config: undefined,
@@ -352,11 +349,6 @@ function parseArgs(argv: string[]) {
352
349
  const arg = argv[i]
353
350
  if (arg === '--task' || arg === '-t') {
354
351
  options.task = argv[++i]
355
- } else if (arg === '--decks') {
356
- options.decks = argv[++i]
357
- } else if (arg === '--criteria' || arg === '-c') {
358
- options.criteria = argv[++i]
359
- options.control = argv[++i]
360
352
  } else if (arg === '--dir' || arg === '-d') {
361
353
  options.dir = argv[++i]
362
354
  } else if (arg === '--project' || arg === '-p') {
@@ -0,0 +1,92 @@
1
+ /**
2
+ * path-guard — Path validation for arena CLI.
3
+ *
4
+ * Arena accepts deck/task/player paths from CLI arguments. These must
5
+ * be validated to prevent arbitrary file reads and directory traversal.
6
+ *
7
+ * Reference: arena sweep 2026-05-10 P1 path-trust findings
8
+ */
9
+
10
+ import { resolve, isAbsolute } from "node:path"
11
+ import { existsSync } from "node:fs"
12
+
13
+ /**
14
+ * Validate a deck path: must point to a file within the project or
15
+ * be a valid absolute path. Refuse paths containing ".." traversal.
16
+ */
17
+ export function validateDeckPath(raw: string, projectDir: string): string {
18
+ if (raw.includes("..")) {
19
+ throw new Error(`Deck path contains parent traversal (..): ${raw}`)
20
+ }
21
+ if (raw.includes("\0")) {
22
+ throw new Error(`Deck path contains null byte`)
23
+ }
24
+
25
+ const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
26
+
27
+ if (!resolved.startsWith(resolve(projectDir) + "/") && !isAbsolute(raw)) {
28
+ throw new Error(
29
+ `Deck path "${raw}" resolves outside the project directory.\n` +
30
+ ` Resolved: ${resolved}\n` +
31
+ ` Project: ${resolve(projectDir)}`
32
+ )
33
+ }
34
+
35
+ return resolved
36
+ }
37
+
38
+ /**
39
+ * Validate a task path: must be a .md or .agent.md file, must exist,
40
+ * must not traverse outside the project.
41
+ */
42
+ export function validateTaskPath(raw: string, projectDir: string): string {
43
+ if (raw.includes("..")) {
44
+ throw new Error(`Task path contains parent traversal (..): ${raw}`)
45
+ }
46
+ if (raw.includes("\0")) {
47
+ throw new Error(`Task path contains null byte`)
48
+ }
49
+
50
+ const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
51
+
52
+ if (!resolved.startsWith(resolve(projectDir) + "/")) {
53
+ throw new Error(
54
+ `Task path "${raw}" resolves outside the project directory.\n` +
55
+ ` Resolved: ${resolved}\n` +
56
+ ` Project: ${resolve(projectDir)}`
57
+ )
58
+ }
59
+
60
+ if (!existsSync(resolved)) {
61
+ throw new Error(`Task file not found: ${raw}\n Resolved: ${resolved}`)
62
+ }
63
+
64
+ return resolved
65
+ }
66
+
67
+ /**
68
+ * Validate the output directory — must be within the project or /tmp.
69
+ * Arena writes agent output and judge verdicts here.
70
+ */
71
+ export function validateOutDir(raw: string, projectDir: string): string {
72
+ if (raw.includes("..")) {
73
+ throw new Error(`Output directory contains parent traversal (..): ${raw}`)
74
+ }
75
+
76
+ const resolved = isAbsolute(raw) ? resolve(raw) : resolve(projectDir, raw)
77
+ const resolvedProject = resolve(projectDir)
78
+
79
+ // Allow /tmp as a valid output target
80
+ if (resolved.startsWith("/tmp/") || resolved === "/tmp") {
81
+ return resolved
82
+ }
83
+
84
+ if (!resolved.startsWith(resolvedProject + "/")) {
85
+ throw new Error(
86
+ `Output directory "${raw}" is outside the project.\n` +
87
+ ` Use --out with a path under the project or in /tmp.`
88
+ )
89
+ }
90
+
91
+ return resolved
92
+ }
package/src/preflight.ts CHANGED
@@ -6,6 +6,7 @@
6
6
  * IO is injected via function parameters (e.g., existsFn, readdirFn).
7
7
  */
8
8
 
9
+ import { join } from 'node:path'
9
10
  import { ColdPool, parseLocator } from '@lythos/cold-pool'
10
11
 
11
12
  // ── Types ─────────────────────────────────────────────────────────────────
@@ -170,8 +171,8 @@ export function buildCopyPlan(
170
171
  for (const name of entries) {
171
172
  if (skipSet.has(name)) continue
172
173
  plan.push({
173
- src: `${workdir}/${name}`,
174
- dest: `${outDir}/${name}`,
174
+ src: join(workdir, name),
175
+ dest: join(outDir, name),
175
176
  name,
176
177
  })
177
178
  }