npm - @lythos/skill-arena - Versions diffs - 0.9.23 → 0.9.24 - Mend

@lythos/skill-arena 0.9.23 → 0.9.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
 ```bash
 bun add -d @lythos/skill-arena
 # or use directly
-bunx @lythos/skill-arena@0.9.23 <command>
+bunx @lythos/skill-arena@0.9.24 <command>
 ```
 ## Quick Start
 ```bash
 # Mode 1: Compare two skills on the same task
-bunx @lythos/skill-arena@0.9.23 \
+bunx @lythos/skill-arena@0.9.24 \
   --task "Generate auth flow diagram" \
   --skills "design-doc-mermaid,mermaid-tools" \
   --criteria "syntax,context,token"
 # Mode 2: Compare full deck configurations
-bunx @lythos/skill-arena@0.9.23 \
+bunx @lythos/skill-arena@0.9.24 \
   --task "Generate auth flow diagram" \
   --decks "./decks/minimal.toml,./decks/rich.toml" \
   --criteria "quality,token,maintainability"
 # Visualize results
-bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
+bunx @lythos/skill-arena@0.9.24 viz tmp/arena-<id>/
 ```
 ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.23 viz tmp/arena-<id>/
 ```bash
 # Print execution plan without running
-bunx @lythos/skill-arena@0.9.23 run --config arena.toml --dry-run
+bunx @lythos/skill-arena@0.9.24 run --config arena.toml --dry-run
 # Execute with per-side runs_per_side and statistical aggregation
-bunx @lythos/skill-arena@0.9.23 run --config arena.toml
+bunx @lythos/skill-arena@0.9.24 run --config arena.toml
 ```
 ### CLI-flag mode (backward compat)
 ```
-bunx @lythos/skill-arena@0.9.23 run \
+bunx @lythos/skill-arena@0.9.24 run \
   --task ./TASK-arena.md \
   --players ./players/claude.toml \
   --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.23 run \
 ### Scaffold mode (legacy, manual execution)
 ```
-bunx @lythos/skill-arena@0.9.23 scaffold --task "..." --skills a,b
+bunx @lythos/skill-arena@0.9.24 scaffold --task "..." --skills a,b
 ```
 ### Viz
 ```bash
-bunx @lythos/skill-arena@0.9.23 viz runs/arena-<id>/
+bunx @lythos/skill-arena@0.9.24 viz runs/arena-<id>/
 ```
 ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
 Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
 ```
-Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.23 ...
+Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.24 ...
 Skill   (packages/<name>/skill/)     → build → SKILL.md + thin scripts
 Output  (skills/<name>/)             → git commit → agent-visible skill
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.9.23",
+  "version": "0.9.24",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
   "keywords": [
     "ai-agent",
@@ -37,6 +37,7 @@
     "bun": ">=1.0.0"
   },
   "dependencies": {
+    "@lythos/cold-pool": "workspace:*",
     "@lythos/test-utils": "^0.9.1",
     "zod": "^3.24.0",
     "zod-to-json-schema": "^3.25.2"

package/src/cli.ts CHANGED Viewed

@@ -37,8 +37,8 @@ function printHelp(): void {
   console.log(`🎭 lythoskill-arena — Skill comparison runner
 Usage:
-  lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>]
-  lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>]
+  lythoskill-arena agent-run --task <path> --deck <path> [--player kimi] [--out <dir>] [--timeout <ms>]
+  lythoskill-arena agent-run --brief "<prompt>" --deck <path> [--out <dir>] [--timeout <ms>]
   lythoskill-arena run --task <path> --players <A.toml,B.toml> --decks <A.toml,B.toml> --criteria <c1,c2,...> [--out <dir>]
   lythoskill-arena scaffold --task "<description>" --skills <skill1,skill2,...>
   lythoskill-arena scaffold --task "<description>" --decks <deck1,deck2,...>
@@ -90,6 +90,7 @@ async function agentRun(args: string[]) {
     else if (args[i] === '--deck' || args[i] === '-d') opts.deck = args[++i]
     else if (args[i] === '--player' || args[i] === '-p') opts.player = args[++i]
     else if (args[i] === '--out' || args[i] === '-o') opts.out = args[++i]
+    else if (args[i] === '--timeout') opts.timeout = args[++i]
   }
   if (!opts.deck) {
@@ -127,7 +128,7 @@ async function agentRun(args: string[]) {
     scenarioOpt.scenario = {
       name: 'ad-hoc task',
       description: opts.brief!.slice(0, 80),
-      timeout: 120000,
+      timeout: Number(opts.timeout ?? 120000),
       given: { deck: {} },
       when: opts.brief!,
       then: ['Write your output to output.md', 'The output should be complete and well-structured'],

package/src/preflight.ts CHANGED Viewed

@@ -6,6 +6,8 @@
  * IO is injected via function parameters (e.g., existsFn, readdirFn).
  */
+import { ColdPool, parseLocator } from '@lythos/cold-pool'
 // ── Types ─────────────────────────────────────────────────────────────────
 /** A skill as declared in skill-deck.toml */
@@ -83,8 +85,11 @@ export function parseDeckSkills(
 /**
  * Check each declared skill against the cold pool filesystem.
  *
- * For skills with explicit paths: resolve `<coldPoolDir>/<path>/SKILL.md`
- * For skills without paths (array format): resolve `<coldPoolDir>/<name>/SKILL.md`
+ * Path resolution delegates to @lythos/cold-pool's `parseLocator` and
+ * `ColdPool.resolveDir` so localhost / FQ / standalone forms all map to
+ * the right physical layout (per ADR-20260507021957847). Non-FQ legacy
+ * names (e.g., bare `pdf`) fall back to `<coldPoolDir>/<name>/SKILL.md`.
+ *
  * Skills with HTTP/URL paths are skipped (not local).
  *
  * `existsFn` is the IO injection point — swap for real fs or mock.
@@ -94,11 +99,28 @@ export function checkSkillExistence(
   coldPoolDir: string,
   existsFn: (path: string) => boolean
 ): SkillCheck[] {
+  const pool = new ColdPool(coldPoolDir)
   return skills.map(skill => {
-    const resolvedName = skill.path && !skill.path.startsWith('http')
+    const candidatePath = skill.path && !skill.path.startsWith('http')
       ? skill.path
       : skill.name
-    const expectedPath = `${coldPoolDir}/${resolvedName}/SKILL.md`
+    let expectedPath: string
+    const locator = parseLocator(candidatePath)
+    if (!locator) {
+      // Legacy bare-name fallback. Per ADR-20260502012643244 this should
+      // be removed in 0.10.x once arena.toml authors switch to FQ.
+      expectedPath = `${coldPoolDir}/${candidatePath}/SKILL.md`
+    } else if (locator.isLocalhost) {
+      // localhost layout: top-level dir under coldPool, no `localhost/` prefix
+      expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
+    } else if (locator.skill) {
+      expectedPath = `${pool.resolveDir(locator)}/${locator.skill}/SKILL.md`
+    } else {
+      // Standalone repo: SKILL.md at repo root
+      expectedPath = `${pool.resolveDir(locator)}/SKILL.md`
+    }
     return {
       name: skill.name,
       expectedPath,