npm - @lythos/skill-arena - Versions diffs - 0.9.20 → 0.9.22 - Mend

@lythos/skill-arena 0.9.20 → 0.9.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
 ```bash
 bun add -d @lythos/skill-arena
 # or use directly
-bunx @lythos/skill-arena@0.9.20 <command>
+bunx @lythos/skill-arena@0.9.22 <command>
 ```
 ## Quick Start
 ```bash
 # Mode 1: Compare two skills on the same task
-bunx @lythos/skill-arena@0.9.20 \
+bunx @lythos/skill-arena@0.9.22 \
   --task "Generate auth flow diagram" \
   --skills "design-doc-mermaid,mermaid-tools" \
   --criteria "syntax,context,token"
 # Mode 2: Compare full deck configurations
-bunx @lythos/skill-arena@0.9.20 \
+bunx @lythos/skill-arena@0.9.22 \
   --task "Generate auth flow diagram" \
   --decks "./decks/minimal.toml,./decks/rich.toml" \
   --criteria "quality,token,maintainability"
 # Visualize results
-bunx @lythos/skill-arena@0.9.20 viz tmp/arena-<id>/
+bunx @lythos/skill-arena@0.9.22 viz tmp/arena-<id>/
 ```
 ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.20 viz tmp/arena-<id>/
 ```bash
 # Print execution plan without running
-bunx @lythos/skill-arena@0.9.20 run --config arena.toml --dry-run
+bunx @lythos/skill-arena@0.9.22 run --config arena.toml --dry-run
 # Execute with per-side runs_per_side and statistical aggregation
-bunx @lythos/skill-arena@0.9.20 run --config arena.toml
+bunx @lythos/skill-arena@0.9.22 run --config arena.toml
 ```
 ### CLI-flag mode (backward compat)
 ```
-bunx @lythos/skill-arena@0.9.20 run \
+bunx @lythos/skill-arena@0.9.22 run \
   --task ./TASK-arena.md \
   --players ./players/claude.toml \
   --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.20 run \
 ### Scaffold mode (legacy, manual execution)
 ```
-bunx @lythos/skill-arena@0.9.20 scaffold --task "..." --skills a,b
+bunx @lythos/skill-arena@0.9.22 scaffold --task "..." --skills a,b
 ```
 ### Viz
 ```bash
-bunx @lythos/skill-arena@0.9.20 viz runs/arena-<id>/
+bunx @lythos/skill-arena@0.9.22 viz runs/arena-<id>/
 ```
 ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
 Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
 ```
-Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.20 ...
+Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.22 ...
 Skill   (packages/<name>/skill/)     → build → SKILL.md + thin scripts
 Output  (skills/<name>/)             → git commit → agent-visible skill
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.9.20",
+  "version": "0.9.22",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
   "keywords": [
     "ai-agent",

package/src/cli.ts CHANGED Viewed

@@ -109,7 +109,7 @@ async function agentRun(args: string[]) {
     taskPath = join(tmpDir, 'TASK.md')
     const briefTask = `---
 name: ad-hoc task
-description: ${opts.brief!.slice(0, 80)}
+description: ${opts.brief!.replace(/"/g, '\\"').slice(0, 80)}
 timeout: 120000
 ---
@@ -136,7 +136,7 @@ Evaluate whether the output is complete, accurate, and well-structured.
   const player = resolvePlayer(opts.player ?? 'kimi')
   const agent = useAgent(player)
-  const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), 'agent-run-output')
+  const outDir = opts.out ? resolve(opts.out) : join(process.cwd(), `agent-output-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`)
   mkdirSync(outDir, { recursive: true })
   console.log(`🤖 agent-run: ${player} × ${deckPath}`)
@@ -164,16 +164,17 @@ Evaluate whether the output is complete, accurate, and well-structured.
   if (result.agentResult.stderr) writeFileSync(join(outDir, 'agent-stderr.txt'), result.agentResult.stderr, 'utf-8')
   if (result.verdict) writeFileSync(join(outDir, 'judge-verdict.json'), JSON.stringify(result.verdict, null, 2) + '\n', 'utf-8')
-  // Copy agent-produced files from workdir (output.md, output.docx, etc.)
+  // Copy all agent-produced files from workdir (output.md, output.docx, etc.)
+  // Skip .claude/ (symlink dir) and deck artifacts. Recursive so docx/pdf work.
   if (agentWorkdir) {
-    const { readdirSync, statSync, copyFileSync } = await import('node:fs')
+    const { cpSync, readdirSync } = await import('node:fs')
+    const skipSet = new Set(['.claude', 'skill-deck.toml', 'skill-deck.lock'])
     try {
       for (const entry of readdirSync(agentWorkdir)) {
-        if (entry.startsWith('.') || entry === 'skill-deck.toml' || entry === 'skill-deck.lock') continue
+        if (skipSet.has(entry)) continue
         const src = join(agentWorkdir, entry)
-        try {
-          if (statSync(src).isFile()) copyFileSync(src, join(outDir, entry))
-        } catch {}
+        const dest = join(outDir, entry)
+        try { cpSync(src, dest, { recursive: true }) } catch {}
       }
     } catch {}
   }