npm - @lythos/skill-arena - Versions diffs - 0.9.36 → 0.9.38 - Mend

@lythos/skill-arena 0.9.36 → 0.9.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -49,26 +49,26 @@ Note: Claude `-p` mode has known issues with web tools in Bun.spawn (deferred to
 ```bash
 bun add -d @lythos/skill-arena
 # or use directly
-bunx @lythos/skill-arena@0.9.36 <command>
+bunx @lythos/skill-arena@0.9.38 <command>
 ```
 ## Quick Start
 ```bash
 # Mode 1: Compare two skills on the same task
-bunx @lythos/skill-arena@0.9.36 \
+bunx @lythos/skill-arena@0.9.38 \
   --task "Generate auth flow diagram" \
   --skills "design-doc-mermaid,mermaid-tools" \
   --criteria "syntax,context,token"
 # Mode 2: Compare full deck configurations
-bunx @lythos/skill-arena@0.9.36 \
+bunx @lythos/skill-arena@0.9.38 \
   --task "Generate auth flow diagram" \
   --decks "./decks/minimal.toml,./decks/rich.toml" \
   --criteria "quality,token,maintainability"
 # Visualize results
-bunx @lythos/skill-arena@0.9.36 viz tmp/arena-<id>/
+bunx @lythos/skill-arena@0.9.38 viz tmp/arena-<id>/
 ```
 ## Commands
@@ -77,16 +77,16 @@ bunx @lythos/skill-arena@0.9.36 viz tmp/arena-<id>/
 ```bash
 # Print execution plan without running
-bunx @lythos/skill-arena@0.9.36 run --config arena.toml --dry-run
+bunx @lythos/skill-arena@0.9.38 run --config arena.toml --dry-run
 # Execute with per-side runs_per_side and statistical aggregation
-bunx @lythos/skill-arena@0.9.36 run --config arena.toml
+bunx @lythos/skill-arena@0.9.38 run --config arena.toml
 ```
 ### CLI-flag mode (backward compat)
 ```
-bunx @lythos/skill-arena@0.9.36 run \
+bunx @lythos/skill-arena@0.9.38 run \
   --task ./TASK-arena.md \
   --players ./players/claude.toml \
   --decks ./decks/run-01.toml,./decks/run-02.toml \
@@ -96,13 +96,13 @@ bunx @lythos/skill-arena@0.9.36 run \
 ### Scaffold mode (legacy, manual execution)
 ```
-bunx @lythos/skill-arena@0.9.36 scaffold --task "..." --skills a,b
+bunx @lythos/skill-arena@0.9.38 scaffold --task "..." --skills a,b
 ```
 ### Viz
 ```bash
-bunx @lythos/skill-arena@0.9.36 viz runs/arena-<id>/
+bunx @lythos/skill-arena@0.9.38 viz runs/arena-<id>/
 ```
 ## Skill Documentation
@@ -116,7 +116,7 @@ The agent-visible **Skill** layer documentation is here:
 Part of the [lythoskill](https://github.com/lythos-labs/lythoskill) ecosystem — the thin-skill pattern separates heavy logic (this npm package) from lightweight agent instructions (SKILL.md).
 ```
-Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.36 ...
+Starter (this package) → npm publish → bunx @lythos/skill-arena@0.9.38 ...
 Skill   (packages/<name>/skill/)     → build → SKILL.md + thin scripts
 Output  (skills/<name>/)             → git commit → agent-visible skill
 ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lythos/skill-arena",
-  "version": "0.9.36",
+  "version": "0.9.38",
   "description": "Skill Arena — benchmark skill effectiveness with controlled-variable comparison",
   "keywords": [
     "ai-agent",
@@ -16,6 +16,11 @@
     "access": "public"
   },
   "type": "module",
+  "scripts": {
+    "test": "bun test src/ --pass-with-no-tests",
+    "test:coverage": "bun test src/ --coverage --coverage-reporter=lcov --coverage-dir=coverage --pass-with-no-tests",
+    "test:watch": "bun test src/ --watch"
+  },
   "bin": {
     "lythoskill-arena": "src/cli.ts"
   },

package/src/cli.ts CHANGED Viewed

@@ -107,9 +107,12 @@ async function agentRun(args: string[]) {
   let deckPath: string
   if (opts.deck.startsWith('http://') || opts.deck.startsWith('https://')) {
     let url = opts.deck
-    if (url.includes('github.com/') && url.includes('/blob/')) {
-      url = url.replace('github.com/', 'raw.githubusercontent.com/').replace('/blob/', '/')
-    }
+    try {
+      const u = new URL(url)
+      if (u.hostname === 'github.com' && u.pathname.includes('/blob/')) {
+        url = `https://raw.githubusercontent.com${u.pathname.replace('/blob/', '/')}`
+      }
+    } catch { /* keep original url */ }
     const dest = resolve(process.cwd(), 'arena-deck.toml')
     console.log(`📥 Fetching arena deck: ${url}`)
     const res = await fetch(url, { signal: AbortSignal.timeout(30_000) })
@@ -125,6 +128,7 @@ async function agentRun(args: string[]) {
   const { useAgent } = await import('@lythos/test-utils/agents')
   // Optional: register claude-sdk adapter if the package is installed
   try { await import('@lythos/agent-adapter-claude-sdk') } catch { /* package not installed */ }
+  try { await import('@lythos/agent-adapter-deepseek-serve') } catch { /* package not installed */ }
   const { runAgentScenario } = await import('@lythos/test-utils/agent-bdd')
   const { resolvePlayer } = await import('./player')
   const { readFileSync, writeFileSync, mkdirSync } = await import('node:fs')

package/src/runner.ts CHANGED Viewed

@@ -109,11 +109,35 @@ export async function runArenaFromToml(opts: {
       const result = await runAgentScenario({
         scenarioPath: taskAbs,
         agent,
-        async setupWorkdir(_scenario: AgentScenario, workdir: string) {
+        async setupWorkdir(scenario: AgentScenario, workdir: string) {
           mkdirSync(workdir, { recursive: true })
           const deckContent = readFileSync(cell.deck, 'utf-8')
           writeFileSync(join(workdir, 'skill-deck.toml'), deckContent)
+          // Write AGENTS.md bootloader — agents read this on entry
+          writeFileSync(join(workdir, 'AGENTS.md'), [
+            '# Arena Test Environment',
+            '',
+            `**Side**: ${cell.side}`,
+            `**Player**: ${cell.player}`,
+            `**Run**: ${cell.run}`,
+            '',
+            '## Task',
+            '',
+            scenario.it ?? scenario.description ?? '(no task description)',
+            '',
+            '## How This Works',
+            '',
+            '- This is an isolated arena test directory. No parent `.claude/skills/` exists.',
+            '- Skills are configured in `skill-deck.toml` and symlinked by `deck link`.',
+            '- Complete the task above using the available skills.',
+            '- Output your work to this directory (or `output/` if specified).',
+            '',
+            '## Expected Output',
+            '',
+            'After completing the task, write a brief summary of what you did.',
+          ].join('\n'))
           // Link skills via bunx (works both locally and when installed via bunx)
           const linkProc = Bun.spawn(
             ['bunx', '@lythos/skill-deck', 'link'],