@lythos/skill-arena 0.6.2 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/cli.ts +27 -11
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@lythos/skill-arena",
3
- "version": "0.6.2",
4
- "description": "Skill Arena \u2014 benchmark skill effectiveness with controlled-variable comparison",
3
+ "version": "0.7.2",
4
+ "description": "Skill Arena benchmark skill effectiveness with controlled-variable comparison",
5
5
  "keywords": [
6
6
  "ai-agent",
7
7
  "skill",
package/src/cli.ts CHANGED
@@ -150,6 +150,7 @@ export function runArena(argv: string[]) {
150
150
  // ── 创建目录结构 ────────────────────────────────────────────
151
151
  mkdirSync(join(ARENA_DIR, 'decks'), { recursive: true })
152
152
  mkdirSync(join(ARENA_DIR, 'runs'), { recursive: true })
153
+ mkdirSync(join(ARENA_DIR, 'sides'), { recursive: true })
153
154
 
154
155
  // ── 生成参与者与 deck ───────────────────────────────────────
155
156
  let participants: { id: string; name: string; skill_name: string; deck_path: string }[]
@@ -213,6 +214,16 @@ ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n
213
214
  }
214
215
  }
215
216
 
217
+ // ── 为每个 side 创建隔离工作空间 ────────────────────────────
218
+ for (const p of participants) {
219
+ const sideDir = join(ARENA_DIR, 'sides', p.id)
220
+ mkdirSync(sideDir, { recursive: true })
221
+ // 复制 deck 到 side 目录作为 skill-deck.toml
222
+ const sideDeckPath = join(sideDir, 'skill-deck.toml')
223
+ const deckContent = readFileSync(p.deck_path, 'utf-8')
224
+ writeFileSync(sideDeckPath, deckContent)
225
+ }
226
+
216
227
  // ── 生成 arena.json ─────────────────────────────────────────
217
228
  const arenaJson = {
218
229
  version: '1.0.0',
@@ -221,13 +232,17 @@ ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n
221
232
  slug: ARENA_SLUG,
222
233
  created_at: new Date().toISOString(),
223
234
  task_description: TASK,
224
- participants,
235
+ participants: participants.map(p => ({
236
+ ...p,
237
+ side_dir: join(ARENA_DIR, 'sides', p.id),
238
+ })),
225
239
  criteria,
226
240
  working_dir: ARENA_DIR,
227
241
  },
228
242
  status: 'setup',
229
243
  runs: participants.map(p => ({
230
244
  participant_id: p.id,
245
+ side_dir: join(ARENA_DIR, 'sides', p.id),
231
246
  output_path: join(ARENA_DIR, 'runs', `${p.id}.md`),
232
247
  })),
233
248
  }
@@ -255,9 +270,8 @@ judge_persona: |
255
270
  : `你是一个中立的技能评测员。对比所有 subagent 的输出,
256
271
  按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。`}
257
272
  acceptance:
258
- ${participants.map(p => ` - Subagent ${p.id} 使用 ${p.deck_path.replace(PROJECT_DIR, '.')} 完成任务并写入 runs/${p.id}.md`).join('\n')}
273
+ ${participants.map(p => ` - Subagent ${p.id} sides/${p.id}/ 隔离环境完成任务并写入 runs/${p.id}.md`).join('\n')}
259
274
  - Judge 读取所有 run 文件并生成 report.md
260
- - 所有 subagent 完成后恢复父 deck
261
275
  managed_dirs:
262
276
  - ${relArenaDir}/
263
277
  ---
@@ -268,18 +282,19 @@ managed_dirs:
268
282
 
269
283
  ${participants.map(p => `### ${p.id} (${p.name})
270
284
  \`\`\`bash
271
- cd "${PROJECT_DIR}"
272
- bunx @lythos/skill-deck link --deck "${p.deck_path}" --workdir "${PROJECT_DIR}"
273
- # 然后执行任务,输出写入 "${join(ARENA_DIR, 'runs', `${p.id}.md`)}"
274
- bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}" --workdir "${PROJECT_DIR}"
285
+ # 进入隔离工作空间(已预装 deck)
286
+ cd "${join(ARENA_DIR, 'sides', p.id)}"
287
+ # 确认 skill-deck.toml 存在后 link(首次或 deck 更新时)
288
+ bunx @lythos/skill-deck link
289
+ # 然后执行任务,输出写入 "../../runs/${p.id}.md"
275
290
  \`\`\`
276
291
  `).join('')}
277
292
 
278
293
  ### Judge
279
294
  \`\`\`bash
280
- cd "${PROJECT_DIR}"
281
- bunx @lythos/skill-deck link --deck "${join(PROJECT_DIR, 'skill-deck.toml')}" --workdir "${PROJECT_DIR}"
282
- # 读取所有 run 文件,生成 "${join(ARENA_DIR, 'report.md')}"
295
+ # 在 Host 侧读取所有 side 输出,生成报告
296
+ cd "${ARENA_DIR}"
297
+ # 读取 runs/*.md,按 evaluation_criteria 评分,生成 report.md
283
298
  \`\`\`
284
299
  `
285
300
 
@@ -299,11 +314,12 @@ ${mode === 'single-skill' ? `控制变量: ${CONTROL_SKILLS.join(', ')}\n` : ''
299
314
  生成文件:
300
315
  📋 ${join(ARENA_DIR, 'arena.json')}
301
316
  🎴 ${participants.length} 个 arena deck → ${join(ARENA_DIR, 'decks')}
317
+ 🏟️ ${participants.length} 个 side 隔离工作空间 → ${join(ARENA_DIR, 'sides')}
302
318
  📝 Task Card → ${taskCardPath}
303
319
 
304
320
  下一步:
305
321
  1. 阅读 Task Card: cat "${taskCardPath}"
306
- 2. 按指令逐个/并行启动 subagent
322
+ 2. 按指令逐个/并行启动 subagent(每个在独立的 side 目录)
307
323
  3. Judge 生成 report.md
308
324
  `)
309
325
  }