@lythos/skill-arena 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +27 -11
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -150,6 +150,7 @@ export function runArena(argv: string[]) {
|
|
|
150
150
|
// ── 创建目录结构 ────────────────────────────────────────────
|
|
151
151
|
mkdirSync(join(ARENA_DIR, 'decks'), { recursive: true })
|
|
152
152
|
mkdirSync(join(ARENA_DIR, 'runs'), { recursive: true })
|
|
153
|
+
mkdirSync(join(ARENA_DIR, 'sides'), { recursive: true })
|
|
153
154
|
|
|
154
155
|
// ── 生成参与者与 deck ───────────────────────────────────────
|
|
155
156
|
let participants: { id: string; name: string; skill_name: string; deck_path: string }[]
|
|
@@ -213,6 +214,16 @@ ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n
|
|
|
213
214
|
}
|
|
214
215
|
}
|
|
215
216
|
|
|
217
|
+
// ── 为每个 side 创建隔离工作空间 ────────────────────────────
|
|
218
|
+
for (const p of participants) {
|
|
219
|
+
const sideDir = join(ARENA_DIR, 'sides', p.id)
|
|
220
|
+
mkdirSync(sideDir, { recursive: true })
|
|
221
|
+
// 复制 deck 到 side 目录作为 skill-deck.toml
|
|
222
|
+
const sideDeckPath = join(sideDir, 'skill-deck.toml')
|
|
223
|
+
const deckContent = readFileSync(p.deck_path, 'utf-8')
|
|
224
|
+
writeFileSync(sideDeckPath, deckContent)
|
|
225
|
+
}
|
|
226
|
+
|
|
216
227
|
// ── 生成 arena.json ─────────────────────────────────────────
|
|
217
228
|
const arenaJson = {
|
|
218
229
|
version: '1.0.0',
|
|
@@ -221,13 +232,17 @@ ${[...new Set([p.skill_name, ...CONTROL_SKILLS])].map(s => ` "${s}",`).join('\n
|
|
|
221
232
|
slug: ARENA_SLUG,
|
|
222
233
|
created_at: new Date().toISOString(),
|
|
223
234
|
task_description: TASK,
|
|
224
|
-
participants
|
|
235
|
+
participants: participants.map(p => ({
|
|
236
|
+
...p,
|
|
237
|
+
side_dir: join(ARENA_DIR, 'sides', p.id),
|
|
238
|
+
})),
|
|
225
239
|
criteria,
|
|
226
240
|
working_dir: ARENA_DIR,
|
|
227
241
|
},
|
|
228
242
|
status: 'setup',
|
|
229
243
|
runs: participants.map(p => ({
|
|
230
244
|
participant_id: p.id,
|
|
245
|
+
side_dir: join(ARENA_DIR, 'sides', p.id),
|
|
231
246
|
output_path: join(ARENA_DIR, 'runs', `${p.id}.md`),
|
|
232
247
|
})),
|
|
233
248
|
}
|
|
@@ -255,9 +270,8 @@ judge_persona: |
|
|
|
255
270
|
: `你是一个中立的技能评测员。对比所有 subagent 的输出,
|
|
256
271
|
按 evaluation_criteria 给出 1-5 分评分,最终给出 Winner 和选型建议。`}
|
|
257
272
|
acceptance:
|
|
258
|
-
${participants.map(p => ` - Subagent ${p.id}
|
|
273
|
+
${participants.map(p => ` - Subagent ${p.id} 在 sides/${p.id}/ 隔离环境完成任务并写入 runs/${p.id}.md`).join('\n')}
|
|
259
274
|
- Judge 读取所有 run 文件并生成 report.md
|
|
260
|
-
- 所有 subagent 完成后恢复父 deck
|
|
261
275
|
managed_dirs:
|
|
262
276
|
- ${relArenaDir}/
|
|
263
277
|
---
|
|
@@ -268,18 +282,19 @@ managed_dirs:
|
|
|
268
282
|
|
|
269
283
|
${participants.map(p => `### ${p.id} (${p.name})
|
|
270
284
|
\`\`\`bash
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
#
|
|
274
|
-
bunx @lythos/skill-deck link
|
|
285
|
+
# 进入隔离工作空间(已预装 deck)
|
|
286
|
+
cd "${join(ARENA_DIR, 'sides', p.id)}"
|
|
287
|
+
# 确认 skill-deck.toml 存在后 link(首次或 deck 更新时)
|
|
288
|
+
bunx @lythos/skill-deck link
|
|
289
|
+
# 然后执行任务,输出写入 "../../runs/${p.id}.md"
|
|
275
290
|
\`\`\`
|
|
276
291
|
`).join('')}
|
|
277
292
|
|
|
278
293
|
### Judge
|
|
279
294
|
\`\`\`bash
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
#
|
|
295
|
+
# 在 Host 侧读取所有 side 输出,生成报告
|
|
296
|
+
cd "${ARENA_DIR}"
|
|
297
|
+
# 读取 runs/*.md,按 evaluation_criteria 评分,生成 report.md
|
|
283
298
|
\`\`\`
|
|
284
299
|
`
|
|
285
300
|
|
|
@@ -299,11 +314,12 @@ ${mode === 'single-skill' ? `控制变量: ${CONTROL_SKILLS.join(', ')}\n` : ''
|
|
|
299
314
|
生成文件:
|
|
300
315
|
📋 ${join(ARENA_DIR, 'arena.json')}
|
|
301
316
|
🎴 ${participants.length} 个 arena deck → ${join(ARENA_DIR, 'decks')}
|
|
317
|
+
🏟️ ${participants.length} 个 side 隔离工作空间 → ${join(ARENA_DIR, 'sides')}
|
|
302
318
|
📝 Task Card → ${taskCardPath}
|
|
303
319
|
|
|
304
320
|
下一步:
|
|
305
321
|
1. 阅读 Task Card: cat "${taskCardPath}"
|
|
306
|
-
2. 按指令逐个/并行启动 subagent
|
|
322
|
+
2. 按指令逐个/并行启动 subagent(每个在独立的 side 目录)
|
|
307
323
|
3. Judge 生成 report.md
|
|
308
324
|
`)
|
|
309
325
|
}
|