kairn-cli 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -221,7 +221,7 @@ var ui = {
221
221
  // Key-value pairs
222
222
  kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
223
223
  // File list
224
- file: (path24) => chalk.dim(` ${path24}`),
224
+ file: (path25) => chalk.dim(` ${path25}`),
225
225
  // Tool display
226
226
  tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
227
227
  ${chalk.dim(reason)}`,
@@ -3702,8 +3702,8 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
3702
3702
  import { Command as Command11 } from "commander";
3703
3703
  import chalk14 from "chalk";
3704
3704
  import ora2 from "ora";
3705
- import fs23 from "fs/promises";
3706
- import path23 from "path";
3705
+ import fs24 from "fs/promises";
3706
+ import path24 from "path";
3707
3707
  import { parse as yamlParse2 } from "yaml";
3708
3708
  import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3709
3709
 
@@ -4031,6 +4031,13 @@ async function snapshotBaseline(projectRoot, workspacePath) {
4031
4031
  }
4032
4032
  await copyDir(claudeDir, baselineDir);
4033
4033
  await copyDir(claudeDir, iter0Dir);
4034
+ const mcpJsonPath = path16.join(projectRoot, ".mcp.json");
4035
+ try {
4036
+ await fs16.access(mcpJsonPath);
4037
+ await fs16.copyFile(mcpJsonPath, path16.join(baselineDir, ".mcp.json"));
4038
+ await fs16.copyFile(mcpJsonPath, path16.join(iter0Dir, ".mcp.json"));
4039
+ } catch {
4040
+ }
4034
4041
  }
4035
4042
  async function copyDir(src, dest) {
4036
4043
  await fs16.mkdir(dest, { recursive: true });
@@ -4353,6 +4360,11 @@ async function scoreTask(task, workspacePath, stdout, stderr, config) {
4353
4360
  // src/evolve/runner.ts
4354
4361
  var execAsync2 = promisify2(exec2);
4355
4362
  var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
4363
+ async function deployMcpJson(harnessPath, workDir) {
4364
+ const src = path18.join(harnessPath, ".mcp.json");
4365
+ await fs18.copyFile(src, path18.join(workDir, ".mcp.json")).catch(() => {
4366
+ });
4367
+ }
4356
4368
  async function createIsolatedWorkspace(projectRoot, harnessPath) {
4357
4369
  const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
4358
4370
  try {
@@ -4367,6 +4379,7 @@ async function createIsolatedWorkspace(projectRoot, harnessPath) {
4367
4379
  });
4368
4380
  await fs18.rm(path18.join(tmpDir2, ".claude"), { recursive: true, force: true });
4369
4381
  await copyDir(harnessPath, path18.join(tmpDir2, ".claude"));
4382
+ await deployMcpJson(harnessPath, tmpDir2);
4370
4383
  return { workDir: tmpDir2, isWorktree: true };
4371
4384
  } catch {
4372
4385
  }
@@ -4374,6 +4387,7 @@ async function createIsolatedWorkspace(projectRoot, harnessPath) {
4374
4387
  await copyProjectDir(projectRoot, tmpDir);
4375
4388
  await fs18.rm(path18.join(tmpDir, ".claude"), { recursive: true, force: true });
4376
4389
  await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
4390
+ await deployMcpJson(harnessPath, tmpDir);
4377
4391
  return { workDir: tmpDir, isWorktree: false };
4378
4392
  }
4379
4393
  async function copyProjectDir(src, dest) {
@@ -4555,31 +4569,77 @@ function parseToolCalls(stdout) {
4555
4569
  return [];
4556
4570
  }
4557
4571
  }
4558
- async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress) {
4572
+ function computeStddev(values, mean) {
4573
+ if (values.length <= 1) return 0;
4574
+ const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
4575
+ return Math.sqrt(sumSqDiffs / values.length);
4576
+ }
4577
+ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1) {
4559
4578
  const results = {};
4560
4579
  const projectRoot = path18.resolve(workspacePath, "..");
4580
+ const effectiveRuns = Math.max(1, runsPerTask);
4561
4581
  for (const task of tasks) {
4562
- const traceDir = path18.join(
4563
- workspacePath,
4564
- "traces",
4565
- iteration.toString(),
4566
- task.id
4567
- );
4568
4582
  onProgress?.({ type: "task-start", iteration, taskId: task.id });
4569
- const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
4570
- let score = taskResult.score;
4571
- if (config) {
4572
- const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4573
- const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4574
- score = await scoreTask(task, traceDir, stdout, stderr, config);
4575
- await writeScore(traceDir, score);
4576
- }
4577
- results[task.id] = score;
4583
+ if (effectiveRuns > 1 && config) {
4584
+ const runScores = [];
4585
+ let passCount = 0;
4586
+ for (let run = 0; run < effectiveRuns; run++) {
4587
+ const traceDir = path18.join(
4588
+ workspacePath,
4589
+ "traces",
4590
+ iteration.toString(),
4591
+ `${task.id}_run${run}`
4592
+ );
4593
+ onProgress?.({
4594
+ type: "task-run",
4595
+ iteration,
4596
+ taskId: task.id,
4597
+ message: `Run ${run + 1}/${effectiveRuns} of ${task.id}`
4598
+ });
4599
+ await runTask(task, harnessPath, traceDir, iteration, projectRoot);
4600
+ const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4601
+ const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4602
+ const score = await scoreTask(task, traceDir, stdout, stderr, config);
4603
+ await writeScore(traceDir, score);
4604
+ runScores.push(score.score ?? (score.pass ? 100 : 0));
4605
+ if (score.pass) passCount++;
4606
+ }
4607
+ const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
4608
+ const stddev = computeStddev(runScores, mean);
4609
+ results[task.id] = {
4610
+ pass: passCount > effectiveRuns / 2,
4611
+ score: mean,
4612
+ details: `Mean of ${effectiveRuns} runs`,
4613
+ variance: {
4614
+ runs: effectiveRuns,
4615
+ scores: runScores,
4616
+ mean,
4617
+ stddev
4618
+ }
4619
+ };
4620
+ } else {
4621
+ const traceDir = path18.join(
4622
+ workspacePath,
4623
+ "traces",
4624
+ iteration.toString(),
4625
+ task.id
4626
+ );
4627
+ const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
4628
+ let score = taskResult.score;
4629
+ if (config) {
4630
+ const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4631
+ const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4632
+ score = await scoreTask(task, traceDir, stdout, stderr, config);
4633
+ await writeScore(traceDir, score);
4634
+ }
4635
+ results[task.id] = score;
4636
+ }
4637
+ const finalScore = results[task.id];
4578
4638
  onProgress?.({
4579
4639
  type: "task-scored",
4580
4640
  iteration,
4581
4641
  taskId: task.id,
4582
- score: score.score ?? (score.pass ? 100 : 0)
4642
+ score: finalScore.score ?? (finalScore.pass ? 100 : 0)
4583
4643
  });
4584
4644
  }
4585
4645
  const scores = Object.values(results);
@@ -4623,23 +4683,37 @@ minimal changes to the harness files that will fix those failures.
4623
4683
 
4624
4684
  3. Check history for counterfactual evidence
4625
4685
 
4686
+ ## Available Mutation Actions
4687
+ 1. **replace** \u2014 Replace old_text with new_text in a file: { "file": "...", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." }
4688
+ 2. **add_section** \u2014 Append new content to a file (or create it): { "file": "...", "action": "add_section", "new_text": "...", "rationale": "..." }
4689
+ 3. **create_file** \u2014 Create a new file: { "file": "...", "action": "create_file", "new_text": "...", "rationale": "..." }
4690
+ 4. **delete_section** \u2014 Remove specific text from a file: { "file": "...", "action": "delete_section", "old_text": "...", "rationale": "..." }
4691
+ 5. **delete_file** \u2014 Delete an entire file: { "file": "...", "action": "delete_file", "rationale": "..." }
4692
+
4626
4693
  ## Output Format
4627
4694
  Return a JSON object:
4628
4695
  {
4629
4696
  "reasoning": "Your full causal analysis...",
4630
4697
  "mutations": [
4631
4698
  { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
4632
- { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
4699
+ { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." },
4700
+ { "file": "rules/obsolete.md", "action": "delete_file", "rationale": "..." }
4633
4701
  ],
4634
4702
  "expected_impact": { "task-id": "+15% \u2014 explanation" }
4635
4703
  }
4636
4704
 
4705
+ ## MCP Configuration
4706
+ You can also mutate .mcp.json to add, remove, or reconfigure MCP servers.
4707
+ Treat .mcp.json like any other harness file \u2014 propose changes when traces show
4708
+ the agent lacks a tool it needs, or has tools that add noise without benefit.
4709
+
4637
4710
  ## Rules
4638
4711
  - MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
4639
4712
  - Each mutation must have a clear rationale tied to a specific trace observation.
4640
4713
  - Never remove something that's working for another task.
4641
4714
  - If a previous iteration's change caused a regression, REVERT it.
4642
- - Prefer ADDITIVE changes over replacements when possible.
4715
+ - Consider both additions AND removals. Remove sections that add noise without improving task performance.
4716
+ - Bloated harnesses hurt performance \u2014 trim what isn't earning its keep.
4643
4717
 
4644
4718
  Return ONLY valid JSON.`;
4645
4719
  var STDOUT_TRUNCATION_LIMIT = 1e3;
@@ -4830,10 +4904,11 @@ function parseProposerResponse(raw) {
4830
4904
  if (file.includes("..")) {
4831
4905
  continue;
4832
4906
  }
4833
- if (action !== "replace" && action !== "add_section" && action !== "create_file") {
4907
+ const validActions = /* @__PURE__ */ new Set(["replace", "add_section", "create_file", "delete_section", "delete_file"]);
4908
+ if (!validActions.has(action)) {
4834
4909
  continue;
4835
4910
  }
4836
- if (action === "replace" && !oldText) {
4911
+ if ((action === "replace" || action === "delete_section") && !oldText) {
4837
4912
  continue;
4838
4913
  }
4839
4914
  const mutation = {
@@ -4912,6 +4987,23 @@ async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
4912
4987
  } else if (mutation.action === "create_file") {
4913
4988
  await fs20.mkdir(path20.dirname(filePath), { recursive: true });
4914
4989
  await fs20.writeFile(filePath, mutation.newText, "utf-8");
4990
+ } else if (mutation.action === "delete_section") {
4991
+ if (!mutation.oldText) {
4992
+ continue;
4993
+ }
4994
+ let sectionContent;
4995
+ try {
4996
+ sectionContent = await fs20.readFile(filePath, "utf-8");
4997
+ } catch {
4998
+ continue;
4999
+ }
5000
+ if (!sectionContent.includes(mutation.oldText)) {
5001
+ continue;
5002
+ }
5003
+ await fs20.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
5004
+ } else if (mutation.action === "delete_file") {
5005
+ await fs20.unlink(filePath).catch(() => {
5006
+ });
4915
5007
  }
4916
5008
  }
4917
5009
  const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
@@ -5006,7 +5098,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5006
5098
  workspacePath,
5007
5099
  iter,
5008
5100
  kairnConfig,
5009
- onProgress
5101
+ onProgress,
5102
+ evolveConfig.runsPerTask
5010
5103
  );
5011
5104
  onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
5012
5105
  if (iter === 0) baselineScore = aggregate;
@@ -5363,24 +5456,128 @@ async function generateJsonReport(workspacePath) {
5363
5456
  };
5364
5457
  }
5365
5458
 
5459
+ // src/evolve/apply.ts
5460
+ import fs23 from "fs/promises";
5461
+ import path23 from "path";
5462
+ async function listIterations(workspacePath) {
5463
+ const iterationsDir = path23.join(workspacePath, "iterations");
5464
+ let entries;
5465
+ try {
5466
+ entries = await fs23.readdir(iterationsDir);
5467
+ } catch {
5468
+ return [];
5469
+ }
5470
+ const nums = [];
5471
+ for (const entry of entries) {
5472
+ const n = parseInt(entry, 10);
5473
+ if (!isNaN(n)) {
5474
+ try {
5475
+ await fs23.access(path23.join(iterationsDir, entry, "harness"));
5476
+ nums.push(n);
5477
+ } catch {
5478
+ }
5479
+ }
5480
+ }
5481
+ return nums.sort((a, b) => a - b);
5482
+ }
5483
+ async function findBestIteration(workspacePath, iterations) {
5484
+ let bestIter = iterations[0];
5485
+ let bestScore = -Infinity;
5486
+ for (const iter of iterations) {
5487
+ const log = await loadIterationLog(workspacePath, iter);
5488
+ const score = log?.score ?? 0;
5489
+ if (score > bestScore) {
5490
+ bestScore = score;
5491
+ bestIter = iter;
5492
+ }
5493
+ }
5494
+ return bestIter;
5495
+ }
5496
+ async function listFilesRecursive(dir) {
5497
+ const results = [];
5498
+ async function walk(current) {
5499
+ let entries;
5500
+ try {
5501
+ entries = await fs23.readdir(current, { withFileTypes: true });
5502
+ } catch {
5503
+ return;
5504
+ }
5505
+ for (const entry of entries) {
5506
+ const fullPath = path23.join(current, entry.name);
5507
+ if (entry.isDirectory()) {
5508
+ await walk(fullPath);
5509
+ } else {
5510
+ results.push(path23.relative(dir, fullPath));
5511
+ }
5512
+ }
5513
+ }
5514
+ await walk(dir);
5515
+ return results;
5516
+ }
5517
+ async function applyEvolution(workspacePath, projectRoot, targetIteration) {
5518
+ const iterations = await listIterations(workspacePath);
5519
+ if (iterations.length === 0) {
5520
+ throw new Error("No iterations found in workspace. Run `kairn evolve run` first.");
5521
+ }
5522
+ let iter;
5523
+ if (targetIteration !== void 0) {
5524
+ if (!iterations.includes(targetIteration)) {
5525
+ throw new Error(
5526
+ `Iteration ${targetIteration} not found. Available: ${iterations.join(", ")}`
5527
+ );
5528
+ }
5529
+ iter = targetIteration;
5530
+ } else {
5531
+ iter = await findBestIteration(workspacePath, iterations);
5532
+ }
5533
+ const harnessPath = path23.join(
5534
+ workspacePath,
5535
+ "iterations",
5536
+ iter.toString(),
5537
+ "harness"
5538
+ );
5539
+ const claudeDir = path23.join(projectRoot, ".claude");
5540
+ const diffPreview = await generateDiff2(claudeDir, harnessPath);
5541
+ const currentFiles = await listFilesRecursive(claudeDir);
5542
+ const targetFiles = await listFilesRecursive(harnessPath);
5543
+ const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
5544
+ const filesChanged = [];
5545
+ for (const filePath of allPaths) {
5546
+ const currentContent = await fs23.readFile(path23.join(claudeDir, filePath), "utf-8").catch(() => null);
5547
+ const targetContent = await fs23.readFile(path23.join(harnessPath, filePath), "utf-8").catch(() => null);
5548
+ if (currentContent !== targetContent) {
5549
+ filesChanged.push(filePath);
5550
+ }
5551
+ }
5552
+ await fs23.rm(claudeDir, { recursive: true, force: true });
5553
+ await copyDir(harnessPath, claudeDir);
5554
+ return {
5555
+ iteration: iter,
5556
+ filesChanged,
5557
+ diffPreview
5558
+ };
5559
+ }
5560
+
5366
5561
  // src/commands/evolve.ts
5367
5562
  var DEFAULT_CONFIG = {
5368
5563
  model: "claude-sonnet-4-6",
5369
5564
  proposerModel: "claude-opus-4-6",
5370
5565
  scorer: "pass-fail",
5371
5566
  maxIterations: 5,
5372
- parallelTasks: 1
5567
+ parallelTasks: 1,
5568
+ runsPerTask: 1
5373
5569
  };
5374
5570
  async function loadEvolveConfigFromWorkspace(workspacePath) {
5375
5571
  try {
5376
- const configStr = await fs23.readFile(path23.join(workspacePath, "config.yaml"), "utf-8");
5572
+ const configStr = await fs24.readFile(path24.join(workspacePath, "config.yaml"), "utf-8");
5377
5573
  const parsed = yamlParse2(configStr);
5378
5574
  return {
5379
5575
  model: parsed.model ?? DEFAULT_CONFIG.model,
5380
5576
  proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
5381
5577
  scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
5382
5578
  maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
5383
- parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
5579
+ parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks,
5580
+ runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask
5384
5581
  };
5385
5582
  } catch {
5386
5583
  return { ...DEFAULT_CONFIG };
@@ -5391,9 +5588,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5391
5588
  try {
5392
5589
  const projectRoot = process.cwd();
5393
5590
  console.log(ui.section("Evolve Init"));
5394
- const claudeDir = path23.join(projectRoot, ".claude");
5591
+ const claudeDir = path24.join(projectRoot, ".claude");
5395
5592
  try {
5396
- await fs23.access(claudeDir);
5593
+ await fs24.access(claudeDir);
5397
5594
  } catch {
5398
5595
  console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
5399
5596
  process.exit(1);
@@ -5443,7 +5640,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5443
5640
  if (config) {
5444
5641
  let claudeMd = "";
5445
5642
  try {
5446
- claudeMd = await fs23.readFile(path23.join(claudeDir, "CLAUDE.md"), "utf-8");
5643
+ claudeMd = await fs24.readFile(path24.join(claudeDir, "CLAUDE.md"), "utf-8");
5447
5644
  } catch {
5448
5645
  }
5449
5646
  const profile = await buildProjectProfile(projectRoot);
@@ -5474,16 +5671,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5474
5671
  evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
5475
5672
  try {
5476
5673
  const projectRoot = process.cwd();
5477
- const workspace = path23.join(projectRoot, ".kairn-evolve");
5674
+ const workspace = path24.join(projectRoot, ".kairn-evolve");
5478
5675
  console.log(ui.section("Evolve Baseline"));
5479
5676
  try {
5480
- await fs23.access(workspace);
5677
+ await fs24.access(workspace);
5481
5678
  } catch {
5482
5679
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5483
5680
  process.exit(1);
5484
5681
  }
5485
5682
  await snapshotBaseline(projectRoot, workspace);
5486
- const baselineDir = path23.join(workspace, "baseline");
5683
+ const baselineDir = path24.join(workspace, "baseline");
5487
5684
  const fileCount = await countFiles(baselineDir);
5488
5685
  console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
5489
5686
  } catch (err) {
@@ -5492,21 +5689,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
5492
5689
  process.exit(1);
5493
5690
  }
5494
5691
  });
5495
- evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
5692
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").action(async (options) => {
5496
5693
  try {
5497
5694
  const projectRoot = process.cwd();
5498
- const workspace = path23.join(projectRoot, ".kairn-evolve");
5695
+ const workspace = path24.join(projectRoot, ".kairn-evolve");
5499
5696
  console.log(ui.section("Evolve Run"));
5500
5697
  try {
5501
- await fs23.access(workspace);
5698
+ await fs24.access(workspace);
5502
5699
  } catch {
5503
5700
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5504
5701
  process.exit(1);
5505
5702
  }
5506
- const tasksPath = path23.join(workspace, "tasks.yaml");
5703
+ const tasksPath = path24.join(workspace, "tasks.yaml");
5507
5704
  let tasksContent;
5508
5705
  try {
5509
- tasksContent = await fs23.readFile(tasksPath, "utf-8");
5706
+ tasksContent = await fs24.readFile(tasksPath, "utf-8");
5510
5707
  } catch {
5511
5708
  console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
5512
5709
  process.exit(1);
@@ -5525,15 +5722,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5525
5722
  console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
5526
5723
  console.log("");
5527
5724
  const config = await loadConfig();
5528
- const harnessPath = path23.join(projectRoot, ".claude");
5725
+ const harnessPath = path24.join(projectRoot, ".claude");
5529
5726
  const results = [];
5530
5727
  for (const task of tasksToRun) {
5531
- const traceDir = path23.join(workspace, "traces", "0", task.id);
5728
+ const traceDir = path24.join(workspace, "traces", "0", task.id);
5532
5729
  const spinner = ora2(`Running: ${task.id}`).start();
5533
5730
  const result = await runTask(task, harnessPath, traceDir, 0);
5534
5731
  if (config) {
5535
- const stdout = await fs23.readFile(path23.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
5536
- const stderr = await fs23.readFile(path23.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
5732
+ const stdout = await fs24.readFile(path24.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
5733
+ const stderr = await fs24.readFile(path24.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
5537
5734
  const score = await scoreTask(task, traceDir, stdout, stderr, config);
5538
5735
  result.score = score;
5539
5736
  await writeScore(traceDir, score);
@@ -5561,8 +5758,14 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5561
5758
  process.exit(1);
5562
5759
  }
5563
5760
  evolveConfig.maxIterations = iterations;
5761
+ const runs = parseInt(options.runs ?? "1", 10);
5762
+ if (isNaN(runs) || runs < 1) {
5763
+ console.log(ui.error("--runs must be a positive integer"));
5764
+ process.exit(1);
5765
+ }
5766
+ evolveConfig.runsPerTask = runs;
5564
5767
  try {
5565
- await fs23.access(path23.join(workspace, "iterations", "0", "harness"));
5768
+ await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
5566
5769
  } catch {
5567
5770
  console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
5568
5771
  process.exit(1);
@@ -5595,6 +5798,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5595
5798
  case "task-start":
5596
5799
  console.log(chalk14.dim(` Running: ${event.taskId ?? "unknown"}...`));
5597
5800
  break;
5801
+ case "task-run":
5802
+ console.log(chalk14.dim(` ${event.message ?? ""}`));
5803
+ break;
5598
5804
  case "task-scored": {
5599
5805
  const taskScore = event.score ?? 0;
5600
5806
  const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");
@@ -5616,9 +5822,18 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5616
5822
  console.log(` Improvement: ${improvement.toFixed(1)} points`);
5617
5823
  }
5618
5824
  console.log("");
5619
- console.log(" Iter Score Mutations Status");
5825
+ const showVariance = runs > 1;
5826
+ console.log(showVariance ? " Iter Score Mutations Status" : " Iter Score Mutations Status");
5620
5827
  for (const iter of result.iterations) {
5621
- const scoreStr = iter.score.toFixed(1).padStart(6) + "%";
5828
+ let scoreDisplay;
5829
+ if (showVariance) {
5830
+ const taskScores = Object.values(iter.taskResults);
5831
+ const stddevs = taskScores.map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5832
+ const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : 0;
5833
+ scoreDisplay = `${iter.score.toFixed(1).padStart(6)}% \xB1${avgStddev.toFixed(1)}`;
5834
+ } else {
5835
+ scoreDisplay = iter.score.toFixed(1).padStart(6) + "%";
5836
+ }
5622
5837
  const mutations = iter.proposal?.mutations.length ?? 0;
5623
5838
  const mutStr = mutations > 0 ? mutations.toString() : "-";
5624
5839
  let status = "evaluated";
@@ -5626,7 +5841,7 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5626
5841
  else if (!iter.proposal && !iter.diffPatch) status = "rollback";
5627
5842
  else if (iter.score >= 100) status = "perfect";
5628
5843
  else if (iter.iteration === result.bestIteration) status = "best";
5629
- console.log(` ${iter.iteration.toString().padStart(4)} ${scoreStr} ${mutStr.padStart(9)} ${status}`);
5844
+ console.log(` ${iter.iteration.toString().padStart(4)} ${scoreDisplay} ${mutStr.padStart(9)} ${status}`);
5630
5845
  }
5631
5846
  }
5632
5847
  } catch (err) {
@@ -5635,12 +5850,56 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5635
5850
  process.exit(1);
5636
5851
  }
5637
5852
  });
5853
+ evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
5854
+ try {
5855
+ const projectRoot = process.cwd();
5856
+ const workspace = path24.join(projectRoot, ".kairn-evolve");
5857
+ console.log(ui.section("Evolve Apply"));
5858
+ try {
5859
+ await fs24.access(workspace);
5860
+ } catch {
5861
+ console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5862
+ process.exit(1);
5863
+ }
5864
+ let targetIteration;
5865
+ if (options.iter) {
5866
+ targetIteration = parseInt(options.iter, 10);
5867
+ if (isNaN(targetIteration)) {
5868
+ console.log(ui.error("--iter must be a number"));
5869
+ process.exit(1);
5870
+ }
5871
+ }
5872
+ const result = await applyEvolution(workspace, projectRoot, targetIteration);
5873
+ if (result.diffPreview) {
5874
+ console.log(ui.section("Changes"));
5875
+ for (const line of result.diffPreview.split("\n")) {
5876
+ if (line.startsWith("---") || line.startsWith("+++")) {
5877
+ console.log(chalk14.bold(line));
5878
+ } else if (line.startsWith("+")) {
5879
+ console.log(chalk14.green(line));
5880
+ } else if (line.startsWith("-")) {
5881
+ console.log(chalk14.red(line));
5882
+ } else {
5883
+ console.log(line);
5884
+ }
5885
+ }
5886
+ }
5887
+ console.log("");
5888
+ console.log(ui.success(
5889
+ `Applied iteration ${result.iteration} harness (${result.filesChanged.length} files)`
5890
+ ));
5891
+ } catch (err) {
5892
+ const msg = err instanceof Error ? err.message : String(err);
5893
+ console.log(ui.error(msg));
5894
+ process.exit(1);
5895
+ }
5896
+ });
5638
5897
  evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
5639
5898
  try {
5640
5899
  const projectRoot = process.cwd();
5641
- const workspace = path23.join(projectRoot, ".kairn-evolve");
5900
+ const workspace = path24.join(projectRoot, ".kairn-evolve");
5642
5901
  try {
5643
- await fs23.access(workspace);
5902
+ await fs24.access(workspace);
5644
5903
  } catch {
5645
5904
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5646
5905
  process.exit(1);
@@ -5661,23 +5920,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
5661
5920
  evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
5662
5921
  try {
5663
5922
  const projectRoot = process.cwd();
5664
- const workspace = path23.join(projectRoot, ".kairn-evolve");
5923
+ const workspace = path24.join(projectRoot, ".kairn-evolve");
5665
5924
  const iter1 = parseInt(iter1Str, 10);
5666
5925
  const iter2 = parseInt(iter2Str, 10);
5667
5926
  if (isNaN(iter1) || isNaN(iter2)) {
5668
5927
  console.log(ui.error("Both arguments must be integers (iteration numbers)"));
5669
5928
  process.exit(1);
5670
5929
  }
5671
- const harness1 = path23.join(workspace, "iterations", iter1.toString(), "harness");
5672
- const harness2 = path23.join(workspace, "iterations", iter2.toString(), "harness");
5930
+ const harness1 = path24.join(workspace, "iterations", iter1.toString(), "harness");
5931
+ const harness2 = path24.join(workspace, "iterations", iter2.toString(), "harness");
5673
5932
  try {
5674
- await fs23.access(harness1);
5933
+ await fs24.access(harness1);
5675
5934
  } catch {
5676
5935
  console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
5677
5936
  process.exit(1);
5678
5937
  }
5679
5938
  try {
5680
- await fs23.access(harness2);
5939
+ await fs24.access(harness2);
5681
5940
  } catch {
5682
5941
  console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
5683
5942
  process.exit(1);
@@ -5732,10 +5991,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
5732
5991
  async function countFiles(dir) {
5733
5992
  let count = 0;
5734
5993
  try {
5735
- const entries = await fs23.readdir(dir, { withFileTypes: true });
5994
+ const entries = await fs24.readdir(dir, { withFileTypes: true });
5736
5995
  for (const entry of entries) {
5737
5996
  if (entry.isDirectory()) {
5738
- count += await countFiles(path23.join(dir, entry.name));
5997
+ count += await countFiles(path24.join(dir, entry.name));
5739
5998
  } else {
5740
5999
  count++;
5741
6000
  }