kairn-cli 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,3 +1,99 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // src/evolve/memory.ts
12
+ var memory_exports = {};
13
+ __export(memory_exports, {
14
+ buildRunSummary: () => buildRunSummary,
15
+ formatMemoryForProposer: () => formatMemoryForProposer,
16
+ loadProposerMemory: () => loadProposerMemory,
17
+ saveRunSummary: () => saveRunSummary
18
+ });
19
+ import fs19 from "fs/promises";
20
+ import path19 from "path";
21
+ async function loadProposerMemory(workspacePath) {
22
+ const memoryPath = path19.join(workspacePath, MEMORY_FILE);
23
+ try {
24
+ const raw = await fs19.readFile(memoryPath, "utf-8");
25
+ const parsed = JSON.parse(raw);
26
+ if (Array.isArray(parsed)) return parsed;
27
+ return [];
28
+ } catch {
29
+ return [];
30
+ }
31
+ }
32
+ function buildRunSummary(history, baselineScore, bestScore) {
33
+ const effectiveMutations = [];
34
+ const regressiveMutations = [];
35
+ for (let i = 1; i < history.length; i++) {
36
+ const prev = history[i - 1];
37
+ const curr = history[i];
38
+ if (!curr.proposal?.mutations.length) continue;
39
+ const delta = curr.score - prev.score;
40
+ const summary = curr.proposal.mutations.map((m) => `${m.action} ${m.file}: ${m.rationale}`).join("; ");
41
+ if (delta > 0) {
42
+ effectiveMutations.push(`+${delta.toFixed(1)}: ${summary}`);
43
+ } else if (delta < -5) {
44
+ regressiveMutations.push(`${delta.toFixed(1)}: ${summary}`);
45
+ }
46
+ }
47
+ const improvement = bestScore - baselineScore;
48
+ const insights = improvement > 0 ? `Improved ${improvement.toFixed(1)} points. ${effectiveMutations.length} helpful mutations, ${regressiveMutations.length} regressions.` : `No improvement. ${regressiveMutations.length} regressions observed.`;
49
+ return {
50
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
51
+ baselineScore,
52
+ bestScore,
53
+ improvement,
54
+ effectiveMutations,
55
+ regressiveMutations,
56
+ insights
57
+ };
58
+ }
59
+ async function saveRunSummary(workspacePath, summary) {
60
+ const existing = await loadProposerMemory(workspacePath);
61
+ existing.push(summary);
62
+ const trimmed = existing.slice(-MAX_ENTRIES);
63
+ const memoryPath = path19.join(workspacePath, MEMORY_FILE);
64
+ await fs19.writeFile(memoryPath, JSON.stringify(trimmed, null, 2), "utf-8");
65
+ }
66
+ function formatMemoryForProposer(memory) {
67
+ if (memory.length === 0) return "";
68
+ const lines = ["## Prior Run History\n"];
69
+ for (const entry of memory) {
70
+ lines.push(`### Run at ${entry.timestamp}`);
71
+ lines.push(`- Baseline: ${entry.baselineScore.toFixed(1)}%, Best: ${entry.bestScore.toFixed(1)}%, Improvement: ${entry.improvement >= 0 ? "+" : ""}${entry.improvement.toFixed(1)}`);
72
+ if (entry.effectiveMutations.length > 0) {
73
+ lines.push("- Effective mutations:");
74
+ for (const m of entry.effectiveMutations.slice(0, 3)) {
75
+ lines.push(` - ${m}`);
76
+ }
77
+ }
78
+ if (entry.regressiveMutations.length > 0) {
79
+ lines.push("- Regressive mutations (AVOID these):");
80
+ for (const m of entry.regressiveMutations.slice(0, 3)) {
81
+ lines.push(` - ${m}`);
82
+ }
83
+ }
84
+ lines.push("");
85
+ }
86
+ return lines.join("\n");
87
+ }
88
+ var MEMORY_FILE, MAX_ENTRIES;
89
+ var init_memory = __esm({
90
+ "src/evolve/memory.ts"() {
91
+ "use strict";
92
+ MEMORY_FILE = "proposer-memory.json";
93
+ MAX_ENTRIES = 10;
94
+ }
95
+ });
96
+
1
97
  // src/cli.ts
2
98
  import { Command as Command12 } from "commander";
3
99
  import chalk15 from "chalk";
@@ -275,7 +371,7 @@ var ui = {
275
371
  // Key-value pairs
276
372
  kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
277
373
  // File list
278
- file: (path25) => chalk.dim(` ${path25}`),
374
+ file: (path26) => chalk.dim(` ${path26}`),
279
375
  // Tool display
280
376
  tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
281
377
  ${chalk.dim(reason)}`,
@@ -3782,8 +3878,8 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
3782
3878
  import { Command as Command11 } from "commander";
3783
3879
  import chalk14 from "chalk";
3784
3880
  import ora2 from "ora";
3785
- import fs24 from "fs/promises";
3786
- import path24 from "path";
3881
+ import fs25 from "fs/promises";
3882
+ import path25 from "path";
3787
3883
  import { parse as yamlParse2 } from "yaml";
3788
3884
  import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
3789
3885
 
@@ -4794,12 +4890,12 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
4794
4890
  }
4795
4891
 
4796
4892
  // src/evolve/loop.ts
4797
- import fs21 from "fs/promises";
4798
- import path21 from "path";
4893
+ import fs22 from "fs/promises";
4894
+ import path22 from "path";
4799
4895
 
4800
4896
  // src/evolve/proposer.ts
4801
- import fs19 from "fs/promises";
4802
- import path19 from "path";
4897
+ import fs20 from "fs/promises";
4898
+ import path20 from "path";
4803
4899
  var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
4804
4900
  agent environment (.claude/ directory) based on execution traces from real tasks.
4805
4901
 
@@ -4872,18 +4968,18 @@ async function readHarnessFiles(harnessPath) {
4872
4968
  async function walk(dir, prefix) {
4873
4969
  let entries;
4874
4970
  try {
4875
- entries = await fs19.readdir(dir, { withFileTypes: true });
4971
+ entries = await fs20.readdir(dir, { withFileTypes: true });
4876
4972
  } catch {
4877
4973
  return;
4878
4974
  }
4879
4975
  for (const entry of entries) {
4880
- const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
4881
- const fullPath = path19.join(dir, entry.name);
4976
+ const relativePath = prefix ? path20.join(prefix, entry.name) : entry.name;
4977
+ const fullPath = path20.join(dir, entry.name);
4882
4978
  if (entry.isDirectory()) {
4883
4979
  await walk(fullPath, relativePath);
4884
4980
  } else if (entry.isFile()) {
4885
4981
  try {
4886
- result[relativePath] = await fs19.readFile(fullPath, "utf-8");
4982
+ result[relativePath] = await fs20.readFile(fullPath, "utf-8");
4887
4983
  } catch {
4888
4984
  }
4889
4985
  }
@@ -4899,7 +4995,7 @@ function truncateStdout(stdout, limit) {
4899
4995
  return `[...truncated, showing last ${limit} chars...]
4900
4996
  ${stdout.slice(-limit)}`;
4901
4997
  }
4902
- function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
4998
+ function buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection) {
4903
4999
  const harnessSection = ["## Current Harness Files\n"];
4904
5000
  const fileEntries = Object.entries(harnessFiles);
4905
5001
  if (fileEntries.length === 0) {
@@ -4935,7 +5031,8 @@ ${content}
4935
5031
  const historyBudget = remainingBudget - traceBudget;
4936
5032
  const traceSection = buildTraceSection(traces, traceBudget);
4937
5033
  const historySection = buildHistorySection(history, historyBudget);
4938
- return fixedContent + "\n" + traceSection + "\n" + historySection;
5034
+ const memoryPart = memorySection ? "\n" + memorySection : "";
5035
+ return fixedContent + "\n" + traceSection + "\n" + historySection + memoryPart;
4939
5036
  }
4940
5037
  function buildTraceSection(traces, budget) {
4941
5038
  if (traces.length === 0) return "## Execution Traces\n\n(No traces available)\n";
@@ -5090,7 +5187,10 @@ function parseProposerResponse(raw) {
5090
5187
  async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
5091
5188
  const harnessFiles = await readHarnessFiles(harnessPath);
5092
5189
  const traces = await loadIterationTraces(workspacePath, iteration);
5093
- const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
5190
+ const { loadProposerMemory: loadProposerMemory2, formatMemoryForProposer: formatMemoryForProposer2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
5191
+ const memory = await loadProposerMemory2(workspacePath);
5192
+ const memorySection = formatMemoryForProposer2(memory);
5193
+ const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection);
5094
5194
  const proposerConfig = { ...config, model: proposerModel };
5095
5195
  const response = await callLLM(proposerConfig, userMessage, {
5096
5196
  systemPrompt: PROPOSER_SYSTEM_PROMPT,
@@ -5101,60 +5201,60 @@ async function propose(iteration, workspacePath, harnessPath, history, tasks, co
5101
5201
  }
5102
5202
 
5103
5203
  // src/evolve/mutator.ts
5104
- import fs20 from "fs/promises";
5105
- import path20 from "path";
5204
+ import fs21 from "fs/promises";
5205
+ import path21 from "path";
5106
5206
  async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
5107
- const newHarnessPath = path20.join(nextIterationDir, "harness");
5207
+ const newHarnessPath = path21.join(nextIterationDir, "harness");
5108
5208
  await copyDir(currentHarnessPath, newHarnessPath);
5109
5209
  for (const mutation of mutations) {
5110
5210
  if (mutation.file.includes("..")) {
5111
5211
  continue;
5112
5212
  }
5113
- const filePath = path20.join(newHarnessPath, mutation.file);
5213
+ const filePath = path21.join(newHarnessPath, mutation.file);
5114
5214
  if (mutation.action === "replace") {
5115
5215
  if (!mutation.oldText) {
5116
5216
  continue;
5117
5217
  }
5118
- const content = await fs20.readFile(filePath, "utf-8");
5218
+ const content = await fs21.readFile(filePath, "utf-8");
5119
5219
  if (!content.includes(mutation.oldText)) {
5120
5220
  continue;
5121
5221
  }
5122
- await fs20.writeFile(
5222
+ await fs21.writeFile(
5123
5223
  filePath,
5124
5224
  content.replace(mutation.oldText, mutation.newText),
5125
5225
  "utf-8"
5126
5226
  );
5127
5227
  } else if (mutation.action === "add_section") {
5128
5228
  try {
5129
- const content = await fs20.readFile(filePath, "utf-8");
5130
- await fs20.writeFile(
5229
+ const content = await fs21.readFile(filePath, "utf-8");
5230
+ await fs21.writeFile(
5131
5231
  filePath,
5132
5232
  content + "\n\n" + mutation.newText,
5133
5233
  "utf-8"
5134
5234
  );
5135
5235
  } catch {
5136
- await fs20.mkdir(path20.dirname(filePath), { recursive: true });
5137
- await fs20.writeFile(filePath, mutation.newText, "utf-8");
5236
+ await fs21.mkdir(path21.dirname(filePath), { recursive: true });
5237
+ await fs21.writeFile(filePath, mutation.newText, "utf-8");
5138
5238
  }
5139
5239
  } else if (mutation.action === "create_file") {
5140
- await fs20.mkdir(path20.dirname(filePath), { recursive: true });
5141
- await fs20.writeFile(filePath, mutation.newText, "utf-8");
5240
+ await fs21.mkdir(path21.dirname(filePath), { recursive: true });
5241
+ await fs21.writeFile(filePath, mutation.newText, "utf-8");
5142
5242
  } else if (mutation.action === "delete_section") {
5143
5243
  if (!mutation.oldText) {
5144
5244
  continue;
5145
5245
  }
5146
5246
  let sectionContent;
5147
5247
  try {
5148
- sectionContent = await fs20.readFile(filePath, "utf-8");
5248
+ sectionContent = await fs21.readFile(filePath, "utf-8");
5149
5249
  } catch {
5150
5250
  continue;
5151
5251
  }
5152
5252
  if (!sectionContent.includes(mutation.oldText)) {
5153
5253
  continue;
5154
5254
  }
5155
- await fs20.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
5255
+ await fs21.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
5156
5256
  } else if (mutation.action === "delete_file") {
5157
- await fs20.unlink(filePath).catch(() => {
5257
+ await fs21.unlink(filePath).catch(() => {
5158
5258
  });
5159
5259
  }
5160
5260
  }
@@ -5202,17 +5302,17 @@ async function readAllFiles(dir) {
5202
5302
  async function walk(current) {
5203
5303
  let entries;
5204
5304
  try {
5205
- entries = await fs20.readdir(current, { withFileTypes: true });
5305
+ entries = await fs21.readdir(current, { withFileTypes: true });
5206
5306
  } catch {
5207
5307
  return;
5208
5308
  }
5209
5309
  for (const entry of entries) {
5210
- const fullPath = path20.join(current, entry.name);
5211
- const relativePath = path20.relative(dir, fullPath);
5310
+ const fullPath = path21.join(current, entry.name);
5311
+ const relativePath = path21.relative(dir, fullPath);
5212
5312
  if (entry.isDirectory()) {
5213
5313
  await walk(fullPath);
5214
5314
  } else {
5215
- result[relativePath] = await fs20.readFile(fullPath, "utf-8");
5315
+ result[relativePath] = await fs21.readFile(fullPath, "utf-8");
5216
5316
  }
5217
5317
  }
5218
5318
  }
@@ -5221,20 +5321,27 @@ async function readAllFiles(dir) {
5221
5321
  }
5222
5322
 
5223
5323
  // src/evolve/loop.ts
5324
+ function computeMutationCap(iter, maxIterations, maxMutations) {
5325
+ if (maxIterations <= 1) return maxMutations;
5326
+ const progress = iter / (maxIterations - 1);
5327
+ if (progress <= 0.4) return maxMutations;
5328
+ const decayProgress = (progress - 0.4) / 0.6;
5329
+ return Math.max(1, Math.round(maxMutations * (1 - decayProgress * (1 - 1 / maxMutations))));
5330
+ }
5224
5331
  async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
5225
5332
  const history = [];
5226
5333
  let bestScore = -1;
5227
5334
  let bestIteration = 0;
5228
5335
  let baselineScore = 0;
5229
5336
  for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
5230
- const harnessPath = path21.join(
5337
+ const harnessPath = path22.join(
5231
5338
  workspacePath,
5232
5339
  "iterations",
5233
5340
  iter.toString(),
5234
5341
  "harness"
5235
5342
  );
5236
5343
  try {
5237
- await fs21.access(harnessPath);
5344
+ await fs22.access(harnessPath);
5238
5345
  } catch {
5239
5346
  if (iter === 0) {
5240
5347
  throw new Error(
@@ -5267,6 +5374,29 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5267
5374
  tasksToRun.push(task);
5268
5375
  }
5269
5376
  }
5377
+ const sampleSize = evolveConfig.evalSampleSize;
5378
+ if (sampleSize > 0 && sampleSize < tasksToRun.length) {
5379
+ const shuffled = [...tasksToRun].sort((a, b) => {
5380
+ const hashA = (iter * 31 + a.id.charCodeAt(0)) % 1e3;
5381
+ const hashB = (iter * 31 + b.id.charCodeAt(0)) % 1e3;
5382
+ return hashA - hashB;
5383
+ });
5384
+ const sampled = new Set(shuffled.slice(0, sampleSize).map((t) => t.id));
5385
+ for (const task of tasksToRun) {
5386
+ if (!sampled.has(task.id)) {
5387
+ const prev = prevLog.taskResults[task.id];
5388
+ const prevVal = prev ? prev.score ?? (prev.pass ? 100 : 0) : 0;
5389
+ carriedScores[task.id] = { pass: prevVal >= 50, score: prevVal };
5390
+ onProgress?.({
5391
+ type: "task-skipped",
5392
+ iteration: iter,
5393
+ taskId: task.id,
5394
+ message: `Sampled out ${task.id} (mini-batch ${sampleSize}/${tasksToRun.length})`
5395
+ });
5396
+ }
5397
+ }
5398
+ tasksToRun = tasksToRun.filter((t) => sampled.has(t.id));
5399
+ }
5270
5400
  }
5271
5401
  const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
5272
5402
  tasksToRun,
@@ -5327,7 +5457,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5327
5457
  };
5328
5458
  await writeIterationLog(workspacePath, rollbackLog);
5329
5459
  history.push(rollbackLog);
5330
- const bestHarnessPath = path21.join(
5460
+ const bestHarnessPath = path22.join(
5331
5461
  workspacePath,
5332
5462
  "iterations",
5333
5463
  bestIteration.toString(),
@@ -5345,13 +5475,14 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5345
5475
  kairnConfig,
5346
5476
  evolveConfig.proposerModel
5347
5477
  );
5348
- if (rollbackProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5478
+ const rollbackCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
5479
+ if (rollbackProposal.mutations.length > rollbackCap) {
5349
5480
  rollbackProposal = {
5350
5481
  ...rollbackProposal,
5351
- mutations: rollbackProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration)
5482
+ mutations: rollbackProposal.mutations.slice(0, rollbackCap)
5352
5483
  };
5353
5484
  }
5354
- const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5485
+ const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
5355
5486
  await applyMutations(bestHarnessPath, nextIterDir2, rollbackProposal.mutations);
5356
5487
  onProgress?.({
5357
5488
  type: "mutations-applied",
@@ -5359,8 +5490,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5359
5490
  mutationCount: rollbackProposal.mutations.length
5360
5491
  });
5361
5492
  } catch {
5362
- const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5363
- await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
5493
+ const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
5494
+ await copyDir(bestHarnessPath, path22.join(nextIterDir2, "harness"));
5364
5495
  }
5365
5496
  }
5366
5497
  continue;
@@ -5406,10 +5537,11 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5406
5537
  kairnConfig,
5407
5538
  evolveConfig.proposerModel
5408
5539
  );
5409
- if (proposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5540
+ const iterCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
5541
+ if (proposal.mutations.length > iterCap) {
5410
5542
  proposal = {
5411
5543
  ...proposal,
5412
- mutations: proposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration)
5544
+ mutations: proposal.mutations.slice(0, iterCap)
5413
5545
  };
5414
5546
  }
5415
5547
  } catch (err) {
@@ -5419,12 +5551,12 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5419
5551
  iteration: iter,
5420
5552
  message: `Proposer failed: ${errMsg}`
5421
5553
  });
5422
- const nextIterDir2 = path21.join(
5554
+ const nextIterDir2 = path22.join(
5423
5555
  workspacePath,
5424
5556
  "iterations",
5425
5557
  (iter + 1).toString()
5426
5558
  );
5427
- await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
5559
+ await copyDir(harnessPath, path22.join(nextIterDir2, "harness"));
5428
5560
  const skipLog = {
5429
5561
  iteration: iter,
5430
5562
  score: aggregate,
@@ -5437,7 +5569,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5437
5569
  history.push(skipLog);
5438
5570
  continue;
5439
5571
  }
5440
- const nextIterDir = path21.join(
5572
+ const nextIterDir = path22.join(
5441
5573
  workspacePath,
5442
5574
  "iterations",
5443
5575
  (iter + 1).toString()
@@ -5451,7 +5583,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5451
5583
  );
5452
5584
  diffPatch = mutationResult.diffPatch;
5453
5585
  } catch {
5454
- await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
5586
+ await copyDir(harnessPath, path22.join(nextIterDir, "harness"));
5455
5587
  }
5456
5588
  onProgress?.({
5457
5589
  type: "mutations-applied",
@@ -5469,6 +5601,62 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5469
5601
  await writeIterationLog(workspacePath, iterLog);
5470
5602
  history.push(iterLog);
5471
5603
  }
5604
+ if (evolveConfig.usePrincipal && history.length >= 2) {
5605
+ onProgress?.({ type: "proposing", iteration: history.length, message: "Principal Proposer synthesizing final harness" });
5606
+ const baselineHarnessPath = path22.join(workspacePath, "iterations", "0", "harness");
5607
+ try {
5608
+ const principalProposal = await propose(
5609
+ history.length,
5610
+ workspacePath,
5611
+ baselineHarnessPath,
5612
+ history,
5613
+ tasks,
5614
+ kairnConfig,
5615
+ evolveConfig.proposerModel
5616
+ );
5617
+ if (principalProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5618
+ principalProposal.mutations = principalProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration);
5619
+ }
5620
+ const principalIterNum = history.length;
5621
+ const principalIterDir = path22.join(workspacePath, "iterations", principalIterNum.toString());
5622
+ const mutResult = await applyMutations(baselineHarnessPath, principalIterDir, principalProposal.mutations);
5623
+ onProgress?.({ type: "iteration-start", iteration: principalIterNum });
5624
+ const { results: principalResults, aggregate: principalAggregate } = await evaluateAll(
5625
+ tasks,
5626
+ mutResult.newHarnessPath,
5627
+ workspacePath,
5628
+ principalIterNum,
5629
+ kairnConfig,
5630
+ onProgress,
5631
+ evolveConfig.runsPerTask,
5632
+ evolveConfig.parallelTasks
5633
+ );
5634
+ onProgress?.({ type: "iteration-scored", iteration: principalIterNum, score: principalAggregate });
5635
+ const principalLog = {
5636
+ iteration: principalIterNum,
5637
+ score: principalAggregate,
5638
+ taskResults: principalResults,
5639
+ proposal: principalProposal,
5640
+ diffPatch: mutResult.diffPatch,
5641
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
5642
+ };
5643
+ await writeIterationLog(workspacePath, principalLog);
5644
+ history.push(principalLog);
5645
+ if (principalAggregate > bestScore) {
5646
+ bestScore = principalAggregate;
5647
+ bestIteration = principalIterNum;
5648
+ }
5649
+ } catch (err) {
5650
+ const errMsg = err instanceof Error ? err.message : String(err);
5651
+ onProgress?.({ type: "proposer-error", iteration: history.length, message: `Principal failed: ${errMsg}` });
5652
+ }
5653
+ }
5654
+ try {
5655
+ const { buildRunSummary: buildRunSummary2, saveRunSummary: saveRunSummary2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
5656
+ const summary = buildRunSummary2(history, baselineScore, bestScore);
5657
+ await saveRunSummary2(workspacePath, summary);
5658
+ } catch {
5659
+ }
5472
5660
  onProgress?.({
5473
5661
  type: "complete",
5474
5662
  iteration: history.length > 0 ? history.length - 1 : 0,
@@ -5483,8 +5671,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5483
5671
  }
5484
5672
 
5485
5673
  // src/evolve/report.ts
5486
- import fs22 from "fs/promises";
5487
- import path22 from "path";
5674
+ import fs23 from "fs/promises";
5675
+ import path23 from "path";
5488
5676
 
5489
5677
  // src/evolve/diagnosis.ts
5490
5678
  function numericScore(s) {
@@ -5534,10 +5722,10 @@ function numericScore2(s) {
5534
5722
  return s.score ?? (s.pass ? 100 : 0);
5535
5723
  }
5536
5724
  async function loadAllIterations(workspacePath) {
5537
- const iterDir = path22.join(workspacePath, "iterations");
5725
+ const iterDir = path23.join(workspacePath, "iterations");
5538
5726
  let entries;
5539
5727
  try {
5540
- entries = await fs22.readdir(iterDir);
5728
+ entries = await fs23.readdir(iterDir);
5541
5729
  } catch {
5542
5730
  return [];
5543
5731
  }
@@ -5551,7 +5739,7 @@ async function loadAllIterations(workspacePath) {
5551
5739
  }
5552
5740
  async function loadTasks(workspacePath) {
5553
5741
  try {
5554
- const content = await fs22.readFile(path22.join(workspacePath, "tasks.yaml"), "utf-8");
5742
+ const content = await fs23.readFile(path23.join(workspacePath, "tasks.yaml"), "utf-8");
5555
5743
  const parsed = yamlParse(content);
5556
5744
  return parsed?.tasks ?? [];
5557
5745
  } catch {
@@ -5725,13 +5913,13 @@ async function generateJsonReport(workspacePath) {
5725
5913
  }
5726
5914
 
5727
5915
  // src/evolve/apply.ts
5728
- import fs23 from "fs/promises";
5729
- import path23 from "path";
5916
+ import fs24 from "fs/promises";
5917
+ import path24 from "path";
5730
5918
  async function listIterations(workspacePath) {
5731
- const iterationsDir = path23.join(workspacePath, "iterations");
5919
+ const iterationsDir = path24.join(workspacePath, "iterations");
5732
5920
  let entries;
5733
5921
  try {
5734
- entries = await fs23.readdir(iterationsDir);
5922
+ entries = await fs24.readdir(iterationsDir);
5735
5923
  } catch {
5736
5924
  return [];
5737
5925
  }
@@ -5740,7 +5928,7 @@ async function listIterations(workspacePath) {
5740
5928
  const n = parseInt(entry, 10);
5741
5929
  if (!isNaN(n)) {
5742
5930
  try {
5743
- await fs23.access(path23.join(iterationsDir, entry, "harness"));
5931
+ await fs24.access(path24.join(iterationsDir, entry, "harness"));
5744
5932
  nums.push(n);
5745
5933
  } catch {
5746
5934
  }
@@ -5766,16 +5954,16 @@ async function listFilesRecursive(dir) {
5766
5954
  async function walk(current) {
5767
5955
  let entries;
5768
5956
  try {
5769
- entries = await fs23.readdir(current, { withFileTypes: true });
5957
+ entries = await fs24.readdir(current, { withFileTypes: true });
5770
5958
  } catch {
5771
5959
  return;
5772
5960
  }
5773
5961
  for (const entry of entries) {
5774
- const fullPath = path23.join(current, entry.name);
5962
+ const fullPath = path24.join(current, entry.name);
5775
5963
  if (entry.isDirectory()) {
5776
5964
  await walk(fullPath);
5777
5965
  } else {
5778
- results.push(path23.relative(dir, fullPath));
5966
+ results.push(path24.relative(dir, fullPath));
5779
5967
  }
5780
5968
  }
5781
5969
  }
@@ -5798,37 +5986,37 @@ async function applyEvolution(workspacePath, projectRoot, targetIteration) {
5798
5986
  } else {
5799
5987
  iter = await findBestIteration(workspacePath, iterations);
5800
5988
  }
5801
- const harnessPath = path23.join(
5989
+ const harnessPath = path24.join(
5802
5990
  workspacePath,
5803
5991
  "iterations",
5804
5992
  iter.toString(),
5805
5993
  "harness"
5806
5994
  );
5807
- const claudeDir = path23.join(projectRoot, ".claude");
5995
+ const claudeDir = path24.join(projectRoot, ".claude");
5808
5996
  const diffPreview = await generateDiff2(claudeDir, harnessPath);
5809
5997
  const currentFiles = await listFilesRecursive(claudeDir);
5810
5998
  const targetFiles = await listFilesRecursive(harnessPath);
5811
5999
  const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
5812
6000
  const filesChanged = [];
5813
6001
  for (const filePath of allPaths) {
5814
- const currentContent = await fs23.readFile(path23.join(claudeDir, filePath), "utf-8").catch(() => null);
5815
- const targetContent = await fs23.readFile(path23.join(harnessPath, filePath), "utf-8").catch(() => null);
6002
+ const currentContent = await fs24.readFile(path24.join(claudeDir, filePath), "utf-8").catch(() => null);
6003
+ const targetContent = await fs24.readFile(path24.join(harnessPath, filePath), "utf-8").catch(() => null);
5816
6004
  if (currentContent !== targetContent) {
5817
6005
  filesChanged.push(filePath);
5818
6006
  }
5819
6007
  }
5820
- await fs23.rm(claudeDir, { recursive: true, force: true });
6008
+ await fs24.rm(claudeDir, { recursive: true, force: true });
5821
6009
  await copyDir(harnessPath, claudeDir);
5822
- const harnessMcpJson = path23.join(harnessPath, ".mcp.json");
5823
- const projectMcpJson = path23.join(projectRoot, ".mcp.json");
6010
+ const harnessMcpJson = path24.join(harnessPath, ".mcp.json");
6011
+ const projectMcpJson = path24.join(projectRoot, ".mcp.json");
5824
6012
  try {
5825
- await fs23.access(harnessMcpJson);
5826
- const currentMcp = await fs23.readFile(projectMcpJson, "utf-8").catch(() => null);
5827
- const targetMcp = await fs23.readFile(harnessMcpJson, "utf-8").catch(() => null);
6013
+ await fs24.access(harnessMcpJson);
6014
+ const currentMcp = await fs24.readFile(projectMcpJson, "utf-8").catch(() => null);
6015
+ const targetMcp = await fs24.readFile(harnessMcpJson, "utf-8").catch(() => null);
5828
6016
  if (currentMcp !== targetMcp) {
5829
6017
  filesChanged.push(".mcp.json");
5830
6018
  }
5831
- await fs23.copyFile(harnessMcpJson, projectMcpJson);
6019
+ await fs24.copyFile(harnessMcpJson, projectMcpJson);
5832
6020
  } catch {
5833
6021
  }
5834
6022
  return {
@@ -5848,11 +6036,13 @@ var DEFAULT_CONFIG = {
5848
6036
  runsPerTask: 1,
5849
6037
  maxMutationsPerIteration: 3,
5850
6038
  pruneThreshold: 95,
5851
- maxTaskDrop: 20
6039
+ maxTaskDrop: 20,
6040
+ usePrincipal: false,
6041
+ evalSampleSize: 0
5852
6042
  };
5853
6043
  async function loadEvolveConfigFromWorkspace(workspacePath) {
5854
6044
  try {
5855
- const configStr = await fs24.readFile(path24.join(workspacePath, "config.yaml"), "utf-8");
6045
+ const configStr = await fs25.readFile(path25.join(workspacePath, "config.yaml"), "utf-8");
5856
6046
  const parsed = yamlParse2(configStr);
5857
6047
  return {
5858
6048
  model: parsed.model ?? DEFAULT_CONFIG.model,
@@ -5863,7 +6053,9 @@ async function loadEvolveConfigFromWorkspace(workspacePath) {
5863
6053
  runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask,
5864
6054
  maxMutationsPerIteration: parsed.max_mutations_per_iteration ?? DEFAULT_CONFIG.maxMutationsPerIteration,
5865
6055
  pruneThreshold: parsed.prune_threshold ?? DEFAULT_CONFIG.pruneThreshold,
5866
- maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop
6056
+ maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop,
6057
+ usePrincipal: parsed.use_principal ?? DEFAULT_CONFIG.usePrincipal,
6058
+ evalSampleSize: parsed.eval_sample_size ?? DEFAULT_CONFIG.evalSampleSize
5867
6059
  };
5868
6060
  } catch {
5869
6061
  return { ...DEFAULT_CONFIG };
@@ -5874,9 +6066,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5874
6066
  try {
5875
6067
  const projectRoot = process.cwd();
5876
6068
  console.log(ui.section("Evolve Init"));
5877
- const claudeDir = path24.join(projectRoot, ".claude");
6069
+ const claudeDir = path25.join(projectRoot, ".claude");
5878
6070
  try {
5879
- await fs24.access(claudeDir);
6071
+ await fs25.access(claudeDir);
5880
6072
  } catch {
5881
6073
  console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
5882
6074
  process.exit(1);
@@ -5926,7 +6118,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5926
6118
  if (config) {
5927
6119
  let claudeMd = "";
5928
6120
  try {
5929
- claudeMd = await fs24.readFile(path24.join(claudeDir, "CLAUDE.md"), "utf-8");
6121
+ claudeMd = await fs25.readFile(path25.join(claudeDir, "CLAUDE.md"), "utf-8");
5930
6122
  } catch {
5931
6123
  }
5932
6124
  const profile = await buildProjectProfile(projectRoot);
@@ -5957,16 +6149,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5957
6149
  evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
5958
6150
  try {
5959
6151
  const projectRoot = process.cwd();
5960
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6152
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
5961
6153
  console.log(ui.section("Evolve Baseline"));
5962
6154
  try {
5963
- await fs24.access(workspace);
6155
+ await fs25.access(workspace);
5964
6156
  } catch {
5965
6157
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5966
6158
  process.exit(1);
5967
6159
  }
5968
6160
  await snapshotBaseline(projectRoot, workspace);
5969
- const baselineDir = path24.join(workspace, "baseline");
6161
+ const baselineDir = path25.join(workspace, "baseline");
5970
6162
  const fileCount = await countFiles(baselineDir);
5971
6163
  console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
5972
6164
  } catch (err) {
@@ -5975,21 +6167,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
5975
6167
  process.exit(1);
5976
6168
  }
5977
6169
  });
5978
- evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").action(async (options) => {
6170
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").option("--principal", "Run Principal Proposer as final iteration").option("--eval-sample <n>", "Sample N tasks per middle iteration (0 = all)", "0").action(async (options) => {
5979
6171
  try {
5980
6172
  const projectRoot = process.cwd();
5981
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6173
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
5982
6174
  console.log(ui.section("Evolve Run"));
5983
6175
  try {
5984
- await fs24.access(workspace);
6176
+ await fs25.access(workspace);
5985
6177
  } catch {
5986
6178
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5987
6179
  process.exit(1);
5988
6180
  }
5989
- const tasksPath = path24.join(workspace, "tasks.yaml");
6181
+ const tasksPath = path25.join(workspace, "tasks.yaml");
5990
6182
  let tasksContent;
5991
6183
  try {
5992
- tasksContent = await fs24.readFile(tasksPath, "utf-8");
6184
+ tasksContent = await fs25.readFile(tasksPath, "utf-8");
5993
6185
  } catch {
5994
6186
  console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
5995
6187
  process.exit(1);
@@ -6008,15 +6200,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
6008
6200
  console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
6009
6201
  console.log("");
6010
6202
  const config = await loadConfig();
6011
- const harnessPath = path24.join(projectRoot, ".claude");
6203
+ const harnessPath = path25.join(projectRoot, ".claude");
6012
6204
  const results = [];
6013
6205
  for (const task of tasksToRun) {
6014
- const traceDir = path24.join(workspace, "traces", "0", task.id);
6206
+ const traceDir = path25.join(workspace, "traces", "0", task.id);
6015
6207
  const spinner = ora2(`Running: ${task.id}`).start();
6016
6208
  const result = await runTask(task, harnessPath, traceDir, 0);
6017
6209
  if (config) {
6018
- const stdout = await fs24.readFile(path24.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
6019
- const stderr = await fs24.readFile(path24.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
6210
+ const stdout = await fs25.readFile(path25.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
6211
+ const stderr = await fs25.readFile(path25.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
6020
6212
  const score = await scoreTask(task, traceDir, stdout, stderr, config);
6021
6213
  result.score = score;
6022
6214
  await writeScore(traceDir, score);
@@ -6074,8 +6266,17 @@ evolveCommand.command("run").description("Run tasks against the current harness"
6074
6266
  process.exit(1);
6075
6267
  }
6076
6268
  evolveConfig.maxTaskDrop = maxTaskDrop;
6269
+ if (options.principal) {
6270
+ evolveConfig.usePrincipal = true;
6271
+ }
6272
+ const evalSample = parseInt(options.evalSample ?? "0", 10);
6273
+ if (isNaN(evalSample) || evalSample < 0) {
6274
+ console.log(ui.error("--eval-sample must be a non-negative integer"));
6275
+ process.exit(1);
6276
+ }
6277
+ evolveConfig.evalSampleSize = evalSample;
6077
6278
  try {
6078
- await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
6279
+ await fs25.access(path25.join(workspace, "iterations", "0", "harness"));
6079
6280
  } catch {
6080
6281
  console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
6081
6282
  process.exit(1);
@@ -6169,10 +6370,10 @@ evolveCommand.command("run").description("Run tasks against the current harness"
6169
6370
  evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
6170
6371
  try {
6171
6372
  const projectRoot = process.cwd();
6172
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6373
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6173
6374
  console.log(ui.section("Evolve Apply"));
6174
6375
  try {
6175
- await fs24.access(workspace);
6376
+ await fs25.access(workspace);
6176
6377
  } catch {
6177
6378
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
6178
6379
  process.exit(1);
@@ -6213,9 +6414,9 @@ evolveCommand.command("apply").description("Apply the best evolved harness to yo
6213
6414
  evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
6214
6415
  try {
6215
6416
  const projectRoot = process.cwd();
6216
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6417
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6217
6418
  try {
6218
- await fs24.access(workspace);
6419
+ await fs25.access(workspace);
6219
6420
  } catch {
6220
6421
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
6221
6422
  process.exit(1);
@@ -6236,23 +6437,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
6236
6437
  evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
6237
6438
  try {
6238
6439
  const projectRoot = process.cwd();
6239
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6440
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6240
6441
  const iter1 = parseInt(iter1Str, 10);
6241
6442
  const iter2 = parseInt(iter2Str, 10);
6242
6443
  if (isNaN(iter1) || isNaN(iter2)) {
6243
6444
  console.log(ui.error("Both arguments must be integers (iteration numbers)"));
6244
6445
  process.exit(1);
6245
6446
  }
6246
- const harness1 = path24.join(workspace, "iterations", iter1.toString(), "harness");
6247
- const harness2 = path24.join(workspace, "iterations", iter2.toString(), "harness");
6447
+ const harness1 = path25.join(workspace, "iterations", iter1.toString(), "harness");
6448
+ const harness2 = path25.join(workspace, "iterations", iter2.toString(), "harness");
6248
6449
  try {
6249
- await fs24.access(harness1);
6450
+ await fs25.access(harness1);
6250
6451
  } catch {
6251
6452
  console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
6252
6453
  process.exit(1);
6253
6454
  }
6254
6455
  try {
6255
- await fs24.access(harness2);
6456
+ await fs25.access(harness2);
6256
6457
  } catch {
6257
6458
  console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
6258
6459
  process.exit(1);
@@ -6307,10 +6508,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
6307
6508
  async function countFiles(dir) {
6308
6509
  let count = 0;
6309
6510
  try {
6310
- const entries = await fs24.readdir(dir, { withFileTypes: true });
6511
+ const entries = await fs25.readdir(dir, { withFileTypes: true });
6311
6512
  for (const entry of entries) {
6312
6513
  if (entry.isDirectory()) {
6313
- count += await countFiles(path24.join(dir, entry.name));
6514
+ count += await countFiles(path25.join(dir, entry.name));
6314
6515
  } else {
6315
6516
  count++;
6316
6517
  }