kairn-cli 2.2.10 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,10 +1,107 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // src/evolve/memory.ts
12
+ var memory_exports = {};
13
+ __export(memory_exports, {
14
+ buildRunSummary: () => buildRunSummary,
15
+ formatMemoryForProposer: () => formatMemoryForProposer,
16
+ loadProposerMemory: () => loadProposerMemory,
17
+ saveRunSummary: () => saveRunSummary
18
+ });
19
+ import fs19 from "fs/promises";
20
+ import path19 from "path";
21
+ async function loadProposerMemory(workspacePath) {
22
+ const memoryPath = path19.join(workspacePath, MEMORY_FILE);
23
+ try {
24
+ const raw = await fs19.readFile(memoryPath, "utf-8");
25
+ const parsed = JSON.parse(raw);
26
+ if (Array.isArray(parsed)) return parsed;
27
+ return [];
28
+ } catch {
29
+ return [];
30
+ }
31
+ }
32
+ function buildRunSummary(history, baselineScore, bestScore) {
33
+ const effectiveMutations = [];
34
+ const regressiveMutations = [];
35
+ for (let i = 1; i < history.length; i++) {
36
+ const prev = history[i - 1];
37
+ const curr = history[i];
38
+ if (!curr.proposal?.mutations.length) continue;
39
+ const delta = curr.score - prev.score;
40
+ const summary = curr.proposal.mutations.map((m) => `${m.action} ${m.file}: ${m.rationale}`).join("; ");
41
+ if (delta > 0) {
42
+ effectiveMutations.push(`+${delta.toFixed(1)}: ${summary}`);
43
+ } else if (delta < -5) {
44
+ regressiveMutations.push(`${delta.toFixed(1)}: ${summary}`);
45
+ }
46
+ }
47
+ const improvement = bestScore - baselineScore;
48
+ const insights = improvement > 0 ? `Improved ${improvement.toFixed(1)} points. ${effectiveMutations.length} helpful mutations, ${regressiveMutations.length} regressions.` : `No improvement. ${regressiveMutations.length} regressions observed.`;
49
+ return {
50
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
51
+ baselineScore,
52
+ bestScore,
53
+ improvement,
54
+ effectiveMutations,
55
+ regressiveMutations,
56
+ insights
57
+ };
58
+ }
59
+ async function saveRunSummary(workspacePath, summary) {
60
+ const existing = await loadProposerMemory(workspacePath);
61
+ existing.push(summary);
62
+ const trimmed = existing.slice(-MAX_ENTRIES);
63
+ const memoryPath = path19.join(workspacePath, MEMORY_FILE);
64
+ await fs19.writeFile(memoryPath, JSON.stringify(trimmed, null, 2), "utf-8");
65
+ }
66
+ function formatMemoryForProposer(memory) {
67
+ if (memory.length === 0) return "";
68
+ const lines = ["## Prior Run History\n"];
69
+ for (const entry of memory) {
70
+ lines.push(`### Run at ${entry.timestamp}`);
71
+ lines.push(`- Baseline: ${entry.baselineScore.toFixed(1)}%, Best: ${entry.bestScore.toFixed(1)}%, Improvement: ${entry.improvement >= 0 ? "+" : ""}${entry.improvement.toFixed(1)}`);
72
+ if (entry.effectiveMutations.length > 0) {
73
+ lines.push("- Effective mutations:");
74
+ for (const m of entry.effectiveMutations.slice(0, 3)) {
75
+ lines.push(` - ${m}`);
76
+ }
77
+ }
78
+ if (entry.regressiveMutations.length > 0) {
79
+ lines.push("- Regressive mutations (AVOID these):");
80
+ for (const m of entry.regressiveMutations.slice(0, 3)) {
81
+ lines.push(` - ${m}`);
82
+ }
83
+ }
84
+ lines.push("");
85
+ }
86
+ return lines.join("\n");
87
+ }
88
+ var MEMORY_FILE, MAX_ENTRIES;
89
+ var init_memory = __esm({
90
+ "src/evolve/memory.ts"() {
91
+ "use strict";
92
+ MEMORY_FILE = "proposer-memory.json";
93
+ MAX_ENTRIES = 10;
94
+ }
95
+ });
96
+
1
97
  // src/cli.ts
2
98
  import { Command as Command12 } from "commander";
3
99
  import chalk15 from "chalk";
100
+ import { createRequire } from "module";
4
101
 
5
102
  // src/commands/init.ts
6
103
  import { Command } from "commander";
7
- import { input, password, select } from "@inquirer/prompts";
104
+ import { confirm, input, password, select } from "@inquirer/prompts";
8
105
  import chalk3 from "chalk";
9
106
  import Anthropic from "@anthropic-ai/sdk";
10
107
  import OpenAI from "openai";
@@ -62,6 +159,59 @@ async function saveConfig(config) {
62
159
  await fs.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2), "utf-8");
63
160
  }
64
161
 
162
+ // src/auth/keychain.ts
163
+ import { exec } from "child_process";
164
+ import { promisify } from "util";
165
+ var execAsync = promisify(exec);
166
+ var KEYCHAIN_SERVICE = "Claude Code-credentials";
167
+ var TOKEN_EXPIRY_BUFFER_MS = 6e4;
168
+ function parseKeychainCredentials(raw) {
169
+ let parsed;
170
+ try {
171
+ parsed = JSON.parse(raw);
172
+ } catch {
173
+ return null;
174
+ }
175
+ if (typeof parsed !== "object" || parsed === null) return null;
176
+ const obj = parsed;
177
+ const oauth = obj["claudeAiOauth"];
178
+ if (typeof oauth !== "object" || oauth === null) return null;
179
+ const oauthObj = oauth;
180
+ const accessToken = oauthObj["accessToken"];
181
+ const refreshToken = oauthObj["refreshToken"];
182
+ const expiresAt = oauthObj["expiresAt"];
183
+ const subscriptionType = oauthObj["subscriptionType"];
184
+ if (typeof accessToken !== "string" || !accessToken) return null;
185
+ if (typeof refreshToken !== "string") return null;
186
+ if (typeof expiresAt !== "number") return null;
187
+ return {
188
+ accessToken,
189
+ refreshToken,
190
+ expiresAt,
191
+ subscriptionType: typeof subscriptionType === "string" ? subscriptionType : "unknown"
192
+ };
193
+ }
194
+ function isTokenExpired(credentials) {
195
+ return Date.now() + TOKEN_EXPIRY_BUFFER_MS >= credentials.expiresAt;
196
+ }
197
+ async function readClaudeCodeCredentials(account) {
198
+ if (process.platform !== "darwin") return null;
199
+ try {
200
+ const acct = account ?? "";
201
+ const cmd = acct ? `security find-generic-password -s "${KEYCHAIN_SERVICE}" -a "${acct}" -w` : `security find-generic-password -s "${KEYCHAIN_SERVICE}" -w`;
202
+ const { stdout } = await execAsync(cmd, { timeout: 5e3 });
203
+ return parseKeychainCredentials(stdout.trim());
204
+ } catch {
205
+ return null;
206
+ }
207
+ }
208
+ async function getAccessToken(account) {
209
+ const creds = await readClaudeCodeCredentials(account);
210
+ if (!creds) return null;
211
+ if (isTokenExpired(creds)) return null;
212
+ return creds.accessToken;
213
+ }
214
+
65
215
  // src/providers.ts
66
216
  var PROVIDER_CONFIGS = {
67
217
  anthropic: {
@@ -221,7 +371,7 @@ var ui = {
221
371
  // Key-value pairs
222
372
  kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
223
373
  // File list
224
- file: (path25) => chalk.dim(` ${path25}`),
374
+ file: (path26) => chalk.dim(` ${path26}`),
225
375
  // Tool display
226
376
  tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
227
377
  ${chalk.dim(reason)}`,
@@ -508,30 +658,49 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
508
658
  choices: PROVIDER_MODELS[provider]
509
659
  });
510
660
  }
511
- const apiKey = await password({
512
- message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
513
- mask: "*"
514
- });
515
- if (!apiKey && provider !== "other") {
516
- console.log(ui.error("No API key provided. Aborting."));
517
- process.exit(1);
661
+ let apiKey = "";
662
+ let authType = "api-key";
663
+ if (provider === "anthropic") {
664
+ const oauthToken = await getAccessToken();
665
+ if (oauthToken) {
666
+ const useOAuth = await confirm({
667
+ message: "Claude Code subscription detected. Use it instead of an API key? (experimental \u2014 may break)",
668
+ default: true
669
+ });
670
+ if (useOAuth) {
671
+ authType = "claude-code-oauth";
672
+ console.log(ui.warn("Using Claude Code OAuth token. This is undocumented and may break at any time."));
673
+ console.log(ui.success("OAuth token validated"));
674
+ }
675
+ }
518
676
  }
519
- if (apiKey) {
520
- console.log(chalk3.dim("\n Verifying API key..."));
521
- const valid = await verifyKey(provider, apiKey, baseURL, model);
522
- if (!valid) {
523
- console.log(ui.error("Invalid API key. Check your key and try again."));
677
+ if (authType === "api-key") {
678
+ apiKey = await password({
679
+ message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
680
+ mask: "*"
681
+ });
682
+ if (!apiKey && provider !== "other") {
683
+ console.log(ui.error("No API key provided. Aborting."));
524
684
  process.exit(1);
525
685
  }
526
- console.log(ui.success("API key verified"));
527
- } else {
528
- console.log(ui.warn("No API key \u2014 skipping verification"));
686
+ if (apiKey) {
687
+ console.log(chalk3.dim("\n Verifying API key..."));
688
+ const valid = await verifyKey(provider, apiKey, baseURL, model);
689
+ if (!valid) {
690
+ console.log(ui.error("Invalid API key. Check your key and try again."));
691
+ process.exit(1);
692
+ }
693
+ console.log(ui.success("API key verified"));
694
+ } else {
695
+ console.log(ui.warn("No API key \u2014 skipping verification"));
696
+ }
529
697
  }
530
698
  const config = {
531
699
  provider,
532
- api_key: apiKey || "",
700
+ api_key: apiKey,
533
701
  model,
534
702
  ...baseURL ? { base_url: baseURL } : {},
703
+ ...authType !== "api-key" ? { auth_type: authType } : {},
535
704
  default_runtime: "claude-code",
536
705
  created_at: (/* @__PURE__ */ new Date()).toISOString()
537
706
  };
@@ -555,7 +724,7 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
555
724
 
556
725
  // src/commands/describe.ts
557
726
  import { Command as Command2 } from "commander";
558
- import { input as input2, confirm, select as select2 } from "@inquirer/prompts";
727
+ import { input as input2, confirm as confirm2, select as select2 } from "@inquirer/prompts";
559
728
  import chalk5 from "chalk";
560
729
 
561
730
  // src/compiler/compile.ts
@@ -1240,8 +1409,18 @@ async function callLLM(config, userMessage, options) {
1240
1409
  const { systemPrompt } = options;
1241
1410
  const jsonMode = options.jsonMode ?? false;
1242
1411
  const providerName = getProviderName(config.provider);
1412
+ let apiKey = config.api_key;
1413
+ if (config.auth_type === "claude-code-oauth") {
1414
+ const oauthToken = await getAccessToken();
1415
+ if (!oauthToken) {
1416
+ throw new Error(
1417
+ "Claude Code OAuth token unavailable or expired. Run `kairn init` to reconfigure, or launch Claude Code to refresh the token."
1418
+ );
1419
+ }
1420
+ apiKey = oauthToken;
1421
+ }
1243
1422
  if (config.provider === "anthropic") {
1244
- const client2 = new Anthropic2({ apiKey: config.api_key });
1423
+ const client2 = new Anthropic2({ apiKey });
1245
1424
  const messages = [
1246
1425
  { role: "user", content: userMessage }
1247
1426
  ];
@@ -1262,7 +1441,7 @@ async function callLLM(config, userMessage, options) {
1262
1441
  }
1263
1442
  }
1264
1443
  const resolvedBaseURL = getBaseURL(config.provider, config.base_url);
1265
- const clientOptions = { apiKey: config.api_key };
1444
+ const clientOptions = { apiKey };
1266
1445
  if (resolvedBaseURL) clientOptions.baseURL = resolvedBaseURL;
1267
1446
  const client = new OpenAI2(clientOptions);
1268
1447
  try {
@@ -2441,7 +2620,7 @@ Autonomy level: ${autonomyLevel} (${autonomyLabel(autonomyLevel)})`;
2441
2620
  console.log("");
2442
2621
  }
2443
2622
  }
2444
- const proceed = options.yes || await confirm({
2623
+ const proceed = options.yes || await confirm2({
2445
2624
  message: "Generate environment in current directory?",
2446
2625
  default: true
2447
2626
  });
@@ -2656,7 +2835,7 @@ var updateRegistryCommand = new Command5("update-registry").description("Fetch t
2656
2835
 
2657
2836
  // src/commands/optimize.ts
2658
2837
  import { Command as Command6 } from "commander";
2659
- import { confirm as confirm2 } from "@inquirer/prompts";
2838
+ import { confirm as confirm3 } from "@inquirer/prompts";
2660
2839
  import chalk9 from "chalk";
2661
2840
  import ora from "ora";
2662
2841
  import fs12 from "fs/promises";
@@ -2718,7 +2897,7 @@ function detectFramework(deps) {
2718
2897
  ];
2719
2898
  const detected = [];
2720
2899
  for (const [packages, name] of frameworks) {
2721
- if (packages.some((pkg) => deps.includes(pkg))) {
2900
+ if (packages.some((pkg2) => deps.includes(pkg2))) {
2722
2901
  detected.push(name);
2723
2902
  }
2724
2903
  }
@@ -2742,11 +2921,11 @@ function extractEnvKeys(content) {
2742
2921
  return keys;
2743
2922
  }
2744
2923
  async function scanProject(dir) {
2745
- const pkg = await readJsonSafe(path11.join(dir, "package.json"));
2746
- const deps = pkg?.dependencies ? Object.keys(pkg.dependencies) : [];
2747
- const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies) : [];
2924
+ const pkg2 = await readJsonSafe(path11.join(dir, "package.json"));
2925
+ const deps = pkg2?.dependencies ? Object.keys(pkg2.dependencies) : [];
2926
+ const devDeps = pkg2?.devDependencies ? Object.keys(pkg2.devDependencies) : [];
2748
2927
  const allDeps = [...deps, ...devDeps];
2749
- const scripts = pkg?.scripts || {};
2928
+ const scripts = pkg2?.scripts || {};
2750
2929
  const rootFiles = await listDirSafe(dir);
2751
2930
  const keyFiles = rootFiles.filter(
2752
2931
  (f) => [
@@ -2808,8 +2987,8 @@ async function scanProject(dir) {
2808
2987
  existingSkills = await listDirSafe(path11.join(claudeDir, "skills"));
2809
2988
  existingAgents = (await listDirSafe(path11.join(claudeDir, "agents"))).filter((f) => f.endsWith(".md")).map((f) => f.replace(".md", ""));
2810
2989
  }
2811
- const name = pkg?.name || path11.basename(dir);
2812
- const description = pkg?.description || "";
2990
+ const name = pkg2?.name || path11.basename(dir);
2991
+ const description = pkg2?.description || "";
2813
2992
  return {
2814
2993
  name,
2815
2994
  description,
@@ -3028,7 +3207,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3028
3207
  }
3029
3208
  if (!options.yes) {
3030
3209
  console.log("");
3031
- const proceed = await confirm2({
3210
+ const proceed = await confirm3({
3032
3211
  message: "Generate optimized environment? This will overwrite existing .claude/ files.",
3033
3212
  default: false
3034
3213
  });
@@ -3040,7 +3219,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3040
3219
  } else {
3041
3220
  console.log(chalk9.dim("\n No existing .claude/ directory found \u2014 generating from scratch.\n"));
3042
3221
  if (!options.yes) {
3043
- const proceed = await confirm2({
3222
+ const proceed = await confirm3({
3044
3223
  message: "Generate Claude Code environment for this project?",
3045
3224
  default: true
3046
3225
  });
@@ -3103,7 +3282,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3103
3282
  }
3104
3283
  }
3105
3284
  console.log("");
3106
- const apply = await confirm2({
3285
+ const apply = await confirm3({
3107
3286
  message: "Apply these changes?",
3108
3287
  default: true
3109
3288
  });
@@ -3699,10 +3878,10 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
3699
3878
  import { Command as Command11 } from "commander";
3700
3879
  import chalk14 from "chalk";
3701
3880
  import ora2 from "ora";
3702
- import fs24 from "fs/promises";
3703
- import path24 from "path";
3881
+ import fs25 from "fs/promises";
3882
+ import path25 from "path";
3704
3883
  import { parse as yamlParse2 } from "yaml";
3705
- import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3884
+ import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
3706
3885
 
3707
3886
  // src/evolve/init.ts
3708
3887
  import fs15 from "fs/promises";
@@ -3945,14 +4124,14 @@ async function buildProjectProfile(projectRoot) {
3945
4124
  path15.join(projectRoot, "package.json"),
3946
4125
  "utf-8"
3947
4126
  );
3948
- const pkg = JSON.parse(pkgStr);
4127
+ const pkg2 = JSON.parse(pkgStr);
3949
4128
  profile.language = "typescript";
3950
- if (pkg.scripts && typeof pkg.scripts === "object") {
3951
- profile.scripts = pkg.scripts;
4129
+ if (pkg2.scripts && typeof pkg2.scripts === "object") {
4130
+ profile.scripts = pkg2.scripts;
3952
4131
  }
3953
4132
  const deps = {
3954
- ...pkg.dependencies ?? {},
3955
- ...pkg.devDependencies ?? {}
4133
+ ...pkg2.dependencies ?? {},
4134
+ ...pkg2.devDependencies ?? {}
3956
4135
  };
3957
4136
  if (deps.next) {
3958
4137
  profile.framework = "Next.js";
@@ -4051,8 +4230,8 @@ async function copyDir(src, dest) {
4051
4230
  }
4052
4231
 
4053
4232
  // src/evolve/runner.ts
4054
- import { exec as exec2, spawn } from "child_process";
4055
- import { promisify as promisify2 } from "util";
4233
+ import { exec as exec3, spawn } from "child_process";
4234
+ import { promisify as promisify3 } from "util";
4056
4235
  import fs18 from "fs/promises";
4057
4236
  import os3 from "os";
4058
4237
  import path18 from "path";
@@ -4177,11 +4356,11 @@ async function loadIterationLog(workspacePath, iteration) {
4177
4356
  }
4178
4357
 
4179
4358
  // src/evolve/exec.ts
4180
- import { exec } from "child_process";
4181
- import { promisify } from "util";
4182
- var execAsync = promisify(exec);
4359
+ import { exec as exec2 } from "child_process";
4360
+ import { promisify as promisify2 } from "util";
4361
+ var execAsync2 = promisify2(exec2);
4183
4362
  async function execCommand(cmd, cwd, timeoutMs = 3e4) {
4184
- return execAsync(cmd, { cwd, timeout: timeoutMs });
4363
+ return execAsync2(cmd, { cwd, timeout: timeoutMs });
4185
4364
  }
4186
4365
 
4187
4366
  // src/evolve/scorers.ts
@@ -4341,21 +4520,47 @@ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
4341
4520
  breakdown
4342
4521
  };
4343
4522
  }
4523
+ function classifyFailure(score, stdout, stderr) {
4524
+ if (score.pass) return score;
4525
+ const combined = `${stdout}
4526
+ ${stderr}`.toLowerCase();
4527
+ const scoreValue = score.score ?? 0;
4528
+ let failureCategory = "unknown";
4529
+ let failureReason = "";
4530
+ if (stderr.includes("[setup]") && stderr.includes("Error") || combined.includes("command not found") || combined.includes("no such file or directory")) {
4531
+ failureCategory = "task";
4532
+ failureReason = "Task setup failed or references missing resources";
4533
+ } else if (combined.includes("token limit") || combined.includes("context length") || combined.includes("rate limit") || combined.includes("api error") || combined.includes("429") || combined.includes("overloaded")) {
4534
+ failureCategory = "model";
4535
+ failureReason = "Model API error, token limit, or rate limit";
4536
+ } else if (combined.includes("build failed") && combined.includes("before") || combined.includes("merge conflict") || combined.includes("git dirty") || combined.includes("uncommitted changes")) {
4537
+ failureCategory = "repo";
4538
+ failureReason = "Pre-existing repo issues (build failure, dirty state)";
4539
+ } else if (scoreValue >= 20 && scoreValue < 80) {
4540
+ failureCategory = "harness";
4541
+ failureReason = "Agent attempted the task but did not follow harness conventions";
4542
+ }
4543
+ return { ...score, failureCategory, failureReason };
4544
+ }
4344
4545
  async function scoreTask(task, workspacePath, stdout, stderr, config) {
4546
+ let score;
4345
4547
  if (task.scoring === "pass-fail") {
4346
- return passFailScorer(task, workspacePath, stdout, stderr);
4347
- }
4348
- if (task.scoring === "llm-judge" && config) {
4349
- return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4548
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4549
+ } else if (task.scoring === "llm-judge" && config) {
4550
+ score = await llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4551
+ } else if (task.scoring === "rubric" && config) {
4552
+ score = await rubricScorer(task, workspacePath, stdout, stderr, config);
4553
+ } else {
4554
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4350
4555
  }
4351
- if (task.scoring === "rubric" && config) {
4352
- return rubricScorer(task, workspacePath, stdout, stderr, config);
4556
+ if (!score.pass) {
4557
+ score = classifyFailure(score, stdout, stderr);
4353
4558
  }
4354
- return passFailScorer(task, workspacePath, stdout, stderr);
4559
+ return score;
4355
4560
  }
4356
4561
 
4357
4562
  // src/evolve/runner.ts
4358
- var execAsync2 = promisify2(exec2);
4563
+ var execAsync3 = promisify3(exec3);
4359
4564
  var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
4360
4565
  async function deployMcpJson(harnessPath, workDir) {
4361
4566
  const src = path18.join(harnessPath, ".mcp.json");
@@ -4365,12 +4570,12 @@ async function deployMcpJson(harnessPath, workDir) {
4365
4570
  async function createIsolatedWorkspace(projectRoot, harnessPath) {
4366
4571
  const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
4367
4572
  try {
4368
- await execAsync2("git rev-parse --is-inside-work-tree", {
4573
+ await execAsync3("git rev-parse --is-inside-work-tree", {
4369
4574
  cwd: projectRoot,
4370
4575
  timeout: 5e3
4371
4576
  });
4372
4577
  const tmpDir2 = path18.join(os3.tmpdir(), `kairn-evolve-wt-${suffix}`);
4373
- await execAsync2(`git worktree add --detach "${tmpDir2}" HEAD`, {
4578
+ await execAsync3(`git worktree add --detach "${tmpDir2}" HEAD`, {
4374
4579
  cwd: projectRoot,
4375
4580
  timeout: 3e4
4376
4581
  });
@@ -4409,14 +4614,14 @@ async function copyProjectDir(src, dest) {
4409
4614
  async function cleanupIsolatedWorkspace(workDir, isWorktree, projectRoot) {
4410
4615
  if (isWorktree) {
4411
4616
  try {
4412
- await execAsync2(`git worktree remove "${workDir}" --force`, {
4617
+ await execAsync3(`git worktree remove "${workDir}" --force`, {
4413
4618
  cwd: projectRoot,
4414
4619
  timeout: 1e4
4415
4620
  });
4416
4621
  } catch {
4417
4622
  await fs18.rm(workDir, { recursive: true, force: true }).catch(() => {
4418
4623
  });
4419
- await execAsync2("git worktree prune", {
4624
+ await execAsync3("git worktree prune", {
4420
4625
  cwd: projectRoot,
4421
4626
  timeout: 5e3
4422
4627
  }).catch(() => {
@@ -4437,7 +4642,7 @@ async function runTask(task, harnessPath, traceDir, iteration, projectRoot) {
4437
4642
  let setupStderr = "";
4438
4643
  if (task.setup.trim()) {
4439
4644
  try {
4440
- await execAsync2(task.setup, { cwd: workDir, timeout: 6e4 });
4645
+ await execAsync3(task.setup, { cwd: workDir, timeout: 6e4 });
4441
4646
  } catch (err) {
4442
4647
  setupStderr = err instanceof Error ? err.message : String(err);
4443
4648
  }
@@ -4685,12 +4890,12 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
4685
4890
  }
4686
4891
 
4687
4892
  // src/evolve/loop.ts
4688
- import fs21 from "fs/promises";
4689
- import path21 from "path";
4893
+ import fs22 from "fs/promises";
4894
+ import path22 from "path";
4690
4895
 
4691
4896
  // src/evolve/proposer.ts
4692
- import fs19 from "fs/promises";
4693
- import path19 from "path";
4897
+ import fs20 from "fs/promises";
4898
+ import path20 from "path";
4694
4899
  var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
4695
4900
  agent environment (.claude/ directory) based on execution traces from real tasks.
4696
4901
 
@@ -4763,18 +4968,18 @@ async function readHarnessFiles(harnessPath) {
4763
4968
  async function walk(dir, prefix) {
4764
4969
  let entries;
4765
4970
  try {
4766
- entries = await fs19.readdir(dir, { withFileTypes: true });
4971
+ entries = await fs20.readdir(dir, { withFileTypes: true });
4767
4972
  } catch {
4768
4973
  return;
4769
4974
  }
4770
4975
  for (const entry of entries) {
4771
- const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
4772
- const fullPath = path19.join(dir, entry.name);
4976
+ const relativePath = prefix ? path20.join(prefix, entry.name) : entry.name;
4977
+ const fullPath = path20.join(dir, entry.name);
4773
4978
  if (entry.isDirectory()) {
4774
4979
  await walk(fullPath, relativePath);
4775
4980
  } else if (entry.isFile()) {
4776
4981
  try {
4777
- result[relativePath] = await fs19.readFile(fullPath, "utf-8");
4982
+ result[relativePath] = await fs20.readFile(fullPath, "utf-8");
4778
4983
  } catch {
4779
4984
  }
4780
4985
  }
@@ -4790,7 +4995,7 @@ function truncateStdout(stdout, limit) {
4790
4995
  return `[...truncated, showing last ${limit} chars...]
4791
4996
  ${stdout.slice(-limit)}`;
4792
4997
  }
4793
- function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
4998
+ function buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection) {
4794
4999
  const harnessSection = ["## Current Harness Files\n"];
4795
5000
  const fileEntries = Object.entries(harnessFiles);
4796
5001
  if (fileEntries.length === 0) {
@@ -4826,7 +5031,8 @@ ${content}
4826
5031
  const historyBudget = remainingBudget - traceBudget;
4827
5032
  const traceSection = buildTraceSection(traces, traceBudget);
4828
5033
  const historySection = buildHistorySection(history, historyBudget);
4829
- return fixedContent + "\n" + traceSection + "\n" + historySection;
5034
+ const memoryPart = memorySection ? "\n" + memorySection : "";
5035
+ return fixedContent + "\n" + traceSection + "\n" + historySection + memoryPart;
4830
5036
  }
4831
5037
  function buildTraceSection(traces, budget) {
4832
5038
  if (traces.length === 0) return "## Execution Traces\n\n(No traces available)\n";
@@ -4981,7 +5187,10 @@ function parseProposerResponse(raw) {
4981
5187
  async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
4982
5188
  const harnessFiles = await readHarnessFiles(harnessPath);
4983
5189
  const traces = await loadIterationTraces(workspacePath, iteration);
4984
- const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
5190
+ const { loadProposerMemory: loadProposerMemory2, formatMemoryForProposer: formatMemoryForProposer2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
5191
+ const memory = await loadProposerMemory2(workspacePath);
5192
+ const memorySection = formatMemoryForProposer2(memory);
5193
+ const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection);
4985
5194
  const proposerConfig = { ...config, model: proposerModel };
4986
5195
  const response = await callLLM(proposerConfig, userMessage, {
4987
5196
  systemPrompt: PROPOSER_SYSTEM_PROMPT,
@@ -4992,60 +5201,60 @@ async function propose(iteration, workspacePath, harnessPath, history, tasks, co
4992
5201
  }
4993
5202
 
4994
5203
  // src/evolve/mutator.ts
4995
- import fs20 from "fs/promises";
4996
- import path20 from "path";
5204
+ import fs21 from "fs/promises";
5205
+ import path21 from "path";
4997
5206
  async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
4998
- const newHarnessPath = path20.join(nextIterationDir, "harness");
5207
+ const newHarnessPath = path21.join(nextIterationDir, "harness");
4999
5208
  await copyDir(currentHarnessPath, newHarnessPath);
5000
5209
  for (const mutation of mutations) {
5001
5210
  if (mutation.file.includes("..")) {
5002
5211
  continue;
5003
5212
  }
5004
- const filePath = path20.join(newHarnessPath, mutation.file);
5213
+ const filePath = path21.join(newHarnessPath, mutation.file);
5005
5214
  if (mutation.action === "replace") {
5006
5215
  if (!mutation.oldText) {
5007
5216
  continue;
5008
5217
  }
5009
- const content = await fs20.readFile(filePath, "utf-8");
5218
+ const content = await fs21.readFile(filePath, "utf-8");
5010
5219
  if (!content.includes(mutation.oldText)) {
5011
5220
  continue;
5012
5221
  }
5013
- await fs20.writeFile(
5222
+ await fs21.writeFile(
5014
5223
  filePath,
5015
5224
  content.replace(mutation.oldText, mutation.newText),
5016
5225
  "utf-8"
5017
5226
  );
5018
5227
  } else if (mutation.action === "add_section") {
5019
5228
  try {
5020
- const content = await fs20.readFile(filePath, "utf-8");
5021
- await fs20.writeFile(
5229
+ const content = await fs21.readFile(filePath, "utf-8");
5230
+ await fs21.writeFile(
5022
5231
  filePath,
5023
5232
  content + "\n\n" + mutation.newText,
5024
5233
  "utf-8"
5025
5234
  );
5026
5235
  } catch {
5027
- await fs20.mkdir(path20.dirname(filePath), { recursive: true });
5028
- await fs20.writeFile(filePath, mutation.newText, "utf-8");
5236
+ await fs21.mkdir(path21.dirname(filePath), { recursive: true });
5237
+ await fs21.writeFile(filePath, mutation.newText, "utf-8");
5029
5238
  }
5030
5239
  } else if (mutation.action === "create_file") {
5031
- await fs20.mkdir(path20.dirname(filePath), { recursive: true });
5032
- await fs20.writeFile(filePath, mutation.newText, "utf-8");
5240
+ await fs21.mkdir(path21.dirname(filePath), { recursive: true });
5241
+ await fs21.writeFile(filePath, mutation.newText, "utf-8");
5033
5242
  } else if (mutation.action === "delete_section") {
5034
5243
  if (!mutation.oldText) {
5035
5244
  continue;
5036
5245
  }
5037
5246
  let sectionContent;
5038
5247
  try {
5039
- sectionContent = await fs20.readFile(filePath, "utf-8");
5248
+ sectionContent = await fs21.readFile(filePath, "utf-8");
5040
5249
  } catch {
5041
5250
  continue;
5042
5251
  }
5043
5252
  if (!sectionContent.includes(mutation.oldText)) {
5044
5253
  continue;
5045
5254
  }
5046
- await fs20.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
5255
+ await fs21.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
5047
5256
  } else if (mutation.action === "delete_file") {
5048
- await fs20.unlink(filePath).catch(() => {
5257
+ await fs21.unlink(filePath).catch(() => {
5049
5258
  });
5050
5259
  }
5051
5260
  }
@@ -5093,17 +5302,17 @@ async function readAllFiles(dir) {
5093
5302
  async function walk(current) {
5094
5303
  let entries;
5095
5304
  try {
5096
- entries = await fs20.readdir(current, { withFileTypes: true });
5305
+ entries = await fs21.readdir(current, { withFileTypes: true });
5097
5306
  } catch {
5098
5307
  return;
5099
5308
  }
5100
5309
  for (const entry of entries) {
5101
- const fullPath = path20.join(current, entry.name);
5102
- const relativePath = path20.relative(dir, fullPath);
5310
+ const fullPath = path21.join(current, entry.name);
5311
+ const relativePath = path21.relative(dir, fullPath);
5103
5312
  if (entry.isDirectory()) {
5104
5313
  await walk(fullPath);
5105
5314
  } else {
5106
- result[relativePath] = await fs20.readFile(fullPath, "utf-8");
5315
+ result[relativePath] = await fs21.readFile(fullPath, "utf-8");
5107
5316
  }
5108
5317
  }
5109
5318
  }
@@ -5112,20 +5321,27 @@ async function readAllFiles(dir) {
5112
5321
  }
5113
5322
 
5114
5323
  // src/evolve/loop.ts
5324
+ function computeMutationCap(iter, maxIterations, maxMutations) {
5325
+ if (maxIterations <= 1) return maxMutations;
5326
+ const progress = iter / (maxIterations - 1);
5327
+ if (progress <= 0.4) return maxMutations;
5328
+ const decayProgress = (progress - 0.4) / 0.6;
5329
+ return Math.max(1, Math.round(maxMutations * (1 - decayProgress * (1 - 1 / maxMutations))));
5330
+ }
5115
5331
  async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
5116
5332
  const history = [];
5117
5333
  let bestScore = -1;
5118
5334
  let bestIteration = 0;
5119
5335
  let baselineScore = 0;
5120
5336
  for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
5121
- const harnessPath = path21.join(
5337
+ const harnessPath = path22.join(
5122
5338
  workspacePath,
5123
5339
  "iterations",
5124
5340
  iter.toString(),
5125
5341
  "harness"
5126
5342
  );
5127
5343
  try {
5128
- await fs21.access(harnessPath);
5344
+ await fs22.access(harnessPath);
5129
5345
  } catch {
5130
5346
  if (iter === 0) {
5131
5347
  throw new Error(
@@ -5158,6 +5374,29 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5158
5374
  tasksToRun.push(task);
5159
5375
  }
5160
5376
  }
5377
+ const sampleSize = evolveConfig.evalSampleSize;
5378
+ if (sampleSize > 0 && sampleSize < tasksToRun.length) {
5379
+ const shuffled = [...tasksToRun].sort((a, b) => {
5380
+ const hashA = (iter * 31 + a.id.charCodeAt(0)) % 1e3;
5381
+ const hashB = (iter * 31 + b.id.charCodeAt(0)) % 1e3;
5382
+ return hashA - hashB;
5383
+ });
5384
+ const sampled = new Set(shuffled.slice(0, sampleSize).map((t) => t.id));
5385
+ for (const task of tasksToRun) {
5386
+ if (!sampled.has(task.id)) {
5387
+ const prev = prevLog.taskResults[task.id];
5388
+ const prevVal = prev ? prev.score ?? (prev.pass ? 100 : 0) : 0;
5389
+ carriedScores[task.id] = { pass: prevVal >= 50, score: prevVal };
5390
+ onProgress?.({
5391
+ type: "task-skipped",
5392
+ iteration: iter,
5393
+ taskId: task.id,
5394
+ message: `Sampled out ${task.id} (mini-batch ${sampleSize}/${tasksToRun.length})`
5395
+ });
5396
+ }
5397
+ }
5398
+ tasksToRun = tasksToRun.filter((t) => sampled.has(t.id));
5399
+ }
5161
5400
  }
5162
5401
  const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
5163
5402
  tasksToRun,
@@ -5218,7 +5457,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5218
5457
  };
5219
5458
  await writeIterationLog(workspacePath, rollbackLog);
5220
5459
  history.push(rollbackLog);
5221
- const bestHarnessPath = path21.join(
5460
+ const bestHarnessPath = path22.join(
5222
5461
  workspacePath,
5223
5462
  "iterations",
5224
5463
  bestIteration.toString(),
@@ -5236,13 +5475,14 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5236
5475
  kairnConfig,
5237
5476
  evolveConfig.proposerModel
5238
5477
  );
5239
- if (rollbackProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5478
+ const rollbackCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
5479
+ if (rollbackProposal.mutations.length > rollbackCap) {
5240
5480
  rollbackProposal = {
5241
5481
  ...rollbackProposal,
5242
- mutations: rollbackProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration)
5482
+ mutations: rollbackProposal.mutations.slice(0, rollbackCap)
5243
5483
  };
5244
5484
  }
5245
- const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5485
+ const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
5246
5486
  await applyMutations(bestHarnessPath, nextIterDir2, rollbackProposal.mutations);
5247
5487
  onProgress?.({
5248
5488
  type: "mutations-applied",
@@ -5250,8 +5490,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5250
5490
  mutationCount: rollbackProposal.mutations.length
5251
5491
  });
5252
5492
  } catch {
5253
- const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5254
- await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
5493
+ const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
5494
+ await copyDir(bestHarnessPath, path22.join(nextIterDir2, "harness"));
5255
5495
  }
5256
5496
  }
5257
5497
  continue;
@@ -5297,10 +5537,11 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5297
5537
  kairnConfig,
5298
5538
  evolveConfig.proposerModel
5299
5539
  );
5300
- if (proposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5540
+ const iterCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
5541
+ if (proposal.mutations.length > iterCap) {
5301
5542
  proposal = {
5302
5543
  ...proposal,
5303
- mutations: proposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration)
5544
+ mutations: proposal.mutations.slice(0, iterCap)
5304
5545
  };
5305
5546
  }
5306
5547
  } catch (err) {
@@ -5310,12 +5551,12 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5310
5551
  iteration: iter,
5311
5552
  message: `Proposer failed: ${errMsg}`
5312
5553
  });
5313
- const nextIterDir2 = path21.join(
5554
+ const nextIterDir2 = path22.join(
5314
5555
  workspacePath,
5315
5556
  "iterations",
5316
5557
  (iter + 1).toString()
5317
5558
  );
5318
- await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
5559
+ await copyDir(harnessPath, path22.join(nextIterDir2, "harness"));
5319
5560
  const skipLog = {
5320
5561
  iteration: iter,
5321
5562
  score: aggregate,
@@ -5328,7 +5569,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5328
5569
  history.push(skipLog);
5329
5570
  continue;
5330
5571
  }
5331
- const nextIterDir = path21.join(
5572
+ const nextIterDir = path22.join(
5332
5573
  workspacePath,
5333
5574
  "iterations",
5334
5575
  (iter + 1).toString()
@@ -5342,7 +5583,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5342
5583
  );
5343
5584
  diffPatch = mutationResult.diffPatch;
5344
5585
  } catch {
5345
- await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
5586
+ await copyDir(harnessPath, path22.join(nextIterDir, "harness"));
5346
5587
  }
5347
5588
  onProgress?.({
5348
5589
  type: "mutations-applied",
@@ -5360,6 +5601,62 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5360
5601
  await writeIterationLog(workspacePath, iterLog);
5361
5602
  history.push(iterLog);
5362
5603
  }
5604
+ if (evolveConfig.usePrincipal && history.length >= 2) {
5605
+ onProgress?.({ type: "proposing", iteration: history.length, message: "Principal Proposer synthesizing final harness" });
5606
+ const baselineHarnessPath = path22.join(workspacePath, "iterations", "0", "harness");
5607
+ try {
5608
+ const principalProposal = await propose(
5609
+ history.length,
5610
+ workspacePath,
5611
+ baselineHarnessPath,
5612
+ history,
5613
+ tasks,
5614
+ kairnConfig,
5615
+ evolveConfig.proposerModel
5616
+ );
5617
+ if (principalProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5618
+ principalProposal.mutations = principalProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration);
5619
+ }
5620
+ const principalIterNum = history.length;
5621
+ const principalIterDir = path22.join(workspacePath, "iterations", principalIterNum.toString());
5622
+ const mutResult = await applyMutations(baselineHarnessPath, principalIterDir, principalProposal.mutations);
5623
+ onProgress?.({ type: "iteration-start", iteration: principalIterNum });
5624
+ const { results: principalResults, aggregate: principalAggregate } = await evaluateAll(
5625
+ tasks,
5626
+ mutResult.newHarnessPath,
5627
+ workspacePath,
5628
+ principalIterNum,
5629
+ kairnConfig,
5630
+ onProgress,
5631
+ evolveConfig.runsPerTask,
5632
+ evolveConfig.parallelTasks
5633
+ );
5634
+ onProgress?.({ type: "iteration-scored", iteration: principalIterNum, score: principalAggregate });
5635
+ const principalLog = {
5636
+ iteration: principalIterNum,
5637
+ score: principalAggregate,
5638
+ taskResults: principalResults,
5639
+ proposal: principalProposal,
5640
+ diffPatch: mutResult.diffPatch,
5641
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
5642
+ };
5643
+ await writeIterationLog(workspacePath, principalLog);
5644
+ history.push(principalLog);
5645
+ if (principalAggregate > bestScore) {
5646
+ bestScore = principalAggregate;
5647
+ bestIteration = principalIterNum;
5648
+ }
5649
+ } catch (err) {
5650
+ const errMsg = err instanceof Error ? err.message : String(err);
5651
+ onProgress?.({ type: "proposer-error", iteration: history.length, message: `Principal failed: ${errMsg}` });
5652
+ }
5653
+ }
5654
+ try {
5655
+ const { buildRunSummary: buildRunSummary2, saveRunSummary: saveRunSummary2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
5656
+ const summary = buildRunSummary2(history, baselineScore, bestScore);
5657
+ await saveRunSummary2(workspacePath, summary);
5658
+ } catch {
5659
+ }
5363
5660
  onProgress?.({
5364
5661
  type: "complete",
5365
5662
  iteration: history.length > 0 ? history.length - 1 : 0,
@@ -5374,8 +5671,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5374
5671
  }
5375
5672
 
5376
5673
  // src/evolve/report.ts
5377
- import fs22 from "fs/promises";
5378
- import path22 from "path";
5674
+ import fs23 from "fs/promises";
5675
+ import path23 from "path";
5379
5676
 
5380
5677
  // src/evolve/diagnosis.ts
5381
5678
  function numericScore(s) {
@@ -5425,10 +5722,10 @@ function numericScore2(s) {
5425
5722
  return s.score ?? (s.pass ? 100 : 0);
5426
5723
  }
5427
5724
  async function loadAllIterations(workspacePath) {
5428
- const iterDir = path22.join(workspacePath, "iterations");
5725
+ const iterDir = path23.join(workspacePath, "iterations");
5429
5726
  let entries;
5430
5727
  try {
5431
- entries = await fs22.readdir(iterDir);
5728
+ entries = await fs23.readdir(iterDir);
5432
5729
  } catch {
5433
5730
  return [];
5434
5731
  }
@@ -5442,7 +5739,7 @@ async function loadAllIterations(workspacePath) {
5442
5739
  }
5443
5740
  async function loadTasks(workspacePath) {
5444
5741
  try {
5445
- const content = await fs22.readFile(path22.join(workspacePath, "tasks.yaml"), "utf-8");
5742
+ const content = await fs23.readFile(path23.join(workspacePath, "tasks.yaml"), "utf-8");
5446
5743
  const parsed = yamlParse(content);
5447
5744
  return parsed?.tasks ?? [];
5448
5745
  } catch {
@@ -5453,6 +5750,7 @@ function buildLeaderboard(iterations, tasks) {
5453
5750
  const taskIds = tasks.map((t) => t.id);
5454
5751
  return taskIds.map((taskId) => {
5455
5752
  const scores = {};
5753
+ const variance = {};
5456
5754
  let bestScore = -1;
5457
5755
  let bestIteration = 0;
5458
5756
  for (const iter of iterations) {
@@ -5460,13 +5758,21 @@ function buildLeaderboard(iterations, tasks) {
5460
5758
  if (s) {
5461
5759
  const score = numericScore2(s);
5462
5760
  scores[iter.iteration] = score;
5761
+ if (s.variance) {
5762
+ variance[iter.iteration] = {
5763
+ mean: s.variance.mean,
5764
+ stddev: s.variance.stddev,
5765
+ runs: s.variance.runs
5766
+ };
5767
+ }
5463
5768
  if (score > bestScore) {
5464
5769
  bestScore = score;
5465
5770
  bestIteration = iter.iteration;
5466
5771
  }
5467
5772
  }
5468
5773
  }
5469
- return { taskId, scores, bestIteration, bestScore };
5774
+ const hasVariance = Object.keys(variance).length > 0;
5775
+ return { taskId, scores, bestIteration, bestScore, ...hasVariance ? { variance } : {} };
5470
5776
  });
5471
5777
  }
5472
5778
  function iterationStatus(iter, bestIteration) {
@@ -5502,13 +5808,29 @@ async function generateMarkdownReport(workspacePath) {
5502
5808
  lines.push("");
5503
5809
  lines.push("## Iterations");
5504
5810
  lines.push("");
5505
- lines.push("| Iter | Score | Mutations | Status |");
5506
- lines.push("|------|-------|-----------|--------|");
5811
+ const hasVariance = iterations.some(
5812
+ (iter) => Object.values(iter.taskResults).some((s) => s.variance)
5813
+ );
5814
+ if (hasVariance) {
5815
+ lines.push("| Iter | Score | Mutations | Status |");
5816
+ lines.push("|------|-------|-----------|--------|");
5817
+ } else {
5818
+ lines.push("| Iter | Score | Mutations | Status |");
5819
+ lines.push("|------|-------|-----------|--------|");
5820
+ }
5507
5821
  for (const iter of iterations) {
5508
5822
  const mutations = iter.proposal?.mutations.length ?? 0;
5509
5823
  const mutStr = mutations > 0 ? mutations.toString() : "-";
5510
5824
  const status = iterationStatus(iter, bestIter.iteration);
5511
- lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
5825
+ let scoreStr = `${iter.score.toFixed(1)}%`;
5826
+ if (hasVariance) {
5827
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5828
+ if (stddevs.length > 0) {
5829
+ const avgStddev = stddevs.reduce((a, b) => a + b, 0) / stddevs.length;
5830
+ scoreStr = `${iter.score.toFixed(1)}% \xB1${avgStddev.toFixed(1)}`;
5831
+ }
5832
+ }
5833
+ lines.push(`| ${iter.iteration} | ${scoreStr} | ${mutStr} | ${status} |`);
5512
5834
  }
5513
5835
  lines.push("");
5514
5836
  if (leaderboard.length > 0) {
@@ -5521,7 +5843,10 @@ async function generateMarkdownReport(workspacePath) {
5521
5843
  for (const entry of leaderboard) {
5522
5844
  const scoreCols = iterNums.map((n) => {
5523
5845
  const s = entry.scores[n];
5524
- return s !== void 0 ? `${s.toFixed(0)}%` : "-";
5846
+ if (s === void 0) return "-";
5847
+ const v = entry.variance?.[n];
5848
+ if (v && v.runs > 1) return `${s.toFixed(0)}% \xB1${v.stddev.toFixed(1)}`;
5849
+ return `${s.toFixed(0)}%`;
5525
5850
  });
5526
5851
  lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
5527
5852
  }
@@ -5571,25 +5896,30 @@ async function generateJsonReport(workspacePath) {
5571
5896
  bestIteration: bestIter.iteration,
5572
5897
  improvement
5573
5898
  },
5574
- iterations: iterations.map((iter) => ({
5575
- iteration: iter.iteration,
5576
- score: iter.score,
5577
- mutationCount: iter.proposal?.mutations.length ?? 0,
5578
- status: iterationStatus(iter, bestIter.iteration)
5579
- })),
5899
+ iterations: iterations.map((iter) => {
5900
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5901
+ const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : void 0;
5902
+ return {
5903
+ iteration: iter.iteration,
5904
+ score: iter.score,
5905
+ ...avgStddev !== void 0 ? { stddev: avgStddev } : {},
5906
+ mutationCount: iter.proposal?.mutations.length ?? 0,
5907
+ status: iterationStatus(iter, bestIter.iteration)
5908
+ };
5909
+ }),
5580
5910
  leaderboard,
5581
5911
  counterfactuals
5582
5912
  };
5583
5913
  }
5584
5914
 
5585
5915
  // src/evolve/apply.ts
5586
- import fs23 from "fs/promises";
5587
- import path23 from "path";
5916
+ import fs24 from "fs/promises";
5917
+ import path24 from "path";
5588
5918
  async function listIterations(workspacePath) {
5589
- const iterationsDir = path23.join(workspacePath, "iterations");
5919
+ const iterationsDir = path24.join(workspacePath, "iterations");
5590
5920
  let entries;
5591
5921
  try {
5592
- entries = await fs23.readdir(iterationsDir);
5922
+ entries = await fs24.readdir(iterationsDir);
5593
5923
  } catch {
5594
5924
  return [];
5595
5925
  }
@@ -5598,7 +5928,7 @@ async function listIterations(workspacePath) {
5598
5928
  const n = parseInt(entry, 10);
5599
5929
  if (!isNaN(n)) {
5600
5930
  try {
5601
- await fs23.access(path23.join(iterationsDir, entry, "harness"));
5931
+ await fs24.access(path24.join(iterationsDir, entry, "harness"));
5602
5932
  nums.push(n);
5603
5933
  } catch {
5604
5934
  }
@@ -5624,16 +5954,16 @@ async function listFilesRecursive(dir) {
5624
5954
  async function walk(current) {
5625
5955
  let entries;
5626
5956
  try {
5627
- entries = await fs23.readdir(current, { withFileTypes: true });
5957
+ entries = await fs24.readdir(current, { withFileTypes: true });
5628
5958
  } catch {
5629
5959
  return;
5630
5960
  }
5631
5961
  for (const entry of entries) {
5632
- const fullPath = path23.join(current, entry.name);
5962
+ const fullPath = path24.join(current, entry.name);
5633
5963
  if (entry.isDirectory()) {
5634
5964
  await walk(fullPath);
5635
5965
  } else {
5636
- results.push(path23.relative(dir, fullPath));
5966
+ results.push(path24.relative(dir, fullPath));
5637
5967
  }
5638
5968
  }
5639
5969
  }
@@ -5656,37 +5986,37 @@ async function applyEvolution(workspacePath, projectRoot, targetIteration) {
5656
5986
  } else {
5657
5987
  iter = await findBestIteration(workspacePath, iterations);
5658
5988
  }
5659
- const harnessPath = path23.join(
5989
+ const harnessPath = path24.join(
5660
5990
  workspacePath,
5661
5991
  "iterations",
5662
5992
  iter.toString(),
5663
5993
  "harness"
5664
5994
  );
5665
- const claudeDir = path23.join(projectRoot, ".claude");
5995
+ const claudeDir = path24.join(projectRoot, ".claude");
5666
5996
  const diffPreview = await generateDiff2(claudeDir, harnessPath);
5667
5997
  const currentFiles = await listFilesRecursive(claudeDir);
5668
5998
  const targetFiles = await listFilesRecursive(harnessPath);
5669
5999
  const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
5670
6000
  const filesChanged = [];
5671
6001
  for (const filePath of allPaths) {
5672
- const currentContent = await fs23.readFile(path23.join(claudeDir, filePath), "utf-8").catch(() => null);
5673
- const targetContent = await fs23.readFile(path23.join(harnessPath, filePath), "utf-8").catch(() => null);
6002
+ const currentContent = await fs24.readFile(path24.join(claudeDir, filePath), "utf-8").catch(() => null);
6003
+ const targetContent = await fs24.readFile(path24.join(harnessPath, filePath), "utf-8").catch(() => null);
5674
6004
  if (currentContent !== targetContent) {
5675
6005
  filesChanged.push(filePath);
5676
6006
  }
5677
6007
  }
5678
- await fs23.rm(claudeDir, { recursive: true, force: true });
6008
+ await fs24.rm(claudeDir, { recursive: true, force: true });
5679
6009
  await copyDir(harnessPath, claudeDir);
5680
- const harnessMcpJson = path23.join(harnessPath, ".mcp.json");
5681
- const projectMcpJson = path23.join(projectRoot, ".mcp.json");
6010
+ const harnessMcpJson = path24.join(harnessPath, ".mcp.json");
6011
+ const projectMcpJson = path24.join(projectRoot, ".mcp.json");
5682
6012
  try {
5683
- await fs23.access(harnessMcpJson);
5684
- const currentMcp = await fs23.readFile(projectMcpJson, "utf-8").catch(() => null);
5685
- const targetMcp = await fs23.readFile(harnessMcpJson, "utf-8").catch(() => null);
6013
+ await fs24.access(harnessMcpJson);
6014
+ const currentMcp = await fs24.readFile(projectMcpJson, "utf-8").catch(() => null);
6015
+ const targetMcp = await fs24.readFile(harnessMcpJson, "utf-8").catch(() => null);
5686
6016
  if (currentMcp !== targetMcp) {
5687
6017
  filesChanged.push(".mcp.json");
5688
6018
  }
5689
- await fs23.copyFile(harnessMcpJson, projectMcpJson);
6019
+ await fs24.copyFile(harnessMcpJson, projectMcpJson);
5690
6020
  } catch {
5691
6021
  }
5692
6022
  return {
@@ -5706,11 +6036,13 @@ var DEFAULT_CONFIG = {
5706
6036
  runsPerTask: 1,
5707
6037
  maxMutationsPerIteration: 3,
5708
6038
  pruneThreshold: 95,
5709
- maxTaskDrop: 20
6039
+ maxTaskDrop: 20,
6040
+ usePrincipal: false,
6041
+ evalSampleSize: 0
5710
6042
  };
5711
6043
  async function loadEvolveConfigFromWorkspace(workspacePath) {
5712
6044
  try {
5713
- const configStr = await fs24.readFile(path24.join(workspacePath, "config.yaml"), "utf-8");
6045
+ const configStr = await fs25.readFile(path25.join(workspacePath, "config.yaml"), "utf-8");
5714
6046
  const parsed = yamlParse2(configStr);
5715
6047
  return {
5716
6048
  model: parsed.model ?? DEFAULT_CONFIG.model,
@@ -5721,7 +6053,9 @@ async function loadEvolveConfigFromWorkspace(workspacePath) {
5721
6053
  runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask,
5722
6054
  maxMutationsPerIteration: parsed.max_mutations_per_iteration ?? DEFAULT_CONFIG.maxMutationsPerIteration,
5723
6055
  pruneThreshold: parsed.prune_threshold ?? DEFAULT_CONFIG.pruneThreshold,
5724
- maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop
6056
+ maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop,
6057
+ usePrincipal: parsed.use_principal ?? DEFAULT_CONFIG.usePrincipal,
6058
+ evalSampleSize: parsed.eval_sample_size ?? DEFAULT_CONFIG.evalSampleSize
5725
6059
  };
5726
6060
  } catch {
5727
6061
  return { ...DEFAULT_CONFIG };
@@ -5732,9 +6066,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5732
6066
  try {
5733
6067
  const projectRoot = process.cwd();
5734
6068
  console.log(ui.section("Evolve Init"));
5735
- const claudeDir = path24.join(projectRoot, ".claude");
6069
+ const claudeDir = path25.join(projectRoot, ".claude");
5736
6070
  try {
5737
- await fs24.access(claudeDir);
6071
+ await fs25.access(claudeDir);
5738
6072
  } catch {
5739
6073
  console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
5740
6074
  process.exit(1);
@@ -5766,7 +6100,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5766
6100
  let addMore = true;
5767
6101
  while (addMore) {
5768
6102
  try {
5769
- addMore = await confirm3({ message: "Add another eval task?", default: false });
6103
+ addMore = await confirm4({ message: "Add another eval task?", default: false });
5770
6104
  } catch {
5771
6105
  addMore = false;
5772
6106
  }
@@ -5784,7 +6118,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5784
6118
  if (config) {
5785
6119
  let claudeMd = "";
5786
6120
  try {
5787
- claudeMd = await fs24.readFile(path24.join(claudeDir, "CLAUDE.md"), "utf-8");
6121
+ claudeMd = await fs25.readFile(path25.join(claudeDir, "CLAUDE.md"), "utf-8");
5788
6122
  } catch {
5789
6123
  }
5790
6124
  const profile = await buildProjectProfile(projectRoot);
@@ -5815,16 +6149,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5815
6149
  evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
5816
6150
  try {
5817
6151
  const projectRoot = process.cwd();
5818
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6152
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
5819
6153
  console.log(ui.section("Evolve Baseline"));
5820
6154
  try {
5821
- await fs24.access(workspace);
6155
+ await fs25.access(workspace);
5822
6156
  } catch {
5823
6157
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5824
6158
  process.exit(1);
5825
6159
  }
5826
6160
  await snapshotBaseline(projectRoot, workspace);
5827
- const baselineDir = path24.join(workspace, "baseline");
6161
+ const baselineDir = path25.join(workspace, "baseline");
5828
6162
  const fileCount = await countFiles(baselineDir);
5829
6163
  console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
5830
6164
  } catch (err) {
@@ -5833,21 +6167,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
5833
6167
  process.exit(1);
5834
6168
  }
5835
6169
  });
5836
- evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").action(async (options) => {
6170
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").option("--principal", "Run Principal Proposer as final iteration").option("--eval-sample <n>", "Sample N tasks per middle iteration (0 = all)", "0").action(async (options) => {
5837
6171
  try {
5838
6172
  const projectRoot = process.cwd();
5839
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6173
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
5840
6174
  console.log(ui.section("Evolve Run"));
5841
6175
  try {
5842
- await fs24.access(workspace);
6176
+ await fs25.access(workspace);
5843
6177
  } catch {
5844
6178
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5845
6179
  process.exit(1);
5846
6180
  }
5847
- const tasksPath = path24.join(workspace, "tasks.yaml");
6181
+ const tasksPath = path25.join(workspace, "tasks.yaml");
5848
6182
  let tasksContent;
5849
6183
  try {
5850
- tasksContent = await fs24.readFile(tasksPath, "utf-8");
6184
+ tasksContent = await fs25.readFile(tasksPath, "utf-8");
5851
6185
  } catch {
5852
6186
  console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
5853
6187
  process.exit(1);
@@ -5866,15 +6200,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5866
6200
  console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
5867
6201
  console.log("");
5868
6202
  const config = await loadConfig();
5869
- const harnessPath = path24.join(projectRoot, ".claude");
6203
+ const harnessPath = path25.join(projectRoot, ".claude");
5870
6204
  const results = [];
5871
6205
  for (const task of tasksToRun) {
5872
- const traceDir = path24.join(workspace, "traces", "0", task.id);
6206
+ const traceDir = path25.join(workspace, "traces", "0", task.id);
5873
6207
  const spinner = ora2(`Running: ${task.id}`).start();
5874
6208
  const result = await runTask(task, harnessPath, traceDir, 0);
5875
6209
  if (config) {
5876
- const stdout = await fs24.readFile(path24.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
5877
- const stderr = await fs24.readFile(path24.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
6210
+ const stdout = await fs25.readFile(path25.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
6211
+ const stderr = await fs25.readFile(path25.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
5878
6212
  const score = await scoreTask(task, traceDir, stdout, stderr, config);
5879
6213
  result.score = score;
5880
6214
  await writeScore(traceDir, score);
@@ -5932,8 +6266,17 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5932
6266
  process.exit(1);
5933
6267
  }
5934
6268
  evolveConfig.maxTaskDrop = maxTaskDrop;
6269
+ if (options.principal) {
6270
+ evolveConfig.usePrincipal = true;
6271
+ }
6272
+ const evalSample = parseInt(options.evalSample ?? "0", 10);
6273
+ if (isNaN(evalSample) || evalSample < 0) {
6274
+ console.log(ui.error("--eval-sample must be a non-negative integer"));
6275
+ process.exit(1);
6276
+ }
6277
+ evolveConfig.evalSampleSize = evalSample;
5935
6278
  try {
5936
- await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
6279
+ await fs25.access(path25.join(workspace, "iterations", "0", "harness"));
5937
6280
  } catch {
5938
6281
  console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
5939
6282
  process.exit(1);
@@ -6027,10 +6370,10 @@ evolveCommand.command("run").description("Run tasks against the current harness"
6027
6370
  evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
6028
6371
  try {
6029
6372
  const projectRoot = process.cwd();
6030
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6373
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6031
6374
  console.log(ui.section("Evolve Apply"));
6032
6375
  try {
6033
- await fs24.access(workspace);
6376
+ await fs25.access(workspace);
6034
6377
  } catch {
6035
6378
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
6036
6379
  process.exit(1);
@@ -6071,9 +6414,9 @@ evolveCommand.command("apply").description("Apply the best evolved harness to yo
6071
6414
  evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
6072
6415
  try {
6073
6416
  const projectRoot = process.cwd();
6074
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6417
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6075
6418
  try {
6076
- await fs24.access(workspace);
6419
+ await fs25.access(workspace);
6077
6420
  } catch {
6078
6421
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
6079
6422
  process.exit(1);
@@ -6094,23 +6437,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
6094
6437
  evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
6095
6438
  try {
6096
6439
  const projectRoot = process.cwd();
6097
- const workspace = path24.join(projectRoot, ".kairn-evolve");
6440
+ const workspace = path25.join(projectRoot, ".kairn-evolve");
6098
6441
  const iter1 = parseInt(iter1Str, 10);
6099
6442
  const iter2 = parseInt(iter2Str, 10);
6100
6443
  if (isNaN(iter1) || isNaN(iter2)) {
6101
6444
  console.log(ui.error("Both arguments must be integers (iteration numbers)"));
6102
6445
  process.exit(1);
6103
6446
  }
6104
- const harness1 = path24.join(workspace, "iterations", iter1.toString(), "harness");
6105
- const harness2 = path24.join(workspace, "iterations", iter2.toString(), "harness");
6447
+ const harness1 = path25.join(workspace, "iterations", iter1.toString(), "harness");
6448
+ const harness2 = path25.join(workspace, "iterations", iter2.toString(), "harness");
6106
6449
  try {
6107
- await fs24.access(harness1);
6450
+ await fs25.access(harness1);
6108
6451
  } catch {
6109
6452
  console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
6110
6453
  process.exit(1);
6111
6454
  }
6112
6455
  try {
6113
- await fs24.access(harness2);
6456
+ await fs25.access(harness2);
6114
6457
  } catch {
6115
6458
  console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
6116
6459
  process.exit(1);
@@ -6165,10 +6508,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
6165
6508
  async function countFiles(dir) {
6166
6509
  let count = 0;
6167
6510
  try {
6168
- const entries = await fs24.readdir(dir, { withFileTypes: true });
6511
+ const entries = await fs25.readdir(dir, { withFileTypes: true });
6169
6512
  for (const entry of entries) {
6170
6513
  if (entry.isDirectory()) {
6171
- count += await countFiles(path24.join(dir, entry.name));
6514
+ count += await countFiles(path25.join(dir, entry.name));
6172
6515
  } else {
6173
6516
  count++;
6174
6517
  }
@@ -6179,10 +6522,12 @@ async function countFiles(dir) {
6179
6522
  }
6180
6523
 
6181
6524
  // src/cli.ts
6525
+ var require2 = createRequire(import.meta.url);
6526
+ var pkg = require2("../package.json");
6182
6527
  var program = new Command12();
6183
6528
  program.name("kairn").description(
6184
6529
  "Compile natural language intent into optimized Claude Code environments"
6185
- ).version("1.9.0").option("--no-color", "Disable colored output");
6530
+ ).version(pkg.version).option("--no-color", "Disable colored output");
6186
6531
  program.addCommand(initCommand);
6187
6532
  program.addCommand(describeCommand);
6188
6533
  program.addCommand(optimizeCommand);