kairn-cli 2.2.10 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,10 +1,11 @@
1
1
  // src/cli.ts
2
2
  import { Command as Command12 } from "commander";
3
3
  import chalk15 from "chalk";
4
+ import { createRequire } from "module";
4
5
 
5
6
  // src/commands/init.ts
6
7
  import { Command } from "commander";
7
- import { input, password, select } from "@inquirer/prompts";
8
+ import { confirm, input, password, select } from "@inquirer/prompts";
8
9
  import chalk3 from "chalk";
9
10
  import Anthropic from "@anthropic-ai/sdk";
10
11
  import OpenAI from "openai";
@@ -62,6 +63,59 @@ async function saveConfig(config) {
62
63
  await fs.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2), "utf-8");
63
64
  }
64
65
 
66
+ // src/auth/keychain.ts
67
+ import { exec } from "child_process";
68
+ import { promisify } from "util";
69
+ var execAsync = promisify(exec);
70
+ var KEYCHAIN_SERVICE = "Claude Code-credentials";
71
+ var TOKEN_EXPIRY_BUFFER_MS = 6e4;
72
+ function parseKeychainCredentials(raw) {
73
+ let parsed;
74
+ try {
75
+ parsed = JSON.parse(raw);
76
+ } catch {
77
+ return null;
78
+ }
79
+ if (typeof parsed !== "object" || parsed === null) return null;
80
+ const obj = parsed;
81
+ const oauth = obj["claudeAiOauth"];
82
+ if (typeof oauth !== "object" || oauth === null) return null;
83
+ const oauthObj = oauth;
84
+ const accessToken = oauthObj["accessToken"];
85
+ const refreshToken = oauthObj["refreshToken"];
86
+ const expiresAt = oauthObj["expiresAt"];
87
+ const subscriptionType = oauthObj["subscriptionType"];
88
+ if (typeof accessToken !== "string" || !accessToken) return null;
89
+ if (typeof refreshToken !== "string") return null;
90
+ if (typeof expiresAt !== "number") return null;
91
+ return {
92
+ accessToken,
93
+ refreshToken,
94
+ expiresAt,
95
+ subscriptionType: typeof subscriptionType === "string" ? subscriptionType : "unknown"
96
+ };
97
+ }
98
+ function isTokenExpired(credentials) {
99
+ return Date.now() + TOKEN_EXPIRY_BUFFER_MS >= credentials.expiresAt;
100
+ }
101
+ async function readClaudeCodeCredentials(account) {
102
+ if (process.platform !== "darwin") return null;
103
+ try {
104
+ const acct = account ?? "";
105
+ const cmd = acct ? `security find-generic-password -s "${KEYCHAIN_SERVICE}" -a "${acct}" -w` : `security find-generic-password -s "${KEYCHAIN_SERVICE}" -w`;
106
+ const { stdout } = await execAsync(cmd, { timeout: 5e3 });
107
+ return parseKeychainCredentials(stdout.trim());
108
+ } catch {
109
+ return null;
110
+ }
111
+ }
112
+ async function getAccessToken(account) {
113
+ const creds = await readClaudeCodeCredentials(account);
114
+ if (!creds) return null;
115
+ if (isTokenExpired(creds)) return null;
116
+ return creds.accessToken;
117
+ }
118
+
65
119
  // src/providers.ts
66
120
  var PROVIDER_CONFIGS = {
67
121
  anthropic: {
@@ -508,30 +562,49 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
508
562
  choices: PROVIDER_MODELS[provider]
509
563
  });
510
564
  }
511
- const apiKey = await password({
512
- message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
513
- mask: "*"
514
- });
515
- if (!apiKey && provider !== "other") {
516
- console.log(ui.error("No API key provided. Aborting."));
517
- process.exit(1);
565
+ let apiKey = "";
566
+ let authType = "api-key";
567
+ if (provider === "anthropic") {
568
+ const oauthToken = await getAccessToken();
569
+ if (oauthToken) {
570
+ const useOAuth = await confirm({
571
+ message: "Claude Code subscription detected. Use it instead of an API key? (experimental \u2014 may break)",
572
+ default: true
573
+ });
574
+ if (useOAuth) {
575
+ authType = "claude-code-oauth";
576
+ console.log(ui.warn("Using Claude Code OAuth token. This is undocumented and may break at any time."));
577
+ console.log(ui.success("OAuth token validated"));
578
+ }
579
+ }
518
580
  }
519
- if (apiKey) {
520
- console.log(chalk3.dim("\n Verifying API key..."));
521
- const valid = await verifyKey(provider, apiKey, baseURL, model);
522
- if (!valid) {
523
- console.log(ui.error("Invalid API key. Check your key and try again."));
581
+ if (authType === "api-key") {
582
+ apiKey = await password({
583
+ message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
584
+ mask: "*"
585
+ });
586
+ if (!apiKey && provider !== "other") {
587
+ console.log(ui.error("No API key provided. Aborting."));
524
588
  process.exit(1);
525
589
  }
526
- console.log(ui.success("API key verified"));
527
- } else {
528
- console.log(ui.warn("No API key \u2014 skipping verification"));
590
+ if (apiKey) {
591
+ console.log(chalk3.dim("\n Verifying API key..."));
592
+ const valid = await verifyKey(provider, apiKey, baseURL, model);
593
+ if (!valid) {
594
+ console.log(ui.error("Invalid API key. Check your key and try again."));
595
+ process.exit(1);
596
+ }
597
+ console.log(ui.success("API key verified"));
598
+ } else {
599
+ console.log(ui.warn("No API key \u2014 skipping verification"));
600
+ }
529
601
  }
530
602
  const config = {
531
603
  provider,
532
- api_key: apiKey || "",
604
+ api_key: apiKey,
533
605
  model,
534
606
  ...baseURL ? { base_url: baseURL } : {},
607
+ ...authType !== "api-key" ? { auth_type: authType } : {},
535
608
  default_runtime: "claude-code",
536
609
  created_at: (/* @__PURE__ */ new Date()).toISOString()
537
610
  };
@@ -555,7 +628,7 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
555
628
 
556
629
  // src/commands/describe.ts
557
630
  import { Command as Command2 } from "commander";
558
- import { input as input2, confirm, select as select2 } from "@inquirer/prompts";
631
+ import { input as input2, confirm as confirm2, select as select2 } from "@inquirer/prompts";
559
632
  import chalk5 from "chalk";
560
633
 
561
634
  // src/compiler/compile.ts
@@ -1240,8 +1313,18 @@ async function callLLM(config, userMessage, options) {
1240
1313
  const { systemPrompt } = options;
1241
1314
  const jsonMode = options.jsonMode ?? false;
1242
1315
  const providerName = getProviderName(config.provider);
1316
+ let apiKey = config.api_key;
1317
+ if (config.auth_type === "claude-code-oauth") {
1318
+ const oauthToken = await getAccessToken();
1319
+ if (!oauthToken) {
1320
+ throw new Error(
1321
+ "Claude Code OAuth token unavailable or expired. Run `kairn init` to reconfigure, or launch Claude Code to refresh the token."
1322
+ );
1323
+ }
1324
+ apiKey = oauthToken;
1325
+ }
1243
1326
  if (config.provider === "anthropic") {
1244
- const client2 = new Anthropic2({ apiKey: config.api_key });
1327
+ const client2 = new Anthropic2({ apiKey });
1245
1328
  const messages = [
1246
1329
  { role: "user", content: userMessage }
1247
1330
  ];
@@ -1262,7 +1345,7 @@ async function callLLM(config, userMessage, options) {
1262
1345
  }
1263
1346
  }
1264
1347
  const resolvedBaseURL = getBaseURL(config.provider, config.base_url);
1265
- const clientOptions = { apiKey: config.api_key };
1348
+ const clientOptions = { apiKey };
1266
1349
  if (resolvedBaseURL) clientOptions.baseURL = resolvedBaseURL;
1267
1350
  const client = new OpenAI2(clientOptions);
1268
1351
  try {
@@ -2441,7 +2524,7 @@ Autonomy level: ${autonomyLevel} (${autonomyLabel(autonomyLevel)})`;
2441
2524
  console.log("");
2442
2525
  }
2443
2526
  }
2444
- const proceed = options.yes || await confirm({
2527
+ const proceed = options.yes || await confirm2({
2445
2528
  message: "Generate environment in current directory?",
2446
2529
  default: true
2447
2530
  });
@@ -2656,7 +2739,7 @@ var updateRegistryCommand = new Command5("update-registry").description("Fetch t
2656
2739
 
2657
2740
  // src/commands/optimize.ts
2658
2741
  import { Command as Command6 } from "commander";
2659
- import { confirm as confirm2 } from "@inquirer/prompts";
2742
+ import { confirm as confirm3 } from "@inquirer/prompts";
2660
2743
  import chalk9 from "chalk";
2661
2744
  import ora from "ora";
2662
2745
  import fs12 from "fs/promises";
@@ -2718,7 +2801,7 @@ function detectFramework(deps) {
2718
2801
  ];
2719
2802
  const detected = [];
2720
2803
  for (const [packages, name] of frameworks) {
2721
- if (packages.some((pkg) => deps.includes(pkg))) {
2804
+ if (packages.some((pkg2) => deps.includes(pkg2))) {
2722
2805
  detected.push(name);
2723
2806
  }
2724
2807
  }
@@ -2742,11 +2825,11 @@ function extractEnvKeys(content) {
2742
2825
  return keys;
2743
2826
  }
2744
2827
  async function scanProject(dir) {
2745
- const pkg = await readJsonSafe(path11.join(dir, "package.json"));
2746
- const deps = pkg?.dependencies ? Object.keys(pkg.dependencies) : [];
2747
- const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies) : [];
2828
+ const pkg2 = await readJsonSafe(path11.join(dir, "package.json"));
2829
+ const deps = pkg2?.dependencies ? Object.keys(pkg2.dependencies) : [];
2830
+ const devDeps = pkg2?.devDependencies ? Object.keys(pkg2.devDependencies) : [];
2748
2831
  const allDeps = [...deps, ...devDeps];
2749
- const scripts = pkg?.scripts || {};
2832
+ const scripts = pkg2?.scripts || {};
2750
2833
  const rootFiles = await listDirSafe(dir);
2751
2834
  const keyFiles = rootFiles.filter(
2752
2835
  (f) => [
@@ -2808,8 +2891,8 @@ async function scanProject(dir) {
2808
2891
  existingSkills = await listDirSafe(path11.join(claudeDir, "skills"));
2809
2892
  existingAgents = (await listDirSafe(path11.join(claudeDir, "agents"))).filter((f) => f.endsWith(".md")).map((f) => f.replace(".md", ""));
2810
2893
  }
2811
- const name = pkg?.name || path11.basename(dir);
2812
- const description = pkg?.description || "";
2894
+ const name = pkg2?.name || path11.basename(dir);
2895
+ const description = pkg2?.description || "";
2813
2896
  return {
2814
2897
  name,
2815
2898
  description,
@@ -3028,7 +3111,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3028
3111
  }
3029
3112
  if (!options.yes) {
3030
3113
  console.log("");
3031
- const proceed = await confirm2({
3114
+ const proceed = await confirm3({
3032
3115
  message: "Generate optimized environment? This will overwrite existing .claude/ files.",
3033
3116
  default: false
3034
3117
  });
@@ -3040,7 +3123,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3040
3123
  } else {
3041
3124
  console.log(chalk9.dim("\n No existing .claude/ directory found \u2014 generating from scratch.\n"));
3042
3125
  if (!options.yes) {
3043
- const proceed = await confirm2({
3126
+ const proceed = await confirm3({
3044
3127
  message: "Generate Claude Code environment for this project?",
3045
3128
  default: true
3046
3129
  });
@@ -3103,7 +3186,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3103
3186
  }
3104
3187
  }
3105
3188
  console.log("");
3106
- const apply = await confirm2({
3189
+ const apply = await confirm3({
3107
3190
  message: "Apply these changes?",
3108
3191
  default: true
3109
3192
  });
@@ -3702,7 +3785,7 @@ import ora2 from "ora";
3702
3785
  import fs24 from "fs/promises";
3703
3786
  import path24 from "path";
3704
3787
  import { parse as yamlParse2 } from "yaml";
3705
- import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3788
+ import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
3706
3789
 
3707
3790
  // src/evolve/init.ts
3708
3791
  import fs15 from "fs/promises";
@@ -3945,14 +4028,14 @@ async function buildProjectProfile(projectRoot) {
3945
4028
  path15.join(projectRoot, "package.json"),
3946
4029
  "utf-8"
3947
4030
  );
3948
- const pkg = JSON.parse(pkgStr);
4031
+ const pkg2 = JSON.parse(pkgStr);
3949
4032
  profile.language = "typescript";
3950
- if (pkg.scripts && typeof pkg.scripts === "object") {
3951
- profile.scripts = pkg.scripts;
4033
+ if (pkg2.scripts && typeof pkg2.scripts === "object") {
4034
+ profile.scripts = pkg2.scripts;
3952
4035
  }
3953
4036
  const deps = {
3954
- ...pkg.dependencies ?? {},
3955
- ...pkg.devDependencies ?? {}
4037
+ ...pkg2.dependencies ?? {},
4038
+ ...pkg2.devDependencies ?? {}
3956
4039
  };
3957
4040
  if (deps.next) {
3958
4041
  profile.framework = "Next.js";
@@ -4051,8 +4134,8 @@ async function copyDir(src, dest) {
4051
4134
  }
4052
4135
 
4053
4136
  // src/evolve/runner.ts
4054
- import { exec as exec2, spawn } from "child_process";
4055
- import { promisify as promisify2 } from "util";
4137
+ import { exec as exec3, spawn } from "child_process";
4138
+ import { promisify as promisify3 } from "util";
4056
4139
  import fs18 from "fs/promises";
4057
4140
  import os3 from "os";
4058
4141
  import path18 from "path";
@@ -4177,11 +4260,11 @@ async function loadIterationLog(workspacePath, iteration) {
4177
4260
  }
4178
4261
 
4179
4262
  // src/evolve/exec.ts
4180
- import { exec } from "child_process";
4181
- import { promisify } from "util";
4182
- var execAsync = promisify(exec);
4263
+ import { exec as exec2 } from "child_process";
4264
+ import { promisify as promisify2 } from "util";
4265
+ var execAsync2 = promisify2(exec2);
4183
4266
  async function execCommand(cmd, cwd, timeoutMs = 3e4) {
4184
- return execAsync(cmd, { cwd, timeout: timeoutMs });
4267
+ return execAsync2(cmd, { cwd, timeout: timeoutMs });
4185
4268
  }
4186
4269
 
4187
4270
  // src/evolve/scorers.ts
@@ -4341,21 +4424,47 @@ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
4341
4424
  breakdown
4342
4425
  };
4343
4426
  }
4427
+ function classifyFailure(score, stdout, stderr) {
4428
+ if (score.pass) return score;
4429
+ const combined = `${stdout}
4430
+ ${stderr}`.toLowerCase();
4431
+ const scoreValue = score.score ?? 0;
4432
+ let failureCategory = "unknown";
4433
+ let failureReason = "";
4434
+ if (stderr.includes("[setup]") && stderr.includes("Error") || combined.includes("command not found") || combined.includes("no such file or directory")) {
4435
+ failureCategory = "task";
4436
+ failureReason = "Task setup failed or references missing resources";
4437
+ } else if (combined.includes("token limit") || combined.includes("context length") || combined.includes("rate limit") || combined.includes("api error") || combined.includes("429") || combined.includes("overloaded")) {
4438
+ failureCategory = "model";
4439
+ failureReason = "Model API error, token limit, or rate limit";
4440
+ } else if (combined.includes("build failed") && combined.includes("before") || combined.includes("merge conflict") || combined.includes("git dirty") || combined.includes("uncommitted changes")) {
4441
+ failureCategory = "repo";
4442
+ failureReason = "Pre-existing repo issues (build failure, dirty state)";
4443
+ } else if (scoreValue >= 20 && scoreValue < 80) {
4444
+ failureCategory = "harness";
4445
+ failureReason = "Agent attempted the task but did not follow harness conventions";
4446
+ }
4447
+ return { ...score, failureCategory, failureReason };
4448
+ }
4344
4449
  async function scoreTask(task, workspacePath, stdout, stderr, config) {
4450
+ let score;
4345
4451
  if (task.scoring === "pass-fail") {
4346
- return passFailScorer(task, workspacePath, stdout, stderr);
4347
- }
4348
- if (task.scoring === "llm-judge" && config) {
4349
- return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4452
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4453
+ } else if (task.scoring === "llm-judge" && config) {
4454
+ score = await llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4455
+ } else if (task.scoring === "rubric" && config) {
4456
+ score = await rubricScorer(task, workspacePath, stdout, stderr, config);
4457
+ } else {
4458
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4350
4459
  }
4351
- if (task.scoring === "rubric" && config) {
4352
- return rubricScorer(task, workspacePath, stdout, stderr, config);
4460
+ if (!score.pass) {
4461
+ score = classifyFailure(score, stdout, stderr);
4353
4462
  }
4354
- return passFailScorer(task, workspacePath, stdout, stderr);
4463
+ return score;
4355
4464
  }
4356
4465
 
4357
4466
  // src/evolve/runner.ts
4358
- var execAsync2 = promisify2(exec2);
4467
+ var execAsync3 = promisify3(exec3);
4359
4468
  var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
4360
4469
  async function deployMcpJson(harnessPath, workDir) {
4361
4470
  const src = path18.join(harnessPath, ".mcp.json");
@@ -4365,12 +4474,12 @@ async function deployMcpJson(harnessPath, workDir) {
4365
4474
  async function createIsolatedWorkspace(projectRoot, harnessPath) {
4366
4475
  const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
4367
4476
  try {
4368
- await execAsync2("git rev-parse --is-inside-work-tree", {
4477
+ await execAsync3("git rev-parse --is-inside-work-tree", {
4369
4478
  cwd: projectRoot,
4370
4479
  timeout: 5e3
4371
4480
  });
4372
4481
  const tmpDir2 = path18.join(os3.tmpdir(), `kairn-evolve-wt-${suffix}`);
4373
- await execAsync2(`git worktree add --detach "${tmpDir2}" HEAD`, {
4482
+ await execAsync3(`git worktree add --detach "${tmpDir2}" HEAD`, {
4374
4483
  cwd: projectRoot,
4375
4484
  timeout: 3e4
4376
4485
  });
@@ -4409,14 +4518,14 @@ async function copyProjectDir(src, dest) {
4409
4518
  async function cleanupIsolatedWorkspace(workDir, isWorktree, projectRoot) {
4410
4519
  if (isWorktree) {
4411
4520
  try {
4412
- await execAsync2(`git worktree remove "${workDir}" --force`, {
4521
+ await execAsync3(`git worktree remove "${workDir}" --force`, {
4413
4522
  cwd: projectRoot,
4414
4523
  timeout: 1e4
4415
4524
  });
4416
4525
  } catch {
4417
4526
  await fs18.rm(workDir, { recursive: true, force: true }).catch(() => {
4418
4527
  });
4419
- await execAsync2("git worktree prune", {
4528
+ await execAsync3("git worktree prune", {
4420
4529
  cwd: projectRoot,
4421
4530
  timeout: 5e3
4422
4531
  }).catch(() => {
@@ -4437,7 +4546,7 @@ async function runTask(task, harnessPath, traceDir, iteration, projectRoot) {
4437
4546
  let setupStderr = "";
4438
4547
  if (task.setup.trim()) {
4439
4548
  try {
4440
- await execAsync2(task.setup, { cwd: workDir, timeout: 6e4 });
4549
+ await execAsync3(task.setup, { cwd: workDir, timeout: 6e4 });
4441
4550
  } catch (err) {
4442
4551
  setupStderr = err instanceof Error ? err.message : String(err);
4443
4552
  }
@@ -5453,6 +5562,7 @@ function buildLeaderboard(iterations, tasks) {
5453
5562
  const taskIds = tasks.map((t) => t.id);
5454
5563
  return taskIds.map((taskId) => {
5455
5564
  const scores = {};
5565
+ const variance = {};
5456
5566
  let bestScore = -1;
5457
5567
  let bestIteration = 0;
5458
5568
  for (const iter of iterations) {
@@ -5460,13 +5570,21 @@ function buildLeaderboard(iterations, tasks) {
5460
5570
  if (s) {
5461
5571
  const score = numericScore2(s);
5462
5572
  scores[iter.iteration] = score;
5573
+ if (s.variance) {
5574
+ variance[iter.iteration] = {
5575
+ mean: s.variance.mean,
5576
+ stddev: s.variance.stddev,
5577
+ runs: s.variance.runs
5578
+ };
5579
+ }
5463
5580
  if (score > bestScore) {
5464
5581
  bestScore = score;
5465
5582
  bestIteration = iter.iteration;
5466
5583
  }
5467
5584
  }
5468
5585
  }
5469
- return { taskId, scores, bestIteration, bestScore };
5586
+ const hasVariance = Object.keys(variance).length > 0;
5587
+ return { taskId, scores, bestIteration, bestScore, ...hasVariance ? { variance } : {} };
5470
5588
  });
5471
5589
  }
5472
5590
  function iterationStatus(iter, bestIteration) {
@@ -5502,13 +5620,29 @@ async function generateMarkdownReport(workspacePath) {
5502
5620
  lines.push("");
5503
5621
  lines.push("## Iterations");
5504
5622
  lines.push("");
5505
- lines.push("| Iter | Score | Mutations | Status |");
5506
- lines.push("|------|-------|-----------|--------|");
5623
+ const hasVariance = iterations.some(
5624
+ (iter) => Object.values(iter.taskResults).some((s) => s.variance)
5625
+ );
5626
+ if (hasVariance) {
5627
+ lines.push("| Iter | Score | Mutations | Status |");
5628
+ lines.push("|------|-------|-----------|--------|");
5629
+ } else {
5630
+ lines.push("| Iter | Score | Mutations | Status |");
5631
+ lines.push("|------|-------|-----------|--------|");
5632
+ }
5507
5633
  for (const iter of iterations) {
5508
5634
  const mutations = iter.proposal?.mutations.length ?? 0;
5509
5635
  const mutStr = mutations > 0 ? mutations.toString() : "-";
5510
5636
  const status = iterationStatus(iter, bestIter.iteration);
5511
- lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
5637
+ let scoreStr = `${iter.score.toFixed(1)}%`;
5638
+ if (hasVariance) {
5639
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5640
+ if (stddevs.length > 0) {
5641
+ const avgStddev = stddevs.reduce((a, b) => a + b, 0) / stddevs.length;
5642
+ scoreStr = `${iter.score.toFixed(1)}% \xB1${avgStddev.toFixed(1)}`;
5643
+ }
5644
+ }
5645
+ lines.push(`| ${iter.iteration} | ${scoreStr} | ${mutStr} | ${status} |`);
5512
5646
  }
5513
5647
  lines.push("");
5514
5648
  if (leaderboard.length > 0) {
@@ -5521,7 +5655,10 @@ async function generateMarkdownReport(workspacePath) {
5521
5655
  for (const entry of leaderboard) {
5522
5656
  const scoreCols = iterNums.map((n) => {
5523
5657
  const s = entry.scores[n];
5524
- return s !== void 0 ? `${s.toFixed(0)}%` : "-";
5658
+ if (s === void 0) return "-";
5659
+ const v = entry.variance?.[n];
5660
+ if (v && v.runs > 1) return `${s.toFixed(0)}% \xB1${v.stddev.toFixed(1)}`;
5661
+ return `${s.toFixed(0)}%`;
5525
5662
  });
5526
5663
  lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
5527
5664
  }
@@ -5571,12 +5708,17 @@ async function generateJsonReport(workspacePath) {
5571
5708
  bestIteration: bestIter.iteration,
5572
5709
  improvement
5573
5710
  },
5574
- iterations: iterations.map((iter) => ({
5575
- iteration: iter.iteration,
5576
- score: iter.score,
5577
- mutationCount: iter.proposal?.mutations.length ?? 0,
5578
- status: iterationStatus(iter, bestIter.iteration)
5579
- })),
5711
+ iterations: iterations.map((iter) => {
5712
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5713
+ const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : void 0;
5714
+ return {
5715
+ iteration: iter.iteration,
5716
+ score: iter.score,
5717
+ ...avgStddev !== void 0 ? { stddev: avgStddev } : {},
5718
+ mutationCount: iter.proposal?.mutations.length ?? 0,
5719
+ status: iterationStatus(iter, bestIter.iteration)
5720
+ };
5721
+ }),
5580
5722
  leaderboard,
5581
5723
  counterfactuals
5582
5724
  };
@@ -5766,7 +5908,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5766
5908
  let addMore = true;
5767
5909
  while (addMore) {
5768
5910
  try {
5769
- addMore = await confirm3({ message: "Add another eval task?", default: false });
5911
+ addMore = await confirm4({ message: "Add another eval task?", default: false });
5770
5912
  } catch {
5771
5913
  addMore = false;
5772
5914
  }
@@ -6179,10 +6321,12 @@ async function countFiles(dir) {
6179
6321
  }
6180
6322
 
6181
6323
  // src/cli.ts
6324
+ var require2 = createRequire(import.meta.url);
6325
+ var pkg = require2("../package.json");
6182
6326
  var program = new Command12();
6183
6327
  program.name("kairn").description(
6184
6328
  "Compile natural language intent into optimized Claude Code environments"
6185
- ).version("1.9.0").option("--no-color", "Disable colored output");
6329
+ ).version(pkg.version).option("--no-color", "Disable colored output");
6186
6330
  program.addCommand(initCommand);
6187
6331
  program.addCommand(describeCommand);
6188
6332
  program.addCommand(optimizeCommand);