kairn-cli 2.2.9 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,10 +1,11 @@
1
1
  // src/cli.ts
2
2
  import { Command as Command12 } from "commander";
3
3
  import chalk15 from "chalk";
4
+ import { createRequire } from "module";
4
5
 
5
6
  // src/commands/init.ts
6
7
  import { Command } from "commander";
7
- import { input, password, select } from "@inquirer/prompts";
8
+ import { confirm, input, password, select } from "@inquirer/prompts";
8
9
  import chalk3 from "chalk";
9
10
  import Anthropic from "@anthropic-ai/sdk";
10
11
  import OpenAI from "openai";
@@ -62,6 +63,59 @@ async function saveConfig(config) {
62
63
  await fs.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2), "utf-8");
63
64
  }
64
65
 
66
+ // src/auth/keychain.ts
67
+ import { exec } from "child_process";
68
+ import { promisify } from "util";
69
+ var execAsync = promisify(exec);
70
+ var KEYCHAIN_SERVICE = "Claude Code-credentials";
71
+ var TOKEN_EXPIRY_BUFFER_MS = 6e4;
72
+ function parseKeychainCredentials(raw) {
73
+ let parsed;
74
+ try {
75
+ parsed = JSON.parse(raw);
76
+ } catch {
77
+ return null;
78
+ }
79
+ if (typeof parsed !== "object" || parsed === null) return null;
80
+ const obj = parsed;
81
+ const oauth = obj["claudeAiOauth"];
82
+ if (typeof oauth !== "object" || oauth === null) return null;
83
+ const oauthObj = oauth;
84
+ const accessToken = oauthObj["accessToken"];
85
+ const refreshToken = oauthObj["refreshToken"];
86
+ const expiresAt = oauthObj["expiresAt"];
87
+ const subscriptionType = oauthObj["subscriptionType"];
88
+ if (typeof accessToken !== "string" || !accessToken) return null;
89
+ if (typeof refreshToken !== "string") return null;
90
+ if (typeof expiresAt !== "number") return null;
91
+ return {
92
+ accessToken,
93
+ refreshToken,
94
+ expiresAt,
95
+ subscriptionType: typeof subscriptionType === "string" ? subscriptionType : "unknown"
96
+ };
97
+ }
98
+ function isTokenExpired(credentials) {
99
+ return Date.now() + TOKEN_EXPIRY_BUFFER_MS >= credentials.expiresAt;
100
+ }
101
+ async function readClaudeCodeCredentials(account) {
102
+ if (process.platform !== "darwin") return null;
103
+ try {
104
+ const acct = account ?? "";
105
+ const cmd = acct ? `security find-generic-password -s "${KEYCHAIN_SERVICE}" -a "${acct}" -w` : `security find-generic-password -s "${KEYCHAIN_SERVICE}" -w`;
106
+ const { stdout } = await execAsync(cmd, { timeout: 5e3 });
107
+ return parseKeychainCredentials(stdout.trim());
108
+ } catch {
109
+ return null;
110
+ }
111
+ }
112
+ async function getAccessToken(account) {
113
+ const creds = await readClaudeCodeCredentials(account);
114
+ if (!creds) return null;
115
+ if (isTokenExpired(creds)) return null;
116
+ return creds.accessToken;
117
+ }
118
+
65
119
  // src/providers.ts
66
120
  var PROVIDER_CONFIGS = {
67
121
  anthropic: {
@@ -508,30 +562,49 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
508
562
  choices: PROVIDER_MODELS[provider]
509
563
  });
510
564
  }
511
- const apiKey = await password({
512
- message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
513
- mask: "*"
514
- });
515
- if (!apiKey && provider !== "other") {
516
- console.log(ui.error("No API key provided. Aborting."));
517
- process.exit(1);
565
+ let apiKey = "";
566
+ let authType = "api-key";
567
+ if (provider === "anthropic") {
568
+ const oauthToken = await getAccessToken();
569
+ if (oauthToken) {
570
+ const useOAuth = await confirm({
571
+ message: "Claude Code subscription detected. Use it instead of an API key? (experimental \u2014 may break)",
572
+ default: true
573
+ });
574
+ if (useOAuth) {
575
+ authType = "claude-code-oauth";
576
+ console.log(ui.warn("Using Claude Code OAuth token. This is undocumented and may break at any time."));
577
+ console.log(ui.success("OAuth token validated"));
578
+ }
579
+ }
518
580
  }
519
- if (apiKey) {
520
- console.log(chalk3.dim("\n Verifying API key..."));
521
- const valid = await verifyKey(provider, apiKey, baseURL, model);
522
- if (!valid) {
523
- console.log(ui.error("Invalid API key. Check your key and try again."));
581
+ if (authType === "api-key") {
582
+ apiKey = await password({
583
+ message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
584
+ mask: "*"
585
+ });
586
+ if (!apiKey && provider !== "other") {
587
+ console.log(ui.error("No API key provided. Aborting."));
524
588
  process.exit(1);
525
589
  }
526
- console.log(ui.success("API key verified"));
527
- } else {
528
- console.log(ui.warn("No API key \u2014 skipping verification"));
590
+ if (apiKey) {
591
+ console.log(chalk3.dim("\n Verifying API key..."));
592
+ const valid = await verifyKey(provider, apiKey, baseURL, model);
593
+ if (!valid) {
594
+ console.log(ui.error("Invalid API key. Check your key and try again."));
595
+ process.exit(1);
596
+ }
597
+ console.log(ui.success("API key verified"));
598
+ } else {
599
+ console.log(ui.warn("No API key \u2014 skipping verification"));
600
+ }
529
601
  }
530
602
  const config = {
531
603
  provider,
532
- api_key: apiKey || "",
604
+ api_key: apiKey,
533
605
  model,
534
606
  ...baseURL ? { base_url: baseURL } : {},
607
+ ...authType !== "api-key" ? { auth_type: authType } : {},
535
608
  default_runtime: "claude-code",
536
609
  created_at: (/* @__PURE__ */ new Date()).toISOString()
537
610
  };
@@ -555,7 +628,7 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
555
628
 
556
629
  // src/commands/describe.ts
557
630
  import { Command as Command2 } from "commander";
558
- import { input as input2, confirm, select as select2 } from "@inquirer/prompts";
631
+ import { input as input2, confirm as confirm2, select as select2 } from "@inquirer/prompts";
559
632
  import chalk5 from "chalk";
560
633
 
561
634
  // src/compiler/compile.ts
@@ -1240,8 +1313,18 @@ async function callLLM(config, userMessage, options) {
1240
1313
  const { systemPrompt } = options;
1241
1314
  const jsonMode = options.jsonMode ?? false;
1242
1315
  const providerName = getProviderName(config.provider);
1316
+ let apiKey = config.api_key;
1317
+ if (config.auth_type === "claude-code-oauth") {
1318
+ const oauthToken = await getAccessToken();
1319
+ if (!oauthToken) {
1320
+ throw new Error(
1321
+ "Claude Code OAuth token unavailable or expired. Run `kairn init` to reconfigure, or launch Claude Code to refresh the token."
1322
+ );
1323
+ }
1324
+ apiKey = oauthToken;
1325
+ }
1243
1326
  if (config.provider === "anthropic") {
1244
- const client2 = new Anthropic2({ apiKey: config.api_key });
1327
+ const client2 = new Anthropic2({ apiKey });
1245
1328
  const messages = [
1246
1329
  { role: "user", content: userMessage }
1247
1330
  ];
@@ -1262,7 +1345,7 @@ async function callLLM(config, userMessage, options) {
1262
1345
  }
1263
1346
  }
1264
1347
  const resolvedBaseURL = getBaseURL(config.provider, config.base_url);
1265
- const clientOptions = { apiKey: config.api_key };
1348
+ const clientOptions = { apiKey };
1266
1349
  if (resolvedBaseURL) clientOptions.baseURL = resolvedBaseURL;
1267
1350
  const client = new OpenAI2(clientOptions);
1268
1351
  try {
@@ -2441,7 +2524,7 @@ Autonomy level: ${autonomyLevel} (${autonomyLabel(autonomyLevel)})`;
2441
2524
  console.log("");
2442
2525
  }
2443
2526
  }
2444
- const proceed = options.yes || await confirm({
2527
+ const proceed = options.yes || await confirm2({
2445
2528
  message: "Generate environment in current directory?",
2446
2529
  default: true
2447
2530
  });
@@ -2656,7 +2739,7 @@ var updateRegistryCommand = new Command5("update-registry").description("Fetch t
2656
2739
 
2657
2740
  // src/commands/optimize.ts
2658
2741
  import { Command as Command6 } from "commander";
2659
- import { confirm as confirm2 } from "@inquirer/prompts";
2742
+ import { confirm as confirm3 } from "@inquirer/prompts";
2660
2743
  import chalk9 from "chalk";
2661
2744
  import ora from "ora";
2662
2745
  import fs12 from "fs/promises";
@@ -2718,7 +2801,7 @@ function detectFramework(deps) {
2718
2801
  ];
2719
2802
  const detected = [];
2720
2803
  for (const [packages, name] of frameworks) {
2721
- if (packages.some((pkg) => deps.includes(pkg))) {
2804
+ if (packages.some((pkg2) => deps.includes(pkg2))) {
2722
2805
  detected.push(name);
2723
2806
  }
2724
2807
  }
@@ -2742,11 +2825,11 @@ function extractEnvKeys(content) {
2742
2825
  return keys;
2743
2826
  }
2744
2827
  async function scanProject(dir) {
2745
- const pkg = await readJsonSafe(path11.join(dir, "package.json"));
2746
- const deps = pkg?.dependencies ? Object.keys(pkg.dependencies) : [];
2747
- const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies) : [];
2828
+ const pkg2 = await readJsonSafe(path11.join(dir, "package.json"));
2829
+ const deps = pkg2?.dependencies ? Object.keys(pkg2.dependencies) : [];
2830
+ const devDeps = pkg2?.devDependencies ? Object.keys(pkg2.devDependencies) : [];
2748
2831
  const allDeps = [...deps, ...devDeps];
2749
- const scripts = pkg?.scripts || {};
2832
+ const scripts = pkg2?.scripts || {};
2750
2833
  const rootFiles = await listDirSafe(dir);
2751
2834
  const keyFiles = rootFiles.filter(
2752
2835
  (f) => [
@@ -2808,8 +2891,8 @@ async function scanProject(dir) {
2808
2891
  existingSkills = await listDirSafe(path11.join(claudeDir, "skills"));
2809
2892
  existingAgents = (await listDirSafe(path11.join(claudeDir, "agents"))).filter((f) => f.endsWith(".md")).map((f) => f.replace(".md", ""));
2810
2893
  }
2811
- const name = pkg?.name || path11.basename(dir);
2812
- const description = pkg?.description || "";
2894
+ const name = pkg2?.name || path11.basename(dir);
2895
+ const description = pkg2?.description || "";
2813
2896
  return {
2814
2897
  name,
2815
2898
  description,
@@ -3028,7 +3111,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3028
3111
  }
3029
3112
  if (!options.yes) {
3030
3113
  console.log("");
3031
- const proceed = await confirm2({
3114
+ const proceed = await confirm3({
3032
3115
  message: "Generate optimized environment? This will overwrite existing .claude/ files.",
3033
3116
  default: false
3034
3117
  });
@@ -3040,7 +3123,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3040
3123
  } else {
3041
3124
  console.log(chalk9.dim("\n No existing .claude/ directory found \u2014 generating from scratch.\n"));
3042
3125
  if (!options.yes) {
3043
- const proceed = await confirm2({
3126
+ const proceed = await confirm3({
3044
3127
  message: "Generate Claude Code environment for this project?",
3045
3128
  default: true
3046
3129
  });
@@ -3103,7 +3186,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
3103
3186
  }
3104
3187
  }
3105
3188
  console.log("");
3106
- const apply = await confirm2({
3189
+ const apply = await confirm3({
3107
3190
  message: "Apply these changes?",
3108
3191
  default: true
3109
3192
  });
@@ -3702,7 +3785,7 @@ import ora2 from "ora";
3702
3785
  import fs24 from "fs/promises";
3703
3786
  import path24 from "path";
3704
3787
  import { parse as yamlParse2 } from "yaml";
3705
- import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3788
+ import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
3706
3789
 
3707
3790
  // src/evolve/init.ts
3708
3791
  import fs15 from "fs/promises";
@@ -3945,14 +4028,14 @@ async function buildProjectProfile(projectRoot) {
3945
4028
  path15.join(projectRoot, "package.json"),
3946
4029
  "utf-8"
3947
4030
  );
3948
- const pkg = JSON.parse(pkgStr);
4031
+ const pkg2 = JSON.parse(pkgStr);
3949
4032
  profile.language = "typescript";
3950
- if (pkg.scripts && typeof pkg.scripts === "object") {
3951
- profile.scripts = pkg.scripts;
4033
+ if (pkg2.scripts && typeof pkg2.scripts === "object") {
4034
+ profile.scripts = pkg2.scripts;
3952
4035
  }
3953
4036
  const deps = {
3954
- ...pkg.dependencies ?? {},
3955
- ...pkg.devDependencies ?? {}
4037
+ ...pkg2.dependencies ?? {},
4038
+ ...pkg2.devDependencies ?? {}
3956
4039
  };
3957
4040
  if (deps.next) {
3958
4041
  profile.framework = "Next.js";
@@ -4051,8 +4134,8 @@ async function copyDir(src, dest) {
4051
4134
  }
4052
4135
 
4053
4136
  // src/evolve/runner.ts
4054
- import { exec as exec2, spawn } from "child_process";
4055
- import { promisify as promisify2 } from "util";
4137
+ import { exec as exec3, spawn } from "child_process";
4138
+ import { promisify as promisify3 } from "util";
4056
4139
  import fs18 from "fs/promises";
4057
4140
  import os3 from "os";
4058
4141
  import path18 from "path";
@@ -4177,11 +4260,11 @@ async function loadIterationLog(workspacePath, iteration) {
4177
4260
  }
4178
4261
 
4179
4262
  // src/evolve/exec.ts
4180
- import { exec } from "child_process";
4181
- import { promisify } from "util";
4182
- var execAsync = promisify(exec);
4263
+ import { exec as exec2 } from "child_process";
4264
+ import { promisify as promisify2 } from "util";
4265
+ var execAsync2 = promisify2(exec2);
4183
4266
  async function execCommand(cmd, cwd, timeoutMs = 3e4) {
4184
- return execAsync(cmd, { cwd, timeout: timeoutMs });
4267
+ return execAsync2(cmd, { cwd, timeout: timeoutMs });
4185
4268
  }
4186
4269
 
4187
4270
  // src/evolve/scorers.ts
@@ -4341,21 +4424,47 @@ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
4341
4424
  breakdown
4342
4425
  };
4343
4426
  }
4427
+ function classifyFailure(score, stdout, stderr) {
4428
+ if (score.pass) return score;
4429
+ const combined = `${stdout}
4430
+ ${stderr}`.toLowerCase();
4431
+ const scoreValue = score.score ?? 0;
4432
+ let failureCategory = "unknown";
4433
+ let failureReason = "";
4434
+ if (stderr.includes("[setup]") && stderr.includes("Error") || combined.includes("command not found") || combined.includes("no such file or directory")) {
4435
+ failureCategory = "task";
4436
+ failureReason = "Task setup failed or references missing resources";
4437
+ } else if (combined.includes("token limit") || combined.includes("context length") || combined.includes("rate limit") || combined.includes("api error") || combined.includes("429") || combined.includes("overloaded")) {
4438
+ failureCategory = "model";
4439
+ failureReason = "Model API error, token limit, or rate limit";
4440
+ } else if (combined.includes("build failed") && combined.includes("before") || combined.includes("merge conflict") || combined.includes("git dirty") || combined.includes("uncommitted changes")) {
4441
+ failureCategory = "repo";
4442
+ failureReason = "Pre-existing repo issues (build failure, dirty state)";
4443
+ } else if (scoreValue >= 20 && scoreValue < 80) {
4444
+ failureCategory = "harness";
4445
+ failureReason = "Agent attempted the task but did not follow harness conventions";
4446
+ }
4447
+ return { ...score, failureCategory, failureReason };
4448
+ }
4344
4449
  async function scoreTask(task, workspacePath, stdout, stderr, config) {
4450
+ let score;
4345
4451
  if (task.scoring === "pass-fail") {
4346
- return passFailScorer(task, workspacePath, stdout, stderr);
4347
- }
4348
- if (task.scoring === "llm-judge" && config) {
4349
- return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4452
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4453
+ } else if (task.scoring === "llm-judge" && config) {
4454
+ score = await llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4455
+ } else if (task.scoring === "rubric" && config) {
4456
+ score = await rubricScorer(task, workspacePath, stdout, stderr, config);
4457
+ } else {
4458
+ score = await passFailScorer(task, workspacePath, stdout, stderr);
4350
4459
  }
4351
- if (task.scoring === "rubric" && config) {
4352
- return rubricScorer(task, workspacePath, stdout, stderr, config);
4460
+ if (!score.pass) {
4461
+ score = classifyFailure(score, stdout, stderr);
4353
4462
  }
4354
- return passFailScorer(task, workspacePath, stdout, stderr);
4463
+ return score;
4355
4464
  }
4356
4465
 
4357
4466
  // src/evolve/runner.ts
4358
- var execAsync2 = promisify2(exec2);
4467
+ var execAsync3 = promisify3(exec3);
4359
4468
  var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
4360
4469
  async function deployMcpJson(harnessPath, workDir) {
4361
4470
  const src = path18.join(harnessPath, ".mcp.json");
@@ -4365,12 +4474,12 @@ async function deployMcpJson(harnessPath, workDir) {
4365
4474
  async function createIsolatedWorkspace(projectRoot, harnessPath) {
4366
4475
  const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
4367
4476
  try {
4368
- await execAsync2("git rev-parse --is-inside-work-tree", {
4477
+ await execAsync3("git rev-parse --is-inside-work-tree", {
4369
4478
  cwd: projectRoot,
4370
4479
  timeout: 5e3
4371
4480
  });
4372
4481
  const tmpDir2 = path18.join(os3.tmpdir(), `kairn-evolve-wt-${suffix}`);
4373
- await execAsync2(`git worktree add --detach "${tmpDir2}" HEAD`, {
4482
+ await execAsync3(`git worktree add --detach "${tmpDir2}" HEAD`, {
4374
4483
  cwd: projectRoot,
4375
4484
  timeout: 3e4
4376
4485
  });
@@ -4409,14 +4518,14 @@ async function copyProjectDir(src, dest) {
4409
4518
  async function cleanupIsolatedWorkspace(workDir, isWorktree, projectRoot) {
4410
4519
  if (isWorktree) {
4411
4520
  try {
4412
- await execAsync2(`git worktree remove "${workDir}" --force`, {
4521
+ await execAsync3(`git worktree remove "${workDir}" --force`, {
4413
4522
  cwd: projectRoot,
4414
4523
  timeout: 1e4
4415
4524
  });
4416
4525
  } catch {
4417
4526
  await fs18.rm(workDir, { recursive: true, force: true }).catch(() => {
4418
4527
  });
4419
- await execAsync2("git worktree prune", {
4528
+ await execAsync3("git worktree prune", {
4420
4529
  cwd: projectRoot,
4421
4530
  timeout: 5e3
4422
4531
  }).catch(() => {
@@ -4437,7 +4546,7 @@ async function runTask(task, harnessPath, traceDir, iteration, projectRoot) {
4437
4546
  let setupStderr = "";
4438
4547
  if (task.setup.trim()) {
4439
4548
  try {
4440
- await execAsync2(task.setup, { cwd: workDir, timeout: 6e4 });
4549
+ await execAsync3(task.setup, { cwd: workDir, timeout: 6e4 });
4441
4550
  } catch (err) {
4442
4551
  setupStderr = err instanceof Error ? err.message : String(err);
4443
4552
  }
@@ -5218,19 +5327,41 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5218
5327
  };
5219
5328
  await writeIterationLog(workspacePath, rollbackLog);
5220
5329
  history.push(rollbackLog);
5330
+ const bestHarnessPath = path21.join(
5331
+ workspacePath,
5332
+ "iterations",
5333
+ bestIteration.toString(),
5334
+ "harness"
5335
+ );
5221
5336
  if (iter + 1 < evolveConfig.maxIterations) {
5222
- const nextIterDir2 = path21.join(
5223
- workspacePath,
5224
- "iterations",
5225
- (iter + 1).toString()
5226
- );
5227
- const bestHarnessPath = path21.join(
5228
- workspacePath,
5229
- "iterations",
5230
- bestIteration.toString(),
5231
- "harness"
5232
- );
5233
- await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
5337
+ onProgress?.({ type: "proposing", iteration: iter, message: "Proposing new mutations after rollback" });
5338
+ try {
5339
+ let rollbackProposal = await propose(
5340
+ iter,
5341
+ workspacePath,
5342
+ bestHarnessPath,
5343
+ history,
5344
+ tasks,
5345
+ kairnConfig,
5346
+ evolveConfig.proposerModel
5347
+ );
5348
+ if (rollbackProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
5349
+ rollbackProposal = {
5350
+ ...rollbackProposal,
5351
+ mutations: rollbackProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration)
5352
+ };
5353
+ }
5354
+ const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5355
+ await applyMutations(bestHarnessPath, nextIterDir2, rollbackProposal.mutations);
5356
+ onProgress?.({
5357
+ type: "mutations-applied",
5358
+ iteration: iter,
5359
+ mutationCount: rollbackProposal.mutations.length
5360
+ });
5361
+ } catch {
5362
+ const nextIterDir2 = path21.join(workspacePath, "iterations", (iter + 1).toString());
5363
+ await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
5364
+ }
5234
5365
  }
5235
5366
  continue;
5236
5367
  }
@@ -5431,6 +5562,7 @@ function buildLeaderboard(iterations, tasks) {
5431
5562
  const taskIds = tasks.map((t) => t.id);
5432
5563
  return taskIds.map((taskId) => {
5433
5564
  const scores = {};
5565
+ const variance = {};
5434
5566
  let bestScore = -1;
5435
5567
  let bestIteration = 0;
5436
5568
  for (const iter of iterations) {
@@ -5438,13 +5570,21 @@ function buildLeaderboard(iterations, tasks) {
5438
5570
  if (s) {
5439
5571
  const score = numericScore2(s);
5440
5572
  scores[iter.iteration] = score;
5573
+ if (s.variance) {
5574
+ variance[iter.iteration] = {
5575
+ mean: s.variance.mean,
5576
+ stddev: s.variance.stddev,
5577
+ runs: s.variance.runs
5578
+ };
5579
+ }
5441
5580
  if (score > bestScore) {
5442
5581
  bestScore = score;
5443
5582
  bestIteration = iter.iteration;
5444
5583
  }
5445
5584
  }
5446
5585
  }
5447
- return { taskId, scores, bestIteration, bestScore };
5586
+ const hasVariance = Object.keys(variance).length > 0;
5587
+ return { taskId, scores, bestIteration, bestScore, ...hasVariance ? { variance } : {} };
5448
5588
  });
5449
5589
  }
5450
5590
  function iterationStatus(iter, bestIteration) {
@@ -5480,13 +5620,29 @@ async function generateMarkdownReport(workspacePath) {
5480
5620
  lines.push("");
5481
5621
  lines.push("## Iterations");
5482
5622
  lines.push("");
5483
- lines.push("| Iter | Score | Mutations | Status |");
5484
- lines.push("|------|-------|-----------|--------|");
5623
+ const hasVariance = iterations.some(
5624
+ (iter) => Object.values(iter.taskResults).some((s) => s.variance)
5625
+ );
5626
+ if (hasVariance) {
5627
+ lines.push("| Iter | Score | Mutations | Status |");
5628
+ lines.push("|------|-------|-----------|--------|");
5629
+ } else {
5630
+ lines.push("| Iter | Score | Mutations | Status |");
5631
+ lines.push("|------|-------|-----------|--------|");
5632
+ }
5485
5633
  for (const iter of iterations) {
5486
5634
  const mutations = iter.proposal?.mutations.length ?? 0;
5487
5635
  const mutStr = mutations > 0 ? mutations.toString() : "-";
5488
5636
  const status = iterationStatus(iter, bestIter.iteration);
5489
- lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
5637
+ let scoreStr = `${iter.score.toFixed(1)}%`;
5638
+ if (hasVariance) {
5639
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5640
+ if (stddevs.length > 0) {
5641
+ const avgStddev = stddevs.reduce((a, b) => a + b, 0) / stddevs.length;
5642
+ scoreStr = `${iter.score.toFixed(1)}% \xB1${avgStddev.toFixed(1)}`;
5643
+ }
5644
+ }
5645
+ lines.push(`| ${iter.iteration} | ${scoreStr} | ${mutStr} | ${status} |`);
5490
5646
  }
5491
5647
  lines.push("");
5492
5648
  if (leaderboard.length > 0) {
@@ -5499,7 +5655,10 @@ async function generateMarkdownReport(workspacePath) {
5499
5655
  for (const entry of leaderboard) {
5500
5656
  const scoreCols = iterNums.map((n) => {
5501
5657
  const s = entry.scores[n];
5502
- return s !== void 0 ? `${s.toFixed(0)}%` : "-";
5658
+ if (s === void 0) return "-";
5659
+ const v = entry.variance?.[n];
5660
+ if (v && v.runs > 1) return `${s.toFixed(0)}% \xB1${v.stddev.toFixed(1)}`;
5661
+ return `${s.toFixed(0)}%`;
5503
5662
  });
5504
5663
  lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
5505
5664
  }
@@ -5549,12 +5708,17 @@ async function generateJsonReport(workspacePath) {
5549
5708
  bestIteration: bestIter.iteration,
5550
5709
  improvement
5551
5710
  },
5552
- iterations: iterations.map((iter) => ({
5553
- iteration: iter.iteration,
5554
- score: iter.score,
5555
- mutationCount: iter.proposal?.mutations.length ?? 0,
5556
- status: iterationStatus(iter, bestIter.iteration)
5557
- })),
5711
+ iterations: iterations.map((iter) => {
5712
+ const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
5713
+ const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : void 0;
5714
+ return {
5715
+ iteration: iter.iteration,
5716
+ score: iter.score,
5717
+ ...avgStddev !== void 0 ? { stddev: avgStddev } : {},
5718
+ mutationCount: iter.proposal?.mutations.length ?? 0,
5719
+ status: iterationStatus(iter, bestIter.iteration)
5720
+ };
5721
+ }),
5558
5722
  leaderboard,
5559
5723
  counterfactuals
5560
5724
  };
@@ -5744,7 +5908,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
5744
5908
  let addMore = true;
5745
5909
  while (addMore) {
5746
5910
  try {
5747
- addMore = await confirm3({ message: "Add another eval task?", default: false });
5911
+ addMore = await confirm4({ message: "Add another eval task?", default: false });
5748
5912
  } catch {
5749
5913
  addMore = false;
5750
5914
  }
@@ -6157,10 +6321,12 @@ async function countFiles(dir) {
6157
6321
  }
6158
6322
 
6159
6323
  // src/cli.ts
6324
+ var require2 = createRequire(import.meta.url);
6325
+ var pkg = require2("../package.json");
6160
6326
  var program = new Command12();
6161
6327
  program.name("kairn").description(
6162
6328
  "Compile natural language intent into optimized Claude Code environments"
6163
- ).version("1.9.0").option("--no-color", "Disable colored output");
6329
+ ).version(pkg.version).option("--no-color", "Disable colored output");
6164
6330
  program.addCommand(initCommand);
6165
6331
  program.addCommand(describeCommand);
6166
6332
  program.addCommand(optimizeCommand);