npm - kairn-cli - Versions diffs - 1.14.0 → 2.1.0 - Mend

kairn-cli 1.14.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // src/cli.ts
-import { Command as Command11 } from "commander";
-import chalk14 from "chalk";
+import { Command as Command12 } from "commander";
+import chalk15 from "chalk";
 // src/commands/init.ts
 import { Command } from "commander";
@@ -221,7 +221,7 @@ var ui = {
   // Key-value pairs
   kv: (key, value) => `  ${chalk.cyan(key.padEnd(14))} ${value}`,
   // File list
-  file: (path15) => chalk.dim(`    ${path15}`),
+  file: (path23) => chalk.dim(`    ${path23}`),
   // Tool display
   tool: (name, reason) => `    ${warmStone("\u25CF")} ${chalk.bold(name)}
       ${chalk.dim(reason)}`,
@@ -562,8 +562,6 @@ import chalk5 from "chalk";
 import fs4 from "fs/promises";
 import path4 from "path";
 import crypto from "crypto";
-import Anthropic2 from "@anthropic-ai/sdk";
-import OpenAI2 from "openai";
 // src/compiler/prompt.ts
 var SKELETON_PROMPT = `You are the Kairn skeleton compiler. Your job is to select tools and outline the project structure from a user's natural language description.
@@ -1204,76 +1202,9 @@ async function loadRegistry() {
   return Array.from(merged.values());
 }
-// src/compiler/compile.ts
-function buildSkeletonMessage(intent, registry) {
-  const registrySummary = registry.map(
-    (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
-  ).join("\n");
-  return `## User Intent
-${intent}
-## Available Tool Registry
-${registrySummary}
-Generate the skeleton JSON now.`;
-}
-function buildHarnessMessage(intent, skeleton, concise) {
-  const skeletonJson = JSON.stringify(skeleton, null, 2);
-  const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
-  return `## User Intent
-${intent}
-## Project Skeleton
-${skeletonJson}
-Generate the harness content JSON now.${conciseNote}`;
-}
-function parseSkeletonResponse(text) {
-  let cleaned = text.trim();
-  if (cleaned.startsWith("```")) {
-    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
-  }
-  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
-  if (!jsonMatch) {
-    throw new Error("Pass 1 (skeleton) did not return valid JSON.");
-  }
-  try {
-    const parsed = JSON.parse(jsonMatch[0]);
-    if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
-      throw new Error("Skeleton missing required fields: name, tools");
-    }
-    return parsed;
-  } catch (err) {
-    throw new Error(
-      `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
-    );
-  }
-}
-function parseHarnessResponse(text) {
-  let cleaned = text.trim();
-  if (cleaned.startsWith("```")) {
-    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
-  }
-  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
-  if (!jsonMatch) {
-    throw new Error("Pass 2 (harness) did not return valid JSON.");
-  }
-  try {
-    const parsed = JSON.parse(jsonMatch[0]);
-    if (!parsed.claude_md || !parsed.commands) {
-      throw new Error("Harness missing required fields: claude_md, commands");
-    }
-    return parsed;
-  } catch (err) {
-    throw new Error(
-      `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
-    );
-  }
-}
+// src/llm.ts
+import Anthropic2 from "@anthropic-ai/sdk";
+import OpenAI2 from "openai";
 function classifyError(err, provider) {
   const msg = err instanceof Error ? err.message : String(err);
   const status = err?.status;
@@ -1305,8 +1236,8 @@ function classifyError(err, provider) {
   return `${provider} API error: ${msg}`;
 }
 async function callLLM(config, userMessage, options) {
-  const maxTokens = options?.maxTokens ?? 8192;
-  const systemPrompt = options?.systemPrompt ?? SYSTEM_PROMPT;
+  const maxTokens = options.maxTokens ?? 8192;
+  const systemPrompt = options.systemPrompt;
   const providerName = getProviderName(config.provider);
   if (config.provider === "anthropic") {
     const client2 = new Anthropic2({ apiKey: config.api_key });
@@ -1348,6 +1279,77 @@ async function callLLM(config, userMessage, options) {
     throw new Error(classifyError(err, providerName));
   }
 }
+// src/compiler/compile.ts
+function buildSkeletonMessage(intent, registry) {
+  const registrySummary = registry.map(
+    (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
+  ).join("\n");
+  return `## User Intent
+${intent}
+## Available Tool Registry
+${registrySummary}
+Generate the skeleton JSON now.`;
+}
+function buildHarnessMessage(intent, skeleton, concise) {
+  const skeletonJson = JSON.stringify(skeleton, null, 2);
+  const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
+  return `## User Intent
+${intent}
+## Project Skeleton
+${skeletonJson}
+Generate the harness content JSON now.${conciseNote}`;
+}
+function parseSkeletonResponse(text) {
+  let cleaned = text.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    throw new Error("Pass 1 (skeleton) did not return valid JSON.");
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
+      throw new Error("Skeleton missing required fields: name, tools");
+    }
+    return parsed;
+  } catch (err) {
+    throw new Error(
+      `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
+function parseHarnessResponse(text) {
+  let cleaned = text.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    throw new Error("Pass 2 (harness) did not return valid JSON.");
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    if (!parsed.claude_md || !parsed.commands) {
+      throw new Error("Harness missing required fields: claude_md, commands");
+    }
+    return parsed;
+  } catch (err) {
+    throw new Error(
+      `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
 function buildSettings(skeleton, registry) {
   const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
   const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
@@ -1517,7 +1519,9 @@ async function generateClarifications(intent, onProgress) {
   onProgress?.("Analyzing your request...");
   const clarificationConfig = { ...config };
   clarificationConfig.model = getCheapModel(config.provider, config.model);
-  const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent);
+  const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent, {
+    systemPrompt: SYSTEM_PROMPT
+  });
   try {
     let cleaned = response.trim();
     if (cleaned.startsWith("```")) {
@@ -3686,8 +3690,1572 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
   console.log("");
 });
+// src/commands/evolve.ts
+import { Command as Command11 } from "commander";
+import chalk14 from "chalk";
+import ora2 from "ora";
+import fs22 from "fs/promises";
+import path22 from "path";
+import { parse as yamlParse } from "yaml";
+import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
+// src/evolve/init.ts
+import fs15 from "fs/promises";
+import path15 from "path";
+import { stringify as yamlStringify } from "yaml";
+// src/evolve/templates.ts
+var EVAL_TEMPLATES = {
+  "add-feature": {
+    id: "add-feature",
+    name: "Add Feature",
+    description: "Can the agent add a new capability?",
+    bestFor: ["feature-development", "api-building", "full-stack"]
+  },
+  "fix-bug": {
+    id: "fix-bug",
+    name: "Fix Bug",
+    description: "Can the agent diagnose and fix a problem?",
+    bestFor: ["maintenance", "debugging", "qa"]
+  },
+  "refactor": {
+    id: "refactor",
+    name: "Refactor",
+    description: "Can the agent restructure code?",
+    bestFor: ["maintenance", "architecture", "backend"]
+  },
+  "test-writing": {
+    id: "test-writing",
+    name: "Test Writing",
+    description: "Can the agent write tests?",
+    bestFor: ["tdd", "qa", "backend"]
+  },
+  "config-change": {
+    id: "config-change",
+    name: "Config Change",
+    description: "Can the agent update configuration?",
+    bestFor: ["devops", "infrastructure", "backend"]
+  },
+  "documentation": {
+    id: "documentation",
+    name: "Documentation",
+    description: "Can the agent write and update docs?",
+    bestFor: ["content", "api-building", "full-stack"]
+  }
+};
+function selectTemplatesForWorkflow(workflowType) {
+  const mapping = {
+    "feature-development": ["add-feature", "test-writing", "documentation"],
+    "api-building": ["add-feature", "fix-bug", "test-writing"],
+    "full-stack": ["add-feature", "fix-bug", "test-writing"],
+    "maintenance": ["fix-bug", "refactor", "test-writing"],
+    "debugging": ["fix-bug", "test-writing"],
+    "qa": ["fix-bug", "test-writing", "add-feature"],
+    "architecture": ["refactor", "test-writing", "config-change"],
+    "backend": ["fix-bug", "refactor", "config-change", "test-writing"],
+    "devops": ["config-change", "fix-bug"],
+    "infrastructure": ["config-change", "refactor"],
+    "tdd": ["test-writing", "add-feature", "fix-bug"],
+    "content": ["documentation", "add-feature"],
+    "research": ["documentation", "add-feature"]
+  };
+  return mapping[workflowType] || ["add-feature", "fix-bug", "test-writing"];
+}
+var TASK_GENERATION_PROMPT = `You are an eval task generator for Claude Code agent environments. Given a project's CLAUDE.md, project structure, and selected eval templates, generate concrete, project-specific tasks.
+Each task must be realistic and testable against the actual project. Avoid generic placeholders.
+Return a JSON object with a "tasks" array. Each task has:
+- id: kebab-case identifier (e.g., "add-health-endpoint")
+- template: which eval template this instantiates
+- description: concrete task description the agent will receive
+- setup: shell commands to prepare the workspace (e.g., "npm install")
+- expected_outcome: multi-line string describing what success looks like
+- scoring: "pass-fail", "llm-judge", or "rubric"
+- timeout: seconds (300 for features/bugs, 600 for refactors, 180 for config/docs/tests)
+Return ONLY valid JSON, no markdown fences.`;
+function parseJsonResponse(raw) {
+  let cleaned = raw.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/) ?? cleaned.match(/\[[\s\S]*\]/);
+  if (!jsonMatch) {
+    throw new Error(
+      "LLM response did not contain valid JSON. Try again or use a different model."
+    );
+  }
+  try {
+    return JSON.parse(jsonMatch[0]);
+  } catch (err) {
+    throw new Error(
+      `Failed to parse LLM response as JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
+var REQUIRED_TASK_FIELDS = [
+  "id",
+  "template",
+  "description",
+  "setup",
+  "expected_outcome",
+  "scoring",
+  "timeout"
+];
+function validateTask(obj, index) {
+  if (typeof obj !== "object" || obj === null) {
+    throw new Error(`Task at index ${index} is not an object`);
+  }
+  const record = obj;
+  for (const field of REQUIRED_TASK_FIELDS) {
+    if (!(field in record) || record[field] === void 0 || record[field] === null) {
+      throw new Error(`Task at index ${index} is missing required field: ${field}`);
+    }
+  }
+  return record;
+}
+function buildTaskGenerationMessage(claudeMd, projectProfile, templates) {
+  const profileLines = [
+    `Language: ${projectProfile.language ?? "unknown"}`,
+    `Framework: ${projectProfile.framework ?? "none"}`,
+    `Scripts: ${Object.entries(projectProfile.scripts).map(([k, v]) => `${k}=${v}`).join(", ") || "none"}`,
+    `Key files: ${projectProfile.keyFiles.join(", ") || "none"}`
+  ];
+  const templateDescriptions = templates.map((t) => {
+    const meta = EVAL_TEMPLATES[t];
+    return `- ${t}: ${meta.description}`;
+  }).join("\n");
+  return [
+    "## CLAUDE.md",
+    "",
+    claudeMd,
+    "",
+    "## Project Profile",
+    "",
+    ...profileLines,
+    "",
+    "## Selected Eval Templates",
+    "",
+    templateDescriptions,
+    "",
+    "Generate concrete, project-specific tasks for each template above."
+  ].join("\n");
+}
+async function generateTasksFromTemplates(claudeMd, projectProfile, templates, config) {
+  const userMessage = buildTaskGenerationMessage(claudeMd, projectProfile, templates);
+  const rawResponse = await callLLM(config, userMessage, {
+    systemPrompt: TASK_GENERATION_PROMPT,
+    maxTokens: 4096
+  });
+  const parsed = parseJsonResponse(rawResponse);
+  if (typeof parsed !== "object" || parsed === null) {
+    throw new Error("LLM response is not a JSON object");
+  }
+  const tasksObj = parsed;
+  if (!Array.isArray(tasksObj.tasks)) {
+    throw new Error("LLM response does not contain a 'tasks' array");
+  }
+  const tasks = [];
+  for (let i = 0; i < tasksObj.tasks.length; i++) {
+    tasks.push(validateTask(tasksObj.tasks[i], i));
+  }
+  return tasks;
+}
+// src/evolve/init.ts
+async function createEvolveWorkspace(projectRoot, config) {
+  const workspace = path15.join(projectRoot, ".kairn-evolve");
+  await fs15.mkdir(path15.join(workspace, "baseline"), { recursive: true });
+  await fs15.mkdir(path15.join(workspace, "traces"), { recursive: true });
+  await fs15.mkdir(path15.join(workspace, "iterations"), { recursive: true });
+  const configObj = {
+    model: config.model,
+    proposer_model: config.proposerModel,
+    scorer: config.scorer,
+    max_iterations: config.maxIterations,
+    parallel_tasks: config.parallelTasks
+  };
+  await fs15.writeFile(
+    path15.join(workspace, "config.yaml"),
+    yamlStringify(configObj),
+    "utf-8"
+  );
+  return workspace;
+}
+async function writeTasksFile(workspacePath, tasks) {
+  const doc = {
+    tasks: tasks.map((t) => ({
+      id: t.id,
+      template: t.template,
+      description: t.description,
+      setup: t.setup,
+      expected_outcome: t.expected_outcome,
+      scoring: t.scoring,
+      ...t.rubric ? { rubric: t.rubric } : {},
+      timeout: t.timeout
+    }))
+  };
+  const header = "# .kairn-evolve/tasks.yaml\n# Auto-generated by kairn evolve init \u2014 edit freely\n";
+  await fs15.writeFile(
+    path15.join(workspacePath, "tasks.yaml"),
+    header + yamlStringify(doc),
+    "utf-8"
+  );
+}
+async function buildProjectProfile(projectRoot) {
+  const profile = {
+    language: null,
+    framework: null,
+    scripts: {},
+    keyFiles: []
+  };
+  try {
+    const pkgStr = await fs15.readFile(
+      path15.join(projectRoot, "package.json"),
+      "utf-8"
+    );
+    const pkg = JSON.parse(pkgStr);
+    profile.language = "typescript";
+    if (pkg.scripts && typeof pkg.scripts === "object") {
+      profile.scripts = pkg.scripts;
+    }
+    const deps = {
+      ...pkg.dependencies ?? {},
+      ...pkg.devDependencies ?? {}
+    };
+    if (deps.next) {
+      profile.framework = "Next.js";
+    } else if (deps.express) {
+      profile.framework = "Express";
+    } else if (deps.react) {
+      profile.framework = "React";
+    } else if (deps.vue) {
+      profile.framework = "Vue";
+    } else if (deps.commander) {
+      profile.framework = "CLI (Commander.js)";
+    }
+  } catch {
+  }
+  if (!profile.language) {
+    try {
+      await fs15.access(path15.join(projectRoot, "pyproject.toml"));
+      profile.language = "python";
+    } catch {
+      try {
+        await fs15.access(path15.join(projectRoot, "requirements.txt"));
+        profile.language = "python";
+      } catch {
+      }
+    }
+  }
+  try {
+    const entries = await fs15.readdir(projectRoot);
+    const keyPatterns = [
+      "README.md",
+      "package.json",
+      "tsconfig.json",
+      "pyproject.toml",
+      "Cargo.toml",
+      "go.mod",
+      "Makefile",
+      "Dockerfile"
+    ];
+    profile.keyFiles = entries.filter((e) => keyPatterns.includes(e));
+  } catch {
+  }
+  return profile;
+}
+async function autoGenerateTasks(projectRoot, workflowType) {
+  const config = await loadConfig();
+  if (!config) {
+    throw new Error("No config found. Run `kairn init` first.");
+  }
+  let claudeMd = "";
+  try {
+    claudeMd = await fs15.readFile(
+      path15.join(projectRoot, ".claude", "CLAUDE.md"),
+      "utf-8"
+    );
+  } catch {
+  }
+  const profile = await buildProjectProfile(projectRoot);
+  const templates = selectTemplatesForWorkflow(workflowType);
+  return generateTasksFromTemplates(claudeMd, profile, templates, config);
+}
+// src/evolve/baseline.ts
+import fs16 from "fs/promises";
+import path16 from "path";
+async function snapshotBaseline(projectRoot, workspacePath) {
+  const claudeDir = path16.join(projectRoot, ".claude");
+  const baselineDir = path16.join(workspacePath, "baseline");
+  const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
+  try {
+    await fs16.access(claudeDir);
+  } catch {
+    throw new Error(`.claude/ directory not found in ${projectRoot}`);
+  }
+  await copyDir(claudeDir, baselineDir);
+  await copyDir(claudeDir, iter0Dir);
+}
+async function copyDir(src, dest) {
+  await fs16.mkdir(dest, { recursive: true });
+  const entries = await fs16.readdir(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = path16.join(src, entry.name);
+    const destPath = path16.join(dest, entry.name);
+    if (entry.isDirectory()) {
+      await copyDir(srcPath, destPath);
+    } else {
+      await fs16.copyFile(srcPath, destPath);
+    }
+  }
+}
+// src/evolve/runner.ts
+import { exec as exec2, spawn } from "child_process";
+import { promisify as promisify2 } from "util";
+import fs18 from "fs/promises";
+import os3 from "os";
+import path18 from "path";
+// src/evolve/trace.ts
+import fs17 from "fs/promises";
+import path17 from "path";
+async function loadTrace(traceDir) {
+  const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
+  const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
+  const filesChangedStr = await fs17.readFile(
+    path17.join(traceDir, "files_changed.json"),
+    "utf-8"
+  ).catch(() => "{}");
+  const timingStr = await fs17.readFile(
+    path17.join(traceDir, "timing.json"),
+    "utf-8"
+  ).catch(() => "{}");
+  const scoreStr = await fs17.readFile(
+    path17.join(traceDir, "score.json"),
+    "utf-8"
+  ).catch(() => '{"pass": false}');
+  const toolCallsStr = await fs17.readFile(
+    path17.join(traceDir, "tool_calls.jsonl"),
+    "utf-8"
+  ).catch(() => "");
+  const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
+  const parentDir = path17.basename(path17.dirname(traceDir));
+  const iteration = parseInt(parentDir, 10) || 0;
+  return {
+    taskId: path17.basename(traceDir),
+    iteration,
+    stdout,
+    stderr,
+    toolCalls,
+    filesChanged: JSON.parse(filesChangedStr),
+    score: JSON.parse(scoreStr),
+    timing: JSON.parse(timingStr)
+  };
+}
+async function loadIterationTraces(workspacePath, iteration) {
+  const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
+  const traces = [];
+  try {
+    const taskDirs = await fs17.readdir(tracesDir);
+    for (const taskId of taskDirs) {
+      const trace = await loadTrace(path17.join(tracesDir, taskId));
+      traces.push(trace);
+    }
+  } catch {
+  }
+  return traces;
+}
+async function writeTrace(traceDir, trace) {
+  await fs17.mkdir(traceDir, { recursive: true });
+  await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
+  await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
+  const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
+  await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
+  await fs17.writeFile(
+    path17.join(traceDir, "files_changed.json"),
+    JSON.stringify(trace.filesChanged, null, 2),
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(traceDir, "timing.json"),
+    JSON.stringify(trace.timing, null, 2),
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(traceDir, "score.json"),
+    JSON.stringify(trace.score, null, 2),
+    "utf-8"
+  );
+}
+async function writeScore(traceDir, score) {
+  await fs17.writeFile(
+    path17.join(traceDir, "score.json"),
+    JSON.stringify(score, null, 2),
+    "utf-8"
+  );
+}
+async function writeIterationLog(workspacePath, log) {
+  const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
+  await fs17.mkdir(iterDir, { recursive: true });
+  await fs17.writeFile(
+    path17.join(iterDir, "scores.json"),
+    JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(iterDir, "proposer_reasoning.md"),
+    log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(iterDir, "mutation_diff.patch"),
+    log.diffPatch ?? "",
+    "utf-8"
+  );
+}
+// src/evolve/exec.ts
+import { exec } from "child_process";
+import { promisify } from "util";
+var execAsync = promisify(exec);
+async function execCommand(cmd, cwd, timeoutMs = 3e4) {
+  return execAsync(cmd, { cwd, timeout: timeoutMs });
+}
+// src/evolve/scorers.ts
+var COMMAND_PATTERN = /^(npm |npx |node |python |make |cargo |go |git |test |ls |cat |grep |curl )/;
+var SHELL_METACHAR_PATTERN = /[;|&`$()<>]/;
+var JUDGE_SYSTEM_PROMPT = `You are an eval judge for Claude Code agent tasks. Given a task description, expected outcome, and actual execution results, determine if the task was completed successfully.
+Return ONLY valid JSON with this structure:
+{
+  "pass": true/false,
+  "score": 0-100,
+  "reasoning": "Brief explanation of your judgment"
+}`;
+var RUBRIC_SYSTEM_PROMPT = `You are an eval judge scoring a specific criterion. Given the task, the criterion to evaluate, and the execution results, score the criterion.
+Return ONLY valid JSON:
+{
+  "score": 0.0-1.0,
+  "reasoning": "Brief explanation"
+}`;
+async function passFailScorer(task, workspacePath, stdout, stderr) {
+  const outcomes = Array.isArray(task.expected_outcome) ? task.expected_outcome : task.expected_outcome.split("\n");
+  const commands = outcomes.map((line) => line.replace(/^-\s*/, "").trim()).filter((line) => COMMAND_PATTERN.test(line));
+  if (commands.length > 0) {
+    const failures = [];
+    for (const cmd of commands) {
+      if (SHELL_METACHAR_PATTERN.test(cmd)) {
+        failures.push(`Rejected unsafe command (shell metacharacters): ${cmd}`);
+        continue;
+      }
+      try {
+        await execCommand(cmd, workspacePath);
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        failures.push(`Command failed: ${cmd}
+${msg}`);
+      }
+    }
+    const passed2 = failures.length === 0;
+    return {
+      pass: passed2,
+      score: passed2 ? 100 : 0,
+      details: passed2 ? `All ${commands.length} verification commands passed` : failures.join("\n")
+    };
+  }
+  const hasErrors = stderr.toLowerCase().includes("error") || stderr.toLowerCase().includes("failed") || stderr.toLowerCase().includes("exception");
+  const passed = !hasErrors;
+  return {
+    pass: passed,
+    score: passed ? 100 : 0,
+    details: passed ? "No errors detected in output" : "Errors found in stderr"
+  };
+}
+async function llmJudgeScorer(task, workspacePath, stdout, stderr, config) {
+  const expectedOutcome = Array.isArray(task.expected_outcome) ? task.expected_outcome.join("\n") : task.expected_outcome;
+  const userMessage = [
+    "## Task",
+    task.description,
+    "",
+    "## Expected Outcome",
+    expectedOutcome,
+    "",
+    "## Actual stdout (last 2000 chars)",
+    stdout.slice(-2e3),
+    "",
+    "## Actual stderr (last 1000 chars)",
+    stderr.slice(-1e3)
+  ].join("\n");
+  try {
+    const response = await callLLM(config, userMessage, {
+      systemPrompt: JUDGE_SYSTEM_PROMPT,
+      maxTokens: 1024
+    });
+    let cleaned = response.trim();
+    if (cleaned.startsWith("```")) {
+      cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+    }
+    const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) {
+      return { pass: false, score: 0, reasoning: "Judge returned invalid JSON" };
+    }
+    const result = JSON.parse(jsonMatch[0]);
+    return {
+      pass: result.pass,
+      score: result.score,
+      reasoning: result.reasoning
+    };
+  } catch (err) {
+    return {
+      pass: false,
+      score: 0,
+      reasoning: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
+    };
+  }
+}
+async function rubricScorer(task, workspacePath, stdout, stderr, config) {
+  if (!task.rubric || task.rubric.length === 0) {
+    return passFailScorer(task, workspacePath, stdout, stderr);
+  }
+  const breakdown = [];
+  let weightedSum = 0;
+  for (const criterion of task.rubric) {
+    const userMessage = [
+      "## Task",
+      task.description,
+      "",
+      "## Criterion to Evaluate",
+      `"${criterion.criterion}" (weight: ${criterion.weight})`,
+      "",
+      "## Actual stdout (last 2000 chars)",
+      stdout.slice(-2e3),
+      "",
+      "## Actual stderr (last 500 chars)",
+      stderr.slice(-500)
+    ].join("\n");
+    try {
+      const response = await callLLM(config, userMessage, {
+        systemPrompt: RUBRIC_SYSTEM_PROMPT,
+        maxTokens: 512
+      });
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```")) {
+        cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+      }
+      const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+      if (jsonMatch) {
+        const result = JSON.parse(jsonMatch[0]);
+        const clampedScore = Math.max(0, Math.min(1, result.score));
+        breakdown.push({
+          criterion: criterion.criterion,
+          score: clampedScore,
+          weight: criterion.weight
+        });
+        weightedSum += clampedScore * criterion.weight;
+      } else {
+        breakdown.push({
+          criterion: criterion.criterion,
+          score: 0,
+          weight: criterion.weight
+        });
+      }
+    } catch {
+      breakdown.push({
+        criterion: criterion.criterion,
+        score: 0,
+        weight: criterion.weight
+      });
+    }
+  }
+  const totalWeight = task.rubric.reduce((sum, c) => sum + c.weight, 0);
+  const totalScore = totalWeight > 0 ? Math.round(weightedSum / totalWeight * 100) : 0;
+  return {
+    pass: totalScore >= 60,
+    score: totalScore,
+    reasoning: `Rubric score: ${totalScore}%`,
+    breakdown
+  };
+}
+async function scoreTask(task, workspacePath, stdout, stderr, config) {
+  if (task.scoring === "pass-fail") {
+    return passFailScorer(task, workspacePath, stdout, stderr);
+  }
+  if (task.scoring === "llm-judge" && config) {
+    return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
+  }
+  if (task.scoring === "rubric" && config) {
+    return rubricScorer(task, workspacePath, stdout, stderr, config);
+  }
+  return passFailScorer(task, workspacePath, stdout, stderr);
+}
+// src/evolve/runner.ts
+var execAsync2 = promisify2(exec2);
+async function runTask(task, harnessPath, traceDir, iteration) {
+  await fs18.mkdir(traceDir, { recursive: true });
+  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
+  const startMs = Date.now();
+  const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
+  try {
+    await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
+    let setupStderr = "";
+    if (task.setup.trim()) {
+      try {
+        await execAsync2(task.setup, { cwd: tmpDir, timeout: 6e4 });
+      } catch (err) {
+        setupStderr = err instanceof Error ? err.message : String(err);
+      }
+    }
+    const filesBefore = await snapshotFileList(tmpDir);
+    const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
+    const filesAfter = await snapshotFileList(tmpDir);
+    const filesChanged = diffFileLists(filesBefore, filesAfter);
+    const toolCalls = parseToolCalls(spawnResult.stdout);
+    const completedAt = (/* @__PURE__ */ new Date()).toISOString();
+    const durationMs = Date.now() - startMs;
+    const combinedStderr = setupStderr ? `[setup] ${setupStderr}
+${spawnResult.stderr}` : spawnResult.stderr;
+    const trace = {
+      taskId: task.id,
+      iteration,
+      stdout: spawnResult.stdout,
+      stderr: combinedStderr,
+      toolCalls,
+      filesChanged,
+      score: { pass: false, details: "Pending scoring" },
+      timing: { startedAt, completedAt, durationMs }
+    };
+    await writeTrace(traceDir, trace);
+    return {
+      taskId: task.id,
+      score: trace.score,
+      traceDir
+    };
+  } finally {
+    await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
+    });
+  }
+}
+async function spawnClaude(instruction, cwd, timeoutSec) {
+  return new Promise((resolve) => {
+    const args = ["--print", "--output-format", "text", "--max-turns", "50"];
+    const child = spawn("claude", args, {
+      cwd,
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: timeoutSec * 1e3,
+      env: { ...process.env }
+    });
+    let stdout = "";
+    let stderr = "";
+    child.stdout.on("data", (data) => {
+      stdout += data.toString();
+    });
+    child.stderr.on("data", (data) => {
+      stderr += data.toString();
+    });
+    child.stdin.write(instruction);
+    child.stdin.end();
+    child.on("close", (code) => {
+      resolve({ stdout, stderr, exitCode: code ?? 1 });
+    });
+    child.on("error", (err) => {
+      resolve({
+        stdout,
+        stderr: stderr + `
+Spawn error: ${err.message}`,
+        exitCode: 1
+      });
+    });
+  });
+}
+async function snapshotFileList(dir) {
+  const result = {};
+  async function walk(current) {
+    let entries;
+    try {
+      entries = await fs18.readdir(current, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    for (const entry of entries) {
+      const fullPath = path18.join(current, entry.name);
+      const relativePath = path18.relative(dir, fullPath);
+      if (relativePath.startsWith(".claude")) continue;
+      if (relativePath.startsWith("node_modules")) continue;
+      if (relativePath.startsWith(".git")) continue;
+      if (entry.isDirectory()) {
+        await walk(fullPath);
+      } else {
+        try {
+          const stat = await fs18.stat(fullPath);
+          result[relativePath] = stat.mtimeMs;
+        } catch {
+        }
+      }
+    }
+  }
+  await walk(dir);
+  return result;
+}
+function diffFileLists(before, after) {
+  const changes = {};
+  for (const [file, mtime] of Object.entries(after)) {
+    if (!(file in before)) {
+      changes[file] = "created";
+    } else if (before[file] !== mtime) {
+      changes[file] = "modified";
+    }
+  }
+  for (const file of Object.keys(before)) {
+    if (!(file in after)) {
+      changes[file] = "deleted";
+    }
+  }
+  return changes;
+}
+function parseToolCalls(stdout) {
+  try {
+    const lines = stdout.split("\n").filter((l) => l.trim());
+    const toolCalls = [];
+    for (const line of lines) {
+      try {
+        const obj = JSON.parse(line);
+        if (obj.type === "tool_use" || obj.tool_name) {
+          toolCalls.push(obj);
+        }
+      } catch {
+      }
+    }
+    return toolCalls;
+  } catch {
+    return [];
+  }
+}
+async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config) {
+  const results = {};
+  for (const task of tasks) {
+    const traceDir = path18.join(
+      workspacePath,
+      "traces",
+      iteration.toString(),
+      task.id
+    );
+    const taskResult = await runTask(task, harnessPath, traceDir, iteration);
+    let score = taskResult.score;
+    if (config) {
+      const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
+      const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
+      score = await scoreTask(task, traceDir, stdout, stderr, config);
+      await writeScore(traceDir, score);
+    }
+    results[task.id] = score;
+  }
+  const scores = Object.values(results);
+  const total = scores.reduce(
+    (sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
+    0
+  );
+  const aggregate = scores.length > 0 ? total / scores.length : 0;
+  return { results, aggregate };
+}
+// src/evolve/loop.ts
+import fs21 from "fs/promises";
+import path21 from "path";
+// src/evolve/proposer.ts
+import fs19 from "fs/promises";
+import path19 from "path";
+var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
+agent environment (.claude/ directory) based on execution traces from real tasks.
+## What You Have Access To
+1. Current harness: The .claude/ directory files (CLAUDE.md, commands/, rules/, agents/)
+2. Execution traces: Full stdout/stderr, tool call sequences, file changes, and scores
+3. History: Previous iterations' proposals, diffs, and resulting score changes
+## Your Task
+Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
+minimal changes to the harness files that will fix those failures.
+## Diagnosis Process
+1. For each failed/low-scoring task:
+   a. Read the full trace (stdout, tool calls, file changes)
+   b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
+   c. Trace the failure back to a specific harness decision
+   d. Propose a fix
+2. For each successful task:
+   a. Note what worked well
+   b. Ensure proposed changes don't break what's working
+3. Check history for counterfactual evidence
+## Output Format
+Return a JSON object:
+{
+  "reasoning": "Your full causal analysis...",
+  "mutations": [
+    { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
+    { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
+  ],
+  "expected_impact": { "task-id": "+15% \u2014 explanation" }
+}
+## Rules
+- MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
+- Each mutation must have a clear rationale tied to a specific trace observation.
+- Never remove something that's working for another task.
+- If a previous iteration's change caused a regression, REVERT it.
+- Prefer ADDITIVE changes over replacements when possible.
+Return ONLY valid JSON.`;
+var STDOUT_TRUNCATION_LIMIT = 2e3;
+async function readHarnessFiles(harnessPath) {
+  const result = {};
+  async function walk(dir, prefix) {
+    let entries;
+    try {
+      entries = await fs19.readdir(dir, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    for (const entry of entries) {
+      const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
+      const fullPath = path19.join(dir, entry.name);
+      if (entry.isDirectory()) {
+        await walk(fullPath, relativePath);
+      } else if (entry.isFile()) {
+        try {
+          result[relativePath] = await fs19.readFile(fullPath, "utf-8");
+        } catch {
+        }
+      }
+    }
+  }
+  await walk(harnessPath, "");
+  return result;
+}
+function truncateStdout(stdout, limit) {
+  if (stdout.length <= limit) {
+    return stdout;
+  }
+  return `[...truncated, showing last ${limit} chars...]
+${stdout.slice(-limit)}`;
+}
+function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
+  const sections = [];
+  sections.push("## Current Harness Files\n");
+  const fileEntries = Object.entries(harnessFiles);
+  if (fileEntries.length === 0) {
+    sections.push("(No harness files found)\n");
+  } else {
+    for (const [filePath, content] of fileEntries) {
+      sections.push(`### ${filePath}
+\`\`\`
+${content}
+\`\`\`
+`);
+    }
+  }
+  sections.push("## Task Definitions\n");
+  if (tasks.length === 0) {
+    sections.push("(No tasks defined)\n");
+  } else {
+    for (const task of tasks) {
+      sections.push(
+        `### Task: ${task.id}
+- Template: ${task.template}
+- Description: ${task.description}
+- Expected outcome: ${Array.isArray(task.expected_outcome) ? task.expected_outcome.join("; ") : task.expected_outcome}
+- Scoring: ${task.scoring}
+`
+      );
+    }
+  }
+  sections.push("## Execution Traces\n");
+  if (traces.length === 0) {
+    sections.push("(No traces available)\n");
+  } else {
+    for (const trace of traces) {
+      const scoreNum = trace.score.score !== void 0 ? trace.score.score : trace.score.pass ? 100 : 0;
+      const truncatedStdout = truncateStdout(trace.stdout, STDOUT_TRUNCATION_LIMIT);
+      const filesChangedList = Object.entries(trace.filesChanged).map(([f, action]) => `  - ${f}: ${action}`).join("\n");
+      sections.push(
+        `### Trace: ${trace.taskId}
+- Pass: ${trace.score.pass}
+- Score: ${scoreNum}
+` + (trace.score.details ? `- Details: ${trace.score.details}
+` : "") + `- Duration: ${trace.timing.durationMs}ms
+- Files changed:
+${filesChangedList || "  (none)"}
+- Stdout (last ${STDOUT_TRUNCATION_LIMIT} chars):
+\`\`\`
+${truncatedStdout}
+\`\`\`
+`
+      );
+    }
+  }
+  sections.push("## Iteration History\n");
+  if (history.length === 0) {
+    sections.push("(No previous iterations)\n");
+  } else {
+    for (const log of history) {
+      const taskScores = Object.entries(log.taskResults).map(([id, s]) => `  - ${id}: ${s.score !== void 0 ? s.score : s.pass ? 100 : 0} (pass=${s.pass})`).join("\n");
+      sections.push(
+        `### Iteration ${log.iteration} \u2014 Score: ${log.score}
+- Task results:
+${taskScores}
+`
+      );
+      if (log.proposal) {
+        sections.push(
+          `- Proposal reasoning: ${log.proposal.reasoning}
+- Mutations: ${log.proposal.mutations.length} change(s)
+`
+        );
+      }
+    }
+  }
+  return sections.join("\n");
+}
+function parseProposerResponse(raw) {
+  let cleaned = raw.trim();
+  const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/);
+  if (fenceMatch) {
+    cleaned = fenceMatch[1].trim();
+  }
+  let parsed;
+  try {
+    parsed = JSON.parse(cleaned);
+  } catch {
+    throw new Error(`Proposer returned invalid JSON: ${cleaned.slice(0, 200)}`);
+  }
+  if (typeof parsed !== "object" || parsed === null) {
+    throw new Error("Proposer response is not a JSON object");
+  }
+  const obj = parsed;
+  if (typeof obj["reasoning"] !== "string") {
+    throw new Error('Proposer response missing required "reasoning" string field');
+  }
+  if (!Array.isArray(obj["mutations"])) {
+    throw new Error('Proposer response missing required "mutations" array field');
+  }
+  const mutations = [];
+  for (const entry of obj["mutations"]) {
+    if (typeof entry !== "object" || entry === null) {
+      continue;
+    }
+    const m = entry;
+    const file = typeof m["file"] === "string" ? m["file"] : "";
+    const action = typeof m["action"] === "string" ? m["action"] : "";
+    const newText = typeof m["new_text"] === "string" ? m["new_text"] : typeof m["newText"] === "string" ? m["newText"] : "";
+    const oldText = typeof m["old_text"] === "string" ? m["old_text"] : typeof m["oldText"] === "string" ? m["oldText"] : void 0;
+    const rationale = typeof m["rationale"] === "string" ? m["rationale"] : "";
+    if (file.includes("..")) {
+      continue;
+    }
+    if (action !== "replace" && action !== "add_section" && action !== "create_file") {
+      continue;
+    }
+    if (action === "replace" && !oldText) {
+      continue;
+    }
+    const mutation = {
+      file,
+      action,
+      newText,
+      rationale
+    };
+    if (oldText !== void 0) {
+      mutation.oldText = oldText;
+    }
+    mutations.push(mutation);
+  }
+  const rawImpact = obj["expected_impact"] ?? obj["expectedImpact"] ?? {};
+  const expectedImpact = {};
+  if (typeof rawImpact === "object" && rawImpact !== null) {
+    for (const [key, value] of Object.entries(rawImpact)) {
+      expectedImpact[key] = typeof value === "string" ? value : String(value);
+    }
+  }
+  return {
+    reasoning: obj["reasoning"],
+    mutations,
+    expectedImpact
+  };
+}
+async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
+  const harnessFiles = await readHarnessFiles(harnessPath);
+  const traces = await loadIterationTraces(workspacePath, iteration);
+  const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
+  const proposerConfig = { ...config, model: proposerModel };
+  const response = await callLLM(proposerConfig, userMessage, {
+    systemPrompt: PROPOSER_SYSTEM_PROMPT,
+    maxTokens: 8192
+  });
+  return parseProposerResponse(response);
+}
+// src/evolve/mutator.ts
+import fs20 from "fs/promises";
+import path20 from "path";
+async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
+  const newHarnessPath = path20.join(nextIterationDir, "harness");
+  await copyDir(currentHarnessPath, newHarnessPath);
+  for (const mutation of mutations) {
+    if (mutation.file.includes("..")) {
+      continue;
+    }
+    const filePath = path20.join(newHarnessPath, mutation.file);
+    if (mutation.action === "replace") {
+      if (!mutation.oldText) {
+        continue;
+      }
+      const content = await fs20.readFile(filePath, "utf-8");
+      if (!content.includes(mutation.oldText)) {
+        continue;
+      }
+      await fs20.writeFile(
+        filePath,
+        content.replace(mutation.oldText, mutation.newText),
+        "utf-8"
+      );
+    } else if (mutation.action === "add_section") {
+      try {
+        const content = await fs20.readFile(filePath, "utf-8");
+        await fs20.writeFile(
+          filePath,
+          content + "\n\n" + mutation.newText,
+          "utf-8"
+        );
+      } catch {
+        await fs20.mkdir(path20.dirname(filePath), { recursive: true });
+        await fs20.writeFile(filePath, mutation.newText, "utf-8");
+      }
+    } else if (mutation.action === "create_file") {
+      await fs20.mkdir(path20.dirname(filePath), { recursive: true });
+      await fs20.writeFile(filePath, mutation.newText, "utf-8");
+    }
+  }
+  const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
+  return { newHarnessPath, diffPatch };
+}
+async function generateDiff2(oldDir, newDir) {
+  const oldFiles = await readAllFiles(oldDir);
+  const newFiles = await readAllFiles(newDir);
+  const allPaths = /* @__PURE__ */ new Set([
+    ...Object.keys(oldFiles),
+    ...Object.keys(newFiles)
+  ]);
+  const patches = [];
+  for (const filePath of [...allPaths].sort()) {
+    const oldContent = oldFiles[filePath] ?? "";
+    const newContent = newFiles[filePath] ?? "";
+    if (oldContent === newContent) continue;
+    patches.push(`--- a/${filePath}`);
+    patches.push(`+++ b/${filePath}`);
+    if (!oldContent) {
+      for (const line of newContent.split("\n")) {
+        patches.push(`+${line}`);
+      }
+    } else if (!newContent) {
+      for (const line of oldContent.split("\n")) {
+        patches.push(`-${line}`);
+      }
+    } else {
+      const oldLines = oldContent.split("\n");
+      const newLines = newContent.split("\n");
+      for (const line of oldLines) {
+        patches.push(`-${line}`);
+      }
+      for (const line of newLines) {
+        patches.push(`+${line}`);
+      }
+    }
+    patches.push("");
+  }
+  return patches.join("\n");
+}
+async function readAllFiles(dir) {
+  const result = {};
+  async function walk(current) {
+    let entries;
+    try {
+      entries = await fs20.readdir(current, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    for (const entry of entries) {
+      const fullPath = path20.join(current, entry.name);
+      const relativePath = path20.relative(dir, fullPath);
+      if (entry.isDirectory()) {
+        await walk(fullPath);
+      } else {
+        result[relativePath] = await fs20.readFile(fullPath, "utf-8");
+      }
+    }
+  }
+  await walk(dir);
+  return result;
+}
+// src/evolve/loop.ts
+async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
+  const history = [];
+  let bestScore = -1;
+  let bestIteration = 0;
+  let baselineScore = 0;
+  for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
+    const harnessPath = path21.join(
+      workspacePath,
+      "iterations",
+      iter.toString(),
+      "harness"
+    );
+    try {
+      await fs21.access(harnessPath);
+    } catch {
+      if (iter === 0) {
+        throw new Error(
+          "No baseline harness found. Run `kairn evolve baseline` first."
+        );
+      }
+      break;
+    }
+    onProgress?.({ type: "iteration-start", iteration: iter });
+    const { results, aggregate } = await evaluateAll(
+      tasks,
+      harnessPath,
+      workspacePath,
+      iter,
+      kairnConfig
+    );
+    onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
+    if (iter === 0) baselineScore = aggregate;
+    if (iter > 0 && aggregate < bestScore) {
+      onProgress?.({
+        type: "rollback",
+        iteration: iter,
+        score: aggregate,
+        message: `Regression: ${aggregate.toFixed(1)}% < ${bestScore.toFixed(1)}%. Rolling back.`
+      });
+      const rollbackLog = {
+        iteration: iter,
+        score: aggregate,
+        taskResults: results,
+        proposal: null,
+        diffPatch: null,
+        timestamp: (/* @__PURE__ */ new Date()).toISOString()
+      };
+      await writeIterationLog(workspacePath, rollbackLog);
+      history.push(rollbackLog);
+      if (iter + 1 < evolveConfig.maxIterations) {
+        const nextIterDir2 = path21.join(
+          workspacePath,
+          "iterations",
+          (iter + 1).toString()
+        );
+        const bestHarnessPath = path21.join(
+          workspacePath,
+          "iterations",
+          bestIteration.toString(),
+          "harness"
+        );
+        await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
+      }
+      continue;
+    }
+    bestScore = aggregate;
+    bestIteration = iter;
+    if (aggregate >= 100) {
+      onProgress?.({ type: "perfect-score", iteration: iter, score: aggregate });
+      const perfectLog = {
+        iteration: iter,
+        score: aggregate,
+        taskResults: results,
+        proposal: null,
+        diffPatch: null,
+        timestamp: (/* @__PURE__ */ new Date()).toISOString()
+      };
+      await writeIterationLog(workspacePath, perfectLog);
+      history.push(perfectLog);
+      break;
+    }
+    if (iter === evolveConfig.maxIterations - 1) {
+      const finalLog = {
+        iteration: iter,
+        score: aggregate,
+        taskResults: results,
+        proposal: null,
+        diffPatch: null,
+        timestamp: (/* @__PURE__ */ new Date()).toISOString()
+      };
+      await writeIterationLog(workspacePath, finalLog);
+      history.push(finalLog);
+      break;
+    }
+    onProgress?.({ type: "proposing", iteration: iter });
+    let proposal;
+    try {
+      proposal = await propose(
+        iter,
+        workspacePath,
+        harnessPath,
+        history,
+        tasks,
+        kairnConfig,
+        evolveConfig.proposerModel
+      );
+    } catch {
+      const nextIterDir2 = path21.join(
+        workspacePath,
+        "iterations",
+        (iter + 1).toString()
+      );
+      await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
+      const skipLog = {
+        iteration: iter,
+        score: aggregate,
+        taskResults: results,
+        proposal: null,
+        diffPatch: null,
+        timestamp: (/* @__PURE__ */ new Date()).toISOString()
+      };
+      await writeIterationLog(workspacePath, skipLog);
+      history.push(skipLog);
+      continue;
+    }
+    const nextIterDir = path21.join(
+      workspacePath,
+      "iterations",
+      (iter + 1).toString()
+    );
+    let diffPatch = "";
+    try {
+      const mutationResult = await applyMutations(
+        harnessPath,
+        nextIterDir,
+        proposal.mutations
+      );
+      diffPatch = mutationResult.diffPatch;
+    } catch {
+      await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
+    }
+    onProgress?.({
+      type: "mutations-applied",
+      iteration: iter,
+      mutationCount: proposal.mutations.length
+    });
+    const iterLog = {
+      iteration: iter,
+      score: aggregate,
+      taskResults: results,
+      proposal,
+      diffPatch,
+      timestamp: (/* @__PURE__ */ new Date()).toISOString()
+    };
+    await writeIterationLog(workspacePath, iterLog);
+    history.push(iterLog);
+  }
+  onProgress?.({
+    type: "complete",
+    iteration: history.length > 0 ? history.length - 1 : 0,
+    score: bestScore
+  });
+  return {
+    iterations: history,
+    bestIteration,
+    bestScore,
+    baselineScore
+  };
+}
+// src/commands/evolve.ts
+var DEFAULT_CONFIG = {
+  model: "claude-sonnet-4-6",
+  proposerModel: "claude-opus-4-6",
+  scorer: "pass-fail",
+  maxIterations: 5,
+  parallelTasks: 1
+};
+async function loadEvolveConfigFromWorkspace(workspacePath) {
+  try {
+    const configStr = await fs22.readFile(path22.join(workspacePath, "config.yaml"), "utf-8");
+    const parsed = yamlParse(configStr);
+    return {
+      model: parsed.model ?? DEFAULT_CONFIG.model,
+      proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
+      scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
+      maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
+      parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
+    };
+  } catch {
+    return { ...DEFAULT_CONFIG };
+  }
+}
+var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
+evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
+  try {
+    const projectRoot = process.cwd();
+    console.log(ui.section("Evolve Init"));
+    const claudeDir = path22.join(projectRoot, ".claude");
+    try {
+      await fs22.access(claudeDir);
+    } catch {
+      console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
+      process.exit(1);
+    }
+    const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
+    console.log(ui.success("Created .kairn-evolve/ workspace"));
+    const spinner = ora2("Generating project-specific eval tasks...").start();
+    let tasks;
+    try {
+      tasks = await autoGenerateTasks(projectRoot, options.workflow);
+      spinner.succeed(`Generated ${tasks.length} eval tasks`);
+    } catch {
+      spinner.fail("LLM task generation failed");
+      const templateIds = selectTemplatesForWorkflow(options.workflow);
+      tasks = templateIds.map((templateId, index) => ({
+        id: `${templateId}-${index + 1}`,
+        template: templateId,
+        description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
+        setup: "npm install",
+        expected_outcome: "Task completed successfully",
+        scoring: "pass-fail",
+        timeout: 300
+      }));
+      console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
+    }
+    for (const task of tasks) {
+      console.log(chalk14.cyan(`  ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
+    }
+    let addMore = true;
+    while (addMore) {
+      try {
+        addMore = await confirm3({ message: "Add another eval task?", default: false });
+      } catch {
+        addMore = false;
+      }
+      if (addMore) {
+        const templateId = await select4({
+          message: "Select eval template:",
+          choices: Object.values(EVAL_TEMPLATES).map((t) => ({
+            name: `${t.name} \u2014 ${t.description}`,
+            value: t.id
+          }))
+        });
+        const addSpinner = ora2("Generating task...").start();
+        try {
+          const config = await loadConfig();
+          if (config) {
+            let claudeMd = "";
+            try {
+              claudeMd = await fs22.readFile(path22.join(claudeDir, "CLAUDE.md"), "utf-8");
+            } catch {
+            }
+            const profile = await buildProjectProfile(projectRoot);
+            const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
+            tasks.push(...newTasks);
+            addSpinner.succeed(`Added ${newTasks.length} task(s)`);
+          } else {
+            addSpinner.fail("No config found");
+          }
+        } catch {
+          addSpinner.fail("Failed to generate task");
+        }
+      }
+    }
+    await writeTasksFile(workspace, tasks);
+    console.log(ui.success(`Wrote ${tasks.length} tasks to tasks.yaml`));
+    console.log("");
+    console.log(chalk14.dim("  Next steps:"));
+    console.log(chalk14.dim("    1. Review .kairn-evolve/tasks.yaml"));
+    console.log(chalk14.dim("    2. Run: kairn evolve baseline"));
+    console.log(chalk14.dim("    3. Run: kairn evolve run"));
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
+  try {
+    const projectRoot = process.cwd();
+    const workspace = path22.join(projectRoot, ".kairn-evolve");
+    console.log(ui.section("Evolve Baseline"));
+    try {
+      await fs22.access(workspace);
+    } catch {
+      console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    await snapshotBaseline(projectRoot, workspace);
+    const baselineDir = path22.join(workspace, "baseline");
+    const fileCount = await countFiles(baselineDir);
+    console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
+  try {
+    const projectRoot = process.cwd();
+    const workspace = path22.join(projectRoot, ".kairn-evolve");
+    console.log(ui.section("Evolve Run"));
+    try {
+      await fs22.access(workspace);
+    } catch {
+      console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    const tasksPath = path22.join(workspace, "tasks.yaml");
+    let tasksContent;
+    try {
+      tasksContent = await fs22.readFile(tasksPath, "utf-8");
+    } catch {
+      console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    const parsed = yamlParse(tasksContent);
+    if (!parsed?.tasks || parsed.tasks.length === 0) {
+      console.log(ui.error("No tasks found in tasks.yaml"));
+      process.exit(1);
+    }
+    if (options.task) {
+      const tasksToRun = parsed.tasks.filter((t) => t.id === options.task);
+      if (tasksToRun.length === 0) {
+        console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
+        process.exit(1);
+      }
+      console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
+      console.log("");
+      const config = await loadConfig();
+      const harnessPath = path22.join(projectRoot, ".claude");
+      const results = [];
+      for (const task of tasksToRun) {
+        const traceDir = path22.join(workspace, "traces", "0", task.id);
+        const spinner = ora2(`Running: ${task.id}`).start();
+        const result = await runTask(task, harnessPath, traceDir, 0);
+        if (config) {
+          const stdout = await fs22.readFile(path22.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
+          const stderr = await fs22.readFile(path22.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
+          const score = await scoreTask(task, traceDir, stdout, stderr, config);
+          result.score = score;
+          await writeScore(traceDir, score);
+        }
+        results.push(result);
+        const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
+        const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
+        spinner.stop();
+        console.log(`  ${status}  ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
+      }
+      const passed = results.filter((r) => r.score.pass).length;
+      console.log("");
+      console.log(ui.info(`Results: ${passed}/${results.length} passed`));
+      console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
+    } else {
+      const kairnConfig = await loadConfig();
+      if (!kairnConfig) {
+        console.log(ui.error("No config found. Run kairn init first."));
+        process.exit(1);
+      }
+      const evolveConfig = await loadEvolveConfigFromWorkspace(workspace);
+      const iterations = parseInt(options.iterations ?? "5", 10);
+      if (isNaN(iterations) || iterations < 1) {
+        console.log(ui.error("--iterations must be a positive integer"));
+        process.exit(1);
+      }
+      evolveConfig.maxIterations = iterations;
+      try {
+        await fs22.access(path22.join(workspace, "iterations", "0", "harness"));
+      } catch {
+        console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
+        process.exit(1);
+      }
+      const result = await evolve(workspace, parsed.tasks, kairnConfig, evolveConfig, (event) => {
+        switch (event.type) {
+          case "iteration-start":
+            console.log(ui.section(`Iteration ${event.iteration}`));
+            break;
+          case "iteration-scored": {
+            const scoreColor = event.score !== void 0 && event.score >= 100 ? chalk14.green : event.score !== void 0 && event.score >= 60 ? chalk14.yellow : chalk14.red;
+            console.log(`  Score: ${scoreColor((event.score?.toFixed(1) ?? "0") + "%")}`);
+            break;
+          }
+          case "rollback":
+            console.log(chalk14.yellow(`  Warning: ${event.message ?? "Regression detected"}`));
+            break;
+          case "proposing":
+            console.log(chalk14.dim("  Proposer analyzing traces..."));
+            break;
+          case "mutations-applied":
+            console.log(chalk14.dim(`  Applied ${event.mutationCount ?? 0} mutation(s)`));
+            break;
+          case "perfect-score":
+            console.log(chalk14.green("  Perfect score. Stopping."));
+            break;
+          case "complete":
+            break;
+        }
+      });
+      console.log(ui.section("Evolution Summary"));
+      console.log(`  Iterations:    ${result.iterations.length}`);
+      console.log(`  Baseline:      ${result.baselineScore.toFixed(1)}%`);
+      console.log(`  Best:          ${chalk14.green(result.bestScore.toFixed(1) + "%")} (iteration ${result.bestIteration})`);
+      const improvement = result.bestScore - result.baselineScore;
+      if (improvement > 0) {
+        console.log(`  Improvement:   ${chalk14.green("+" + improvement.toFixed(1) + " points")}`);
+      } else {
+        console.log(`  Improvement:   ${improvement.toFixed(1)} points`);
+      }
+      console.log("");
+      console.log("  Iter  Score     Mutations  Status");
+      for (const iter of result.iterations) {
+        const scoreStr = iter.score.toFixed(1).padStart(6) + "%";
+        const mutations = iter.proposal?.mutations.length ?? 0;
+        const mutStr = mutations > 0 ? mutations.toString() : "-";
+        let status = "evaluated";
+        if (iter.iteration === 0) status = "baseline";
+        else if (!iter.proposal && !iter.diffPatch) status = "rollback";
+        else if (iter.score >= 100) status = "perfect";
+        else if (iter.iteration === result.bestIteration) status = "best";
+        console.log(`  ${iter.iteration.toString().padStart(4)}  ${scoreStr}  ${mutStr.padStart(9)}  ${status}`);
+      }
+    }
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+async function countFiles(dir) {
+  let count = 0;
+  try {
+    const entries = await fs22.readdir(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        count += await countFiles(path22.join(dir, entry.name));
+      } else {
+        count++;
+      }
+    }
+  } catch {
+  }
+  return count;
+}
 // src/cli.ts
-var program = new Command11();
+var program = new Command12();
 program.name("kairn").description(
   "Compile natural language intent into optimized Claude Code environments"
 ).version("1.9.0").option("--no-color", "Disable colored output");
@@ -3701,8 +5269,9 @@ program.addCommand(doctorCommand);
 program.addCommand(registryCommand);
 program.addCommand(templatesCommand);
 program.addCommand(keysCommand);
+program.addCommand(evolveCommand);
 if (process.argv.includes("--no-color") || process.env.NO_COLOR) {
-  chalk14.level = 0;
+  chalk15.level = 0;
 }
 program.parse();
 //# sourceMappingURL=cli.js.map