npm - kairn-cli - Versions diffs - 1.13.0 → 2.0.0 - Mend

kairn-cli 1.13.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // src/cli.ts
-import { Command as Command11 } from "commander";
-import chalk14 from "chalk";
+import { Command as Command12 } from "commander";
+import chalk15 from "chalk";
 // src/commands/init.ts
 import { Command } from "commander";
@@ -221,7 +221,7 @@ var ui = {
   // Key-value pairs
   kv: (key, value) => `  ${chalk.cyan(key.padEnd(14))} ${value}`,
   // File list
-  file: (path15) => chalk.dim(`    ${path15}`),
+  file: (path20) => chalk.dim(`    ${path20}`),
   // Tool display
   tool: (name, reason) => `    ${warmStone("\u25CF")} ${chalk.bold(name)}
       ${chalk.dim(reason)}`,
@@ -562,8 +562,6 @@ import chalk5 from "chalk";
 import fs4 from "fs/promises";
 import path4 from "path";
 import crypto from "crypto";
-import Anthropic2 from "@anthropic-ai/sdk";
-import OpenAI2 from "openai";
 // src/compiler/prompt.ts
 var SKELETON_PROMPT = `You are the Kairn skeleton compiler. Your job is to select tools and outline the project structure from a user's natural language description.
@@ -716,6 +714,13 @@ At the start of every session, before doing ANY work:
 4. Summarize what you see in 2-3 lines, then proceed
 This saves 2-5 exploratory turns. Never ask "what files are here?" \u2014 look first.
+## Completion Standards
+Never mark a task "done" without running the Completion Verification checklist.
+Tests passing is necessary but not sufficient \u2014 also verify requirements coverage,
+state cleanliness, and review changes from the perspective of a test engineer,
+code reviewer, and the requesting user.
 \`\`\`
 Do not add generic filler. Every line must be specific to the user's workflow.
@@ -738,6 +743,7 @@ Do not add generic filler. Every line must be specific to the user's workflow.
 14. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
 15. "Engineering Standards", "Tool Usage Policy", and "Code Philosophy" sections in CLAUDE.md
 16. A "First Turn Protocol" section in CLAUDE.md (orient before working: pwd, ls, git status, check relevant runtimes, read task files)
+17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
 ## Shell-Integrated Commands
@@ -855,7 +861,7 @@ Merge this into the settings hooks alongside the PreToolUse and PostToolUse hook
 - \`/project:status\` command (live git status, recent commits, SPRINT.md overview using ! prefix)
 - \`/project:fix\` command (takes $ARGUMENTS as issue number, plans fix, implements, tests, commits)
 - \`/project:sprint\` command (define acceptance criteria before coding, writes to docs/SPRINT.md)
-- \`/project:develop\` command (full development pipeline \u2014 orchestrates @architect \u2192 @planner \u2192 @implementer \u2192 @verifier \u2192 @fixer \u2192 @grill \u2192 @doc-updater through spec, plan, TDD implement, review, and doc update phases)
+- \`/project:develop\` command (full development pipeline \u2014 orchestrates @architect \u2192 @planner \u2192 @implementer \u2192 @verifier \u2192 @fixer \u2192 @grill \u2192 @doc-updater through spec, plan, TDD implement, review, and doc update phases). MUST include a Phase 7 "Completion Gate" that runs a Completion Verification checklist before marking the feature done: re-read original requirements, confirm each is met with evidence, run test suite + lint/typecheck, review git diff for unexpected changes or debug artifacts, answer 3 perspective questions (test engineer, code reviewer, requesting user). If ANY check fails, loop back to fix before completing.
 - A TDD skill using the 3-phase isolation pattern (RED \u2192 GREEN \u2192 REFACTOR):
   - RED: Write failing test only. Verify it FAILS.
   - GREEN: Write MINIMUM code to pass. Nothing extra.
@@ -1023,6 +1029,13 @@ At the start of every session, before doing ANY work:
 4. Summarize what you see in 2-3 lines, then proceed
 This saves 2-5 exploratory turns. Never ask "what files are here?" \u2014 look first.
+## Completion Standards
+Never mark a task "done" without running the Completion Verification checklist.
+Tests passing is necessary but not sufficient \u2014 also verify requirements coverage,
+state cleanliness, and review changes from the perspective of a test engineer,
+code reviewer, and the requesting user.
 \`\`\`
 Do not add generic filler. Every line must be specific to the user's workflow.
@@ -1045,6 +1058,7 @@ Do not add generic filler. Every line must be specific to the user's workflow.
 14. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
 15. "Engineering Standards", "Tool Usage Policy", and "Code Philosophy" sections in CLAUDE.md
 16. A "First Turn Protocol" section in CLAUDE.md (orient before working: pwd, ls, git status, check relevant runtimes, read task files)
+17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
 ## Tool Selection Rules
@@ -1188,76 +1202,9 @@ async function loadRegistry() {
   return Array.from(merged.values());
 }
-// src/compiler/compile.ts
-function buildSkeletonMessage(intent, registry) {
-  const registrySummary = registry.map(
-    (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
-  ).join("\n");
-  return `## User Intent
-${intent}
-## Available Tool Registry
-${registrySummary}
-Generate the skeleton JSON now.`;
-}
-function buildHarnessMessage(intent, skeleton, concise) {
-  const skeletonJson = JSON.stringify(skeleton, null, 2);
-  const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
-  return `## User Intent
-${intent}
-## Project Skeleton
-${skeletonJson}
-Generate the harness content JSON now.${conciseNote}`;
-}
-function parseSkeletonResponse(text) {
-  let cleaned = text.trim();
-  if (cleaned.startsWith("```")) {
-    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
-  }
-  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
-  if (!jsonMatch) {
-    throw new Error("Pass 1 (skeleton) did not return valid JSON.");
-  }
-  try {
-    const parsed = JSON.parse(jsonMatch[0]);
-    if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
-      throw new Error("Skeleton missing required fields: name, tools");
-    }
-    return parsed;
-  } catch (err) {
-    throw new Error(
-      `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
-    );
-  }
-}
-function parseHarnessResponse(text) {
-  let cleaned = text.trim();
-  if (cleaned.startsWith("```")) {
-    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
-  }
-  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
-  if (!jsonMatch) {
-    throw new Error("Pass 2 (harness) did not return valid JSON.");
-  }
-  try {
-    const parsed = JSON.parse(jsonMatch[0]);
-    if (!parsed.claude_md || !parsed.commands) {
-      throw new Error("Harness missing required fields: claude_md, commands");
-    }
-    return parsed;
-  } catch (err) {
-    throw new Error(
-      `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
-    );
-  }
-}
+// src/llm.ts
+import Anthropic2 from "@anthropic-ai/sdk";
+import OpenAI2 from "openai";
 function classifyError(err, provider) {
   const msg = err instanceof Error ? err.message : String(err);
   const status = err?.status;
@@ -1289,8 +1236,8 @@ function classifyError(err, provider) {
   return `${provider} API error: ${msg}`;
 }
 async function callLLM(config, userMessage, options) {
-  const maxTokens = options?.maxTokens ?? 8192;
-  const systemPrompt = options?.systemPrompt ?? SYSTEM_PROMPT;
+  const maxTokens = options.maxTokens ?? 8192;
+  const systemPrompt = options.systemPrompt;
   const providerName = getProviderName(config.provider);
   if (config.provider === "anthropic") {
     const client2 = new Anthropic2({ apiKey: config.api_key });
@@ -1332,6 +1279,77 @@ async function callLLM(config, userMessage, options) {
     throw new Error(classifyError(err, providerName));
   }
 }
+// src/compiler/compile.ts
+function buildSkeletonMessage(intent, registry) {
+  const registrySummary = registry.map(
+    (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
+  ).join("\n");
+  return `## User Intent
+${intent}
+## Available Tool Registry
+${registrySummary}
+Generate the skeleton JSON now.`;
+}
+function buildHarnessMessage(intent, skeleton, concise) {
+  const skeletonJson = JSON.stringify(skeleton, null, 2);
+  const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
+  return `## User Intent
+${intent}
+## Project Skeleton
+${skeletonJson}
+Generate the harness content JSON now.${conciseNote}`;
+}
+function parseSkeletonResponse(text) {
+  let cleaned = text.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    throw new Error("Pass 1 (skeleton) did not return valid JSON.");
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
+      throw new Error("Skeleton missing required fields: name, tools");
+    }
+    return parsed;
+  } catch (err) {
+    throw new Error(
+      `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
+function parseHarnessResponse(text) {
+  let cleaned = text.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    throw new Error("Pass 2 (harness) did not return valid JSON.");
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    if (!parsed.claude_md || !parsed.commands) {
+      throw new Error("Harness missing required fields: claude_md, commands");
+    }
+    return parsed;
+  } catch (err) {
+    throw new Error(
+      `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
 function buildSettings(skeleton, registry) {
   const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
   const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
@@ -1501,7 +1519,9 @@ async function generateClarifications(intent, onProgress) {
   onProgress?.("Analyzing your request...");
   const clarificationConfig = { ...config };
   clarificationConfig.model = getCheapModel(config.provider, config.model);
-  const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent);
+  const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent, {
+    systemPrompt: SYSTEM_PROMPT
+  });
   try {
     let cleaned = response.trim();
     if (cleaned.startsWith("```")) {
@@ -1682,7 +1702,29 @@ If confidence is LOW or MEDIUM, fix issues and re-verify.
 Run /project:grill for adversarial review.
 Fix any BLOCKERs.
-## Phase 6: SHIP
+## Phase 6: COMPLETION GATE
+Before shipping, run the Completion Verification checklist:
+### Requirements Check
+- [ ] Re-read the ORIGINAL task description (not your interpretation)
+- [ ] Each explicit requirement is met with evidence (test output, diff)
+- [ ] Each implicit requirement (error handling, types, tests) is addressed
+### State Check
+- [ ] Test suite passes
+- [ ] Lint/typecheck passes
+- [ ] \`git diff --stat\` \u2014 every changed file is intentional
+- [ ] No debug artifacts (console.log, TODO, commented-out code, temp files)
+### Perspective Check (1 sentence each)
+- **Test engineer:** Most likely production failure mode?
+- **Code reviewer:** What would I flag in review?
+- **Requesting user:** Does this solve my actual problem?
+ALL pass \u2192 proceed to ship. ANY fail \u2192 fix first, then re-verify.
+## Phase 7: SHIP
 Run /project:commit.
 Report what was built and what's next from docs/SPRINT.md.
@@ -1762,11 +1804,26 @@ Run verification:
 - Run functional tests
 - If NEEDS FIXES: fix and re-verify
-## Phase 5: PR
+## Phase 5: COMPLETION GATE
+Before creating a PR, run the Completion Verification checklist:
+- [ ] Re-read the ORIGINAL spec from docs/SPRINT.md
+- [ ] Each requirement is met with evidence (test output, diff)
+- [ ] Test suite + lint/typecheck pass
+- [ ] \`git diff --stat\` \u2014 every changed file is intentional, no debug artifacts
+- **Test engineer:** Most likely production failure mode?
+- **Code reviewer:** What would I flag in review?
+- **Requesting user:** Does this solve my actual problem?
+ALL pass \u2192 proceed to PR. ANY fail \u2192 fix first, then re-verify.
+Include the checklist results in the PR description.
+## Phase 6: PR
 Create a pull request:
-  gh pr create --title "feat: {name}" --body "{spec + QA report}"
+  gh pr create --title "feat: {name}" --body "{spec + QA report + verification checklist}"
-## Phase 6: NEXT
+## Phase 7: NEXT
 Report:
   "PR #{N} ready for review: {link}
    Next priority from SPRINT.md: {next task}
@@ -1790,13 +1847,19 @@ Repeat until max features reached or stopped:
 2. Create worktree + branch
 3. Implement the feature
 4. Run verification (build, test, lint)
-5. Open PR via gh
-6. Report status
-7. Move to next feature
+5. Run Completion Verification checklist:
+   - Requirements met with evidence
+   - Tests + lint/typecheck pass
+   - No debug artifacts or unexpected file changes
+   - 3-perspective check (test engineer, reviewer, user)
+6. Open PR via gh (include verification results in PR body)
+7. Report status
+8. Move to next feature
 ## Stop Conditions
 - Max 5 features per autopilot session
 - Any BLOCKER from verification
+- Completion Verification checklist fails after 2 fix attempts
 - Build failure that can't be resolved in 3 attempts
 - User presses Escape`;
 var AUTOPILOT_WARNING = `
@@ -3627,8 +3690,882 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
   console.log("");
 });
+// src/commands/evolve.ts
+import { Command as Command11 } from "commander";
+import chalk14 from "chalk";
+import ora2 from "ora";
+import fs19 from "fs/promises";
+import path19 from "path";
+import { parse as yamlParse } from "yaml";
+import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
+// src/evolve/init.ts
+import fs15 from "fs/promises";
+import path15 from "path";
+import { stringify as yamlStringify } from "yaml";
+// src/evolve/templates.ts
+var EVAL_TEMPLATES = {
+  "add-feature": {
+    id: "add-feature",
+    name: "Add Feature",
+    description: "Can the agent add a new capability?",
+    bestFor: ["feature-development", "api-building", "full-stack"]
+  },
+  "fix-bug": {
+    id: "fix-bug",
+    name: "Fix Bug",
+    description: "Can the agent diagnose and fix a problem?",
+    bestFor: ["maintenance", "debugging", "qa"]
+  },
+  "refactor": {
+    id: "refactor",
+    name: "Refactor",
+    description: "Can the agent restructure code?",
+    bestFor: ["maintenance", "architecture", "backend"]
+  },
+  "test-writing": {
+    id: "test-writing",
+    name: "Test Writing",
+    description: "Can the agent write tests?",
+    bestFor: ["tdd", "qa", "backend"]
+  },
+  "config-change": {
+    id: "config-change",
+    name: "Config Change",
+    description: "Can the agent update configuration?",
+    bestFor: ["devops", "infrastructure", "backend"]
+  },
+  "documentation": {
+    id: "documentation",
+    name: "Documentation",
+    description: "Can the agent write and update docs?",
+    bestFor: ["content", "api-building", "full-stack"]
+  }
+};
+function selectTemplatesForWorkflow(workflowType) {
+  const mapping = {
+    "feature-development": ["add-feature", "test-writing", "documentation"],
+    "api-building": ["add-feature", "fix-bug", "test-writing"],
+    "full-stack": ["add-feature", "fix-bug", "test-writing"],
+    "maintenance": ["fix-bug", "refactor", "test-writing"],
+    "debugging": ["fix-bug", "test-writing"],
+    "qa": ["fix-bug", "test-writing", "add-feature"],
+    "architecture": ["refactor", "test-writing", "config-change"],
+    "backend": ["fix-bug", "refactor", "config-change", "test-writing"],
+    "devops": ["config-change", "fix-bug"],
+    "infrastructure": ["config-change", "refactor"],
+    "tdd": ["test-writing", "add-feature", "fix-bug"],
+    "content": ["documentation", "add-feature"],
+    "research": ["documentation", "add-feature"]
+  };
+  return mapping[workflowType] || ["add-feature", "fix-bug", "test-writing"];
+}
+var TASK_GENERATION_PROMPT = `You are an eval task generator for Claude Code agent environments. Given a project's CLAUDE.md, project structure, and selected eval templates, generate concrete, project-specific tasks.
+Each task must be realistic and testable against the actual project. Avoid generic placeholders.
+Return a JSON object with a "tasks" array. Each task has:
+- id: kebab-case identifier (e.g., "add-health-endpoint")
+- template: which eval template this instantiates
+- description: concrete task description the agent will receive
+- setup: shell commands to prepare the workspace (e.g., "npm install")
+- expected_outcome: multi-line string describing what success looks like
+- scoring: "pass-fail", "llm-judge", or "rubric"
+- timeout: seconds (300 for features/bugs, 600 for refactors, 180 for config/docs/tests)
+Return ONLY valid JSON, no markdown fences.`;
+function parseJsonResponse(raw) {
+  let cleaned = raw.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+  }
+  const jsonMatch = cleaned.match(/\{[\s\S]*\}/) ?? cleaned.match(/\[[\s\S]*\]/);
+  if (!jsonMatch) {
+    throw new Error(
+      "LLM response did not contain valid JSON. Try again or use a different model."
+    );
+  }
+  try {
+    return JSON.parse(jsonMatch[0]);
+  } catch (err) {
+    throw new Error(
+      `Failed to parse LLM response as JSON: ${err instanceof Error ? err.message : String(err)}`
+    );
+  }
+}
+var REQUIRED_TASK_FIELDS = [
+  "id",
+  "template",
+  "description",
+  "setup",
+  "expected_outcome",
+  "scoring",
+  "timeout"
+];
+function validateTask(obj, index) {
+  if (typeof obj !== "object" || obj === null) {
+    throw new Error(`Task at index ${index} is not an object`);
+  }
+  const record = obj;
+  for (const field of REQUIRED_TASK_FIELDS) {
+    if (!(field in record) || record[field] === void 0 || record[field] === null) {
+      throw new Error(`Task at index ${index} is missing required field: ${field}`);
+    }
+  }
+  return record;
+}
+function buildTaskGenerationMessage(claudeMd, projectProfile, templates) {
+  const profileLines = [
+    `Language: ${projectProfile.language ?? "unknown"}`,
+    `Framework: ${projectProfile.framework ?? "none"}`,
+    `Scripts: ${Object.entries(projectProfile.scripts).map(([k, v]) => `${k}=${v}`).join(", ") || "none"}`,
+    `Key files: ${projectProfile.keyFiles.join(", ") || "none"}`
+  ];
+  const templateDescriptions = templates.map((t) => {
+    const meta = EVAL_TEMPLATES[t];
+    return `- ${t}: ${meta.description}`;
+  }).join("\n");
+  return [
+    "## CLAUDE.md",
+    "",
+    claudeMd,
+    "",
+    "## Project Profile",
+    "",
+    ...profileLines,
+    "",
+    "## Selected Eval Templates",
+    "",
+    templateDescriptions,
+    "",
+    "Generate concrete, project-specific tasks for each template above."
+  ].join("\n");
+}
+async function generateTasksFromTemplates(claudeMd, projectProfile, templates, config) {
+  const userMessage = buildTaskGenerationMessage(claudeMd, projectProfile, templates);
+  const rawResponse = await callLLM(config, userMessage, {
+    systemPrompt: TASK_GENERATION_PROMPT,
+    maxTokens: 4096
+  });
+  const parsed = parseJsonResponse(rawResponse);
+  if (typeof parsed !== "object" || parsed === null) {
+    throw new Error("LLM response is not a JSON object");
+  }
+  const tasksObj = parsed;
+  if (!Array.isArray(tasksObj.tasks)) {
+    throw new Error("LLM response does not contain a 'tasks' array");
+  }
+  const tasks = [];
+  for (let i = 0; i < tasksObj.tasks.length; i++) {
+    tasks.push(validateTask(tasksObj.tasks[i], i));
+  }
+  return tasks;
+}
+// src/evolve/init.ts
+async function createEvolveWorkspace(projectRoot, config) {
+  const workspace = path15.join(projectRoot, ".kairn-evolve");
+  await fs15.mkdir(path15.join(workspace, "baseline"), { recursive: true });
+  await fs15.mkdir(path15.join(workspace, "traces"), { recursive: true });
+  await fs15.mkdir(path15.join(workspace, "iterations"), { recursive: true });
+  const configObj = {
+    model: config.model,
+    proposer_model: config.proposerModel,
+    scorer: config.scorer,
+    max_iterations: config.maxIterations,
+    parallel_tasks: config.parallelTasks
+  };
+  await fs15.writeFile(
+    path15.join(workspace, "config.yaml"),
+    yamlStringify(configObj),
+    "utf-8"
+  );
+  return workspace;
+}
+async function writeTasksFile(workspacePath, tasks) {
+  const doc = {
+    tasks: tasks.map((t) => ({
+      id: t.id,
+      template: t.template,
+      description: t.description,
+      setup: t.setup,
+      expected_outcome: t.expected_outcome,
+      scoring: t.scoring,
+      ...t.rubric ? { rubric: t.rubric } : {},
+      timeout: t.timeout
+    }))
+  };
+  const header = "# .kairn-evolve/tasks.yaml\n# Auto-generated by kairn evolve init \u2014 edit freely\n";
+  await fs15.writeFile(
+    path15.join(workspacePath, "tasks.yaml"),
+    header + yamlStringify(doc),
+    "utf-8"
+  );
+}
+async function buildProjectProfile(projectRoot) {
+  const profile = {
+    language: null,
+    framework: null,
+    scripts: {},
+    keyFiles: []
+  };
+  try {
+    const pkgStr = await fs15.readFile(
+      path15.join(projectRoot, "package.json"),
+      "utf-8"
+    );
+    const pkg = JSON.parse(pkgStr);
+    profile.language = "typescript";
+    if (pkg.scripts && typeof pkg.scripts === "object") {
+      profile.scripts = pkg.scripts;
+    }
+    const deps = {
+      ...pkg.dependencies ?? {},
+      ...pkg.devDependencies ?? {}
+    };
+    if (deps.next) {
+      profile.framework = "Next.js";
+    } else if (deps.express) {
+      profile.framework = "Express";
+    } else if (deps.react) {
+      profile.framework = "React";
+    } else if (deps.vue) {
+      profile.framework = "Vue";
+    } else if (deps.commander) {
+      profile.framework = "CLI (Commander.js)";
+    }
+  } catch {
+  }
+  if (!profile.language) {
+    try {
+      await fs15.access(path15.join(projectRoot, "pyproject.toml"));
+      profile.language = "python";
+    } catch {
+      try {
+        await fs15.access(path15.join(projectRoot, "requirements.txt"));
+        profile.language = "python";
+      } catch {
+      }
+    }
+  }
+  try {
+    const entries = await fs15.readdir(projectRoot);
+    const keyPatterns = [
+      "README.md",
+      "package.json",
+      "tsconfig.json",
+      "pyproject.toml",
+      "Cargo.toml",
+      "go.mod",
+      "Makefile",
+      "Dockerfile"
+    ];
+    profile.keyFiles = entries.filter((e) => keyPatterns.includes(e));
+  } catch {
+  }
+  return profile;
+}
+async function autoGenerateTasks(projectRoot, workflowType) {
+  const config = await loadConfig();
+  if (!config) {
+    throw new Error("No config found. Run `kairn init` first.");
+  }
+  let claudeMd = "";
+  try {
+    claudeMd = await fs15.readFile(
+      path15.join(projectRoot, ".claude", "CLAUDE.md"),
+      "utf-8"
+    );
+  } catch {
+  }
+  const profile = await buildProjectProfile(projectRoot);
+  const templates = selectTemplatesForWorkflow(workflowType);
+  return generateTasksFromTemplates(claudeMd, profile, templates, config);
+}
+// src/evolve/baseline.ts
+import fs16 from "fs/promises";
+import path16 from "path";
+async function snapshotBaseline(projectRoot, workspacePath) {
+  const claudeDir = path16.join(projectRoot, ".claude");
+  const baselineDir = path16.join(workspacePath, "baseline");
+  const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
+  try {
+    await fs16.access(claudeDir);
+  } catch {
+    throw new Error(`.claude/ directory not found in ${projectRoot}`);
+  }
+  await copyDir(claudeDir, baselineDir);
+  await copyDir(claudeDir, iter0Dir);
+}
+async function copyDir(src, dest) {
+  await fs16.mkdir(dest, { recursive: true });
+  const entries = await fs16.readdir(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = path16.join(src, entry.name);
+    const destPath = path16.join(dest, entry.name);
+    if (entry.isDirectory()) {
+      await copyDir(srcPath, destPath);
+    } else {
+      await fs16.copyFile(srcPath, destPath);
+    }
+  }
+}
+// src/evolve/runner.ts
+import { exec, spawn } from "child_process";
+import { promisify } from "util";
+import fs18 from "fs/promises";
+import os3 from "os";
+import path18 from "path";
+// src/evolve/trace.ts
+import fs17 from "fs/promises";
+import path17 from "path";
+async function writeTrace(traceDir, trace) {
+  await fs17.mkdir(traceDir, { recursive: true });
+  await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
+  await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
+  const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
+  await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
+  await fs17.writeFile(
+    path17.join(traceDir, "files_changed.json"),
+    JSON.stringify(trace.filesChanged, null, 2),
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(traceDir, "timing.json"),
+    JSON.stringify(trace.timing, null, 2),
+    "utf-8"
+  );
+  await fs17.writeFile(
+    path17.join(traceDir, "score.json"),
+    JSON.stringify(trace.score, null, 2),
+    "utf-8"
+  );
+}
+async function writeScore(traceDir, score) {
+  await fs17.writeFile(
+    path17.join(traceDir, "score.json"),
+    JSON.stringify(score, null, 2),
+    "utf-8"
+  );
+}
+// src/evolve/runner.ts
+var execAsync = promisify(exec);
+async function runTask(task, harnessPath, traceDir, iteration) {
+  await fs18.mkdir(traceDir, { recursive: true });
+  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
+  const startMs = Date.now();
+  const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
+  try {
+    await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
+    let setupStderr = "";
+    if (task.setup.trim()) {
+      try {
+        await execAsync(task.setup, { cwd: tmpDir, timeout: 6e4 });
+      } catch (err) {
+        setupStderr = err instanceof Error ? err.message : String(err);
+      }
+    }
+    const filesBefore = await snapshotFileList(tmpDir);
+    const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
+    const filesAfter = await snapshotFileList(tmpDir);
+    const filesChanged = diffFileLists(filesBefore, filesAfter);
+    const toolCalls = parseToolCalls(spawnResult.stdout);
+    const completedAt = (/* @__PURE__ */ new Date()).toISOString();
+    const durationMs = Date.now() - startMs;
+    const combinedStderr = setupStderr ? `[setup] ${setupStderr}
+${spawnResult.stderr}` : spawnResult.stderr;
+    const trace = {
+      taskId: task.id,
+      iteration,
+      stdout: spawnResult.stdout,
+      stderr: combinedStderr,
+      toolCalls,
+      filesChanged,
+      score: { pass: false, details: "Pending scoring" },
+      timing: { startedAt, completedAt, durationMs }
+    };
+    await writeTrace(traceDir, trace);
+    return {
+      taskId: task.id,
+      score: trace.score,
+      traceDir
+    };
+  } finally {
+    await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
+    });
+  }
+}
+async function spawnClaude(instruction, cwd, timeoutSec) {
+  return new Promise((resolve) => {
+    const args = ["--print", "--output-format", "text", "--max-turns", "50"];
+    const child = spawn("claude", args, {
+      cwd,
+      stdio: ["pipe", "pipe", "pipe"],
+      timeout: timeoutSec * 1e3,
+      env: { ...process.env }
+    });
+    let stdout = "";
+    let stderr = "";
+    child.stdout.on("data", (data) => {
+      stdout += data.toString();
+    });
+    child.stderr.on("data", (data) => {
+      stderr += data.toString();
+    });
+    child.stdin.write(instruction);
+    child.stdin.end();
+    child.on("close", (code) => {
+      resolve({ stdout, stderr, exitCode: code ?? 1 });
+    });
+    child.on("error", (err) => {
+      resolve({
+        stdout,
+        stderr: stderr + `
+Spawn error: ${err.message}`,
+        exitCode: 1
+      });
+    });
+  });
+}
+async function snapshotFileList(dir) {
+  const result = {};
+  async function walk(current) {
+    let entries;
+    try {
+      entries = await fs18.readdir(current, { withFileTypes: true });
+    } catch {
+      return;
+    }
+    for (const entry of entries) {
+      const fullPath = path18.join(current, entry.name);
+      const relativePath = path18.relative(dir, fullPath);
+      if (relativePath.startsWith(".claude")) continue;
+      if (relativePath.startsWith("node_modules")) continue;
+      if (relativePath.startsWith(".git")) continue;
+      if (entry.isDirectory()) {
+        await walk(fullPath);
+      } else {
+        try {
+          const stat = await fs18.stat(fullPath);
+          result[relativePath] = stat.mtimeMs;
+        } catch {
+        }
+      }
+    }
+  }
+  await walk(dir);
+  return result;
+}
+function diffFileLists(before, after) {
+  const changes = {};
+  for (const [file, mtime] of Object.entries(after)) {
+    if (!(file in before)) {
+      changes[file] = "created";
+    } else if (before[file] !== mtime) {
+      changes[file] = "modified";
+    }
+  }
+  for (const file of Object.keys(before)) {
+    if (!(file in after)) {
+      changes[file] = "deleted";
+    }
+  }
+  return changes;
+}
+function parseToolCalls(stdout) {
+  try {
+    const lines = stdout.split("\n").filter((l) => l.trim());
+    const toolCalls = [];
+    for (const line of lines) {
+      try {
+        const obj = JSON.parse(line);
+        if (obj.type === "tool_use" || obj.tool_name) {
+          toolCalls.push(obj);
+        }
+      } catch {
+      }
+    }
+    return toolCalls;
+  } catch {
+    return [];
+  }
+}
+// src/evolve/exec.ts
+import { exec as exec2 } from "child_process";
+import { promisify as promisify2 } from "util";
+var execAsync2 = promisify2(exec2);
+async function execCommand(cmd, cwd, timeoutMs = 3e4) {
+  return execAsync2(cmd, { cwd, timeout: timeoutMs });
+}
+// src/evolve/scorers.ts
+var COMMAND_PATTERN = /^(npm |npx |node |python |make |cargo |go |git |test |ls |cat |grep |curl )/;
+var SHELL_METACHAR_PATTERN = /[;|&`$()<>]/;
+var JUDGE_SYSTEM_PROMPT = `You are an eval judge for Claude Code agent tasks. Given a task description, expected outcome, and actual execution results, determine if the task was completed successfully.
+Return ONLY valid JSON with this structure:
+{
+  "pass": true/false,
+  "score": 0-100,
+  "reasoning": "Brief explanation of your judgment"
+}`;
+var RUBRIC_SYSTEM_PROMPT = `You are an eval judge scoring a specific criterion. Given the task, the criterion to evaluate, and the execution results, score the criterion.
+Return ONLY valid JSON:
+{
+  "score": 0.0-1.0,
+  "reasoning": "Brief explanation"
+}`;
+async function passFailScorer(task, workspacePath, stdout, stderr) {
+  const outcomes = Array.isArray(task.expected_outcome) ? task.expected_outcome : task.expected_outcome.split("\n");
+  const commands = outcomes.map((line) => line.replace(/^-\s*/, "").trim()).filter((line) => COMMAND_PATTERN.test(line));
+  if (commands.length > 0) {
+    const failures = [];
+    for (const cmd of commands) {
+      if (SHELL_METACHAR_PATTERN.test(cmd)) {
+        failures.push(`Rejected unsafe command (shell metacharacters): ${cmd}`);
+        continue;
+      }
+      try {
+        await execCommand(cmd, workspacePath);
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        failures.push(`Command failed: ${cmd}
+${msg}`);
+      }
+    }
+    const passed2 = failures.length === 0;
+    return {
+      pass: passed2,
+      score: passed2 ? 100 : 0,
+      details: passed2 ? `All ${commands.length} verification commands passed` : failures.join("\n")
+    };
+  }
+  const hasErrors = stderr.toLowerCase().includes("error") || stderr.toLowerCase().includes("failed") || stderr.toLowerCase().includes("exception");
+  const passed = !hasErrors;
+  return {
+    pass: passed,
+    score: passed ? 100 : 0,
+    details: passed ? "No errors detected in output" : "Errors found in stderr"
+  };
+}
+async function llmJudgeScorer(task, workspacePath, stdout, stderr, config) {
+  const expectedOutcome = Array.isArray(task.expected_outcome) ? task.expected_outcome.join("\n") : task.expected_outcome;
+  const userMessage = [
+    "## Task",
+    task.description,
+    "",
+    "## Expected Outcome",
+    expectedOutcome,
+    "",
+    "## Actual stdout (last 2000 chars)",
+    stdout.slice(-2e3),
+    "",
+    "## Actual stderr (last 1000 chars)",
+    stderr.slice(-1e3)
+  ].join("\n");
+  try {
+    const response = await callLLM(config, userMessage, {
+      systemPrompt: JUDGE_SYSTEM_PROMPT,
+      maxTokens: 1024
+    });
+    let cleaned = response.trim();
+    if (cleaned.startsWith("```")) {
+      cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+    }
+    const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) {
+      return { pass: false, score: 0, reasoning: "Judge returned invalid JSON" };
+    }
+    const result = JSON.parse(jsonMatch[0]);
+    return {
+      pass: result.pass,
+      score: result.score,
+      reasoning: result.reasoning
+    };
+  } catch (err) {
+    return {
+      pass: false,
+      score: 0,
+      reasoning: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
+    };
+  }
+}
+async function rubricScorer(task, workspacePath, stdout, stderr, config) {
+  if (!task.rubric || task.rubric.length === 0) {
+    return passFailScorer(task, workspacePath, stdout, stderr);
+  }
+  const breakdown = [];
+  let weightedSum = 0;
+  for (const criterion of task.rubric) {
+    const userMessage = [
+      "## Task",
+      task.description,
+      "",
+      "## Criterion to Evaluate",
+      `"${criterion.criterion}" (weight: ${criterion.weight})`,
+      "",
+      "## Actual stdout (last 2000 chars)",
+      stdout.slice(-2e3),
+      "",
+      "## Actual stderr (last 500 chars)",
+      stderr.slice(-500)
+    ].join("\n");
+    try {
+      const response = await callLLM(config, userMessage, {
+        systemPrompt: RUBRIC_SYSTEM_PROMPT,
+        maxTokens: 512
+      });
+      let cleaned = response.trim();
+      if (cleaned.startsWith("```")) {
+        cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
+      }
+      const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
+      if (jsonMatch) {
+        const result = JSON.parse(jsonMatch[0]);
+        const clampedScore = Math.max(0, Math.min(1, result.score));
+        breakdown.push({
+          criterion: criterion.criterion,
+          score: clampedScore,
+          weight: criterion.weight
+        });
+        weightedSum += clampedScore * criterion.weight;
+      } else {
+        breakdown.push({
+          criterion: criterion.criterion,
+          score: 0,
+          weight: criterion.weight
+        });
+      }
+    } catch {
+      breakdown.push({
+        criterion: criterion.criterion,
+        score: 0,
+        weight: criterion.weight
+      });
+    }
+  }
+  const totalWeight = task.rubric.reduce((sum, c) => sum + c.weight, 0);
+  const totalScore = totalWeight > 0 ? Math.round(weightedSum / totalWeight * 100) : 0;
+  return {
+    pass: totalScore >= 60,
+    score: totalScore,
+    reasoning: `Rubric score: ${totalScore}%`,
+    breakdown
+  };
+}
+async function scoreTask(task, workspacePath, stdout, stderr, config) {
+  if (task.scoring === "pass-fail") {
+    return passFailScorer(task, workspacePath, stdout, stderr);
+  }
+  if (task.scoring === "llm-judge" && config) {
+    return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
+  }
+  if (task.scoring === "rubric" && config) {
+    return rubricScorer(task, workspacePath, stdout, stderr, config);
+  }
+  return passFailScorer(task, workspacePath, stdout, stderr);
+}
+// src/commands/evolve.ts
+var DEFAULT_CONFIG = {
+  model: "claude-sonnet-4-6",
+  proposerModel: "claude-opus-4-6",
+  scorer: "pass-fail",
+  maxIterations: 5,
+  parallelTasks: 1
+};
+var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
+evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
+  try {
+    const projectRoot = process.cwd();
+    console.log(ui.section("Evolve Init"));
+    const claudeDir = path19.join(projectRoot, ".claude");
+    try {
+      await fs19.access(claudeDir);
+    } catch {
+      console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
+      process.exit(1);
+    }
+    const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
+    console.log(ui.success("Created .kairn-evolve/ workspace"));
+    const spinner = ora2("Generating project-specific eval tasks...").start();
+    let tasks;
+    try {
+      tasks = await autoGenerateTasks(projectRoot, options.workflow);
+      spinner.succeed(`Generated ${tasks.length} eval tasks`);
+    } catch {
+      spinner.fail("LLM task generation failed");
+      const templateIds = selectTemplatesForWorkflow(options.workflow);
+      tasks = templateIds.map((templateId, index) => ({
+        id: `${templateId}-${index + 1}`,
+        template: templateId,
+        description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
+        setup: "npm install",
+        expected_outcome: "Task completed successfully",
+        scoring: "pass-fail",
+        timeout: 300
+      }));
+      console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
+    }
+    for (const task of tasks) {
+      console.log(chalk14.cyan(`  ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
+    }
+    let addMore = true;
+    while (addMore) {
+      try {
+        addMore = await confirm3({ message: "Add another eval task?", default: false });
+      } catch {
+        addMore = false;
+      }
+      if (addMore) {
+        const templateId = await select4({
+          message: "Select eval template:",
+          choices: Object.values(EVAL_TEMPLATES).map((t) => ({
+            name: `${t.name} \u2014 ${t.description}`,
+            value: t.id
+          }))
+        });
+        const addSpinner = ora2("Generating task...").start();
+        try {
+          const config = await loadConfig();
+          if (config) {
+            let claudeMd = "";
+            try {
+              claudeMd = await fs19.readFile(path19.join(claudeDir, "CLAUDE.md"), "utf-8");
+            } catch {
+            }
+            const profile = await buildProjectProfile(projectRoot);
+            const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
+            tasks.push(...newTasks);
+            addSpinner.succeed(`Added ${newTasks.length} task(s)`);
+          } else {
+            addSpinner.fail("No config found");
+          }
+        } catch {
+          addSpinner.fail("Failed to generate task");
+        }
+      }
+    }
+    await writeTasksFile(workspace, tasks);
+    console.log(ui.success(`Wrote ${tasks.length} tasks to tasks.yaml`));
+    console.log("");
+    console.log(chalk14.dim("  Next steps:"));
+    console.log(chalk14.dim("    1. Review .kairn-evolve/tasks.yaml"));
+    console.log(chalk14.dim("    2. Run: kairn evolve baseline"));
+    console.log(chalk14.dim("    3. Run: kairn evolve run"));
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
+  try {
+    const projectRoot = process.cwd();
+    const workspace = path19.join(projectRoot, ".kairn-evolve");
+    console.log(ui.section("Evolve Baseline"));
+    try {
+      await fs19.access(workspace);
+    } catch {
+      console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    await snapshotBaseline(projectRoot, workspace);
+    const baselineDir = path19.join(workspace, "baseline");
+    const fileCount = await countFiles(baselineDir);
+    console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").action(async (options) => {
+  try {
+    const projectRoot = process.cwd();
+    const workspace = path19.join(projectRoot, ".kairn-evolve");
+    console.log(ui.section("Evolve Run"));
+    try {
+      await fs19.access(workspace);
+    } catch {
+      console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    const tasksPath = path19.join(workspace, "tasks.yaml");
+    let tasksContent;
+    try {
+      tasksContent = await fs19.readFile(tasksPath, "utf-8");
+    } catch {
+      console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
+      process.exit(1);
+    }
+    const parsed = yamlParse(tasksContent);
+    if (!parsed?.tasks || parsed.tasks.length === 0) {
+      console.log(ui.error("No tasks found in tasks.yaml"));
+      process.exit(1);
+    }
+    const tasksToRun = options.task ? parsed.tasks.filter((t) => t.id === options.task) : parsed.tasks;
+    if (tasksToRun.length === 0) {
+      console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
+      process.exit(1);
+    }
+    console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
+    console.log("");
+    const config = await loadConfig();
+    const harnessPath = path19.join(projectRoot, ".claude");
+    const results = [];
+    for (const task of tasksToRun) {
+      const traceDir = path19.join(workspace, "traces", "0", task.id);
+      const spinner = ora2(`Running: ${task.id}`).start();
+      const result = await runTask(task, harnessPath, traceDir, 0);
+      if (config) {
+        const stdout = await fs19.readFile(path19.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
+        const stderr = await fs19.readFile(path19.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
+        const score = await scoreTask(task, traceDir, stdout, stderr, config);
+        result.score = score;
+        await writeScore(traceDir, score);
+      }
+      results.push(result);
+      const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
+      const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
+      spinner.stop();
+      console.log(`  ${status}  ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
+    }
+    const passed = results.filter((r) => r.score.pass).length;
+    console.log("");
+    console.log(ui.info(`Results: ${passed}/${results.length} passed`));
+    console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    console.log(ui.error(msg));
+    process.exit(1);
+  }
+});
+async function countFiles(dir) {
+  let count = 0;
+  try {
+    const entries = await fs19.readdir(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        count += await countFiles(path19.join(dir, entry.name));
+      } else {
+        count++;
+      }
+    }
+  } catch {
+  }
+  return count;
+}
 // src/cli.ts
-var program = new Command11();
+var program = new Command12();
 program.name("kairn").description(
   "Compile natural language intent into optimized Claude Code environments"
 ).version("1.9.0").option("--no-color", "Disable colored output");
@@ -3642,8 +4579,9 @@ program.addCommand(doctorCommand);
 program.addCommand(registryCommand);
 program.addCommand(templatesCommand);
 program.addCommand(keysCommand);
+program.addCommand(evolveCommand);
 if (process.argv.includes("--no-color") || process.env.NO_COLOR) {
-  chalk14.level = 0;
+  chalk15.level = 0;
 }
 program.parse();
 //# sourceMappingURL=cli.js.map