npm - kairn-cli - Versions diffs - 2.2.10 → 2.3.0 - Mend

kairn-cli 2.2.10 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.js CHANGED Viewed

@@ -1,10 +1,11 @@
 // src/cli.ts
 import { Command as Command12 } from "commander";
 import chalk15 from "chalk";
+import { createRequire } from "module";
 // src/commands/init.ts
 import { Command } from "commander";
-import { input, password, select } from "@inquirer/prompts";
+import { confirm, input, password, select } from "@inquirer/prompts";
 import chalk3 from "chalk";
 import Anthropic from "@anthropic-ai/sdk";
 import OpenAI from "openai";
@@ -62,6 +63,59 @@ async function saveConfig(config) {
   await fs.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2), "utf-8");
 }
+// src/auth/keychain.ts
+import { exec } from "child_process";
+import { promisify } from "util";
+var execAsync = promisify(exec);
+var KEYCHAIN_SERVICE = "Claude Code-credentials";
+var TOKEN_EXPIRY_BUFFER_MS = 6e4;
+function parseKeychainCredentials(raw) {
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch {
+    return null;
+  }
+  if (typeof parsed !== "object" || parsed === null) return null;
+  const obj = parsed;
+  const oauth = obj["claudeAiOauth"];
+  if (typeof oauth !== "object" || oauth === null) return null;
+  const oauthObj = oauth;
+  const accessToken = oauthObj["accessToken"];
+  const refreshToken = oauthObj["refreshToken"];
+  const expiresAt = oauthObj["expiresAt"];
+  const subscriptionType = oauthObj["subscriptionType"];
+  if (typeof accessToken !== "string" || !accessToken) return null;
+  if (typeof refreshToken !== "string") return null;
+  if (typeof expiresAt !== "number") return null;
+  return {
+    accessToken,
+    refreshToken,
+    expiresAt,
+    subscriptionType: typeof subscriptionType === "string" ? subscriptionType : "unknown"
+  };
+}
+function isTokenExpired(credentials) {
+  return Date.now() + TOKEN_EXPIRY_BUFFER_MS >= credentials.expiresAt;
+}
+async function readClaudeCodeCredentials(account) {
+  if (process.platform !== "darwin") return null;
+  try {
+    const acct = account ?? "";
+    const cmd = acct ? `security find-generic-password -s "${KEYCHAIN_SERVICE}" -a "${acct}" -w` : `security find-generic-password -s "${KEYCHAIN_SERVICE}" -w`;
+    const { stdout } = await execAsync(cmd, { timeout: 5e3 });
+    return parseKeychainCredentials(stdout.trim());
+  } catch {
+    return null;
+  }
+}
+async function getAccessToken(account) {
+  const creds = await readClaudeCodeCredentials(account);
+  if (!creds) return null;
+  if (isTokenExpired(creds)) return null;
+  return creds.accessToken;
+}
 // src/providers.ts
 var PROVIDER_CONFIGS = {
   anthropic: {
@@ -508,30 +562,49 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
       choices: PROVIDER_MODELS[provider]
     });
   }
-  const apiKey = await password({
-    message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
-    mask: "*"
-  });
-  if (!apiKey && provider !== "other") {
-    console.log(ui.error("No API key provided. Aborting."));
-    process.exit(1);
+  let apiKey = "";
+  let authType = "api-key";
+  if (provider === "anthropic") {
+    const oauthToken = await getAccessToken();
+    if (oauthToken) {
+      const useOAuth = await confirm({
+        message: "Claude Code subscription detected. Use it instead of an API key? (experimental \u2014 may break)",
+        default: true
+      });
+      if (useOAuth) {
+        authType = "claude-code-oauth";
+        console.log(ui.warn("Using Claude Code OAuth token. This is undocumented and may break at any time."));
+        console.log(ui.success("OAuth token validated"));
+      }
+    }
   }
-  if (apiKey) {
-    console.log(chalk3.dim("\n  Verifying API key..."));
-    const valid = await verifyKey(provider, apiKey, baseURL, model);
-    if (!valid) {
-      console.log(ui.error("Invalid API key. Check your key and try again."));
+  if (authType === "api-key") {
+    apiKey = await password({
+      message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
+      mask: "*"
+    });
+    if (!apiKey && provider !== "other") {
+      console.log(ui.error("No API key provided. Aborting."));
       process.exit(1);
     }
-    console.log(ui.success("API key verified"));
-  } else {
-    console.log(ui.warn("No API key \u2014 skipping verification"));
+    if (apiKey) {
+      console.log(chalk3.dim("\n  Verifying API key..."));
+      const valid = await verifyKey(provider, apiKey, baseURL, model);
+      if (!valid) {
+        console.log(ui.error("Invalid API key. Check your key and try again."));
+        process.exit(1);
+      }
+      console.log(ui.success("API key verified"));
+    } else {
+      console.log(ui.warn("No API key \u2014 skipping verification"));
+    }
   }
   const config = {
     provider,
-    api_key: apiKey || "",
+    api_key: apiKey,
     model,
     ...baseURL ? { base_url: baseURL } : {},
+    ...authType !== "api-key" ? { auth_type: authType } : {},
     default_runtime: "claude-code",
     created_at: (/* @__PURE__ */ new Date()).toISOString()
   };
@@ -555,7 +628,7 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
 // src/commands/describe.ts
 import { Command as Command2 } from "commander";
-import { input as input2, confirm, select as select2 } from "@inquirer/prompts";
+import { input as input2, confirm as confirm2, select as select2 } from "@inquirer/prompts";
 import chalk5 from "chalk";
 // src/compiler/compile.ts
@@ -1240,8 +1313,18 @@ async function callLLM(config, userMessage, options) {
   const { systemPrompt } = options;
   const jsonMode = options.jsonMode ?? false;
   const providerName = getProviderName(config.provider);
+  let apiKey = config.api_key;
+  if (config.auth_type === "claude-code-oauth") {
+    const oauthToken = await getAccessToken();
+    if (!oauthToken) {
+      throw new Error(
+        "Claude Code OAuth token unavailable or expired. Run `kairn init` to reconfigure, or launch Claude Code to refresh the token."
+      );
+    }
+    apiKey = oauthToken;
+  }
   if (config.provider === "anthropic") {
-    const client2 = new Anthropic2({ apiKey: config.api_key });
+    const client2 = new Anthropic2({ apiKey });
     const messages = [
       { role: "user", content: userMessage }
     ];
@@ -1262,7 +1345,7 @@ async function callLLM(config, userMessage, options) {
     }
   }
   const resolvedBaseURL = getBaseURL(config.provider, config.base_url);
-  const clientOptions = { apiKey: config.api_key };
+  const clientOptions = { apiKey };
   if (resolvedBaseURL) clientOptions.baseURL = resolvedBaseURL;
   const client = new OpenAI2(clientOptions);
   try {
@@ -2441,7 +2524,7 @@ Autonomy level: ${autonomyLevel} (${autonomyLabel(autonomyLevel)})`;
       console.log("");
     }
   }
-  const proceed = options.yes || await confirm({
+  const proceed = options.yes || await confirm2({
     message: "Generate environment in current directory?",
     default: true
   });
@@ -2656,7 +2739,7 @@ var updateRegistryCommand = new Command5("update-registry").description("Fetch t
 // src/commands/optimize.ts
 import { Command as Command6 } from "commander";
-import { confirm as confirm2 } from "@inquirer/prompts";
+import { confirm as confirm3 } from "@inquirer/prompts";
 import chalk9 from "chalk";
 import ora from "ora";
 import fs12 from "fs/promises";
@@ -2718,7 +2801,7 @@ function detectFramework(deps) {
   ];
   const detected = [];
   for (const [packages, name] of frameworks) {
-    if (packages.some((pkg) => deps.includes(pkg))) {
+    if (packages.some((pkg2) => deps.includes(pkg2))) {
       detected.push(name);
     }
   }
@@ -2742,11 +2825,11 @@ function extractEnvKeys(content) {
   return keys;
 }
 async function scanProject(dir) {
-  const pkg = await readJsonSafe(path11.join(dir, "package.json"));
-  const deps = pkg?.dependencies ? Object.keys(pkg.dependencies) : [];
-  const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies) : [];
+  const pkg2 = await readJsonSafe(path11.join(dir, "package.json"));
+  const deps = pkg2?.dependencies ? Object.keys(pkg2.dependencies) : [];
+  const devDeps = pkg2?.devDependencies ? Object.keys(pkg2.devDependencies) : [];
   const allDeps = [...deps, ...devDeps];
-  const scripts = pkg?.scripts || {};
+  const scripts = pkg2?.scripts || {};
   const rootFiles = await listDirSafe(dir);
   const keyFiles = rootFiles.filter(
     (f) => [
@@ -2808,8 +2891,8 @@ async function scanProject(dir) {
     existingSkills = await listDirSafe(path11.join(claudeDir, "skills"));
     existingAgents = (await listDirSafe(path11.join(claudeDir, "agents"))).filter((f) => f.endsWith(".md")).map((f) => f.replace(".md", ""));
   }
-  const name = pkg?.name || path11.basename(dir);
-  const description = pkg?.description || "";
+  const name = pkg2?.name || path11.basename(dir);
+  const description = pkg2?.description || "";
   return {
     name,
     description,
@@ -3028,7 +3111,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
     }
     if (!options.yes) {
       console.log("");
-      const proceed = await confirm2({
+      const proceed = await confirm3({
         message: "Generate optimized environment? This will overwrite existing .claude/ files.",
         default: false
       });
@@ -3040,7 +3123,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
   } else {
     console.log(chalk9.dim("\n  No existing .claude/ directory found \u2014 generating from scratch.\n"));
     if (!options.yes) {
-      const proceed = await confirm2({
+      const proceed = await confirm3({
         message: "Generate Claude Code environment for this project?",
         default: true
       });
@@ -3103,7 +3186,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
       }
     }
     console.log("");
-    const apply = await confirm2({
+    const apply = await confirm3({
       message: "Apply these changes?",
       default: true
     });
@@ -3702,7 +3785,7 @@ import ora2 from "ora";
 import fs24 from "fs/promises";
 import path24 from "path";
 import { parse as yamlParse2 } from "yaml";
-import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
+import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
 // src/evolve/init.ts
 import fs15 from "fs/promises";
@@ -3945,14 +4028,14 @@ async function buildProjectProfile(projectRoot) {
       path15.join(projectRoot, "package.json"),
       "utf-8"
     );
-    const pkg = JSON.parse(pkgStr);
+    const pkg2 = JSON.parse(pkgStr);
     profile.language = "typescript";
-    if (pkg.scripts && typeof pkg.scripts === "object") {
-      profile.scripts = pkg.scripts;
+    if (pkg2.scripts && typeof pkg2.scripts === "object") {
+      profile.scripts = pkg2.scripts;
     }
     const deps = {
-      ...pkg.dependencies ?? {},
-      ...pkg.devDependencies ?? {}
+      ...pkg2.dependencies ?? {},
+      ...pkg2.devDependencies ?? {}
     };
     if (deps.next) {
       profile.framework = "Next.js";
@@ -4051,8 +4134,8 @@ async function copyDir(src, dest) {
 }
 // src/evolve/runner.ts
-import { exec as exec2, spawn } from "child_process";
-import { promisify as promisify2 } from "util";
+import { exec as exec3, spawn } from "child_process";
+import { promisify as promisify3 } from "util";
 import fs18 from "fs/promises";
 import os3 from "os";
 import path18 from "path";
@@ -4177,11 +4260,11 @@ async function loadIterationLog(workspacePath, iteration) {
 }
 // src/evolve/exec.ts
-import { exec } from "child_process";
-import { promisify } from "util";
-var execAsync = promisify(exec);
+import { exec as exec2 } from "child_process";
+import { promisify as promisify2 } from "util";
+var execAsync2 = promisify2(exec2);
 async function execCommand(cmd, cwd, timeoutMs = 3e4) {
-  return execAsync(cmd, { cwd, timeout: timeoutMs });
+  return execAsync2(cmd, { cwd, timeout: timeoutMs });
 }
 // src/evolve/scorers.ts
@@ -4341,21 +4424,47 @@ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
     breakdown
   };
 }
+function classifyFailure(score, stdout, stderr) {
+  if (score.pass) return score;
+  const combined = `${stdout}
+${stderr}`.toLowerCase();
+  const scoreValue = score.score ?? 0;
+  let failureCategory = "unknown";
+  let failureReason = "";
+  if (stderr.includes("[setup]") && stderr.includes("Error") || combined.includes("command not found") || combined.includes("no such file or directory")) {
+    failureCategory = "task";
+    failureReason = "Task setup failed or references missing resources";
+  } else if (combined.includes("token limit") || combined.includes("context length") || combined.includes("rate limit") || combined.includes("api error") || combined.includes("429") || combined.includes("overloaded")) {
+    failureCategory = "model";
+    failureReason = "Model API error, token limit, or rate limit";
+  } else if (combined.includes("build failed") && combined.includes("before") || combined.includes("merge conflict") || combined.includes("git dirty") || combined.includes("uncommitted changes")) {
+    failureCategory = "repo";
+    failureReason = "Pre-existing repo issues (build failure, dirty state)";
+  } else if (scoreValue >= 20 && scoreValue < 80) {
+    failureCategory = "harness";
+    failureReason = "Agent attempted the task but did not follow harness conventions";
+  }
+  return { ...score, failureCategory, failureReason };
+}
 async function scoreTask(task, workspacePath, stdout, stderr, config) {
+  let score;
   if (task.scoring === "pass-fail") {
-    return passFailScorer(task, workspacePath, stdout, stderr);
-  }
-  if (task.scoring === "llm-judge" && config) {
-    return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
+    score = await passFailScorer(task, workspacePath, stdout, stderr);
+  } else if (task.scoring === "llm-judge" && config) {
+    score = await llmJudgeScorer(task, workspacePath, stdout, stderr, config);
+  } else if (task.scoring === "rubric" && config) {
+    score = await rubricScorer(task, workspacePath, stdout, stderr, config);
+  } else {
+    score = await passFailScorer(task, workspacePath, stdout, stderr);
   }
-  if (task.scoring === "rubric" && config) {
-    return rubricScorer(task, workspacePath, stdout, stderr, config);
+  if (!score.pass) {
+    score = classifyFailure(score, stdout, stderr);
   }
-  return passFailScorer(task, workspacePath, stdout, stderr);
+  return score;
 }
 // src/evolve/runner.ts
-var execAsync2 = promisify2(exec2);
+var execAsync3 = promisify3(exec3);
 var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
 async function deployMcpJson(harnessPath, workDir) {
   const src = path18.join(harnessPath, ".mcp.json");
@@ -4365,12 +4474,12 @@ async function deployMcpJson(harnessPath, workDir) {
 async function createIsolatedWorkspace(projectRoot, harnessPath) {
   const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
   try {
-    await execAsync2("git rev-parse --is-inside-work-tree", {
+    await execAsync3("git rev-parse --is-inside-work-tree", {
       cwd: projectRoot,
       timeout: 5e3
     });
     const tmpDir2 = path18.join(os3.tmpdir(), `kairn-evolve-wt-${suffix}`);
-    await execAsync2(`git worktree add --detach "${tmpDir2}" HEAD`, {
+    await execAsync3(`git worktree add --detach "${tmpDir2}" HEAD`, {
       cwd: projectRoot,
       timeout: 3e4
     });
@@ -4409,14 +4518,14 @@ async function copyProjectDir(src, dest) {
 async function cleanupIsolatedWorkspace(workDir, isWorktree, projectRoot) {
   if (isWorktree) {
     try {
-      await execAsync2(`git worktree remove "${workDir}" --force`, {
+      await execAsync3(`git worktree remove "${workDir}" --force`, {
         cwd: projectRoot,
         timeout: 1e4
       });
     } catch {
       await fs18.rm(workDir, { recursive: true, force: true }).catch(() => {
       });
-      await execAsync2("git worktree prune", {
+      await execAsync3("git worktree prune", {
         cwd: projectRoot,
         timeout: 5e3
       }).catch(() => {
@@ -4437,7 +4546,7 @@ async function runTask(task, harnessPath, traceDir, iteration, projectRoot) {
     let setupStderr = "";
     if (task.setup.trim()) {
       try {
-        await execAsync2(task.setup, { cwd: workDir, timeout: 6e4 });
+        await execAsync3(task.setup, { cwd: workDir, timeout: 6e4 });
       } catch (err) {
         setupStderr = err instanceof Error ? err.message : String(err);
       }
@@ -5453,6 +5562,7 @@ function buildLeaderboard(iterations, tasks) {
   const taskIds = tasks.map((t) => t.id);
   return taskIds.map((taskId) => {
     const scores = {};
+    const variance = {};
     let bestScore = -1;
     let bestIteration = 0;
     for (const iter of iterations) {
@@ -5460,13 +5570,21 @@ function buildLeaderboard(iterations, tasks) {
       if (s) {
         const score = numericScore2(s);
         scores[iter.iteration] = score;
+        if (s.variance) {
+          variance[iter.iteration] = {
+            mean: s.variance.mean,
+            stddev: s.variance.stddev,
+            runs: s.variance.runs
+          };
+        }
         if (score > bestScore) {
           bestScore = score;
           bestIteration = iter.iteration;
         }
       }
     }
-    return { taskId, scores, bestIteration, bestScore };
+    const hasVariance = Object.keys(variance).length > 0;
+    return { taskId, scores, bestIteration, bestScore, ...hasVariance ? { variance } : {} };
   });
 }
 function iterationStatus(iter, bestIteration) {
@@ -5502,13 +5620,29 @@ async function generateMarkdownReport(workspacePath) {
   lines.push("");
   lines.push("## Iterations");
   lines.push("");
-  lines.push("| Iter | Score | Mutations | Status |");
-  lines.push("|------|-------|-----------|--------|");
+  const hasVariance = iterations.some(
+    (iter) => Object.values(iter.taskResults).some((s) => s.variance)
+  );
+  if (hasVariance) {
+    lines.push("| Iter | Score | Mutations | Status |");
+    lines.push("|------|-------|-----------|--------|");
+  } else {
+    lines.push("| Iter | Score | Mutations | Status |");
+    lines.push("|------|-------|-----------|--------|");
+  }
   for (const iter of iterations) {
     const mutations = iter.proposal?.mutations.length ?? 0;
     const mutStr = mutations > 0 ? mutations.toString() : "-";
     const status = iterationStatus(iter, bestIter.iteration);
-    lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
+    let scoreStr = `${iter.score.toFixed(1)}%`;
+    if (hasVariance) {
+      const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
+      if (stddevs.length > 0) {
+        const avgStddev = stddevs.reduce((a, b) => a + b, 0) / stddevs.length;
+        scoreStr = `${iter.score.toFixed(1)}% \xB1${avgStddev.toFixed(1)}`;
+      }
+    }
+    lines.push(`| ${iter.iteration} | ${scoreStr} | ${mutStr} | ${status} |`);
   }
   lines.push("");
   if (leaderboard.length > 0) {
@@ -5521,7 +5655,10 @@ async function generateMarkdownReport(workspacePath) {
     for (const entry of leaderboard) {
       const scoreCols = iterNums.map((n) => {
         const s = entry.scores[n];
-        return s !== void 0 ? `${s.toFixed(0)}%` : "-";
+        if (s === void 0) return "-";
+        const v = entry.variance?.[n];
+        if (v && v.runs > 1) return `${s.toFixed(0)}% \xB1${v.stddev.toFixed(1)}`;
+        return `${s.toFixed(0)}%`;
       });
       lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
     }
@@ -5571,12 +5708,17 @@ async function generateJsonReport(workspacePath) {
       bestIteration: bestIter.iteration,
       improvement
     },
-    iterations: iterations.map((iter) => ({
-      iteration: iter.iteration,
-      score: iter.score,
-      mutationCount: iter.proposal?.mutations.length ?? 0,
-      status: iterationStatus(iter, bestIter.iteration)
-    })),
+    iterations: iterations.map((iter) => {
+      const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
+      const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : void 0;
+      return {
+        iteration: iter.iteration,
+        score: iter.score,
+        ...avgStddev !== void 0 ? { stddev: avgStddev } : {},
+        mutationCount: iter.proposal?.mutations.length ?? 0,
+        status: iterationStatus(iter, bestIter.iteration)
+      };
+    }),
     leaderboard,
     counterfactuals
   };
@@ -5766,7 +5908,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
     let addMore = true;
     while (addMore) {
       try {
-        addMore = await confirm3({ message: "Add another eval task?", default: false });
+        addMore = await confirm4({ message: "Add another eval task?", default: false });
       } catch {
         addMore = false;
       }
@@ -6179,10 +6321,12 @@ async function countFiles(dir) {
 }
 // src/cli.ts
+var require2 = createRequire(import.meta.url);
+var pkg = require2("../package.json");
 var program = new Command12();
 program.name("kairn").description(
   "Compile natural language intent into optimized Claude Code environments"
-).version("1.9.0").option("--no-color", "Disable colored output");
+).version(pkg.version).option("--no-color", "Disable colored output");
 program.addCommand(initCommand);
 program.addCommand(describeCommand);
 program.addCommand(optimizeCommand);