kairn-cli 1.14.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,6 +1,6 @@
1
1
  // src/cli.ts
2
- import { Command as Command11 } from "commander";
3
- import chalk14 from "chalk";
2
+ import { Command as Command12 } from "commander";
3
+ import chalk15 from "chalk";
4
4
 
5
5
  // src/commands/init.ts
6
6
  import { Command } from "commander";
@@ -221,7 +221,7 @@ var ui = {
221
221
  // Key-value pairs
222
222
  kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
223
223
  // File list
224
- file: (path15) => chalk.dim(` ${path15}`),
224
+ file: (path23) => chalk.dim(` ${path23}`),
225
225
  // Tool display
226
226
  tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
227
227
  ${chalk.dim(reason)}`,
@@ -562,8 +562,6 @@ import chalk5 from "chalk";
562
562
  import fs4 from "fs/promises";
563
563
  import path4 from "path";
564
564
  import crypto from "crypto";
565
- import Anthropic2 from "@anthropic-ai/sdk";
566
- import OpenAI2 from "openai";
567
565
 
568
566
  // src/compiler/prompt.ts
569
567
  var SKELETON_PROMPT = `You are the Kairn skeleton compiler. Your job is to select tools and outline the project structure from a user's natural language description.
@@ -1204,76 +1202,9 @@ async function loadRegistry() {
1204
1202
  return Array.from(merged.values());
1205
1203
  }
1206
1204
 
1207
- // src/compiler/compile.ts
1208
- function buildSkeletonMessage(intent, registry) {
1209
- const registrySummary = registry.map(
1210
- (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
1211
- ).join("\n");
1212
- return `## User Intent
1213
-
1214
- ${intent}
1215
-
1216
- ## Available Tool Registry
1217
-
1218
- ${registrySummary}
1219
-
1220
- Generate the skeleton JSON now.`;
1221
- }
1222
- function buildHarnessMessage(intent, skeleton, concise) {
1223
- const skeletonJson = JSON.stringify(skeleton, null, 2);
1224
- const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
1225
- return `## User Intent
1226
-
1227
- ${intent}
1228
-
1229
- ## Project Skeleton
1230
-
1231
- ${skeletonJson}
1232
-
1233
- Generate the harness content JSON now.${conciseNote}`;
1234
- }
1235
- function parseSkeletonResponse(text) {
1236
- let cleaned = text.trim();
1237
- if (cleaned.startsWith("```")) {
1238
- cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
1239
- }
1240
- const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
1241
- if (!jsonMatch) {
1242
- throw new Error("Pass 1 (skeleton) did not return valid JSON.");
1243
- }
1244
- try {
1245
- const parsed = JSON.parse(jsonMatch[0]);
1246
- if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
1247
- throw new Error("Skeleton missing required fields: name, tools");
1248
- }
1249
- return parsed;
1250
- } catch (err) {
1251
- throw new Error(
1252
- `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
1253
- );
1254
- }
1255
- }
1256
- function parseHarnessResponse(text) {
1257
- let cleaned = text.trim();
1258
- if (cleaned.startsWith("```")) {
1259
- cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
1260
- }
1261
- const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
1262
- if (!jsonMatch) {
1263
- throw new Error("Pass 2 (harness) did not return valid JSON.");
1264
- }
1265
- try {
1266
- const parsed = JSON.parse(jsonMatch[0]);
1267
- if (!parsed.claude_md || !parsed.commands) {
1268
- throw new Error("Harness missing required fields: claude_md, commands");
1269
- }
1270
- return parsed;
1271
- } catch (err) {
1272
- throw new Error(
1273
- `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
1274
- );
1275
- }
1276
- }
1205
+ // src/llm.ts
1206
+ import Anthropic2 from "@anthropic-ai/sdk";
1207
+ import OpenAI2 from "openai";
1277
1208
  function classifyError(err, provider) {
1278
1209
  const msg = err instanceof Error ? err.message : String(err);
1279
1210
  const status = err?.status;
@@ -1305,8 +1236,8 @@ function classifyError(err, provider) {
1305
1236
  return `${provider} API error: ${msg}`;
1306
1237
  }
1307
1238
  async function callLLM(config, userMessage, options) {
1308
- const maxTokens = options?.maxTokens ?? 8192;
1309
- const systemPrompt = options?.systemPrompt ?? SYSTEM_PROMPT;
1239
+ const maxTokens = options.maxTokens ?? 8192;
1240
+ const systemPrompt = options.systemPrompt;
1310
1241
  const providerName = getProviderName(config.provider);
1311
1242
  if (config.provider === "anthropic") {
1312
1243
  const client2 = new Anthropic2({ apiKey: config.api_key });
@@ -1348,6 +1279,77 @@ async function callLLM(config, userMessage, options) {
1348
1279
  throw new Error(classifyError(err, providerName));
1349
1280
  }
1350
1281
  }
1282
+
1283
+ // src/compiler/compile.ts
1284
+ function buildSkeletonMessage(intent, registry) {
1285
+ const registrySummary = registry.map(
1286
+ (t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
1287
+ ).join("\n");
1288
+ return `## User Intent
1289
+
1290
+ ${intent}
1291
+
1292
+ ## Available Tool Registry
1293
+
1294
+ ${registrySummary}
1295
+
1296
+ Generate the skeleton JSON now.`;
1297
+ }
1298
+ function buildHarnessMessage(intent, skeleton, concise) {
1299
+ const skeletonJson = JSON.stringify(skeleton, null, 2);
1300
+ const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
1301
+ return `## User Intent
1302
+
1303
+ ${intent}
1304
+
1305
+ ## Project Skeleton
1306
+
1307
+ ${skeletonJson}
1308
+
1309
+ Generate the harness content JSON now.${conciseNote}`;
1310
+ }
1311
+ function parseSkeletonResponse(text) {
1312
+ let cleaned = text.trim();
1313
+ if (cleaned.startsWith("```")) {
1314
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
1315
+ }
1316
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
1317
+ if (!jsonMatch) {
1318
+ throw new Error("Pass 1 (skeleton) did not return valid JSON.");
1319
+ }
1320
+ try {
1321
+ const parsed = JSON.parse(jsonMatch[0]);
1322
+ if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
1323
+ throw new Error("Skeleton missing required fields: name, tools");
1324
+ }
1325
+ return parsed;
1326
+ } catch (err) {
1327
+ throw new Error(
1328
+ `Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
1329
+ );
1330
+ }
1331
+ }
1332
+ function parseHarnessResponse(text) {
1333
+ let cleaned = text.trim();
1334
+ if (cleaned.startsWith("```")) {
1335
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
1336
+ }
1337
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
1338
+ if (!jsonMatch) {
1339
+ throw new Error("Pass 2 (harness) did not return valid JSON.");
1340
+ }
1341
+ try {
1342
+ const parsed = JSON.parse(jsonMatch[0]);
1343
+ if (!parsed.claude_md || !parsed.commands) {
1344
+ throw new Error("Harness missing required fields: claude_md, commands");
1345
+ }
1346
+ return parsed;
1347
+ } catch (err) {
1348
+ throw new Error(
1349
+ `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
1350
+ );
1351
+ }
1352
+ }
1351
1353
  function buildSettings(skeleton, registry) {
1352
1354
  const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
1353
1355
  const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
@@ -1517,7 +1519,9 @@ async function generateClarifications(intent, onProgress) {
1517
1519
  onProgress?.("Analyzing your request...");
1518
1520
  const clarificationConfig = { ...config };
1519
1521
  clarificationConfig.model = getCheapModel(config.provider, config.model);
1520
- const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent);
1522
+ const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent, {
1523
+ systemPrompt: SYSTEM_PROMPT
1524
+ });
1521
1525
  try {
1522
1526
  let cleaned = response.trim();
1523
1527
  if (cleaned.startsWith("```")) {
@@ -3686,8 +3690,1572 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
3686
3690
  console.log("");
3687
3691
  });
3688
3692
 
3693
+ // src/commands/evolve.ts
3694
+ import { Command as Command11 } from "commander";
3695
+ import chalk14 from "chalk";
3696
+ import ora2 from "ora";
3697
+ import fs22 from "fs/promises";
3698
+ import path22 from "path";
3699
+ import { parse as yamlParse } from "yaml";
3700
+ import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3701
+
3702
+ // src/evolve/init.ts
3703
+ import fs15 from "fs/promises";
3704
+ import path15 from "path";
3705
+ import { stringify as yamlStringify } from "yaml";
3706
+
3707
+ // src/evolve/templates.ts
3708
+ var EVAL_TEMPLATES = {
3709
+ "add-feature": {
3710
+ id: "add-feature",
3711
+ name: "Add Feature",
3712
+ description: "Can the agent add a new capability?",
3713
+ bestFor: ["feature-development", "api-building", "full-stack"]
3714
+ },
3715
+ "fix-bug": {
3716
+ id: "fix-bug",
3717
+ name: "Fix Bug",
3718
+ description: "Can the agent diagnose and fix a problem?",
3719
+ bestFor: ["maintenance", "debugging", "qa"]
3720
+ },
3721
+ "refactor": {
3722
+ id: "refactor",
3723
+ name: "Refactor",
3724
+ description: "Can the agent restructure code?",
3725
+ bestFor: ["maintenance", "architecture", "backend"]
3726
+ },
3727
+ "test-writing": {
3728
+ id: "test-writing",
3729
+ name: "Test Writing",
3730
+ description: "Can the agent write tests?",
3731
+ bestFor: ["tdd", "qa", "backend"]
3732
+ },
3733
+ "config-change": {
3734
+ id: "config-change",
3735
+ name: "Config Change",
3736
+ description: "Can the agent update configuration?",
3737
+ bestFor: ["devops", "infrastructure", "backend"]
3738
+ },
3739
+ "documentation": {
3740
+ id: "documentation",
3741
+ name: "Documentation",
3742
+ description: "Can the agent write and update docs?",
3743
+ bestFor: ["content", "api-building", "full-stack"]
3744
+ }
3745
+ };
3746
+ function selectTemplatesForWorkflow(workflowType) {
3747
+ const mapping = {
3748
+ "feature-development": ["add-feature", "test-writing", "documentation"],
3749
+ "api-building": ["add-feature", "fix-bug", "test-writing"],
3750
+ "full-stack": ["add-feature", "fix-bug", "test-writing"],
3751
+ "maintenance": ["fix-bug", "refactor", "test-writing"],
3752
+ "debugging": ["fix-bug", "test-writing"],
3753
+ "qa": ["fix-bug", "test-writing", "add-feature"],
3754
+ "architecture": ["refactor", "test-writing", "config-change"],
3755
+ "backend": ["fix-bug", "refactor", "config-change", "test-writing"],
3756
+ "devops": ["config-change", "fix-bug"],
3757
+ "infrastructure": ["config-change", "refactor"],
3758
+ "tdd": ["test-writing", "add-feature", "fix-bug"],
3759
+ "content": ["documentation", "add-feature"],
3760
+ "research": ["documentation", "add-feature"]
3761
+ };
3762
+ return mapping[workflowType] || ["add-feature", "fix-bug", "test-writing"];
3763
+ }
3764
+ var TASK_GENERATION_PROMPT = `You are an eval task generator for Claude Code agent environments. Given a project's CLAUDE.md, project structure, and selected eval templates, generate concrete, project-specific tasks.
3765
+
3766
+ Each task must be realistic and testable against the actual project. Avoid generic placeholders.
3767
+
3768
+ Return a JSON object with a "tasks" array. Each task has:
3769
+ - id: kebab-case identifier (e.g., "add-health-endpoint")
3770
+ - template: which eval template this instantiates
3771
+ - description: concrete task description the agent will receive
3772
+ - setup: shell commands to prepare the workspace (e.g., "npm install")
3773
+ - expected_outcome: multi-line string describing what success looks like
3774
+ - scoring: "pass-fail", "llm-judge", or "rubric"
3775
+ - timeout: seconds (300 for features/bugs, 600 for refactors, 180 for config/docs/tests)
3776
+
3777
+ Return ONLY valid JSON, no markdown fences.`;
3778
+ function parseJsonResponse(raw) {
3779
+ let cleaned = raw.trim();
3780
+ if (cleaned.startsWith("```")) {
3781
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
3782
+ }
3783
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/) ?? cleaned.match(/\[[\s\S]*\]/);
3784
+ if (!jsonMatch) {
3785
+ throw new Error(
3786
+ "LLM response did not contain valid JSON. Try again or use a different model."
3787
+ );
3788
+ }
3789
+ try {
3790
+ return JSON.parse(jsonMatch[0]);
3791
+ } catch (err) {
3792
+ throw new Error(
3793
+ `Failed to parse LLM response as JSON: ${err instanceof Error ? err.message : String(err)}`
3794
+ );
3795
+ }
3796
+ }
3797
+ var REQUIRED_TASK_FIELDS = [
3798
+ "id",
3799
+ "template",
3800
+ "description",
3801
+ "setup",
3802
+ "expected_outcome",
3803
+ "scoring",
3804
+ "timeout"
3805
+ ];
3806
+ function validateTask(obj, index) {
3807
+ if (typeof obj !== "object" || obj === null) {
3808
+ throw new Error(`Task at index ${index} is not an object`);
3809
+ }
3810
+ const record = obj;
3811
+ for (const field of REQUIRED_TASK_FIELDS) {
3812
+ if (!(field in record) || record[field] === void 0 || record[field] === null) {
3813
+ throw new Error(`Task at index ${index} is missing required field: ${field}`);
3814
+ }
3815
+ }
3816
+ return record;
3817
+ }
3818
+ function buildTaskGenerationMessage(claudeMd, projectProfile, templates) {
3819
+ const profileLines = [
3820
+ `Language: ${projectProfile.language ?? "unknown"}`,
3821
+ `Framework: ${projectProfile.framework ?? "none"}`,
3822
+ `Scripts: ${Object.entries(projectProfile.scripts).map(([k, v]) => `${k}=${v}`).join(", ") || "none"}`,
3823
+ `Key files: ${projectProfile.keyFiles.join(", ") || "none"}`
3824
+ ];
3825
+ const templateDescriptions = templates.map((t) => {
3826
+ const meta = EVAL_TEMPLATES[t];
3827
+ return `- ${t}: ${meta.description}`;
3828
+ }).join("\n");
3829
+ return [
3830
+ "## CLAUDE.md",
3831
+ "",
3832
+ claudeMd,
3833
+ "",
3834
+ "## Project Profile",
3835
+ "",
3836
+ ...profileLines,
3837
+ "",
3838
+ "## Selected Eval Templates",
3839
+ "",
3840
+ templateDescriptions,
3841
+ "",
3842
+ "Generate concrete, project-specific tasks for each template above."
3843
+ ].join("\n");
3844
+ }
3845
+ async function generateTasksFromTemplates(claudeMd, projectProfile, templates, config) {
3846
+ const userMessage = buildTaskGenerationMessage(claudeMd, projectProfile, templates);
3847
+ const rawResponse = await callLLM(config, userMessage, {
3848
+ systemPrompt: TASK_GENERATION_PROMPT,
3849
+ maxTokens: 4096
3850
+ });
3851
+ const parsed = parseJsonResponse(rawResponse);
3852
+ if (typeof parsed !== "object" || parsed === null) {
3853
+ throw new Error("LLM response is not a JSON object");
3854
+ }
3855
+ const tasksObj = parsed;
3856
+ if (!Array.isArray(tasksObj.tasks)) {
3857
+ throw new Error("LLM response does not contain a 'tasks' array");
3858
+ }
3859
+ const tasks = [];
3860
+ for (let i = 0; i < tasksObj.tasks.length; i++) {
3861
+ tasks.push(validateTask(tasksObj.tasks[i], i));
3862
+ }
3863
+ return tasks;
3864
+ }
3865
+
3866
+ // src/evolve/init.ts
3867
+ async function createEvolveWorkspace(projectRoot, config) {
3868
+ const workspace = path15.join(projectRoot, ".kairn-evolve");
3869
+ await fs15.mkdir(path15.join(workspace, "baseline"), { recursive: true });
3870
+ await fs15.mkdir(path15.join(workspace, "traces"), { recursive: true });
3871
+ await fs15.mkdir(path15.join(workspace, "iterations"), { recursive: true });
3872
+ const configObj = {
3873
+ model: config.model,
3874
+ proposer_model: config.proposerModel,
3875
+ scorer: config.scorer,
3876
+ max_iterations: config.maxIterations,
3877
+ parallel_tasks: config.parallelTasks
3878
+ };
3879
+ await fs15.writeFile(
3880
+ path15.join(workspace, "config.yaml"),
3881
+ yamlStringify(configObj),
3882
+ "utf-8"
3883
+ );
3884
+ return workspace;
3885
+ }
3886
+ async function writeTasksFile(workspacePath, tasks) {
3887
+ const doc = {
3888
+ tasks: tasks.map((t) => ({
3889
+ id: t.id,
3890
+ template: t.template,
3891
+ description: t.description,
3892
+ setup: t.setup,
3893
+ expected_outcome: t.expected_outcome,
3894
+ scoring: t.scoring,
3895
+ ...t.rubric ? { rubric: t.rubric } : {},
3896
+ timeout: t.timeout
3897
+ }))
3898
+ };
3899
+ const header = "# .kairn-evolve/tasks.yaml\n# Auto-generated by kairn evolve init \u2014 edit freely\n";
3900
+ await fs15.writeFile(
3901
+ path15.join(workspacePath, "tasks.yaml"),
3902
+ header + yamlStringify(doc),
3903
+ "utf-8"
3904
+ );
3905
+ }
3906
+ async function buildProjectProfile(projectRoot) {
3907
+ const profile = {
3908
+ language: null,
3909
+ framework: null,
3910
+ scripts: {},
3911
+ keyFiles: []
3912
+ };
3913
+ try {
3914
+ const pkgStr = await fs15.readFile(
3915
+ path15.join(projectRoot, "package.json"),
3916
+ "utf-8"
3917
+ );
3918
+ const pkg = JSON.parse(pkgStr);
3919
+ profile.language = "typescript";
3920
+ if (pkg.scripts && typeof pkg.scripts === "object") {
3921
+ profile.scripts = pkg.scripts;
3922
+ }
3923
+ const deps = {
3924
+ ...pkg.dependencies ?? {},
3925
+ ...pkg.devDependencies ?? {}
3926
+ };
3927
+ if (deps.next) {
3928
+ profile.framework = "Next.js";
3929
+ } else if (deps.express) {
3930
+ profile.framework = "Express";
3931
+ } else if (deps.react) {
3932
+ profile.framework = "React";
3933
+ } else if (deps.vue) {
3934
+ profile.framework = "Vue";
3935
+ } else if (deps.commander) {
3936
+ profile.framework = "CLI (Commander.js)";
3937
+ }
3938
+ } catch {
3939
+ }
3940
+ if (!profile.language) {
3941
+ try {
3942
+ await fs15.access(path15.join(projectRoot, "pyproject.toml"));
3943
+ profile.language = "python";
3944
+ } catch {
3945
+ try {
3946
+ await fs15.access(path15.join(projectRoot, "requirements.txt"));
3947
+ profile.language = "python";
3948
+ } catch {
3949
+ }
3950
+ }
3951
+ }
3952
+ try {
3953
+ const entries = await fs15.readdir(projectRoot);
3954
+ const keyPatterns = [
3955
+ "README.md",
3956
+ "package.json",
3957
+ "tsconfig.json",
3958
+ "pyproject.toml",
3959
+ "Cargo.toml",
3960
+ "go.mod",
3961
+ "Makefile",
3962
+ "Dockerfile"
3963
+ ];
3964
+ profile.keyFiles = entries.filter((e) => keyPatterns.includes(e));
3965
+ } catch {
3966
+ }
3967
+ return profile;
3968
+ }
3969
+ async function autoGenerateTasks(projectRoot, workflowType) {
3970
+ const config = await loadConfig();
3971
+ if (!config) {
3972
+ throw new Error("No config found. Run `kairn init` first.");
3973
+ }
3974
+ let claudeMd = "";
3975
+ try {
3976
+ claudeMd = await fs15.readFile(
3977
+ path15.join(projectRoot, ".claude", "CLAUDE.md"),
3978
+ "utf-8"
3979
+ );
3980
+ } catch {
3981
+ }
3982
+ const profile = await buildProjectProfile(projectRoot);
3983
+ const templates = selectTemplatesForWorkflow(workflowType);
3984
+ return generateTasksFromTemplates(claudeMd, profile, templates, config);
3985
+ }
3986
+
3987
+ // src/evolve/baseline.ts
3988
+ import fs16 from "fs/promises";
3989
+ import path16 from "path";
3990
+ async function snapshotBaseline(projectRoot, workspacePath) {
3991
+ const claudeDir = path16.join(projectRoot, ".claude");
3992
+ const baselineDir = path16.join(workspacePath, "baseline");
3993
+ const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
3994
+ try {
3995
+ await fs16.access(claudeDir);
3996
+ } catch {
3997
+ throw new Error(`.claude/ directory not found in ${projectRoot}`);
3998
+ }
3999
+ await copyDir(claudeDir, baselineDir);
4000
+ await copyDir(claudeDir, iter0Dir);
4001
+ }
4002
+ async function copyDir(src, dest) {
4003
+ await fs16.mkdir(dest, { recursive: true });
4004
+ const entries = await fs16.readdir(src, { withFileTypes: true });
4005
+ for (const entry of entries) {
4006
+ const srcPath = path16.join(src, entry.name);
4007
+ const destPath = path16.join(dest, entry.name);
4008
+ if (entry.isDirectory()) {
4009
+ await copyDir(srcPath, destPath);
4010
+ } else {
4011
+ await fs16.copyFile(srcPath, destPath);
4012
+ }
4013
+ }
4014
+ }
4015
+
4016
+ // src/evolve/runner.ts
4017
+ import { exec as exec2, spawn } from "child_process";
4018
+ import { promisify as promisify2 } from "util";
4019
+ import fs18 from "fs/promises";
4020
+ import os3 from "os";
4021
+ import path18 from "path";
4022
+
4023
+ // src/evolve/trace.ts
4024
+ import fs17 from "fs/promises";
4025
+ import path17 from "path";
4026
+ async function loadTrace(traceDir) {
4027
+ const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4028
+ const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4029
+ const filesChangedStr = await fs17.readFile(
4030
+ path17.join(traceDir, "files_changed.json"),
4031
+ "utf-8"
4032
+ ).catch(() => "{}");
4033
+ const timingStr = await fs17.readFile(
4034
+ path17.join(traceDir, "timing.json"),
4035
+ "utf-8"
4036
+ ).catch(() => "{}");
4037
+ const scoreStr = await fs17.readFile(
4038
+ path17.join(traceDir, "score.json"),
4039
+ "utf-8"
4040
+ ).catch(() => '{"pass": false}');
4041
+ const toolCallsStr = await fs17.readFile(
4042
+ path17.join(traceDir, "tool_calls.jsonl"),
4043
+ "utf-8"
4044
+ ).catch(() => "");
4045
+ const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
4046
+ const parentDir = path17.basename(path17.dirname(traceDir));
4047
+ const iteration = parseInt(parentDir, 10) || 0;
4048
+ return {
4049
+ taskId: path17.basename(traceDir),
4050
+ iteration,
4051
+ stdout,
4052
+ stderr,
4053
+ toolCalls,
4054
+ filesChanged: JSON.parse(filesChangedStr),
4055
+ score: JSON.parse(scoreStr),
4056
+ timing: JSON.parse(timingStr)
4057
+ };
4058
+ }
4059
+ async function loadIterationTraces(workspacePath, iteration) {
4060
+ const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
4061
+ const traces = [];
4062
+ try {
4063
+ const taskDirs = await fs17.readdir(tracesDir);
4064
+ for (const taskId of taskDirs) {
4065
+ const trace = await loadTrace(path17.join(tracesDir, taskId));
4066
+ traces.push(trace);
4067
+ }
4068
+ } catch {
4069
+ }
4070
+ return traces;
4071
+ }
4072
+ async function writeTrace(traceDir, trace) {
4073
+ await fs17.mkdir(traceDir, { recursive: true });
4074
+ await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
4075
+ await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
4076
+ const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
4077
+ await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
4078
+ await fs17.writeFile(
4079
+ path17.join(traceDir, "files_changed.json"),
4080
+ JSON.stringify(trace.filesChanged, null, 2),
4081
+ "utf-8"
4082
+ );
4083
+ await fs17.writeFile(
4084
+ path17.join(traceDir, "timing.json"),
4085
+ JSON.stringify(trace.timing, null, 2),
4086
+ "utf-8"
4087
+ );
4088
+ await fs17.writeFile(
4089
+ path17.join(traceDir, "score.json"),
4090
+ JSON.stringify(trace.score, null, 2),
4091
+ "utf-8"
4092
+ );
4093
+ }
4094
+ async function writeScore(traceDir, score) {
4095
+ await fs17.writeFile(
4096
+ path17.join(traceDir, "score.json"),
4097
+ JSON.stringify(score, null, 2),
4098
+ "utf-8"
4099
+ );
4100
+ }
4101
+ async function writeIterationLog(workspacePath, log) {
4102
+ const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
4103
+ await fs17.mkdir(iterDir, { recursive: true });
4104
+ await fs17.writeFile(
4105
+ path17.join(iterDir, "scores.json"),
4106
+ JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
4107
+ "utf-8"
4108
+ );
4109
+ await fs17.writeFile(
4110
+ path17.join(iterDir, "proposer_reasoning.md"),
4111
+ log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
4112
+ "utf-8"
4113
+ );
4114
+ await fs17.writeFile(
4115
+ path17.join(iterDir, "mutation_diff.patch"),
4116
+ log.diffPatch ?? "",
4117
+ "utf-8"
4118
+ );
4119
+ }
4120
+
4121
+ // src/evolve/exec.ts
4122
+ import { exec } from "child_process";
4123
+ import { promisify } from "util";
4124
+ var execAsync = promisify(exec);
4125
+ async function execCommand(cmd, cwd, timeoutMs = 3e4) {
4126
+ return execAsync(cmd, { cwd, timeout: timeoutMs });
4127
+ }
4128
+
4129
+ // src/evolve/scorers.ts
4130
+ var COMMAND_PATTERN = /^(npm |npx |node |python |make |cargo |go |git |test |ls |cat |grep |curl )/;
4131
+ var SHELL_METACHAR_PATTERN = /[;|&`$()<>]/;
4132
+ var JUDGE_SYSTEM_PROMPT = `You are an eval judge for Claude Code agent tasks. Given a task description, expected outcome, and actual execution results, determine if the task was completed successfully.
4133
+
4134
+ Return ONLY valid JSON with this structure:
4135
+ {
4136
+ "pass": true/false,
4137
+ "score": 0-100,
4138
+ "reasoning": "Brief explanation of your judgment"
4139
+ }`;
4140
+ var RUBRIC_SYSTEM_PROMPT = `You are an eval judge scoring a specific criterion. Given the task, the criterion to evaluate, and the execution results, score the criterion.
4141
+
4142
+ Return ONLY valid JSON:
4143
+ {
4144
+ "score": 0.0-1.0,
4145
+ "reasoning": "Brief explanation"
4146
+ }`;
4147
+ async function passFailScorer(task, workspacePath, stdout, stderr) {
4148
+ const outcomes = Array.isArray(task.expected_outcome) ? task.expected_outcome : task.expected_outcome.split("\n");
4149
+ const commands = outcomes.map((line) => line.replace(/^-\s*/, "").trim()).filter((line) => COMMAND_PATTERN.test(line));
4150
+ if (commands.length > 0) {
4151
+ const failures = [];
4152
+ for (const cmd of commands) {
4153
+ if (SHELL_METACHAR_PATTERN.test(cmd)) {
4154
+ failures.push(`Rejected unsafe command (shell metacharacters): ${cmd}`);
4155
+ continue;
4156
+ }
4157
+ try {
4158
+ await execCommand(cmd, workspacePath);
4159
+ } catch (err) {
4160
+ const msg = err instanceof Error ? err.message : String(err);
4161
+ failures.push(`Command failed: ${cmd}
4162
+ ${msg}`);
4163
+ }
4164
+ }
4165
+ const passed2 = failures.length === 0;
4166
+ return {
4167
+ pass: passed2,
4168
+ score: passed2 ? 100 : 0,
4169
+ details: passed2 ? `All ${commands.length} verification commands passed` : failures.join("\n")
4170
+ };
4171
+ }
4172
+ const hasErrors = stderr.toLowerCase().includes("error") || stderr.toLowerCase().includes("failed") || stderr.toLowerCase().includes("exception");
4173
+ const passed = !hasErrors;
4174
+ return {
4175
+ pass: passed,
4176
+ score: passed ? 100 : 0,
4177
+ details: passed ? "No errors detected in output" : "Errors found in stderr"
4178
+ };
4179
+ }
4180
+ async function llmJudgeScorer(task, workspacePath, stdout, stderr, config) {
4181
+ const expectedOutcome = Array.isArray(task.expected_outcome) ? task.expected_outcome.join("\n") : task.expected_outcome;
4182
+ const userMessage = [
4183
+ "## Task",
4184
+ task.description,
4185
+ "",
4186
+ "## Expected Outcome",
4187
+ expectedOutcome,
4188
+ "",
4189
+ "## Actual stdout (last 2000 chars)",
4190
+ stdout.slice(-2e3),
4191
+ "",
4192
+ "## Actual stderr (last 1000 chars)",
4193
+ stderr.slice(-1e3)
4194
+ ].join("\n");
4195
+ try {
4196
+ const response = await callLLM(config, userMessage, {
4197
+ systemPrompt: JUDGE_SYSTEM_PROMPT,
4198
+ maxTokens: 1024
4199
+ });
4200
+ let cleaned = response.trim();
4201
+ if (cleaned.startsWith("```")) {
4202
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
4203
+ }
4204
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
4205
+ if (!jsonMatch) {
4206
+ return { pass: false, score: 0, reasoning: "Judge returned invalid JSON" };
4207
+ }
4208
+ const result = JSON.parse(jsonMatch[0]);
4209
+ return {
4210
+ pass: result.pass,
4211
+ score: result.score,
4212
+ reasoning: result.reasoning
4213
+ };
4214
+ } catch (err) {
4215
+ return {
4216
+ pass: false,
4217
+ score: 0,
4218
+ reasoning: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
4219
+ };
4220
+ }
4221
+ }
4222
+ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
4223
+ if (!task.rubric || task.rubric.length === 0) {
4224
+ return passFailScorer(task, workspacePath, stdout, stderr);
4225
+ }
4226
+ const breakdown = [];
4227
+ let weightedSum = 0;
4228
+ for (const criterion of task.rubric) {
4229
+ const userMessage = [
4230
+ "## Task",
4231
+ task.description,
4232
+ "",
4233
+ "## Criterion to Evaluate",
4234
+ `"${criterion.criterion}" (weight: ${criterion.weight})`,
4235
+ "",
4236
+ "## Actual stdout (last 2000 chars)",
4237
+ stdout.slice(-2e3),
4238
+ "",
4239
+ "## Actual stderr (last 500 chars)",
4240
+ stderr.slice(-500)
4241
+ ].join("\n");
4242
+ try {
4243
+ const response = await callLLM(config, userMessage, {
4244
+ systemPrompt: RUBRIC_SYSTEM_PROMPT,
4245
+ maxTokens: 512
4246
+ });
4247
+ let cleaned = response.trim();
4248
+ if (cleaned.startsWith("```")) {
4249
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
4250
+ }
4251
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
4252
+ if (jsonMatch) {
4253
+ const result = JSON.parse(jsonMatch[0]);
4254
+ const clampedScore = Math.max(0, Math.min(1, result.score));
4255
+ breakdown.push({
4256
+ criterion: criterion.criterion,
4257
+ score: clampedScore,
4258
+ weight: criterion.weight
4259
+ });
4260
+ weightedSum += clampedScore * criterion.weight;
4261
+ } else {
4262
+ breakdown.push({
4263
+ criterion: criterion.criterion,
4264
+ score: 0,
4265
+ weight: criterion.weight
4266
+ });
4267
+ }
4268
+ } catch {
4269
+ breakdown.push({
4270
+ criterion: criterion.criterion,
4271
+ score: 0,
4272
+ weight: criterion.weight
4273
+ });
4274
+ }
4275
+ }
4276
+ const totalWeight = task.rubric.reduce((sum, c) => sum + c.weight, 0);
4277
+ const totalScore = totalWeight > 0 ? Math.round(weightedSum / totalWeight * 100) : 0;
4278
+ return {
4279
+ pass: totalScore >= 60,
4280
+ score: totalScore,
4281
+ reasoning: `Rubric score: ${totalScore}%`,
4282
+ breakdown
4283
+ };
4284
+ }
4285
+ async function scoreTask(task, workspacePath, stdout, stderr, config) {
4286
+ if (task.scoring === "pass-fail") {
4287
+ return passFailScorer(task, workspacePath, stdout, stderr);
4288
+ }
4289
+ if (task.scoring === "llm-judge" && config) {
4290
+ return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
4291
+ }
4292
+ if (task.scoring === "rubric" && config) {
4293
+ return rubricScorer(task, workspacePath, stdout, stderr, config);
4294
+ }
4295
+ return passFailScorer(task, workspacePath, stdout, stderr);
4296
+ }
4297
+
4298
+ // src/evolve/runner.ts
4299
+ var execAsync2 = promisify2(exec2);
4300
+ async function runTask(task, harnessPath, traceDir, iteration) {
4301
+ await fs18.mkdir(traceDir, { recursive: true });
4302
+ const startedAt = (/* @__PURE__ */ new Date()).toISOString();
4303
+ const startMs = Date.now();
4304
+ const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
4305
+ try {
4306
+ await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
4307
+ let setupStderr = "";
4308
+ if (task.setup.trim()) {
4309
+ try {
4310
+ await execAsync2(task.setup, { cwd: tmpDir, timeout: 6e4 });
4311
+ } catch (err) {
4312
+ setupStderr = err instanceof Error ? err.message : String(err);
4313
+ }
4314
+ }
4315
+ const filesBefore = await snapshotFileList(tmpDir);
4316
+ const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
4317
+ const filesAfter = await snapshotFileList(tmpDir);
4318
+ const filesChanged = diffFileLists(filesBefore, filesAfter);
4319
+ const toolCalls = parseToolCalls(spawnResult.stdout);
4320
+ const completedAt = (/* @__PURE__ */ new Date()).toISOString();
4321
+ const durationMs = Date.now() - startMs;
4322
+ const combinedStderr = setupStderr ? `[setup] ${setupStderr}
4323
+ ${spawnResult.stderr}` : spawnResult.stderr;
4324
+ const trace = {
4325
+ taskId: task.id,
4326
+ iteration,
4327
+ stdout: spawnResult.stdout,
4328
+ stderr: combinedStderr,
4329
+ toolCalls,
4330
+ filesChanged,
4331
+ score: { pass: false, details: "Pending scoring" },
4332
+ timing: { startedAt, completedAt, durationMs }
4333
+ };
4334
+ await writeTrace(traceDir, trace);
4335
+ return {
4336
+ taskId: task.id,
4337
+ score: trace.score,
4338
+ traceDir
4339
+ };
4340
+ } finally {
4341
+ await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
4342
+ });
4343
+ }
4344
+ }
4345
+ async function spawnClaude(instruction, cwd, timeoutSec) {
4346
+ return new Promise((resolve) => {
4347
+ const args = ["--print", "--output-format", "text", "--max-turns", "50"];
4348
+ const child = spawn("claude", args, {
4349
+ cwd,
4350
+ stdio: ["pipe", "pipe", "pipe"],
4351
+ timeout: timeoutSec * 1e3,
4352
+ env: { ...process.env }
4353
+ });
4354
+ let stdout = "";
4355
+ let stderr = "";
4356
+ child.stdout.on("data", (data) => {
4357
+ stdout += data.toString();
4358
+ });
4359
+ child.stderr.on("data", (data) => {
4360
+ stderr += data.toString();
4361
+ });
4362
+ child.stdin.write(instruction);
4363
+ child.stdin.end();
4364
+ child.on("close", (code) => {
4365
+ resolve({ stdout, stderr, exitCode: code ?? 1 });
4366
+ });
4367
+ child.on("error", (err) => {
4368
+ resolve({
4369
+ stdout,
4370
+ stderr: stderr + `
4371
+ Spawn error: ${err.message}`,
4372
+ exitCode: 1
4373
+ });
4374
+ });
4375
+ });
4376
+ }
4377
+ async function snapshotFileList(dir) {
4378
+ const result = {};
4379
+ async function walk(current) {
4380
+ let entries;
4381
+ try {
4382
+ entries = await fs18.readdir(current, { withFileTypes: true });
4383
+ } catch {
4384
+ return;
4385
+ }
4386
+ for (const entry of entries) {
4387
+ const fullPath = path18.join(current, entry.name);
4388
+ const relativePath = path18.relative(dir, fullPath);
4389
+ if (relativePath.startsWith(".claude")) continue;
4390
+ if (relativePath.startsWith("node_modules")) continue;
4391
+ if (relativePath.startsWith(".git")) continue;
4392
+ if (entry.isDirectory()) {
4393
+ await walk(fullPath);
4394
+ } else {
4395
+ try {
4396
+ const stat = await fs18.stat(fullPath);
4397
+ result[relativePath] = stat.mtimeMs;
4398
+ } catch {
4399
+ }
4400
+ }
4401
+ }
4402
+ }
4403
+ await walk(dir);
4404
+ return result;
4405
+ }
4406
+ function diffFileLists(before, after) {
4407
+ const changes = {};
4408
+ for (const [file, mtime] of Object.entries(after)) {
4409
+ if (!(file in before)) {
4410
+ changes[file] = "created";
4411
+ } else if (before[file] !== mtime) {
4412
+ changes[file] = "modified";
4413
+ }
4414
+ }
4415
+ for (const file of Object.keys(before)) {
4416
+ if (!(file in after)) {
4417
+ changes[file] = "deleted";
4418
+ }
4419
+ }
4420
+ return changes;
4421
+ }
4422
+ function parseToolCalls(stdout) {
4423
+ try {
4424
+ const lines = stdout.split("\n").filter((l) => l.trim());
4425
+ const toolCalls = [];
4426
+ for (const line of lines) {
4427
+ try {
4428
+ const obj = JSON.parse(line);
4429
+ if (obj.type === "tool_use" || obj.tool_name) {
4430
+ toolCalls.push(obj);
4431
+ }
4432
+ } catch {
4433
+ }
4434
+ }
4435
+ return toolCalls;
4436
+ } catch {
4437
+ return [];
4438
+ }
4439
+ }
4440
+ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config) {
4441
+ const results = {};
4442
+ for (const task of tasks) {
4443
+ const traceDir = path18.join(
4444
+ workspacePath,
4445
+ "traces",
4446
+ iteration.toString(),
4447
+ task.id
4448
+ );
4449
+ const taskResult = await runTask(task, harnessPath, traceDir, iteration);
4450
+ let score = taskResult.score;
4451
+ if (config) {
4452
+ const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4453
+ const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4454
+ score = await scoreTask(task, traceDir, stdout, stderr, config);
4455
+ await writeScore(traceDir, score);
4456
+ }
4457
+ results[task.id] = score;
4458
+ }
4459
+ const scores = Object.values(results);
4460
+ const total = scores.reduce(
4461
+ (sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
4462
+ 0
4463
+ );
4464
+ const aggregate = scores.length > 0 ? total / scores.length : 0;
4465
+ return { results, aggregate };
4466
+ }
4467
+
4468
+ // src/evolve/loop.ts
4469
+ import fs21 from "fs/promises";
4470
+ import path21 from "path";
4471
+
4472
+ // src/evolve/proposer.ts
4473
+ import fs19 from "fs/promises";
4474
+ import path19 from "path";
4475
+ var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
4476
+ agent environment (.claude/ directory) based on execution traces from real tasks.
4477
+
4478
+ ## What You Have Access To
4479
+ 1. Current harness: The .claude/ directory files (CLAUDE.md, commands/, rules/, agents/)
4480
+ 2. Execution traces: Full stdout/stderr, tool call sequences, file changes, and scores
4481
+ 3. History: Previous iterations' proposals, diffs, and resulting score changes
4482
+
4483
+ ## Your Task
4484
+ Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
4485
+ minimal changes to the harness files that will fix those failures.
4486
+
4487
+ ## Diagnosis Process
4488
+ 1. For each failed/low-scoring task:
4489
+ a. Read the full trace (stdout, tool calls, file changes)
4490
+ b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
4491
+ c. Trace the failure back to a specific harness decision
4492
+ d. Propose a fix
4493
+
4494
+ 2. For each successful task:
4495
+ a. Note what worked well
4496
+ b. Ensure proposed changes don't break what's working
4497
+
4498
+ 3. Check history for counterfactual evidence
4499
+
4500
+ ## Output Format
4501
+ Return a JSON object:
4502
+ {
4503
+ "reasoning": "Your full causal analysis...",
4504
+ "mutations": [
4505
+ { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
4506
+ { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
4507
+ ],
4508
+ "expected_impact": { "task-id": "+15% \u2014 explanation" }
4509
+ }
4510
+
4511
+ ## Rules
4512
+ - MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
4513
+ - Each mutation must have a clear rationale tied to a specific trace observation.
4514
+ - Never remove something that's working for another task.
4515
+ - If a previous iteration's change caused a regression, REVERT it.
4516
+ - Prefer ADDITIVE changes over replacements when possible.
4517
+
4518
+ Return ONLY valid JSON.`;
4519
+ var STDOUT_TRUNCATION_LIMIT = 2e3;
4520
+ async function readHarnessFiles(harnessPath) {
4521
+ const result = {};
4522
+ async function walk(dir, prefix) {
4523
+ let entries;
4524
+ try {
4525
+ entries = await fs19.readdir(dir, { withFileTypes: true });
4526
+ } catch {
4527
+ return;
4528
+ }
4529
+ for (const entry of entries) {
4530
+ const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
4531
+ const fullPath = path19.join(dir, entry.name);
4532
+ if (entry.isDirectory()) {
4533
+ await walk(fullPath, relativePath);
4534
+ } else if (entry.isFile()) {
4535
+ try {
4536
+ result[relativePath] = await fs19.readFile(fullPath, "utf-8");
4537
+ } catch {
4538
+ }
4539
+ }
4540
+ }
4541
+ }
4542
+ await walk(harnessPath, "");
4543
+ return result;
4544
+ }
4545
+ function truncateStdout(stdout, limit) {
4546
+ if (stdout.length <= limit) {
4547
+ return stdout;
4548
+ }
4549
+ return `[...truncated, showing last ${limit} chars...]
4550
+ ${stdout.slice(-limit)}`;
4551
+ }
4552
+ function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
4553
+ const sections = [];
4554
+ sections.push("## Current Harness Files\n");
4555
+ const fileEntries = Object.entries(harnessFiles);
4556
+ if (fileEntries.length === 0) {
4557
+ sections.push("(No harness files found)\n");
4558
+ } else {
4559
+ for (const [filePath, content] of fileEntries) {
4560
+ sections.push(`### ${filePath}
4561
+ \`\`\`
4562
+ ${content}
4563
+ \`\`\`
4564
+ `);
4565
+ }
4566
+ }
4567
+ sections.push("## Task Definitions\n");
4568
+ if (tasks.length === 0) {
4569
+ sections.push("(No tasks defined)\n");
4570
+ } else {
4571
+ for (const task of tasks) {
4572
+ sections.push(
4573
+ `### Task: ${task.id}
4574
+ - Template: ${task.template}
4575
+ - Description: ${task.description}
4576
+ - Expected outcome: ${Array.isArray(task.expected_outcome) ? task.expected_outcome.join("; ") : task.expected_outcome}
4577
+ - Scoring: ${task.scoring}
4578
+ `
4579
+ );
4580
+ }
4581
+ }
4582
+ sections.push("## Execution Traces\n");
4583
+ if (traces.length === 0) {
4584
+ sections.push("(No traces available)\n");
4585
+ } else {
4586
+ for (const trace of traces) {
4587
+ const scoreNum = trace.score.score !== void 0 ? trace.score.score : trace.score.pass ? 100 : 0;
4588
+ const truncatedStdout = truncateStdout(trace.stdout, STDOUT_TRUNCATION_LIMIT);
4589
+ const filesChangedList = Object.entries(trace.filesChanged).map(([f, action]) => ` - ${f}: ${action}`).join("\n");
4590
+ sections.push(
4591
+ `### Trace: ${trace.taskId}
4592
+ - Pass: ${trace.score.pass}
4593
+ - Score: ${scoreNum}
4594
+ ` + (trace.score.details ? `- Details: ${trace.score.details}
4595
+ ` : "") + `- Duration: ${trace.timing.durationMs}ms
4596
+ - Files changed:
4597
+ ${filesChangedList || " (none)"}
4598
+ - Stdout (last ${STDOUT_TRUNCATION_LIMIT} chars):
4599
+ \`\`\`
4600
+ ${truncatedStdout}
4601
+ \`\`\`
4602
+ `
4603
+ );
4604
+ }
4605
+ }
4606
+ sections.push("## Iteration History\n");
4607
+ if (history.length === 0) {
4608
+ sections.push("(No previous iterations)\n");
4609
+ } else {
4610
+ for (const log of history) {
4611
+ const taskScores = Object.entries(log.taskResults).map(([id, s]) => ` - ${id}: ${s.score !== void 0 ? s.score : s.pass ? 100 : 0} (pass=${s.pass})`).join("\n");
4612
+ sections.push(
4613
+ `### Iteration ${log.iteration} \u2014 Score: ${log.score}
4614
+ - Task results:
4615
+ ${taskScores}
4616
+ `
4617
+ );
4618
+ if (log.proposal) {
4619
+ sections.push(
4620
+ `- Proposal reasoning: ${log.proposal.reasoning}
4621
+ - Mutations: ${log.proposal.mutations.length} change(s)
4622
+ `
4623
+ );
4624
+ }
4625
+ }
4626
+ }
4627
+ return sections.join("\n");
4628
+ }
4629
+ function parseProposerResponse(raw) {
4630
+ let cleaned = raw.trim();
4631
+ const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/);
4632
+ if (fenceMatch) {
4633
+ cleaned = fenceMatch[1].trim();
4634
+ }
4635
+ let parsed;
4636
+ try {
4637
+ parsed = JSON.parse(cleaned);
4638
+ } catch {
4639
+ throw new Error(`Proposer returned invalid JSON: ${cleaned.slice(0, 200)}`);
4640
+ }
4641
+ if (typeof parsed !== "object" || parsed === null) {
4642
+ throw new Error("Proposer response is not a JSON object");
4643
+ }
4644
+ const obj = parsed;
4645
+ if (typeof obj["reasoning"] !== "string") {
4646
+ throw new Error('Proposer response missing required "reasoning" string field');
4647
+ }
4648
+ if (!Array.isArray(obj["mutations"])) {
4649
+ throw new Error('Proposer response missing required "mutations" array field');
4650
+ }
4651
+ const mutations = [];
4652
+ for (const entry of obj["mutations"]) {
4653
+ if (typeof entry !== "object" || entry === null) {
4654
+ continue;
4655
+ }
4656
+ const m = entry;
4657
+ const file = typeof m["file"] === "string" ? m["file"] : "";
4658
+ const action = typeof m["action"] === "string" ? m["action"] : "";
4659
+ const newText = typeof m["new_text"] === "string" ? m["new_text"] : typeof m["newText"] === "string" ? m["newText"] : "";
4660
+ const oldText = typeof m["old_text"] === "string" ? m["old_text"] : typeof m["oldText"] === "string" ? m["oldText"] : void 0;
4661
+ const rationale = typeof m["rationale"] === "string" ? m["rationale"] : "";
4662
+ if (file.includes("..")) {
4663
+ continue;
4664
+ }
4665
+ if (action !== "replace" && action !== "add_section" && action !== "create_file") {
4666
+ continue;
4667
+ }
4668
+ if (action === "replace" && !oldText) {
4669
+ continue;
4670
+ }
4671
+ const mutation = {
4672
+ file,
4673
+ action,
4674
+ newText,
4675
+ rationale
4676
+ };
4677
+ if (oldText !== void 0) {
4678
+ mutation.oldText = oldText;
4679
+ }
4680
+ mutations.push(mutation);
4681
+ }
4682
+ const rawImpact = obj["expected_impact"] ?? obj["expectedImpact"] ?? {};
4683
+ const expectedImpact = {};
4684
+ if (typeof rawImpact === "object" && rawImpact !== null) {
4685
+ for (const [key, value] of Object.entries(rawImpact)) {
4686
+ expectedImpact[key] = typeof value === "string" ? value : String(value);
4687
+ }
4688
+ }
4689
+ return {
4690
+ reasoning: obj["reasoning"],
4691
+ mutations,
4692
+ expectedImpact
4693
+ };
4694
+ }
4695
+ async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
4696
+ const harnessFiles = await readHarnessFiles(harnessPath);
4697
+ const traces = await loadIterationTraces(workspacePath, iteration);
4698
+ const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
4699
+ const proposerConfig = { ...config, model: proposerModel };
4700
+ const response = await callLLM(proposerConfig, userMessage, {
4701
+ systemPrompt: PROPOSER_SYSTEM_PROMPT,
4702
+ maxTokens: 8192
4703
+ });
4704
+ return parseProposerResponse(response);
4705
+ }
4706
+
4707
+ // src/evolve/mutator.ts
4708
+ import fs20 from "fs/promises";
4709
+ import path20 from "path";
4710
+ async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
4711
+ const newHarnessPath = path20.join(nextIterationDir, "harness");
4712
+ await copyDir(currentHarnessPath, newHarnessPath);
4713
+ for (const mutation of mutations) {
4714
+ if (mutation.file.includes("..")) {
4715
+ continue;
4716
+ }
4717
+ const filePath = path20.join(newHarnessPath, mutation.file);
4718
+ if (mutation.action === "replace") {
4719
+ if (!mutation.oldText) {
4720
+ continue;
4721
+ }
4722
+ const content = await fs20.readFile(filePath, "utf-8");
4723
+ if (!content.includes(mutation.oldText)) {
4724
+ continue;
4725
+ }
4726
+ await fs20.writeFile(
4727
+ filePath,
4728
+ content.replace(mutation.oldText, mutation.newText),
4729
+ "utf-8"
4730
+ );
4731
+ } else if (mutation.action === "add_section") {
4732
+ try {
4733
+ const content = await fs20.readFile(filePath, "utf-8");
4734
+ await fs20.writeFile(
4735
+ filePath,
4736
+ content + "\n\n" + mutation.newText,
4737
+ "utf-8"
4738
+ );
4739
+ } catch {
4740
+ await fs20.mkdir(path20.dirname(filePath), { recursive: true });
4741
+ await fs20.writeFile(filePath, mutation.newText, "utf-8");
4742
+ }
4743
+ } else if (mutation.action === "create_file") {
4744
+ await fs20.mkdir(path20.dirname(filePath), { recursive: true });
4745
+ await fs20.writeFile(filePath, mutation.newText, "utf-8");
4746
+ }
4747
+ }
4748
+ const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
4749
+ return { newHarnessPath, diffPatch };
4750
+ }
4751
+ async function generateDiff2(oldDir, newDir) {
4752
+ const oldFiles = await readAllFiles(oldDir);
4753
+ const newFiles = await readAllFiles(newDir);
4754
+ const allPaths = /* @__PURE__ */ new Set([
4755
+ ...Object.keys(oldFiles),
4756
+ ...Object.keys(newFiles)
4757
+ ]);
4758
+ const patches = [];
4759
+ for (const filePath of [...allPaths].sort()) {
4760
+ const oldContent = oldFiles[filePath] ?? "";
4761
+ const newContent = newFiles[filePath] ?? "";
4762
+ if (oldContent === newContent) continue;
4763
+ patches.push(`--- a/${filePath}`);
4764
+ patches.push(`+++ b/${filePath}`);
4765
+ if (!oldContent) {
4766
+ for (const line of newContent.split("\n")) {
4767
+ patches.push(`+${line}`);
4768
+ }
4769
+ } else if (!newContent) {
4770
+ for (const line of oldContent.split("\n")) {
4771
+ patches.push(`-${line}`);
4772
+ }
4773
+ } else {
4774
+ const oldLines = oldContent.split("\n");
4775
+ const newLines = newContent.split("\n");
4776
+ for (const line of oldLines) {
4777
+ patches.push(`-${line}`);
4778
+ }
4779
+ for (const line of newLines) {
4780
+ patches.push(`+${line}`);
4781
+ }
4782
+ }
4783
+ patches.push("");
4784
+ }
4785
+ return patches.join("\n");
4786
+ }
4787
+ async function readAllFiles(dir) {
4788
+ const result = {};
4789
+ async function walk(current) {
4790
+ let entries;
4791
+ try {
4792
+ entries = await fs20.readdir(current, { withFileTypes: true });
4793
+ } catch {
4794
+ return;
4795
+ }
4796
+ for (const entry of entries) {
4797
+ const fullPath = path20.join(current, entry.name);
4798
+ const relativePath = path20.relative(dir, fullPath);
4799
+ if (entry.isDirectory()) {
4800
+ await walk(fullPath);
4801
+ } else {
4802
+ result[relativePath] = await fs20.readFile(fullPath, "utf-8");
4803
+ }
4804
+ }
4805
+ }
4806
+ await walk(dir);
4807
+ return result;
4808
+ }
4809
+
4810
+ // src/evolve/loop.ts
4811
+ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
4812
+ const history = [];
4813
+ let bestScore = -1;
4814
+ let bestIteration = 0;
4815
+ let baselineScore = 0;
4816
+ for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
4817
+ const harnessPath = path21.join(
4818
+ workspacePath,
4819
+ "iterations",
4820
+ iter.toString(),
4821
+ "harness"
4822
+ );
4823
+ try {
4824
+ await fs21.access(harnessPath);
4825
+ } catch {
4826
+ if (iter === 0) {
4827
+ throw new Error(
4828
+ "No baseline harness found. Run `kairn evolve baseline` first."
4829
+ );
4830
+ }
4831
+ break;
4832
+ }
4833
+ onProgress?.({ type: "iteration-start", iteration: iter });
4834
+ const { results, aggregate } = await evaluateAll(
4835
+ tasks,
4836
+ harnessPath,
4837
+ workspacePath,
4838
+ iter,
4839
+ kairnConfig
4840
+ );
4841
+ onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
4842
+ if (iter === 0) baselineScore = aggregate;
4843
+ if (iter > 0 && aggregate < bestScore) {
4844
+ onProgress?.({
4845
+ type: "rollback",
4846
+ iteration: iter,
4847
+ score: aggregate,
4848
+ message: `Regression: ${aggregate.toFixed(1)}% < ${bestScore.toFixed(1)}%. Rolling back.`
4849
+ });
4850
+ const rollbackLog = {
4851
+ iteration: iter,
4852
+ score: aggregate,
4853
+ taskResults: results,
4854
+ proposal: null,
4855
+ diffPatch: null,
4856
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4857
+ };
4858
+ await writeIterationLog(workspacePath, rollbackLog);
4859
+ history.push(rollbackLog);
4860
+ if (iter + 1 < evolveConfig.maxIterations) {
4861
+ const nextIterDir2 = path21.join(
4862
+ workspacePath,
4863
+ "iterations",
4864
+ (iter + 1).toString()
4865
+ );
4866
+ const bestHarnessPath = path21.join(
4867
+ workspacePath,
4868
+ "iterations",
4869
+ bestIteration.toString(),
4870
+ "harness"
4871
+ );
4872
+ await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
4873
+ }
4874
+ continue;
4875
+ }
4876
+ bestScore = aggregate;
4877
+ bestIteration = iter;
4878
+ if (aggregate >= 100) {
4879
+ onProgress?.({ type: "perfect-score", iteration: iter, score: aggregate });
4880
+ const perfectLog = {
4881
+ iteration: iter,
4882
+ score: aggregate,
4883
+ taskResults: results,
4884
+ proposal: null,
4885
+ diffPatch: null,
4886
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4887
+ };
4888
+ await writeIterationLog(workspacePath, perfectLog);
4889
+ history.push(perfectLog);
4890
+ break;
4891
+ }
4892
+ if (iter === evolveConfig.maxIterations - 1) {
4893
+ const finalLog = {
4894
+ iteration: iter,
4895
+ score: aggregate,
4896
+ taskResults: results,
4897
+ proposal: null,
4898
+ diffPatch: null,
4899
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4900
+ };
4901
+ await writeIterationLog(workspacePath, finalLog);
4902
+ history.push(finalLog);
4903
+ break;
4904
+ }
4905
+ onProgress?.({ type: "proposing", iteration: iter });
4906
+ let proposal;
4907
+ try {
4908
+ proposal = await propose(
4909
+ iter,
4910
+ workspacePath,
4911
+ harnessPath,
4912
+ history,
4913
+ tasks,
4914
+ kairnConfig,
4915
+ evolveConfig.proposerModel
4916
+ );
4917
+ } catch {
4918
+ const nextIterDir2 = path21.join(
4919
+ workspacePath,
4920
+ "iterations",
4921
+ (iter + 1).toString()
4922
+ );
4923
+ await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
4924
+ const skipLog = {
4925
+ iteration: iter,
4926
+ score: aggregate,
4927
+ taskResults: results,
4928
+ proposal: null,
4929
+ diffPatch: null,
4930
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4931
+ };
4932
+ await writeIterationLog(workspacePath, skipLog);
4933
+ history.push(skipLog);
4934
+ continue;
4935
+ }
4936
+ const nextIterDir = path21.join(
4937
+ workspacePath,
4938
+ "iterations",
4939
+ (iter + 1).toString()
4940
+ );
4941
+ let diffPatch = "";
4942
+ try {
4943
+ const mutationResult = await applyMutations(
4944
+ harnessPath,
4945
+ nextIterDir,
4946
+ proposal.mutations
4947
+ );
4948
+ diffPatch = mutationResult.diffPatch;
4949
+ } catch {
4950
+ await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
4951
+ }
4952
+ onProgress?.({
4953
+ type: "mutations-applied",
4954
+ iteration: iter,
4955
+ mutationCount: proposal.mutations.length
4956
+ });
4957
+ const iterLog = {
4958
+ iteration: iter,
4959
+ score: aggregate,
4960
+ taskResults: results,
4961
+ proposal,
4962
+ diffPatch,
4963
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4964
+ };
4965
+ await writeIterationLog(workspacePath, iterLog);
4966
+ history.push(iterLog);
4967
+ }
4968
+ onProgress?.({
4969
+ type: "complete",
4970
+ iteration: history.length > 0 ? history.length - 1 : 0,
4971
+ score: bestScore
4972
+ });
4973
+ return {
4974
+ iterations: history,
4975
+ bestIteration,
4976
+ bestScore,
4977
+ baselineScore
4978
+ };
4979
+ }
4980
+
4981
+ // src/commands/evolve.ts
4982
+ var DEFAULT_CONFIG = {
4983
+ model: "claude-sonnet-4-6",
4984
+ proposerModel: "claude-opus-4-6",
4985
+ scorer: "pass-fail",
4986
+ maxIterations: 5,
4987
+ parallelTasks: 1
4988
+ };
4989
+ async function loadEvolveConfigFromWorkspace(workspacePath) {
4990
+ try {
4991
+ const configStr = await fs22.readFile(path22.join(workspacePath, "config.yaml"), "utf-8");
4992
+ const parsed = yamlParse(configStr);
4993
+ return {
4994
+ model: parsed.model ?? DEFAULT_CONFIG.model,
4995
+ proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
4996
+ scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
4997
+ maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
4998
+ parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
4999
+ };
5000
+ } catch {
5001
+ return { ...DEFAULT_CONFIG };
5002
+ }
5003
+ }
5004
+ var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
5005
+ evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
5006
+ try {
5007
+ const projectRoot = process.cwd();
5008
+ console.log(ui.section("Evolve Init"));
5009
+ const claudeDir = path22.join(projectRoot, ".claude");
5010
+ try {
5011
+ await fs22.access(claudeDir);
5012
+ } catch {
5013
+ console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
5014
+ process.exit(1);
5015
+ }
5016
+ const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
5017
+ console.log(ui.success("Created .kairn-evolve/ workspace"));
5018
+ const spinner = ora2("Generating project-specific eval tasks...").start();
5019
+ let tasks;
5020
+ try {
5021
+ tasks = await autoGenerateTasks(projectRoot, options.workflow);
5022
+ spinner.succeed(`Generated ${tasks.length} eval tasks`);
5023
+ } catch {
5024
+ spinner.fail("LLM task generation failed");
5025
+ const templateIds = selectTemplatesForWorkflow(options.workflow);
5026
+ tasks = templateIds.map((templateId, index) => ({
5027
+ id: `${templateId}-${index + 1}`,
5028
+ template: templateId,
5029
+ description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
5030
+ setup: "npm install",
5031
+ expected_outcome: "Task completed successfully",
5032
+ scoring: "pass-fail",
5033
+ timeout: 300
5034
+ }));
5035
+ console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
5036
+ }
5037
+ for (const task of tasks) {
5038
+ console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
5039
+ }
5040
+ let addMore = true;
5041
+ while (addMore) {
5042
+ try {
5043
+ addMore = await confirm3({ message: "Add another eval task?", default: false });
5044
+ } catch {
5045
+ addMore = false;
5046
+ }
5047
+ if (addMore) {
5048
+ const templateId = await select4({
5049
+ message: "Select eval template:",
5050
+ choices: Object.values(EVAL_TEMPLATES).map((t) => ({
5051
+ name: `${t.name} \u2014 ${t.description}`,
5052
+ value: t.id
5053
+ }))
5054
+ });
5055
+ const addSpinner = ora2("Generating task...").start();
5056
+ try {
5057
+ const config = await loadConfig();
5058
+ if (config) {
5059
+ let claudeMd = "";
5060
+ try {
5061
+ claudeMd = await fs22.readFile(path22.join(claudeDir, "CLAUDE.md"), "utf-8");
5062
+ } catch {
5063
+ }
5064
+ const profile = await buildProjectProfile(projectRoot);
5065
+ const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
5066
+ tasks.push(...newTasks);
5067
+ addSpinner.succeed(`Added ${newTasks.length} task(s)`);
5068
+ } else {
5069
+ addSpinner.fail("No config found");
5070
+ }
5071
+ } catch {
5072
+ addSpinner.fail("Failed to generate task");
5073
+ }
5074
+ }
5075
+ }
5076
+ await writeTasksFile(workspace, tasks);
5077
+ console.log(ui.success(`Wrote ${tasks.length} tasks to tasks.yaml`));
5078
+ console.log("");
5079
+ console.log(chalk14.dim(" Next steps:"));
5080
+ console.log(chalk14.dim(" 1. Review .kairn-evolve/tasks.yaml"));
5081
+ console.log(chalk14.dim(" 2. Run: kairn evolve baseline"));
5082
+ console.log(chalk14.dim(" 3. Run: kairn evolve run"));
5083
+ } catch (err) {
5084
+ const msg = err instanceof Error ? err.message : String(err);
5085
+ console.log(ui.error(msg));
5086
+ process.exit(1);
5087
+ }
5088
+ });
5089
+ evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
5090
+ try {
5091
+ const projectRoot = process.cwd();
5092
+ const workspace = path22.join(projectRoot, ".kairn-evolve");
5093
+ console.log(ui.section("Evolve Baseline"));
5094
+ try {
5095
+ await fs22.access(workspace);
5096
+ } catch {
5097
+ console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5098
+ process.exit(1);
5099
+ }
5100
+ await snapshotBaseline(projectRoot, workspace);
5101
+ const baselineDir = path22.join(workspace, "baseline");
5102
+ const fileCount = await countFiles(baselineDir);
5103
+ console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
5104
+ } catch (err) {
5105
+ const msg = err instanceof Error ? err.message : String(err);
5106
+ console.log(ui.error(msg));
5107
+ process.exit(1);
5108
+ }
5109
+ });
5110
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
5111
+ try {
5112
+ const projectRoot = process.cwd();
5113
+ const workspace = path22.join(projectRoot, ".kairn-evolve");
5114
+ console.log(ui.section("Evolve Run"));
5115
+ try {
5116
+ await fs22.access(workspace);
5117
+ } catch {
5118
+ console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
5119
+ process.exit(1);
5120
+ }
5121
+ const tasksPath = path22.join(workspace, "tasks.yaml");
5122
+ let tasksContent;
5123
+ try {
5124
+ tasksContent = await fs22.readFile(tasksPath, "utf-8");
5125
+ } catch {
5126
+ console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
5127
+ process.exit(1);
5128
+ }
5129
+ const parsed = yamlParse(tasksContent);
5130
+ if (!parsed?.tasks || parsed.tasks.length === 0) {
5131
+ console.log(ui.error("No tasks found in tasks.yaml"));
5132
+ process.exit(1);
5133
+ }
5134
+ if (options.task) {
5135
+ const tasksToRun = parsed.tasks.filter((t) => t.id === options.task);
5136
+ if (tasksToRun.length === 0) {
5137
+ console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
5138
+ process.exit(1);
5139
+ }
5140
+ console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
5141
+ console.log("");
5142
+ const config = await loadConfig();
5143
+ const harnessPath = path22.join(projectRoot, ".claude");
5144
+ const results = [];
5145
+ for (const task of tasksToRun) {
5146
+ const traceDir = path22.join(workspace, "traces", "0", task.id);
5147
+ const spinner = ora2(`Running: ${task.id}`).start();
5148
+ const result = await runTask(task, harnessPath, traceDir, 0);
5149
+ if (config) {
5150
+ const stdout = await fs22.readFile(path22.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
5151
+ const stderr = await fs22.readFile(path22.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
5152
+ const score = await scoreTask(task, traceDir, stdout, stderr, config);
5153
+ result.score = score;
5154
+ await writeScore(traceDir, score);
5155
+ }
5156
+ results.push(result);
5157
+ const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
5158
+ const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
5159
+ spinner.stop();
5160
+ console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
5161
+ }
5162
+ const passed = results.filter((r) => r.score.pass).length;
5163
+ console.log("");
5164
+ console.log(ui.info(`Results: ${passed}/${results.length} passed`));
5165
+ console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
5166
+ } else {
5167
+ const kairnConfig = await loadConfig();
5168
+ if (!kairnConfig) {
5169
+ console.log(ui.error("No config found. Run kairn init first."));
5170
+ process.exit(1);
5171
+ }
5172
+ const evolveConfig = await loadEvolveConfigFromWorkspace(workspace);
5173
+ const iterations = parseInt(options.iterations ?? "5", 10);
5174
+ if (isNaN(iterations) || iterations < 1) {
5175
+ console.log(ui.error("--iterations must be a positive integer"));
5176
+ process.exit(1);
5177
+ }
5178
+ evolveConfig.maxIterations = iterations;
5179
+ try {
5180
+ await fs22.access(path22.join(workspace, "iterations", "0", "harness"));
5181
+ } catch {
5182
+ console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
5183
+ process.exit(1);
5184
+ }
5185
+ const result = await evolve(workspace, parsed.tasks, kairnConfig, evolveConfig, (event) => {
5186
+ switch (event.type) {
5187
+ case "iteration-start":
5188
+ console.log(ui.section(`Iteration ${event.iteration}`));
5189
+ break;
5190
+ case "iteration-scored": {
5191
+ const scoreColor = event.score !== void 0 && event.score >= 100 ? chalk14.green : event.score !== void 0 && event.score >= 60 ? chalk14.yellow : chalk14.red;
5192
+ console.log(` Score: ${scoreColor((event.score?.toFixed(1) ?? "0") + "%")}`);
5193
+ break;
5194
+ }
5195
+ case "rollback":
5196
+ console.log(chalk14.yellow(` Warning: ${event.message ?? "Regression detected"}`));
5197
+ break;
5198
+ case "proposing":
5199
+ console.log(chalk14.dim(" Proposer analyzing traces..."));
5200
+ break;
5201
+ case "mutations-applied":
5202
+ console.log(chalk14.dim(` Applied ${event.mutationCount ?? 0} mutation(s)`));
5203
+ break;
5204
+ case "perfect-score":
5205
+ console.log(chalk14.green(" Perfect score. Stopping."));
5206
+ break;
5207
+ case "complete":
5208
+ break;
5209
+ }
5210
+ });
5211
+ console.log(ui.section("Evolution Summary"));
5212
+ console.log(` Iterations: ${result.iterations.length}`);
5213
+ console.log(` Baseline: ${result.baselineScore.toFixed(1)}%`);
5214
+ console.log(` Best: ${chalk14.green(result.bestScore.toFixed(1) + "%")} (iteration ${result.bestIteration})`);
5215
+ const improvement = result.bestScore - result.baselineScore;
5216
+ if (improvement > 0) {
5217
+ console.log(` Improvement: ${chalk14.green("+" + improvement.toFixed(1) + " points")}`);
5218
+ } else {
5219
+ console.log(` Improvement: ${improvement.toFixed(1)} points`);
5220
+ }
5221
+ console.log("");
5222
+ console.log(" Iter Score Mutations Status");
5223
+ for (const iter of result.iterations) {
5224
+ const scoreStr = iter.score.toFixed(1).padStart(6) + "%";
5225
+ const mutations = iter.proposal?.mutations.length ?? 0;
5226
+ const mutStr = mutations > 0 ? mutations.toString() : "-";
5227
+ let status = "evaluated";
5228
+ if (iter.iteration === 0) status = "baseline";
5229
+ else if (!iter.proposal && !iter.diffPatch) status = "rollback";
5230
+ else if (iter.score >= 100) status = "perfect";
5231
+ else if (iter.iteration === result.bestIteration) status = "best";
5232
+ console.log(` ${iter.iteration.toString().padStart(4)} ${scoreStr} ${mutStr.padStart(9)} ${status}`);
5233
+ }
5234
+ }
5235
+ } catch (err) {
5236
+ const msg = err instanceof Error ? err.message : String(err);
5237
+ console.log(ui.error(msg));
5238
+ process.exit(1);
5239
+ }
5240
+ });
5241
+ async function countFiles(dir) {
5242
+ let count = 0;
5243
+ try {
5244
+ const entries = await fs22.readdir(dir, { withFileTypes: true });
5245
+ for (const entry of entries) {
5246
+ if (entry.isDirectory()) {
5247
+ count += await countFiles(path22.join(dir, entry.name));
5248
+ } else {
5249
+ count++;
5250
+ }
5251
+ }
5252
+ } catch {
5253
+ }
5254
+ return count;
5255
+ }
5256
+
3689
5257
  // src/cli.ts
3690
- var program = new Command11();
5258
+ var program = new Command12();
3691
5259
  program.name("kairn").description(
3692
5260
  "Compile natural language intent into optimized Claude Code environments"
3693
5261
  ).version("1.9.0").option("--no-color", "Disable colored output");
@@ -3701,8 +5269,9 @@ program.addCommand(doctorCommand);
3701
5269
  program.addCommand(registryCommand);
3702
5270
  program.addCommand(templatesCommand);
3703
5271
  program.addCommand(keysCommand);
5272
+ program.addCommand(evolveCommand);
3704
5273
  if (process.argv.includes("--no-color") || process.env.NO_COLOR) {
3705
- chalk14.level = 0;
5274
+ chalk15.level = 0;
3706
5275
  }
3707
5276
  program.parse();
3708
5277
  //# sourceMappingURL=cli.js.map