kairn-cli 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -221,7 +221,7 @@ var ui = {
221
221
  // Key-value pairs
222
222
  kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
223
223
  // File list
224
- file: (path20) => chalk.dim(` ${path20}`),
224
+ file: (path24) => chalk.dim(` ${path24}`),
225
225
  // Tool display
226
226
  tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
227
227
  ${chalk.dim(reason)}`,
@@ -3694,9 +3694,9 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
3694
3694
  import { Command as Command11 } from "commander";
3695
3695
  import chalk14 from "chalk";
3696
3696
  import ora2 from "ora";
3697
- import fs19 from "fs/promises";
3698
- import path19 from "path";
3699
- import { parse as yamlParse } from "yaml";
3697
+ import fs23 from "fs/promises";
3698
+ import path23 from "path";
3699
+ import { parse as yamlParse2 } from "yaml";
3700
3700
  import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
3701
3701
 
3702
3702
  // src/evolve/init.ts
@@ -4014,8 +4014,8 @@ async function copyDir(src, dest) {
4014
4014
  }
4015
4015
 
4016
4016
  // src/evolve/runner.ts
4017
- import { exec, spawn } from "child_process";
4018
- import { promisify } from "util";
4017
+ import { exec as exec2, spawn } from "child_process";
4018
+ import { promisify as promisify2 } from "util";
4019
4019
  import fs18 from "fs/promises";
4020
4020
  import os3 from "os";
4021
4021
  import path18 from "path";
@@ -4023,6 +4023,52 @@ import path18 from "path";
4023
4023
  // src/evolve/trace.ts
4024
4024
  import fs17 from "fs/promises";
4025
4025
  import path17 from "path";
4026
+ async function loadTrace(traceDir) {
4027
+ const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4028
+ const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4029
+ const filesChangedStr = await fs17.readFile(
4030
+ path17.join(traceDir, "files_changed.json"),
4031
+ "utf-8"
4032
+ ).catch(() => "{}");
4033
+ const timingStr = await fs17.readFile(
4034
+ path17.join(traceDir, "timing.json"),
4035
+ "utf-8"
4036
+ ).catch(() => "{}");
4037
+ const scoreStr = await fs17.readFile(
4038
+ path17.join(traceDir, "score.json"),
4039
+ "utf-8"
4040
+ ).catch(() => '{"pass": false}');
4041
+ const toolCallsStr = await fs17.readFile(
4042
+ path17.join(traceDir, "tool_calls.jsonl"),
4043
+ "utf-8"
4044
+ ).catch(() => "");
4045
+ const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
4046
+ const parentDir = path17.basename(path17.dirname(traceDir));
4047
+ const iteration = parseInt(parentDir, 10) || 0;
4048
+ return {
4049
+ taskId: path17.basename(traceDir),
4050
+ iteration,
4051
+ stdout,
4052
+ stderr,
4053
+ toolCalls,
4054
+ filesChanged: JSON.parse(filesChangedStr),
4055
+ score: JSON.parse(scoreStr),
4056
+ timing: JSON.parse(timingStr)
4057
+ };
4058
+ }
4059
+ async function loadIterationTraces(workspacePath, iteration) {
4060
+ const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
4061
+ const traces = [];
4062
+ try {
4063
+ const taskDirs = await fs17.readdir(tracesDir);
4064
+ for (const taskId of taskDirs) {
4065
+ const trace = await loadTrace(path17.join(tracesDir, taskId));
4066
+ traces.push(trace);
4067
+ }
4068
+ } catch {
4069
+ }
4070
+ return traces;
4071
+ }
4026
4072
  async function writeTrace(traceDir, trace) {
4027
4073
  await fs17.mkdir(traceDir, { recursive: true });
4028
4074
  await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
@@ -4052,156 +4098,53 @@ async function writeScore(traceDir, score) {
4052
4098
  "utf-8"
4053
4099
  );
4054
4100
  }
4055
-
4056
- // src/evolve/runner.ts
4057
- var execAsync = promisify(exec);
4058
- async function runTask(task, harnessPath, traceDir, iteration) {
4059
- await fs18.mkdir(traceDir, { recursive: true });
4060
- const startedAt = (/* @__PURE__ */ new Date()).toISOString();
4061
- const startMs = Date.now();
4062
- const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
4063
- try {
4064
- await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
4065
- let setupStderr = "";
4066
- if (task.setup.trim()) {
4067
- try {
4068
- await execAsync(task.setup, { cwd: tmpDir, timeout: 6e4 });
4069
- } catch (err) {
4070
- setupStderr = err instanceof Error ? err.message : String(err);
4071
- }
4072
- }
4073
- const filesBefore = await snapshotFileList(tmpDir);
4074
- const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
4075
- const filesAfter = await snapshotFileList(tmpDir);
4076
- const filesChanged = diffFileLists(filesBefore, filesAfter);
4077
- const toolCalls = parseToolCalls(spawnResult.stdout);
4078
- const completedAt = (/* @__PURE__ */ new Date()).toISOString();
4079
- const durationMs = Date.now() - startMs;
4080
- const combinedStderr = setupStderr ? `[setup] ${setupStderr}
4081
- ${spawnResult.stderr}` : spawnResult.stderr;
4082
- const trace = {
4083
- taskId: task.id,
4084
- iteration,
4085
- stdout: spawnResult.stdout,
4086
- stderr: combinedStderr,
4087
- toolCalls,
4088
- filesChanged,
4089
- score: { pass: false, details: "Pending scoring" },
4090
- timing: { startedAt, completedAt, durationMs }
4091
- };
4092
- await writeTrace(traceDir, trace);
4093
- return {
4094
- taskId: task.id,
4095
- score: trace.score,
4096
- traceDir
4097
- };
4098
- } finally {
4099
- await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
4100
- });
4101
- }
4102
- }
4103
- async function spawnClaude(instruction, cwd, timeoutSec) {
4104
- return new Promise((resolve) => {
4105
- const args = ["--print", "--output-format", "text", "--max-turns", "50"];
4106
- const child = spawn("claude", args, {
4107
- cwd,
4108
- stdio: ["pipe", "pipe", "pipe"],
4109
- timeout: timeoutSec * 1e3,
4110
- env: { ...process.env }
4111
- });
4112
- let stdout = "";
4113
- let stderr = "";
4114
- child.stdout.on("data", (data) => {
4115
- stdout += data.toString();
4116
- });
4117
- child.stderr.on("data", (data) => {
4118
- stderr += data.toString();
4119
- });
4120
- child.stdin.write(instruction);
4121
- child.stdin.end();
4122
- child.on("close", (code) => {
4123
- resolve({ stdout, stderr, exitCode: code ?? 1 });
4124
- });
4125
- child.on("error", (err) => {
4126
- resolve({
4127
- stdout,
4128
- stderr: stderr + `
4129
- Spawn error: ${err.message}`,
4130
- exitCode: 1
4131
- });
4132
- });
4133
- });
4134
- }
4135
- async function snapshotFileList(dir) {
4136
- const result = {};
4137
- async function walk(current) {
4138
- let entries;
4139
- try {
4140
- entries = await fs18.readdir(current, { withFileTypes: true });
4141
- } catch {
4142
- return;
4143
- }
4144
- for (const entry of entries) {
4145
- const fullPath = path18.join(current, entry.name);
4146
- const relativePath = path18.relative(dir, fullPath);
4147
- if (relativePath.startsWith(".claude")) continue;
4148
- if (relativePath.startsWith("node_modules")) continue;
4149
- if (relativePath.startsWith(".git")) continue;
4150
- if (entry.isDirectory()) {
4151
- await walk(fullPath);
4152
- } else {
4153
- try {
4154
- const stat = await fs18.stat(fullPath);
4155
- result[relativePath] = stat.mtimeMs;
4156
- } catch {
4157
- }
4158
- }
4159
- }
4160
- }
4161
- await walk(dir);
4162
- return result;
4163
- }
4164
- function diffFileLists(before, after) {
4165
- const changes = {};
4166
- for (const [file, mtime] of Object.entries(after)) {
4167
- if (!(file in before)) {
4168
- changes[file] = "created";
4169
- } else if (before[file] !== mtime) {
4170
- changes[file] = "modified";
4171
- }
4172
- }
4173
- for (const file of Object.keys(before)) {
4174
- if (!(file in after)) {
4175
- changes[file] = "deleted";
4176
- }
4177
- }
4178
- return changes;
4101
+ async function writeIterationLog(workspacePath, log) {
4102
+ const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
4103
+ await fs17.mkdir(iterDir, { recursive: true });
4104
+ await fs17.writeFile(
4105
+ path17.join(iterDir, "scores.json"),
4106
+ JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
4107
+ "utf-8"
4108
+ );
4109
+ await fs17.writeFile(
4110
+ path17.join(iterDir, "proposer_reasoning.md"),
4111
+ log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
4112
+ "utf-8"
4113
+ );
4114
+ await fs17.writeFile(
4115
+ path17.join(iterDir, "mutation_diff.patch"),
4116
+ log.diffPatch ?? "",
4117
+ "utf-8"
4118
+ );
4179
4119
  }
4180
- function parseToolCalls(stdout) {
4120
+ async function loadIterationLog(workspacePath, iteration) {
4121
+ const iterDir = path17.join(workspacePath, "iterations", iteration.toString());
4181
4122
  try {
4182
- const lines = stdout.split("\n").filter((l) => l.trim());
4183
- const toolCalls = [];
4184
- for (const line of lines) {
4185
- try {
4186
- const obj = JSON.parse(line);
4187
- if (obj.type === "tool_use" || obj.tool_name) {
4188
- toolCalls.push(obj);
4189
- }
4190
- } catch {
4191
- }
4192
- }
4193
- return toolCalls;
4123
+ await fs17.access(iterDir);
4194
4124
  } catch {
4195
- return [];
4125
+ return null;
4196
4126
  }
4127
+ const scoresStr = await fs17.readFile(path17.join(iterDir, "scores.json"), "utf-8").catch(() => "{}");
4128
+ const reasoning = await fs17.readFile(path17.join(iterDir, "proposer_reasoning.md"), "utf-8").catch(() => "");
4129
+ const diffPatch = await fs17.readFile(path17.join(iterDir, "mutation_diff.patch"), "utf-8").catch(() => "");
4130
+ const scoresData = JSON.parse(scoresStr);
4131
+ const proposal = reasoning ? { reasoning, mutations: [], expectedImpact: {} } : null;
4132
+ return {
4133
+ iteration,
4134
+ score: scoresData.score ?? 0,
4135
+ taskResults: scoresData.taskResults ?? {},
4136
+ proposal,
4137
+ diffPatch: diffPatch || null,
4138
+ timestamp: ""
4139
+ };
4197
4140
  }
4198
4141
 
4199
4142
  // src/evolve/exec.ts
4200
- import { exec as exec2 } from "child_process";
4201
- import { promisify as promisify2 } from "util";
4202
- var execAsync2 = promisify2(exec2);
4143
+ import { exec } from "child_process";
4144
+ import { promisify } from "util";
4145
+ var execAsync = promisify(exec);
4203
4146
  async function execCommand(cmd, cwd, timeoutMs = 3e4) {
4204
- return execAsync2(cmd, { cwd, timeout: timeoutMs });
4147
+ return execAsync(cmd, { cwd, timeout: timeoutMs });
4205
4148
  }
4206
4149
 
4207
4150
  // src/evolve/scorers.ts
@@ -4373,83 +4316,990 @@ async function scoreTask(task, workspacePath, stdout, stderr, config) {
4373
4316
  return passFailScorer(task, workspacePath, stdout, stderr);
4374
4317
  }
4375
4318
 
4376
- // src/commands/evolve.ts
4377
- var DEFAULT_CONFIG = {
4378
- model: "claude-sonnet-4-6",
4379
- proposerModel: "claude-opus-4-6",
4380
- scorer: "pass-fail",
4381
- maxIterations: 5,
4382
- parallelTasks: 1
4383
- };
4384
- var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
4385
- evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
4319
+ // src/evolve/runner.ts
4320
+ var execAsync2 = promisify2(exec2);
4321
+ async function runTask(task, harnessPath, traceDir, iteration) {
4322
+ await fs18.mkdir(traceDir, { recursive: true });
4323
+ const startedAt = (/* @__PURE__ */ new Date()).toISOString();
4324
+ const startMs = Date.now();
4325
+ const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
4386
4326
  try {
4387
- const projectRoot = process.cwd();
4388
- console.log(ui.section("Evolve Init"));
4389
- const claudeDir = path19.join(projectRoot, ".claude");
4390
- try {
4391
- await fs19.access(claudeDir);
4392
- } catch {
4393
- console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
4394
- process.exit(1);
4395
- }
4396
- const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
4397
- console.log(ui.success("Created .kairn-evolve/ workspace"));
4398
- const spinner = ora2("Generating project-specific eval tasks...").start();
4399
- let tasks;
4400
- try {
4401
- tasks = await autoGenerateTasks(projectRoot, options.workflow);
4402
- spinner.succeed(`Generated ${tasks.length} eval tasks`);
4403
- } catch {
4404
- spinner.fail("LLM task generation failed");
4405
- const templateIds = selectTemplatesForWorkflow(options.workflow);
4406
- tasks = templateIds.map((templateId, index) => ({
4407
- id: `${templateId}-${index + 1}`,
4408
- template: templateId,
4409
- description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
4410
- setup: "npm install",
4411
- expected_outcome: "Task completed successfully",
4412
- scoring: "pass-fail",
4413
- timeout: 300
4414
- }));
4415
- console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
4416
- }
4417
- for (const task of tasks) {
4418
- console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
4419
- }
4420
- let addMore = true;
4421
- while (addMore) {
4327
+ await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
4328
+ let setupStderr = "";
4329
+ if (task.setup.trim()) {
4422
4330
  try {
4423
- addMore = await confirm3({ message: "Add another eval task?", default: false });
4424
- } catch {
4425
- addMore = false;
4331
+ await execAsync2(task.setup, { cwd: tmpDir, timeout: 6e4 });
4332
+ } catch (err) {
4333
+ setupStderr = err instanceof Error ? err.message : String(err);
4426
4334
  }
4427
- if (addMore) {
4428
- const templateId = await select4({
4429
- message: "Select eval template:",
4430
- choices: Object.values(EVAL_TEMPLATES).map((t) => ({
4431
- name: `${t.name} \u2014 ${t.description}`,
4432
- value: t.id
4433
- }))
4434
- });
4435
- const addSpinner = ora2("Generating task...").start();
4436
- try {
4437
- const config = await loadConfig();
4438
- if (config) {
4439
- let claudeMd = "";
4440
- try {
4441
- claudeMd = await fs19.readFile(path19.join(claudeDir, "CLAUDE.md"), "utf-8");
4442
- } catch {
4443
- }
4444
- const profile = await buildProjectProfile(projectRoot);
4445
- const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
4446
- tasks.push(...newTasks);
4447
- addSpinner.succeed(`Added ${newTasks.length} task(s)`);
4448
- } else {
4449
- addSpinner.fail("No config found");
4450
- }
4451
- } catch {
4452
- addSpinner.fail("Failed to generate task");
4335
+ }
4336
+ const filesBefore = await snapshotFileList(tmpDir);
4337
+ const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
4338
+ const filesAfter = await snapshotFileList(tmpDir);
4339
+ const filesChanged = diffFileLists(filesBefore, filesAfter);
4340
+ const toolCalls = parseToolCalls(spawnResult.stdout);
4341
+ const completedAt = (/* @__PURE__ */ new Date()).toISOString();
4342
+ const durationMs = Date.now() - startMs;
4343
+ const combinedStderr = setupStderr ? `[setup] ${setupStderr}
4344
+ ${spawnResult.stderr}` : spawnResult.stderr;
4345
+ const trace = {
4346
+ taskId: task.id,
4347
+ iteration,
4348
+ stdout: spawnResult.stdout,
4349
+ stderr: combinedStderr,
4350
+ toolCalls,
4351
+ filesChanged,
4352
+ score: { pass: false, details: "Pending scoring" },
4353
+ timing: { startedAt, completedAt, durationMs }
4354
+ };
4355
+ await writeTrace(traceDir, trace);
4356
+ return {
4357
+ taskId: task.id,
4358
+ score: trace.score,
4359
+ traceDir
4360
+ };
4361
+ } finally {
4362
+ await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
4363
+ });
4364
+ }
4365
+ }
4366
+ async function spawnClaude(instruction, cwd, timeoutSec) {
4367
+ return new Promise((resolve) => {
4368
+ const args = ["--print", "--output-format", "text", "--max-turns", "50"];
4369
+ const child = spawn("claude", args, {
4370
+ cwd,
4371
+ stdio: ["pipe", "pipe", "pipe"],
4372
+ timeout: timeoutSec * 1e3,
4373
+ env: { ...process.env }
4374
+ });
4375
+ let stdout = "";
4376
+ let stderr = "";
4377
+ child.stdout.on("data", (data) => {
4378
+ stdout += data.toString();
4379
+ });
4380
+ child.stderr.on("data", (data) => {
4381
+ stderr += data.toString();
4382
+ });
4383
+ child.stdin.write(instruction);
4384
+ child.stdin.end();
4385
+ child.on("close", (code) => {
4386
+ resolve({ stdout, stderr, exitCode: code ?? 1 });
4387
+ });
4388
+ child.on("error", (err) => {
4389
+ resolve({
4390
+ stdout,
4391
+ stderr: stderr + `
4392
+ Spawn error: ${err.message}`,
4393
+ exitCode: 1
4394
+ });
4395
+ });
4396
+ });
4397
+ }
4398
+ async function snapshotFileList(dir) {
4399
+ const result = {};
4400
+ async function walk(current) {
4401
+ let entries;
4402
+ try {
4403
+ entries = await fs18.readdir(current, { withFileTypes: true });
4404
+ } catch {
4405
+ return;
4406
+ }
4407
+ for (const entry of entries) {
4408
+ const fullPath = path18.join(current, entry.name);
4409
+ const relativePath = path18.relative(dir, fullPath);
4410
+ if (relativePath.startsWith(".claude")) continue;
4411
+ if (relativePath.startsWith("node_modules")) continue;
4412
+ if (relativePath.startsWith(".git")) continue;
4413
+ if (entry.isDirectory()) {
4414
+ await walk(fullPath);
4415
+ } else {
4416
+ try {
4417
+ const stat = await fs18.stat(fullPath);
4418
+ result[relativePath] = stat.mtimeMs;
4419
+ } catch {
4420
+ }
4421
+ }
4422
+ }
4423
+ }
4424
+ await walk(dir);
4425
+ return result;
4426
+ }
4427
+ function diffFileLists(before, after) {
4428
+ const changes = {};
4429
+ for (const [file, mtime] of Object.entries(after)) {
4430
+ if (!(file in before)) {
4431
+ changes[file] = "created";
4432
+ } else if (before[file] !== mtime) {
4433
+ changes[file] = "modified";
4434
+ }
4435
+ }
4436
+ for (const file of Object.keys(before)) {
4437
+ if (!(file in after)) {
4438
+ changes[file] = "deleted";
4439
+ }
4440
+ }
4441
+ return changes;
4442
+ }
4443
+ function parseToolCalls(stdout) {
4444
+ try {
4445
+ const lines = stdout.split("\n").filter((l) => l.trim());
4446
+ const toolCalls = [];
4447
+ for (const line of lines) {
4448
+ try {
4449
+ const obj = JSON.parse(line);
4450
+ if (obj.type === "tool_use" || obj.tool_name) {
4451
+ toolCalls.push(obj);
4452
+ }
4453
+ } catch {
4454
+ }
4455
+ }
4456
+ return toolCalls;
4457
+ } catch {
4458
+ return [];
4459
+ }
4460
+ }
4461
+ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config) {
4462
+ const results = {};
4463
+ for (const task of tasks) {
4464
+ const traceDir = path18.join(
4465
+ workspacePath,
4466
+ "traces",
4467
+ iteration.toString(),
4468
+ task.id
4469
+ );
4470
+ const taskResult = await runTask(task, harnessPath, traceDir, iteration);
4471
+ let score = taskResult.score;
4472
+ if (config) {
4473
+ const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4474
+ const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4475
+ score = await scoreTask(task, traceDir, stdout, stderr, config);
4476
+ await writeScore(traceDir, score);
4477
+ }
4478
+ results[task.id] = score;
4479
+ }
4480
+ const scores = Object.values(results);
4481
+ const total = scores.reduce(
4482
+ (sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
4483
+ 0
4484
+ );
4485
+ const aggregate = scores.length > 0 ? total / scores.length : 0;
4486
+ return { results, aggregate };
4487
+ }
4488
+
4489
+ // src/evolve/loop.ts
4490
+ import fs21 from "fs/promises";
4491
+ import path21 from "path";
4492
+
4493
+ // src/evolve/proposer.ts
4494
+ import fs19 from "fs/promises";
4495
+ import path19 from "path";
4496
+ var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
4497
+ agent environment (.claude/ directory) based on execution traces from real tasks.
4498
+
4499
+ ## What You Have Access To
4500
+ 1. Current harness: The .claude/ directory files (CLAUDE.md, commands/, rules/, agents/)
4501
+ 2. Execution traces: Full stdout/stderr, tool call sequences, file changes, and scores
4502
+ 3. History: Previous iterations' proposals, diffs, and resulting score changes
4503
+
4504
+ ## Your Task
4505
+ Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
4506
+ minimal changes to the harness files that will fix those failures.
4507
+
4508
+ ## Diagnosis Process
4509
+ 1. For each failed/low-scoring task:
4510
+ a. Read the full trace (stdout, tool calls, file changes)
4511
+ b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
4512
+ c. Trace the failure back to a specific harness decision
4513
+ d. Propose a fix
4514
+
4515
+ 2. For each successful task:
4516
+ a. Note what worked well
4517
+ b. Ensure proposed changes don't break what's working
4518
+
4519
+ 3. Check history for counterfactual evidence
4520
+
4521
+ ## Output Format
4522
+ Return a JSON object:
4523
+ {
4524
+ "reasoning": "Your full causal analysis...",
4525
+ "mutations": [
4526
+ { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
4527
+ { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
4528
+ ],
4529
+ "expected_impact": { "task-id": "+15% \u2014 explanation" }
4530
+ }
4531
+
4532
+ ## Rules
4533
+ - MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
4534
+ - Each mutation must have a clear rationale tied to a specific trace observation.
4535
+ - Never remove something that's working for another task.
4536
+ - If a previous iteration's change caused a regression, REVERT it.
4537
+ - Prefer ADDITIVE changes over replacements when possible.
4538
+
4539
+ Return ONLY valid JSON.`;
4540
+ var STDOUT_TRUNCATION_LIMIT = 2e3;
4541
+ async function readHarnessFiles(harnessPath) {
4542
+ const result = {};
4543
+ async function walk(dir, prefix) {
4544
+ let entries;
4545
+ try {
4546
+ entries = await fs19.readdir(dir, { withFileTypes: true });
4547
+ } catch {
4548
+ return;
4549
+ }
4550
+ for (const entry of entries) {
4551
+ const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
4552
+ const fullPath = path19.join(dir, entry.name);
4553
+ if (entry.isDirectory()) {
4554
+ await walk(fullPath, relativePath);
4555
+ } else if (entry.isFile()) {
4556
+ try {
4557
+ result[relativePath] = await fs19.readFile(fullPath, "utf-8");
4558
+ } catch {
4559
+ }
4560
+ }
4561
+ }
4562
+ }
4563
+ await walk(harnessPath, "");
4564
+ return result;
4565
+ }
4566
+ function truncateStdout(stdout, limit) {
4567
+ if (stdout.length <= limit) {
4568
+ return stdout;
4569
+ }
4570
+ return `[...truncated, showing last ${limit} chars...]
4571
+ ${stdout.slice(-limit)}`;
4572
+ }
4573
+ function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
4574
+ const sections = [];
4575
+ sections.push("## Current Harness Files\n");
4576
+ const fileEntries = Object.entries(harnessFiles);
4577
+ if (fileEntries.length === 0) {
4578
+ sections.push("(No harness files found)\n");
4579
+ } else {
4580
+ for (const [filePath, content] of fileEntries) {
4581
+ sections.push(`### ${filePath}
4582
+ \`\`\`
4583
+ ${content}
4584
+ \`\`\`
4585
+ `);
4586
+ }
4587
+ }
4588
+ sections.push("## Task Definitions\n");
4589
+ if (tasks.length === 0) {
4590
+ sections.push("(No tasks defined)\n");
4591
+ } else {
4592
+ for (const task of tasks) {
4593
+ sections.push(
4594
+ `### Task: ${task.id}
4595
+ - Template: ${task.template}
4596
+ - Description: ${task.description}
4597
+ - Expected outcome: ${Array.isArray(task.expected_outcome) ? task.expected_outcome.join("; ") : task.expected_outcome}
4598
+ - Scoring: ${task.scoring}
4599
+ `
4600
+ );
4601
+ }
4602
+ }
4603
+ sections.push("## Execution Traces\n");
4604
+ if (traces.length === 0) {
4605
+ sections.push("(No traces available)\n");
4606
+ } else {
4607
+ for (const trace of traces) {
4608
+ const scoreNum = trace.score.score !== void 0 ? trace.score.score : trace.score.pass ? 100 : 0;
4609
+ const truncatedStdout = truncateStdout(trace.stdout, STDOUT_TRUNCATION_LIMIT);
4610
+ const filesChangedList = Object.entries(trace.filesChanged).map(([f, action]) => ` - ${f}: ${action}`).join("\n");
4611
+ sections.push(
4612
+ `### Trace: ${trace.taskId}
4613
+ - Pass: ${trace.score.pass}
4614
+ - Score: ${scoreNum}
4615
+ ` + (trace.score.details ? `- Details: ${trace.score.details}
4616
+ ` : "") + `- Duration: ${trace.timing.durationMs}ms
4617
+ - Files changed:
4618
+ ${filesChangedList || " (none)"}
4619
+ - Stdout (last ${STDOUT_TRUNCATION_LIMIT} chars):
4620
+ \`\`\`
4621
+ ${truncatedStdout}
4622
+ \`\`\`
4623
+ `
4624
+ );
4625
+ }
4626
+ }
4627
+ sections.push("## Iteration History\n");
4628
+ if (history.length === 0) {
4629
+ sections.push("(No previous iterations)\n");
4630
+ } else {
4631
+ for (const log of history) {
4632
+ const taskScores = Object.entries(log.taskResults).map(([id, s]) => ` - ${id}: ${s.score !== void 0 ? s.score : s.pass ? 100 : 0} (pass=${s.pass})`).join("\n");
4633
+ sections.push(
4634
+ `### Iteration ${log.iteration} \u2014 Score: ${log.score}
4635
+ - Task results:
4636
+ ${taskScores}
4637
+ `
4638
+ );
4639
+ if (log.proposal) {
4640
+ sections.push(
4641
+ `- Proposal reasoning: ${log.proposal.reasoning}
4642
+ - Mutations: ${log.proposal.mutations.length} change(s)
4643
+ `
4644
+ );
4645
+ }
4646
+ }
4647
+ }
4648
+ return sections.join("\n");
4649
+ }
4650
+ function parseProposerResponse(raw) {
4651
+ let cleaned = raw.trim();
4652
+ const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/);
4653
+ if (fenceMatch) {
4654
+ cleaned = fenceMatch[1].trim();
4655
+ }
4656
+ let parsed;
4657
+ try {
4658
+ parsed = JSON.parse(cleaned);
4659
+ } catch {
4660
+ throw new Error(`Proposer returned invalid JSON: ${cleaned.slice(0, 200)}`);
4661
+ }
4662
+ if (typeof parsed !== "object" || parsed === null) {
4663
+ throw new Error("Proposer response is not a JSON object");
4664
+ }
4665
+ const obj = parsed;
4666
+ if (typeof obj["reasoning"] !== "string") {
4667
+ throw new Error('Proposer response missing required "reasoning" string field');
4668
+ }
4669
+ if (!Array.isArray(obj["mutations"])) {
4670
+ throw new Error('Proposer response missing required "mutations" array field');
4671
+ }
4672
+ const mutations = [];
4673
+ for (const entry of obj["mutations"]) {
4674
+ if (typeof entry !== "object" || entry === null) {
4675
+ continue;
4676
+ }
4677
+ const m = entry;
4678
+ const file = typeof m["file"] === "string" ? m["file"] : "";
4679
+ const action = typeof m["action"] === "string" ? m["action"] : "";
4680
+ const newText = typeof m["new_text"] === "string" ? m["new_text"] : typeof m["newText"] === "string" ? m["newText"] : "";
4681
+ const oldText = typeof m["old_text"] === "string" ? m["old_text"] : typeof m["oldText"] === "string" ? m["oldText"] : void 0;
4682
+ const rationale = typeof m["rationale"] === "string" ? m["rationale"] : "";
4683
+ if (file.includes("..")) {
4684
+ continue;
4685
+ }
4686
+ if (action !== "replace" && action !== "add_section" && action !== "create_file") {
4687
+ continue;
4688
+ }
4689
+ if (action === "replace" && !oldText) {
4690
+ continue;
4691
+ }
4692
+ const mutation = {
4693
+ file,
4694
+ action,
4695
+ newText,
4696
+ rationale
4697
+ };
4698
+ if (oldText !== void 0) {
4699
+ mutation.oldText = oldText;
4700
+ }
4701
+ mutations.push(mutation);
4702
+ }
4703
+ const rawImpact = obj["expected_impact"] ?? obj["expectedImpact"] ?? {};
4704
+ const expectedImpact = {};
4705
+ if (typeof rawImpact === "object" && rawImpact !== null) {
4706
+ for (const [key, value] of Object.entries(rawImpact)) {
4707
+ expectedImpact[key] = typeof value === "string" ? value : String(value);
4708
+ }
4709
+ }
4710
+ return {
4711
+ reasoning: obj["reasoning"],
4712
+ mutations,
4713
+ expectedImpact
4714
+ };
4715
+ }
4716
+ async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
4717
+ const harnessFiles = await readHarnessFiles(harnessPath);
4718
+ const traces = await loadIterationTraces(workspacePath, iteration);
4719
+ const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
4720
+ const proposerConfig = { ...config, model: proposerModel };
4721
+ const response = await callLLM(proposerConfig, userMessage, {
4722
+ systemPrompt: PROPOSER_SYSTEM_PROMPT,
4723
+ maxTokens: 8192
4724
+ });
4725
+ return parseProposerResponse(response);
4726
+ }
4727
+
4728
+ // src/evolve/mutator.ts
4729
+ import fs20 from "fs/promises";
4730
+ import path20 from "path";
4731
+ async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
4732
+ const newHarnessPath = path20.join(nextIterationDir, "harness");
4733
+ await copyDir(currentHarnessPath, newHarnessPath);
4734
+ for (const mutation of mutations) {
4735
+ if (mutation.file.includes("..")) {
4736
+ continue;
4737
+ }
4738
+ const filePath = path20.join(newHarnessPath, mutation.file);
4739
+ if (mutation.action === "replace") {
4740
+ if (!mutation.oldText) {
4741
+ continue;
4742
+ }
4743
+ const content = await fs20.readFile(filePath, "utf-8");
4744
+ if (!content.includes(mutation.oldText)) {
4745
+ continue;
4746
+ }
4747
+ await fs20.writeFile(
4748
+ filePath,
4749
+ content.replace(mutation.oldText, mutation.newText),
4750
+ "utf-8"
4751
+ );
4752
+ } else if (mutation.action === "add_section") {
4753
+ try {
4754
+ const content = await fs20.readFile(filePath, "utf-8");
4755
+ await fs20.writeFile(
4756
+ filePath,
4757
+ content + "\n\n" + mutation.newText,
4758
+ "utf-8"
4759
+ );
4760
+ } catch {
4761
+ await fs20.mkdir(path20.dirname(filePath), { recursive: true });
4762
+ await fs20.writeFile(filePath, mutation.newText, "utf-8");
4763
+ }
4764
+ } else if (mutation.action === "create_file") {
4765
+ await fs20.mkdir(path20.dirname(filePath), { recursive: true });
4766
+ await fs20.writeFile(filePath, mutation.newText, "utf-8");
4767
+ }
4768
+ }
4769
+ const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
4770
+ return { newHarnessPath, diffPatch };
4771
+ }
4772
+ async function generateDiff2(oldDir, newDir) {
4773
+ const oldFiles = await readAllFiles(oldDir);
4774
+ const newFiles = await readAllFiles(newDir);
4775
+ const allPaths = /* @__PURE__ */ new Set([
4776
+ ...Object.keys(oldFiles),
4777
+ ...Object.keys(newFiles)
4778
+ ]);
4779
+ const patches = [];
4780
+ for (const filePath of [...allPaths].sort()) {
4781
+ const oldContent = oldFiles[filePath] ?? "";
4782
+ const newContent = newFiles[filePath] ?? "";
4783
+ if (oldContent === newContent) continue;
4784
+ patches.push(`--- a/${filePath}`);
4785
+ patches.push(`+++ b/${filePath}`);
4786
+ if (!oldContent) {
4787
+ for (const line of newContent.split("\n")) {
4788
+ patches.push(`+${line}`);
4789
+ }
4790
+ } else if (!newContent) {
4791
+ for (const line of oldContent.split("\n")) {
4792
+ patches.push(`-${line}`);
4793
+ }
4794
+ } else {
4795
+ const oldLines = oldContent.split("\n");
4796
+ const newLines = newContent.split("\n");
4797
+ for (const line of oldLines) {
4798
+ patches.push(`-${line}`);
4799
+ }
4800
+ for (const line of newLines) {
4801
+ patches.push(`+${line}`);
4802
+ }
4803
+ }
4804
+ patches.push("");
4805
+ }
4806
+ return patches.join("\n");
4807
+ }
4808
+ async function readAllFiles(dir) {
4809
+ const result = {};
4810
+ async function walk(current) {
4811
+ let entries;
4812
+ try {
4813
+ entries = await fs20.readdir(current, { withFileTypes: true });
4814
+ } catch {
4815
+ return;
4816
+ }
4817
+ for (const entry of entries) {
4818
+ const fullPath = path20.join(current, entry.name);
4819
+ const relativePath = path20.relative(dir, fullPath);
4820
+ if (entry.isDirectory()) {
4821
+ await walk(fullPath);
4822
+ } else {
4823
+ result[relativePath] = await fs20.readFile(fullPath, "utf-8");
4824
+ }
4825
+ }
4826
+ }
4827
+ await walk(dir);
4828
+ return result;
4829
+ }
4830
+
4831
+ // src/evolve/loop.ts
4832
+ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
4833
+ const history = [];
4834
+ let bestScore = -1;
4835
+ let bestIteration = 0;
4836
+ let baselineScore = 0;
4837
+ for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
4838
+ const harnessPath = path21.join(
4839
+ workspacePath,
4840
+ "iterations",
4841
+ iter.toString(),
4842
+ "harness"
4843
+ );
4844
+ try {
4845
+ await fs21.access(harnessPath);
4846
+ } catch {
4847
+ if (iter === 0) {
4848
+ throw new Error(
4849
+ "No baseline harness found. Run `kairn evolve baseline` first."
4850
+ );
4851
+ }
4852
+ break;
4853
+ }
4854
+ onProgress?.({ type: "iteration-start", iteration: iter });
4855
+ const { results, aggregate } = await evaluateAll(
4856
+ tasks,
4857
+ harnessPath,
4858
+ workspacePath,
4859
+ iter,
4860
+ kairnConfig
4861
+ );
4862
+ onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
4863
+ if (iter === 0) baselineScore = aggregate;
4864
+ if (iter > 0 && aggregate < bestScore) {
4865
+ onProgress?.({
4866
+ type: "rollback",
4867
+ iteration: iter,
4868
+ score: aggregate,
4869
+ message: `Regression: ${aggregate.toFixed(1)}% < ${bestScore.toFixed(1)}%. Rolling back.`
4870
+ });
4871
+ const rollbackLog = {
4872
+ iteration: iter,
4873
+ score: aggregate,
4874
+ taskResults: results,
4875
+ proposal: null,
4876
+ diffPatch: null,
4877
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4878
+ };
4879
+ await writeIterationLog(workspacePath, rollbackLog);
4880
+ history.push(rollbackLog);
4881
+ if (iter + 1 < evolveConfig.maxIterations) {
4882
+ const nextIterDir2 = path21.join(
4883
+ workspacePath,
4884
+ "iterations",
4885
+ (iter + 1).toString()
4886
+ );
4887
+ const bestHarnessPath = path21.join(
4888
+ workspacePath,
4889
+ "iterations",
4890
+ bestIteration.toString(),
4891
+ "harness"
4892
+ );
4893
+ await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
4894
+ }
4895
+ continue;
4896
+ }
4897
+ bestScore = aggregate;
4898
+ bestIteration = iter;
4899
+ if (aggregate >= 100) {
4900
+ onProgress?.({ type: "perfect-score", iteration: iter, score: aggregate });
4901
+ const perfectLog = {
4902
+ iteration: iter,
4903
+ score: aggregate,
4904
+ taskResults: results,
4905
+ proposal: null,
4906
+ diffPatch: null,
4907
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4908
+ };
4909
+ await writeIterationLog(workspacePath, perfectLog);
4910
+ history.push(perfectLog);
4911
+ break;
4912
+ }
4913
+ if (iter === evolveConfig.maxIterations - 1) {
4914
+ const finalLog = {
4915
+ iteration: iter,
4916
+ score: aggregate,
4917
+ taskResults: results,
4918
+ proposal: null,
4919
+ diffPatch: null,
4920
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4921
+ };
4922
+ await writeIterationLog(workspacePath, finalLog);
4923
+ history.push(finalLog);
4924
+ break;
4925
+ }
4926
+ onProgress?.({ type: "proposing", iteration: iter });
4927
+ let proposal;
4928
+ try {
4929
+ proposal = await propose(
4930
+ iter,
4931
+ workspacePath,
4932
+ harnessPath,
4933
+ history,
4934
+ tasks,
4935
+ kairnConfig,
4936
+ evolveConfig.proposerModel
4937
+ );
4938
+ } catch {
4939
+ const nextIterDir2 = path21.join(
4940
+ workspacePath,
4941
+ "iterations",
4942
+ (iter + 1).toString()
4943
+ );
4944
+ await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
4945
+ const skipLog = {
4946
+ iteration: iter,
4947
+ score: aggregate,
4948
+ taskResults: results,
4949
+ proposal: null,
4950
+ diffPatch: null,
4951
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4952
+ };
4953
+ await writeIterationLog(workspacePath, skipLog);
4954
+ history.push(skipLog);
4955
+ continue;
4956
+ }
4957
+ const nextIterDir = path21.join(
4958
+ workspacePath,
4959
+ "iterations",
4960
+ (iter + 1).toString()
4961
+ );
4962
+ let diffPatch = "";
4963
+ try {
4964
+ const mutationResult = await applyMutations(
4965
+ harnessPath,
4966
+ nextIterDir,
4967
+ proposal.mutations
4968
+ );
4969
+ diffPatch = mutationResult.diffPatch;
4970
+ } catch {
4971
+ await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
4972
+ }
4973
+ onProgress?.({
4974
+ type: "mutations-applied",
4975
+ iteration: iter,
4976
+ mutationCount: proposal.mutations.length
4977
+ });
4978
+ const iterLog = {
4979
+ iteration: iter,
4980
+ score: aggregate,
4981
+ taskResults: results,
4982
+ proposal,
4983
+ diffPatch,
4984
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
4985
+ };
4986
+ await writeIterationLog(workspacePath, iterLog);
4987
+ history.push(iterLog);
4988
+ }
4989
+ onProgress?.({
4990
+ type: "complete",
4991
+ iteration: history.length > 0 ? history.length - 1 : 0,
4992
+ score: bestScore
4993
+ });
4994
+ return {
4995
+ iterations: history,
4996
+ bestIteration,
4997
+ bestScore,
4998
+ baselineScore
4999
+ };
5000
+ }
5001
+
5002
+ // src/evolve/report.ts
5003
+ import fs22 from "fs/promises";
5004
+ import path22 from "path";
5005
+
5006
+ // src/evolve/diagnosis.ts
5007
+ function numericScore(s) {
5008
+ return s.score ?? (s.pass ? 100 : 0);
5009
+ }
5010
+ function diagnoseCounterfactuals(iterations, _tasks) {
5011
+ const entries = [];
5012
+ for (let i = 1; i < iterations.length; i++) {
5013
+ const prev = iterations[i - 1];
5014
+ const curr = iterations[i];
5015
+ if (!curr.proposal && !prev.proposal) continue;
5016
+ const proposal = prev.proposal;
5017
+ if (!proposal || proposal.mutations.length === 0) continue;
5018
+ const mutationSummary = proposal.mutations.map((m) => `${m.action} in ${m.file}: ${m.rationale}`).join("; ");
5019
+ const helpedTasks = [];
5020
+ const hurtTasks = [];
5021
+ const allTaskIds = /* @__PURE__ */ new Set([
5022
+ ...Object.keys(prev.taskResults),
5023
+ ...Object.keys(curr.taskResults)
5024
+ ]);
5025
+ let netDelta = 0;
5026
+ for (const taskId of allTaskIds) {
5027
+ const prevScore = prev.taskResults[taskId] ? numericScore(prev.taskResults[taskId]) : 0;
5028
+ const currScore = curr.taskResults[taskId] ? numericScore(curr.taskResults[taskId]) : 0;
5029
+ const delta = currScore - prevScore;
5030
+ if (delta > 0) {
5031
+ helpedTasks.push({ taskId, delta });
5032
+ } else if (delta < 0) {
5033
+ hurtTasks.push({ taskId, delta });
5034
+ }
5035
+ netDelta += delta;
5036
+ }
5037
+ entries.push({
5038
+ iteration: i,
5039
+ mutationSummary,
5040
+ helpedTasks,
5041
+ hurtTasks,
5042
+ netScoreDelta: netDelta
5043
+ });
5044
+ }
5045
+ return { entries };
5046
+ }
5047
+
5048
+ // src/evolve/report.ts
5049
+ import { parse as yamlParse } from "yaml";
5050
+ function numericScore2(s) {
5051
+ return s.score ?? (s.pass ? 100 : 0);
5052
+ }
5053
+ async function loadAllIterations(workspacePath) {
5054
+ const iterDir = path22.join(workspacePath, "iterations");
5055
+ let entries;
5056
+ try {
5057
+ entries = await fs22.readdir(iterDir);
5058
+ } catch {
5059
+ return [];
5060
+ }
5061
+ const iterations = [];
5062
+ const iterNums = entries.map((e) => parseInt(e, 10)).filter((n) => !isNaN(n)).sort((a, b) => a - b);
5063
+ for (const n of iterNums) {
5064
+ const log = await loadIterationLog(workspacePath, n);
5065
+ if (log) iterations.push(log);
5066
+ }
5067
+ return iterations;
5068
+ }
5069
+ async function loadTasks(workspacePath) {
5070
+ try {
5071
+ const content = await fs22.readFile(path22.join(workspacePath, "tasks.yaml"), "utf-8");
5072
+ const parsed = yamlParse(content);
5073
+ return parsed?.tasks ?? [];
5074
+ } catch {
5075
+ return [];
5076
+ }
5077
+ }
5078
+ function buildLeaderboard(iterations, tasks) {
5079
+ const taskIds = tasks.map((t) => t.id);
5080
+ return taskIds.map((taskId) => {
5081
+ const scores = {};
5082
+ let bestScore = -1;
5083
+ let bestIteration = 0;
5084
+ for (const iter of iterations) {
5085
+ const s = iter.taskResults[taskId];
5086
+ if (s) {
5087
+ const score = numericScore2(s);
5088
+ scores[iter.iteration] = score;
5089
+ if (score > bestScore) {
5090
+ bestScore = score;
5091
+ bestIteration = iter.iteration;
5092
+ }
5093
+ }
5094
+ }
5095
+ return { taskId, scores, bestIteration, bestScore };
5096
+ });
5097
+ }
5098
+ function iterationStatus(iter, bestIteration) {
5099
+ if (iter.iteration === 0) return "baseline";
5100
+ if (!iter.proposal && !iter.diffPatch) return "rollback";
5101
+ if (iter.score >= 100) return "perfect";
5102
+ if (iter.iteration === bestIteration) return "best";
5103
+ return "evaluated";
5104
+ }
5105
+ async function generateMarkdownReport(workspacePath) {
5106
+ const iterations = await loadAllIterations(workspacePath);
5107
+ const tasks = await loadTasks(workspacePath);
5108
+ if (iterations.length === 0) {
5109
+ return "# Evolution Report\n\nNo iterations found. Run `kairn evolve run` first.\n";
5110
+ }
5111
+ const baselineScore = iterations[0].score;
5112
+ const bestIter = iterations.reduce((best, curr) => curr.score > best.score ? curr : best, iterations[0]);
5113
+ const improvement = bestIter.score - baselineScore;
5114
+ const counterfactuals = diagnoseCounterfactuals(iterations, tasks);
5115
+ const leaderboard = buildLeaderboard(iterations, tasks);
5116
+ const lines = [];
5117
+ lines.push("# Evolution Report");
5118
+ lines.push("");
5119
+ lines.push("## Overview");
5120
+ lines.push("");
5121
+ lines.push(`| Metric | Value |`);
5122
+ lines.push(`|--------|-------|`);
5123
+ lines.push(`| Total iterations | ${iterations.length} |`);
5124
+ lines.push(`| Baseline score | ${baselineScore.toFixed(1)}% |`);
5125
+ lines.push(`| Best score | ${bestIter.score.toFixed(1)}% |`);
5126
+ lines.push(`| Best iteration | ${bestIter.iteration} |`);
5127
+ lines.push(`| Improvement | ${improvement >= 0 ? "+" : ""}${improvement.toFixed(1)} points |`);
5128
+ lines.push("");
5129
+ lines.push("## Iterations");
5130
+ lines.push("");
5131
+ lines.push("| Iter | Score | Mutations | Status |");
5132
+ lines.push("|------|-------|-----------|--------|");
5133
+ for (const iter of iterations) {
5134
+ const mutations = iter.proposal?.mutations.length ?? 0;
5135
+ const mutStr = mutations > 0 ? mutations.toString() : "-";
5136
+ const status = iterationStatus(iter, bestIter.iteration);
5137
+ lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
5138
+ }
5139
+ lines.push("");
5140
+ if (leaderboard.length > 0) {
5141
+ lines.push("## Leaderboard");
5142
+ lines.push("");
5143
+ const iterNums = iterations.map((i) => i.iteration);
5144
+ const headerCols = ["Task", ...iterNums.map((n) => `Iter ${n}`), "Best"];
5145
+ lines.push(`| ${headerCols.join(" | ")} |`);
5146
+ lines.push(`| ${headerCols.map(() => "---").join(" | ")} |`);
5147
+ for (const entry of leaderboard) {
5148
+ const scoreCols = iterNums.map((n) => {
5149
+ const s = entry.scores[n];
5150
+ return s !== void 0 ? `${s.toFixed(0)}%` : "-";
5151
+ });
5152
+ lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
5153
+ }
5154
+ lines.push("");
5155
+ }
5156
+ if (counterfactuals.entries.length > 0) {
5157
+ lines.push("## Counterfactual Diagnosis");
5158
+ lines.push("");
5159
+ for (const entry of counterfactuals.entries) {
5160
+ const sign = entry.netScoreDelta >= 0 ? "+" : "";
5161
+ lines.push(`### Iteration ${entry.iteration} (net ${sign}${entry.netScoreDelta.toFixed(1)} points)`);
5162
+ lines.push("");
5163
+ lines.push(`**Mutations:** ${entry.mutationSummary}`);
5164
+ lines.push("");
5165
+ if (entry.helpedTasks.length > 0) {
5166
+ lines.push("**Helped:**");
5167
+ for (const t of entry.helpedTasks) {
5168
+ lines.push(`- ${t.taskId}: +${t.delta.toFixed(1)}`);
5169
+ }
5170
+ lines.push("");
5171
+ }
5172
+ if (entry.hurtTasks.length > 0) {
5173
+ lines.push("**Hurt:**");
5174
+ for (const t of entry.hurtTasks) {
5175
+ lines.push(`- ${t.taskId}: ${t.delta.toFixed(1)}`);
5176
+ }
5177
+ lines.push("");
5178
+ }
5179
+ }
5180
+ }
5181
+ return lines.join("\n");
5182
+ }
5183
+ async function generateJsonReport(workspacePath) {
5184
+ const iterations = await loadAllIterations(workspacePath);
5185
+ const tasks = await loadTasks(workspacePath);
5186
+ const baselineScore = iterations.length > 0 ? iterations[0].score : 0;
5187
+ const bestIter = iterations.length > 0 ? iterations.reduce((best, curr) => curr.score > best.score ? curr : best, iterations[0]) : { score: 0, iteration: 0 };
5188
+ const improvement = bestIter.score - baselineScore;
5189
+ const counterfactuals = diagnoseCounterfactuals(iterations, tasks);
5190
+ const leaderboard = buildLeaderboard(iterations, tasks);
5191
+ return {
5192
+ overview: {
5193
+ title: "Evolution Report",
5194
+ totalIterations: iterations.length,
5195
+ baselineScore,
5196
+ bestScore: bestIter.score,
5197
+ bestIteration: bestIter.iteration,
5198
+ improvement
5199
+ },
5200
+ iterations: iterations.map((iter) => ({
5201
+ iteration: iter.iteration,
5202
+ score: iter.score,
5203
+ mutationCount: iter.proposal?.mutations.length ?? 0,
5204
+ status: iterationStatus(iter, bestIter.iteration)
5205
+ })),
5206
+ leaderboard,
5207
+ counterfactuals
5208
+ };
5209
+ }
5210
+
5211
+ // src/commands/evolve.ts
5212
+ var DEFAULT_CONFIG = {
5213
+ model: "claude-sonnet-4-6",
5214
+ proposerModel: "claude-opus-4-6",
5215
+ scorer: "pass-fail",
5216
+ maxIterations: 5,
5217
+ parallelTasks: 1
5218
+ };
5219
+ async function loadEvolveConfigFromWorkspace(workspacePath) {
5220
+ try {
5221
+ const configStr = await fs23.readFile(path23.join(workspacePath, "config.yaml"), "utf-8");
5222
+ const parsed = yamlParse2(configStr);
5223
+ return {
5224
+ model: parsed.model ?? DEFAULT_CONFIG.model,
5225
+ proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
5226
+ scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
5227
+ maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
5228
+ parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
5229
+ };
5230
+ } catch {
5231
+ return { ...DEFAULT_CONFIG };
5232
+ }
5233
+ }
5234
+ var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
5235
+ evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
5236
+ try {
5237
+ const projectRoot = process.cwd();
5238
+ console.log(ui.section("Evolve Init"));
5239
+ const claudeDir = path23.join(projectRoot, ".claude");
5240
+ try {
5241
+ await fs23.access(claudeDir);
5242
+ } catch {
5243
+ console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
5244
+ process.exit(1);
5245
+ }
5246
+ const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
5247
+ console.log(ui.success("Created .kairn-evolve/ workspace"));
5248
+ const spinner = ora2("Generating project-specific eval tasks...").start();
5249
+ let tasks;
5250
+ try {
5251
+ tasks = await autoGenerateTasks(projectRoot, options.workflow);
5252
+ spinner.succeed(`Generated ${tasks.length} eval tasks`);
5253
+ } catch {
5254
+ spinner.fail("LLM task generation failed");
5255
+ const templateIds = selectTemplatesForWorkflow(options.workflow);
5256
+ tasks = templateIds.map((templateId, index) => ({
5257
+ id: `${templateId}-${index + 1}`,
5258
+ template: templateId,
5259
+ description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
5260
+ setup: "npm install",
5261
+ expected_outcome: "Task completed successfully",
5262
+ scoring: "pass-fail",
5263
+ timeout: 300
5264
+ }));
5265
+ console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
5266
+ }
5267
+ for (const task of tasks) {
5268
+ console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
5269
+ }
5270
+ let addMore = true;
5271
+ while (addMore) {
5272
+ try {
5273
+ addMore = await confirm3({ message: "Add another eval task?", default: false });
5274
+ } catch {
5275
+ addMore = false;
5276
+ }
5277
+ if (addMore) {
5278
+ const templateId = await select4({
5279
+ message: "Select eval template:",
5280
+ choices: Object.values(EVAL_TEMPLATES).map((t) => ({
5281
+ name: `${t.name} \u2014 ${t.description}`,
5282
+ value: t.id
5283
+ }))
5284
+ });
5285
+ const addSpinner = ora2("Generating task...").start();
5286
+ try {
5287
+ const config = await loadConfig();
5288
+ if (config) {
5289
+ let claudeMd = "";
5290
+ try {
5291
+ claudeMd = await fs23.readFile(path23.join(claudeDir, "CLAUDE.md"), "utf-8");
5292
+ } catch {
5293
+ }
5294
+ const profile = await buildProjectProfile(projectRoot);
5295
+ const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
5296
+ tasks.push(...newTasks);
5297
+ addSpinner.succeed(`Added ${newTasks.length} task(s)`);
5298
+ } else {
5299
+ addSpinner.fail("No config found");
5300
+ }
5301
+ } catch {
5302
+ addSpinner.fail("Failed to generate task");
4453
5303
  }
4454
5304
  }
4455
5305
  }
@@ -4469,16 +5319,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
4469
5319
  evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
4470
5320
  try {
4471
5321
  const projectRoot = process.cwd();
4472
- const workspace = path19.join(projectRoot, ".kairn-evolve");
5322
+ const workspace = path23.join(projectRoot, ".kairn-evolve");
4473
5323
  console.log(ui.section("Evolve Baseline"));
4474
5324
  try {
4475
- await fs19.access(workspace);
5325
+ await fs23.access(workspace);
4476
5326
  } catch {
4477
5327
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
4478
5328
  process.exit(1);
4479
5329
  }
4480
5330
  await snapshotBaseline(projectRoot, workspace);
4481
- const baselineDir = path19.join(workspace, "baseline");
5331
+ const baselineDir = path23.join(workspace, "baseline");
4482
5332
  const fileCount = await countFiles(baselineDir);
4483
5333
  console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
4484
5334
  } catch (err) {
@@ -4487,61 +5337,225 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
4487
5337
  process.exit(1);
4488
5338
  }
4489
5339
  });
4490
- evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").action(async (options) => {
5340
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
4491
5341
  try {
4492
5342
  const projectRoot = process.cwd();
4493
- const workspace = path19.join(projectRoot, ".kairn-evolve");
5343
+ const workspace = path23.join(projectRoot, ".kairn-evolve");
4494
5344
  console.log(ui.section("Evolve Run"));
4495
5345
  try {
4496
- await fs19.access(workspace);
5346
+ await fs23.access(workspace);
4497
5347
  } catch {
4498
5348
  console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
4499
5349
  process.exit(1);
4500
5350
  }
4501
- const tasksPath = path19.join(workspace, "tasks.yaml");
5351
+ const tasksPath = path23.join(workspace, "tasks.yaml");
4502
5352
  let tasksContent;
4503
5353
  try {
4504
- tasksContent = await fs19.readFile(tasksPath, "utf-8");
5354
+ tasksContent = await fs23.readFile(tasksPath, "utf-8");
4505
5355
  } catch {
4506
5356
  console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
4507
5357
  process.exit(1);
4508
5358
  }
4509
- const parsed = yamlParse(tasksContent);
5359
+ const parsed = yamlParse2(tasksContent);
4510
5360
  if (!parsed?.tasks || parsed.tasks.length === 0) {
4511
5361
  console.log(ui.error("No tasks found in tasks.yaml"));
4512
5362
  process.exit(1);
4513
5363
  }
4514
- const tasksToRun = options.task ? parsed.tasks.filter((t) => t.id === options.task) : parsed.tasks;
4515
- if (tasksToRun.length === 0) {
4516
- console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
5364
+ if (options.task) {
5365
+ const tasksToRun = parsed.tasks.filter((t) => t.id === options.task);
5366
+ if (tasksToRun.length === 0) {
5367
+ console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
5368
+ process.exit(1);
5369
+ }
5370
+ console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
5371
+ console.log("");
5372
+ const config = await loadConfig();
5373
+ const harnessPath = path23.join(projectRoot, ".claude");
5374
+ const results = [];
5375
+ for (const task of tasksToRun) {
5376
+ const traceDir = path23.join(workspace, "traces", "0", task.id);
5377
+ const spinner = ora2(`Running: ${task.id}`).start();
5378
+ const result = await runTask(task, harnessPath, traceDir, 0);
5379
+ if (config) {
5380
+ const stdout = await fs23.readFile(path23.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
5381
+ const stderr = await fs23.readFile(path23.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
5382
+ const score = await scoreTask(task, traceDir, stdout, stderr, config);
5383
+ result.score = score;
5384
+ await writeScore(traceDir, score);
5385
+ }
5386
+ results.push(result);
5387
+ const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
5388
+ const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
5389
+ spinner.stop();
5390
+ console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
5391
+ }
5392
+ const passed = results.filter((r) => r.score.pass).length;
5393
+ console.log("");
5394
+ console.log(ui.info(`Results: ${passed}/${results.length} passed`));
5395
+ console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
5396
+ } else {
5397
+ const kairnConfig = await loadConfig();
5398
+ if (!kairnConfig) {
5399
+ console.log(ui.error("No config found. Run kairn init first."));
5400
+ process.exit(1);
5401
+ }
5402
+ const evolveConfig = await loadEvolveConfigFromWorkspace(workspace);
5403
+ const iterations = parseInt(options.iterations ?? "5", 10);
5404
+ if (isNaN(iterations) || iterations < 1) {
5405
+ console.log(ui.error("--iterations must be a positive integer"));
5406
+ process.exit(1);
5407
+ }
5408
+ evolveConfig.maxIterations = iterations;
5409
+ try {
5410
+ await fs23.access(path23.join(workspace, "iterations", "0", "harness"));
5411
+ } catch {
5412
+ console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
5413
+ process.exit(1);
5414
+ }
5415
+ const result = await evolve(workspace, parsed.tasks, kairnConfig, evolveConfig, (event) => {
5416
+ switch (event.type) {
5417
+ case "iteration-start":
5418
+ console.log(ui.section(`Iteration ${event.iteration}`));
5419
+ break;
5420
+ case "iteration-scored": {
5421
+ const scoreColor = event.score !== void 0 && event.score >= 100 ? chalk14.green : event.score !== void 0 && event.score >= 60 ? chalk14.yellow : chalk14.red;
5422
+ console.log(` Score: ${scoreColor((event.score?.toFixed(1) ?? "0") + "%")}`);
5423
+ break;
5424
+ }
5425
+ case "rollback":
5426
+ console.log(chalk14.yellow(` Warning: ${event.message ?? "Regression detected"}`));
5427
+ break;
5428
+ case "proposing":
5429
+ console.log(chalk14.dim(" Proposer analyzing traces..."));
5430
+ break;
5431
+ case "mutations-applied":
5432
+ console.log(chalk14.dim(` Applied ${event.mutationCount ?? 0} mutation(s)`));
5433
+ break;
5434
+ case "perfect-score":
5435
+ console.log(chalk14.green(" Perfect score. Stopping."));
5436
+ break;
5437
+ case "complete":
5438
+ break;
5439
+ }
5440
+ });
5441
+ console.log(ui.section("Evolution Summary"));
5442
+ console.log(` Iterations: ${result.iterations.length}`);
5443
+ console.log(` Baseline: ${result.baselineScore.toFixed(1)}%`);
5444
+ console.log(` Best: ${chalk14.green(result.bestScore.toFixed(1) + "%")} (iteration ${result.bestIteration})`);
5445
+ const improvement = result.bestScore - result.baselineScore;
5446
+ if (improvement > 0) {
5447
+ console.log(` Improvement: ${chalk14.green("+" + improvement.toFixed(1) + " points")}`);
5448
+ } else {
5449
+ console.log(` Improvement: ${improvement.toFixed(1)} points`);
5450
+ }
5451
+ console.log("");
5452
+ console.log(" Iter Score Mutations Status");
5453
+ for (const iter of result.iterations) {
5454
+ const scoreStr = iter.score.toFixed(1).padStart(6) + "%";
5455
+ const mutations = iter.proposal?.mutations.length ?? 0;
5456
+ const mutStr = mutations > 0 ? mutations.toString() : "-";
5457
+ let status = "evaluated";
5458
+ if (iter.iteration === 0) status = "baseline";
5459
+ else if (!iter.proposal && !iter.diffPatch) status = "rollback";
5460
+ else if (iter.score >= 100) status = "perfect";
5461
+ else if (iter.iteration === result.bestIteration) status = "best";
5462
+ console.log(` ${iter.iteration.toString().padStart(4)} ${scoreStr} ${mutStr.padStart(9)} ${status}`);
5463
+ }
5464
+ }
5465
+ } catch (err) {
5466
+ const msg = err instanceof Error ? err.message : String(err);
5467
+ console.log(ui.error(msg));
5468
+ process.exit(1);
5469
+ }
5470
+ });
5471
+ evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
5472
+ try {
5473
+ const projectRoot = process.cwd();
5474
+ const workspace = path23.join(projectRoot, ".kairn-evolve");
5475
+ try {
5476
+ await fs23.access(workspace);
5477
+ } catch {
5478
+ console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
4517
5479
  process.exit(1);
4518
5480
  }
4519
- console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
4520
- console.log("");
4521
- const config = await loadConfig();
4522
- const harnessPath = path19.join(projectRoot, ".claude");
4523
- const results = [];
4524
- for (const task of tasksToRun) {
4525
- const traceDir = path19.join(workspace, "traces", "0", task.id);
4526
- const spinner = ora2(`Running: ${task.id}`).start();
4527
- const result = await runTask(task, harnessPath, traceDir, 0);
4528
- if (config) {
4529
- const stdout = await fs19.readFile(path19.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4530
- const stderr = await fs19.readFile(path19.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4531
- const score = await scoreTask(task, traceDir, stdout, stderr, config);
4532
- result.score = score;
4533
- await writeScore(traceDir, score);
5481
+ if (options.json) {
5482
+ const report = await generateJsonReport(workspace);
5483
+ console.log(JSON.stringify(report, null, 2));
5484
+ } else {
5485
+ const markdown = await generateMarkdownReport(workspace);
5486
+ console.log(markdown);
5487
+ }
5488
+ } catch (err) {
5489
+ const msg = err instanceof Error ? err.message : String(err);
5490
+ console.log(ui.error(msg));
5491
+ process.exit(1);
5492
+ }
5493
+ });
5494
+ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
5495
+ try {
5496
+ const projectRoot = process.cwd();
5497
+ const workspace = path23.join(projectRoot, ".kairn-evolve");
5498
+ const iter1 = parseInt(iter1Str, 10);
5499
+ const iter2 = parseInt(iter2Str, 10);
5500
+ if (isNaN(iter1) || isNaN(iter2)) {
5501
+ console.log(ui.error("Both arguments must be integers (iteration numbers)"));
5502
+ process.exit(1);
5503
+ }
5504
+ const harness1 = path23.join(workspace, "iterations", iter1.toString(), "harness");
5505
+ const harness2 = path23.join(workspace, "iterations", iter2.toString(), "harness");
5506
+ try {
5507
+ await fs23.access(harness1);
5508
+ } catch {
5509
+ console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
5510
+ process.exit(1);
5511
+ }
5512
+ try {
5513
+ await fs23.access(harness2);
5514
+ } catch {
5515
+ console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
5516
+ process.exit(1);
5517
+ }
5518
+ console.log(ui.section(`Diff: Iteration ${iter1} \u2192 ${iter2}`));
5519
+ const diffPatch = await generateDiff2(harness1, harness2);
5520
+ if (!diffPatch) {
5521
+ console.log(chalk14.dim(" No harness changes between these iterations."));
5522
+ } else {
5523
+ for (const line of diffPatch.split("\n")) {
5524
+ if (line.startsWith("---") || line.startsWith("+++")) {
5525
+ console.log(chalk14.bold(line));
5526
+ } else if (line.startsWith("+")) {
5527
+ console.log(chalk14.green(line));
5528
+ } else if (line.startsWith("-")) {
5529
+ console.log(chalk14.red(line));
5530
+ } else {
5531
+ console.log(line);
5532
+ }
5533
+ }
5534
+ }
5535
+ const [log1, log2] = await Promise.all([
5536
+ loadIterationLog(workspace, iter1),
5537
+ loadIterationLog(workspace, iter2)
5538
+ ]);
5539
+ if (log1 && log2) {
5540
+ console.log("");
5541
+ console.log(ui.section("Score Comparison"));
5542
+ console.log("");
5543
+ console.log(" Task Iter " + iter1 + " Iter " + iter2 + " Delta");
5544
+ const allTaskIds = /* @__PURE__ */ new Set([
5545
+ ...Object.keys(log1.taskResults),
5546
+ ...Object.keys(log2.taskResults)
5547
+ ]);
5548
+ for (const taskId of [...allTaskIds].sort()) {
5549
+ const s1 = log1.taskResults[taskId];
5550
+ const s2 = log2.taskResults[taskId];
5551
+ const score1 = s1 ? s1.score ?? (s1.pass ? 100 : 0) : 0;
5552
+ const score2 = s2 ? s2.score ?? (s2.pass ? 100 : 0) : 0;
5553
+ const delta = score2 - score1;
5554
+ const deltaStr = delta > 0 ? chalk14.green(`+${delta.toFixed(0)}`) : delta < 0 ? chalk14.red(delta.toFixed(0).toString()) : chalk14.dim("0");
5555
+ const name = taskId.padEnd(30);
5556
+ console.log(` ${name} ${score1.toFixed(0).padStart(5)}% ${score2.toFixed(0).padStart(5)}% ${deltaStr}`);
4534
5557
  }
4535
- results.push(result);
4536
- const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
4537
- const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
4538
- spinner.stop();
4539
- console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
4540
5558
  }
4541
- const passed = results.filter((r) => r.score.pass).length;
4542
- console.log("");
4543
- console.log(ui.info(`Results: ${passed}/${results.length} passed`));
4544
- console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
4545
5559
  } catch (err) {
4546
5560
  const msg = err instanceof Error ? err.message : String(err);
4547
5561
  console.log(ui.error(msg));
@@ -4551,10 +5565,10 @@ evolveCommand.command("run").description("Run tasks against the current harness"
4551
5565
  async function countFiles(dir) {
4552
5566
  let count = 0;
4553
5567
  try {
4554
- const entries = await fs19.readdir(dir, { withFileTypes: true });
5568
+ const entries = await fs23.readdir(dir, { withFileTypes: true });
4555
5569
  for (const entry of entries) {
4556
5570
  if (entry.isDirectory()) {
4557
- count += await countFiles(path19.join(dir, entry.name));
5571
+ count += await countFiles(path23.join(dir, entry.name));
4558
5572
  } else {
4559
5573
  count++;
4560
5574
  }