kairn-cli 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +954 -264
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -221,7 +221,7 @@ var ui = {
|
|
|
221
221
|
// Key-value pairs
|
|
222
222
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
223
|
// File list
|
|
224
|
-
file: (
|
|
224
|
+
file: (path23) => chalk.dim(` ${path23}`),
|
|
225
225
|
// Tool display
|
|
226
226
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
227
|
${chalk.dim(reason)}`,
|
|
@@ -3694,8 +3694,8 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3694
3694
|
import { Command as Command11 } from "commander";
|
|
3695
3695
|
import chalk14 from "chalk";
|
|
3696
3696
|
import ora2 from "ora";
|
|
3697
|
-
import
|
|
3698
|
-
import
|
|
3697
|
+
import fs22 from "fs/promises";
|
|
3698
|
+
import path22 from "path";
|
|
3699
3699
|
import { parse as yamlParse } from "yaml";
|
|
3700
3700
|
import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
|
|
3701
3701
|
|
|
@@ -4014,8 +4014,8 @@ async function copyDir(src, dest) {
|
|
|
4014
4014
|
}
|
|
4015
4015
|
|
|
4016
4016
|
// src/evolve/runner.ts
|
|
4017
|
-
import { exec, spawn } from "child_process";
|
|
4018
|
-
import { promisify } from "util";
|
|
4017
|
+
import { exec as exec2, spawn } from "child_process";
|
|
4018
|
+
import { promisify as promisify2 } from "util";
|
|
4019
4019
|
import fs18 from "fs/promises";
|
|
4020
4020
|
import os3 from "os";
|
|
4021
4021
|
import path18 from "path";
|
|
@@ -4023,6 +4023,52 @@ import path18 from "path";
|
|
|
4023
4023
|
// src/evolve/trace.ts
|
|
4024
4024
|
import fs17 from "fs/promises";
|
|
4025
4025
|
import path17 from "path";
|
|
4026
|
+
async function loadTrace(traceDir) {
|
|
4027
|
+
const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4028
|
+
const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4029
|
+
const filesChangedStr = await fs17.readFile(
|
|
4030
|
+
path17.join(traceDir, "files_changed.json"),
|
|
4031
|
+
"utf-8"
|
|
4032
|
+
).catch(() => "{}");
|
|
4033
|
+
const timingStr = await fs17.readFile(
|
|
4034
|
+
path17.join(traceDir, "timing.json"),
|
|
4035
|
+
"utf-8"
|
|
4036
|
+
).catch(() => "{}");
|
|
4037
|
+
const scoreStr = await fs17.readFile(
|
|
4038
|
+
path17.join(traceDir, "score.json"),
|
|
4039
|
+
"utf-8"
|
|
4040
|
+
).catch(() => '{"pass": false}');
|
|
4041
|
+
const toolCallsStr = await fs17.readFile(
|
|
4042
|
+
path17.join(traceDir, "tool_calls.jsonl"),
|
|
4043
|
+
"utf-8"
|
|
4044
|
+
).catch(() => "");
|
|
4045
|
+
const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
|
|
4046
|
+
const parentDir = path17.basename(path17.dirname(traceDir));
|
|
4047
|
+
const iteration = parseInt(parentDir, 10) || 0;
|
|
4048
|
+
return {
|
|
4049
|
+
taskId: path17.basename(traceDir),
|
|
4050
|
+
iteration,
|
|
4051
|
+
stdout,
|
|
4052
|
+
stderr,
|
|
4053
|
+
toolCalls,
|
|
4054
|
+
filesChanged: JSON.parse(filesChangedStr),
|
|
4055
|
+
score: JSON.parse(scoreStr),
|
|
4056
|
+
timing: JSON.parse(timingStr)
|
|
4057
|
+
};
|
|
4058
|
+
}
|
|
4059
|
+
async function loadIterationTraces(workspacePath, iteration) {
|
|
4060
|
+
const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
|
|
4061
|
+
const traces = [];
|
|
4062
|
+
try {
|
|
4063
|
+
const taskDirs = await fs17.readdir(tracesDir);
|
|
4064
|
+
for (const taskId of taskDirs) {
|
|
4065
|
+
const trace = await loadTrace(path17.join(tracesDir, taskId));
|
|
4066
|
+
traces.push(trace);
|
|
4067
|
+
}
|
|
4068
|
+
} catch {
|
|
4069
|
+
}
|
|
4070
|
+
return traces;
|
|
4071
|
+
}
|
|
4026
4072
|
async function writeTrace(traceDir, trace) {
|
|
4027
4073
|
await fs17.mkdir(traceDir, { recursive: true });
|
|
4028
4074
|
await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
|
|
@@ -4052,156 +4098,32 @@ async function writeScore(traceDir, score) {
|
|
|
4052
4098
|
"utf-8"
|
|
4053
4099
|
);
|
|
4054
4100
|
}
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4073
|
-
const filesBefore = await snapshotFileList(tmpDir);
|
|
4074
|
-
const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
|
|
4075
|
-
const filesAfter = await snapshotFileList(tmpDir);
|
|
4076
|
-
const filesChanged = diffFileLists(filesBefore, filesAfter);
|
|
4077
|
-
const toolCalls = parseToolCalls(spawnResult.stdout);
|
|
4078
|
-
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4079
|
-
const durationMs = Date.now() - startMs;
|
|
4080
|
-
const combinedStderr = setupStderr ? `[setup] ${setupStderr}
|
|
4081
|
-
${spawnResult.stderr}` : spawnResult.stderr;
|
|
4082
|
-
const trace = {
|
|
4083
|
-
taskId: task.id,
|
|
4084
|
-
iteration,
|
|
4085
|
-
stdout: spawnResult.stdout,
|
|
4086
|
-
stderr: combinedStderr,
|
|
4087
|
-
toolCalls,
|
|
4088
|
-
filesChanged,
|
|
4089
|
-
score: { pass: false, details: "Pending scoring" },
|
|
4090
|
-
timing: { startedAt, completedAt, durationMs }
|
|
4091
|
-
};
|
|
4092
|
-
await writeTrace(traceDir, trace);
|
|
4093
|
-
return {
|
|
4094
|
-
taskId: task.id,
|
|
4095
|
-
score: trace.score,
|
|
4096
|
-
traceDir
|
|
4097
|
-
};
|
|
4098
|
-
} finally {
|
|
4099
|
-
await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
|
|
4100
|
-
});
|
|
4101
|
-
}
|
|
4102
|
-
}
|
|
4103
|
-
async function spawnClaude(instruction, cwd, timeoutSec) {
|
|
4104
|
-
return new Promise((resolve) => {
|
|
4105
|
-
const args = ["--print", "--output-format", "text", "--max-turns", "50"];
|
|
4106
|
-
const child = spawn("claude", args, {
|
|
4107
|
-
cwd,
|
|
4108
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
4109
|
-
timeout: timeoutSec * 1e3,
|
|
4110
|
-
env: { ...process.env }
|
|
4111
|
-
});
|
|
4112
|
-
let stdout = "";
|
|
4113
|
-
let stderr = "";
|
|
4114
|
-
child.stdout.on("data", (data) => {
|
|
4115
|
-
stdout += data.toString();
|
|
4116
|
-
});
|
|
4117
|
-
child.stderr.on("data", (data) => {
|
|
4118
|
-
stderr += data.toString();
|
|
4119
|
-
});
|
|
4120
|
-
child.stdin.write(instruction);
|
|
4121
|
-
child.stdin.end();
|
|
4122
|
-
child.on("close", (code) => {
|
|
4123
|
-
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
4124
|
-
});
|
|
4125
|
-
child.on("error", (err) => {
|
|
4126
|
-
resolve({
|
|
4127
|
-
stdout,
|
|
4128
|
-
stderr: stderr + `
|
|
4129
|
-
Spawn error: ${err.message}`,
|
|
4130
|
-
exitCode: 1
|
|
4131
|
-
});
|
|
4132
|
-
});
|
|
4133
|
-
});
|
|
4134
|
-
}
|
|
4135
|
-
async function snapshotFileList(dir) {
|
|
4136
|
-
const result = {};
|
|
4137
|
-
async function walk(current) {
|
|
4138
|
-
let entries;
|
|
4139
|
-
try {
|
|
4140
|
-
entries = await fs18.readdir(current, { withFileTypes: true });
|
|
4141
|
-
} catch {
|
|
4142
|
-
return;
|
|
4143
|
-
}
|
|
4144
|
-
for (const entry of entries) {
|
|
4145
|
-
const fullPath = path18.join(current, entry.name);
|
|
4146
|
-
const relativePath = path18.relative(dir, fullPath);
|
|
4147
|
-
if (relativePath.startsWith(".claude")) continue;
|
|
4148
|
-
if (relativePath.startsWith("node_modules")) continue;
|
|
4149
|
-
if (relativePath.startsWith(".git")) continue;
|
|
4150
|
-
if (entry.isDirectory()) {
|
|
4151
|
-
await walk(fullPath);
|
|
4152
|
-
} else {
|
|
4153
|
-
try {
|
|
4154
|
-
const stat = await fs18.stat(fullPath);
|
|
4155
|
-
result[relativePath] = stat.mtimeMs;
|
|
4156
|
-
} catch {
|
|
4157
|
-
}
|
|
4158
|
-
}
|
|
4159
|
-
}
|
|
4160
|
-
}
|
|
4161
|
-
await walk(dir);
|
|
4162
|
-
return result;
|
|
4163
|
-
}
|
|
4164
|
-
function diffFileLists(before, after) {
|
|
4165
|
-
const changes = {};
|
|
4166
|
-
for (const [file, mtime] of Object.entries(after)) {
|
|
4167
|
-
if (!(file in before)) {
|
|
4168
|
-
changes[file] = "created";
|
|
4169
|
-
} else if (before[file] !== mtime) {
|
|
4170
|
-
changes[file] = "modified";
|
|
4171
|
-
}
|
|
4172
|
-
}
|
|
4173
|
-
for (const file of Object.keys(before)) {
|
|
4174
|
-
if (!(file in after)) {
|
|
4175
|
-
changes[file] = "deleted";
|
|
4176
|
-
}
|
|
4177
|
-
}
|
|
4178
|
-
return changes;
|
|
4179
|
-
}
|
|
4180
|
-
function parseToolCalls(stdout) {
|
|
4181
|
-
try {
|
|
4182
|
-
const lines = stdout.split("\n").filter((l) => l.trim());
|
|
4183
|
-
const toolCalls = [];
|
|
4184
|
-
for (const line of lines) {
|
|
4185
|
-
try {
|
|
4186
|
-
const obj = JSON.parse(line);
|
|
4187
|
-
if (obj.type === "tool_use" || obj.tool_name) {
|
|
4188
|
-
toolCalls.push(obj);
|
|
4189
|
-
}
|
|
4190
|
-
} catch {
|
|
4191
|
-
}
|
|
4192
|
-
}
|
|
4193
|
-
return toolCalls;
|
|
4194
|
-
} catch {
|
|
4195
|
-
return [];
|
|
4196
|
-
}
|
|
4101
|
+
async function writeIterationLog(workspacePath, log) {
|
|
4102
|
+
const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
|
|
4103
|
+
await fs17.mkdir(iterDir, { recursive: true });
|
|
4104
|
+
await fs17.writeFile(
|
|
4105
|
+
path17.join(iterDir, "scores.json"),
|
|
4106
|
+
JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
|
|
4107
|
+
"utf-8"
|
|
4108
|
+
);
|
|
4109
|
+
await fs17.writeFile(
|
|
4110
|
+
path17.join(iterDir, "proposer_reasoning.md"),
|
|
4111
|
+
log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
|
|
4112
|
+
"utf-8"
|
|
4113
|
+
);
|
|
4114
|
+
await fs17.writeFile(
|
|
4115
|
+
path17.join(iterDir, "mutation_diff.patch"),
|
|
4116
|
+
log.diffPatch ?? "",
|
|
4117
|
+
"utf-8"
|
|
4118
|
+
);
|
|
4197
4119
|
}
|
|
4198
4120
|
|
|
4199
4121
|
// src/evolve/exec.ts
|
|
4200
|
-
import { exec
|
|
4201
|
-
import { promisify
|
|
4202
|
-
var
|
|
4122
|
+
import { exec } from "child_process";
|
|
4123
|
+
import { promisify } from "util";
|
|
4124
|
+
var execAsync = promisify(exec);
|
|
4203
4125
|
async function execCommand(cmd, cwd, timeoutMs = 3e4) {
|
|
4204
|
-
return
|
|
4126
|
+
return execAsync(cmd, { cwd, timeout: timeoutMs });
|
|
4205
4127
|
}
|
|
4206
4128
|
|
|
4207
4129
|
// src/evolve/scorers.ts
|
|
@@ -4373,83 +4295,781 @@ async function scoreTask(task, workspacePath, stdout, stderr, config) {
|
|
|
4373
4295
|
return passFailScorer(task, workspacePath, stdout, stderr);
|
|
4374
4296
|
}
|
|
4375
4297
|
|
|
4376
|
-
// src/
|
|
4377
|
-
var
|
|
4378
|
-
|
|
4379
|
-
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
};
|
|
4384
|
-
var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
|
|
4385
|
-
evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
|
|
4298
|
+
// src/evolve/runner.ts
|
|
4299
|
+
var execAsync2 = promisify2(exec2);
|
|
4300
|
+
async function runTask(task, harnessPath, traceDir, iteration) {
|
|
4301
|
+
await fs18.mkdir(traceDir, { recursive: true });
|
|
4302
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4303
|
+
const startMs = Date.now();
|
|
4304
|
+
const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
|
|
4386
4305
|
try {
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
try {
|
|
4391
|
-
await fs19.access(claudeDir);
|
|
4392
|
-
} catch {
|
|
4393
|
-
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
4394
|
-
process.exit(1);
|
|
4395
|
-
}
|
|
4396
|
-
const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
|
|
4397
|
-
console.log(ui.success("Created .kairn-evolve/ workspace"));
|
|
4398
|
-
const spinner = ora2("Generating project-specific eval tasks...").start();
|
|
4399
|
-
let tasks;
|
|
4400
|
-
try {
|
|
4401
|
-
tasks = await autoGenerateTasks(projectRoot, options.workflow);
|
|
4402
|
-
spinner.succeed(`Generated ${tasks.length} eval tasks`);
|
|
4403
|
-
} catch {
|
|
4404
|
-
spinner.fail("LLM task generation failed");
|
|
4405
|
-
const templateIds = selectTemplatesForWorkflow(options.workflow);
|
|
4406
|
-
tasks = templateIds.map((templateId, index) => ({
|
|
4407
|
-
id: `${templateId}-${index + 1}`,
|
|
4408
|
-
template: templateId,
|
|
4409
|
-
description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
|
|
4410
|
-
setup: "npm install",
|
|
4411
|
-
expected_outcome: "Task completed successfully",
|
|
4412
|
-
scoring: "pass-fail",
|
|
4413
|
-
timeout: 300
|
|
4414
|
-
}));
|
|
4415
|
-
console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
|
|
4416
|
-
}
|
|
4417
|
-
for (const task of tasks) {
|
|
4418
|
-
console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
|
|
4419
|
-
}
|
|
4420
|
-
let addMore = true;
|
|
4421
|
-
while (addMore) {
|
|
4306
|
+
await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
|
|
4307
|
+
let setupStderr = "";
|
|
4308
|
+
if (task.setup.trim()) {
|
|
4422
4309
|
try {
|
|
4423
|
-
|
|
4424
|
-
} catch {
|
|
4425
|
-
|
|
4310
|
+
await execAsync2(task.setup, { cwd: tmpDir, timeout: 6e4 });
|
|
4311
|
+
} catch (err) {
|
|
4312
|
+
setupStderr = err instanceof Error ? err.message : String(err);
|
|
4426
4313
|
}
|
|
4427
|
-
|
|
4428
|
-
|
|
4429
|
-
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4433
|
-
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4437
|
-
|
|
4438
|
-
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
|
|
4443
|
-
|
|
4444
|
-
|
|
4445
|
-
|
|
4446
|
-
|
|
4447
|
-
|
|
4448
|
-
|
|
4449
|
-
|
|
4450
|
-
|
|
4451
|
-
|
|
4452
|
-
|
|
4314
|
+
}
|
|
4315
|
+
const filesBefore = await snapshotFileList(tmpDir);
|
|
4316
|
+
const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
|
|
4317
|
+
const filesAfter = await snapshotFileList(tmpDir);
|
|
4318
|
+
const filesChanged = diffFileLists(filesBefore, filesAfter);
|
|
4319
|
+
const toolCalls = parseToolCalls(spawnResult.stdout);
|
|
4320
|
+
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4321
|
+
const durationMs = Date.now() - startMs;
|
|
4322
|
+
const combinedStderr = setupStderr ? `[setup] ${setupStderr}
|
|
4323
|
+
${spawnResult.stderr}` : spawnResult.stderr;
|
|
4324
|
+
const trace = {
|
|
4325
|
+
taskId: task.id,
|
|
4326
|
+
iteration,
|
|
4327
|
+
stdout: spawnResult.stdout,
|
|
4328
|
+
stderr: combinedStderr,
|
|
4329
|
+
toolCalls,
|
|
4330
|
+
filesChanged,
|
|
4331
|
+
score: { pass: false, details: "Pending scoring" },
|
|
4332
|
+
timing: { startedAt, completedAt, durationMs }
|
|
4333
|
+
};
|
|
4334
|
+
await writeTrace(traceDir, trace);
|
|
4335
|
+
return {
|
|
4336
|
+
taskId: task.id,
|
|
4337
|
+
score: trace.score,
|
|
4338
|
+
traceDir
|
|
4339
|
+
};
|
|
4340
|
+
} finally {
|
|
4341
|
+
await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
|
|
4342
|
+
});
|
|
4343
|
+
}
|
|
4344
|
+
}
|
|
4345
|
+
async function spawnClaude(instruction, cwd, timeoutSec) {
|
|
4346
|
+
return new Promise((resolve) => {
|
|
4347
|
+
const args = ["--print", "--output-format", "text", "--max-turns", "50"];
|
|
4348
|
+
const child = spawn("claude", args, {
|
|
4349
|
+
cwd,
|
|
4350
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
4351
|
+
timeout: timeoutSec * 1e3,
|
|
4352
|
+
env: { ...process.env }
|
|
4353
|
+
});
|
|
4354
|
+
let stdout = "";
|
|
4355
|
+
let stderr = "";
|
|
4356
|
+
child.stdout.on("data", (data) => {
|
|
4357
|
+
stdout += data.toString();
|
|
4358
|
+
});
|
|
4359
|
+
child.stderr.on("data", (data) => {
|
|
4360
|
+
stderr += data.toString();
|
|
4361
|
+
});
|
|
4362
|
+
child.stdin.write(instruction);
|
|
4363
|
+
child.stdin.end();
|
|
4364
|
+
child.on("close", (code) => {
|
|
4365
|
+
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
4366
|
+
});
|
|
4367
|
+
child.on("error", (err) => {
|
|
4368
|
+
resolve({
|
|
4369
|
+
stdout,
|
|
4370
|
+
stderr: stderr + `
|
|
4371
|
+
Spawn error: ${err.message}`,
|
|
4372
|
+
exitCode: 1
|
|
4373
|
+
});
|
|
4374
|
+
});
|
|
4375
|
+
});
|
|
4376
|
+
}
|
|
4377
|
+
async function snapshotFileList(dir) {
|
|
4378
|
+
const result = {};
|
|
4379
|
+
async function walk(current) {
|
|
4380
|
+
let entries;
|
|
4381
|
+
try {
|
|
4382
|
+
entries = await fs18.readdir(current, { withFileTypes: true });
|
|
4383
|
+
} catch {
|
|
4384
|
+
return;
|
|
4385
|
+
}
|
|
4386
|
+
for (const entry of entries) {
|
|
4387
|
+
const fullPath = path18.join(current, entry.name);
|
|
4388
|
+
const relativePath = path18.relative(dir, fullPath);
|
|
4389
|
+
if (relativePath.startsWith(".claude")) continue;
|
|
4390
|
+
if (relativePath.startsWith("node_modules")) continue;
|
|
4391
|
+
if (relativePath.startsWith(".git")) continue;
|
|
4392
|
+
if (entry.isDirectory()) {
|
|
4393
|
+
await walk(fullPath);
|
|
4394
|
+
} else {
|
|
4395
|
+
try {
|
|
4396
|
+
const stat = await fs18.stat(fullPath);
|
|
4397
|
+
result[relativePath] = stat.mtimeMs;
|
|
4398
|
+
} catch {
|
|
4399
|
+
}
|
|
4400
|
+
}
|
|
4401
|
+
}
|
|
4402
|
+
}
|
|
4403
|
+
await walk(dir);
|
|
4404
|
+
return result;
|
|
4405
|
+
}
|
|
4406
|
+
function diffFileLists(before, after) {
|
|
4407
|
+
const changes = {};
|
|
4408
|
+
for (const [file, mtime] of Object.entries(after)) {
|
|
4409
|
+
if (!(file in before)) {
|
|
4410
|
+
changes[file] = "created";
|
|
4411
|
+
} else if (before[file] !== mtime) {
|
|
4412
|
+
changes[file] = "modified";
|
|
4413
|
+
}
|
|
4414
|
+
}
|
|
4415
|
+
for (const file of Object.keys(before)) {
|
|
4416
|
+
if (!(file in after)) {
|
|
4417
|
+
changes[file] = "deleted";
|
|
4418
|
+
}
|
|
4419
|
+
}
|
|
4420
|
+
return changes;
|
|
4421
|
+
}
|
|
4422
|
+
function parseToolCalls(stdout) {
|
|
4423
|
+
try {
|
|
4424
|
+
const lines = stdout.split("\n").filter((l) => l.trim());
|
|
4425
|
+
const toolCalls = [];
|
|
4426
|
+
for (const line of lines) {
|
|
4427
|
+
try {
|
|
4428
|
+
const obj = JSON.parse(line);
|
|
4429
|
+
if (obj.type === "tool_use" || obj.tool_name) {
|
|
4430
|
+
toolCalls.push(obj);
|
|
4431
|
+
}
|
|
4432
|
+
} catch {
|
|
4433
|
+
}
|
|
4434
|
+
}
|
|
4435
|
+
return toolCalls;
|
|
4436
|
+
} catch {
|
|
4437
|
+
return [];
|
|
4438
|
+
}
|
|
4439
|
+
}
|
|
4440
|
+
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config) {
|
|
4441
|
+
const results = {};
|
|
4442
|
+
for (const task of tasks) {
|
|
4443
|
+
const traceDir = path18.join(
|
|
4444
|
+
workspacePath,
|
|
4445
|
+
"traces",
|
|
4446
|
+
iteration.toString(),
|
|
4447
|
+
task.id
|
|
4448
|
+
);
|
|
4449
|
+
const taskResult = await runTask(task, harnessPath, traceDir, iteration);
|
|
4450
|
+
let score = taskResult.score;
|
|
4451
|
+
if (config) {
|
|
4452
|
+
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4453
|
+
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4454
|
+
score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4455
|
+
await writeScore(traceDir, score);
|
|
4456
|
+
}
|
|
4457
|
+
results[task.id] = score;
|
|
4458
|
+
}
|
|
4459
|
+
const scores = Object.values(results);
|
|
4460
|
+
const total = scores.reduce(
|
|
4461
|
+
(sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
|
|
4462
|
+
0
|
|
4463
|
+
);
|
|
4464
|
+
const aggregate = scores.length > 0 ? total / scores.length : 0;
|
|
4465
|
+
return { results, aggregate };
|
|
4466
|
+
}
|
|
4467
|
+
|
|
4468
|
+
// src/evolve/loop.ts
|
|
4469
|
+
import fs21 from "fs/promises";
|
|
4470
|
+
import path21 from "path";
|
|
4471
|
+
|
|
4472
|
+
// src/evolve/proposer.ts
|
|
4473
|
+
import fs19 from "fs/promises";
|
|
4474
|
+
import path19 from "path";
|
|
4475
|
+
var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
|
|
4476
|
+
agent environment (.claude/ directory) based on execution traces from real tasks.
|
|
4477
|
+
|
|
4478
|
+
## What You Have Access To
|
|
4479
|
+
1. Current harness: The .claude/ directory files (CLAUDE.md, commands/, rules/, agents/)
|
|
4480
|
+
2. Execution traces: Full stdout/stderr, tool call sequences, file changes, and scores
|
|
4481
|
+
3. History: Previous iterations' proposals, diffs, and resulting score changes
|
|
4482
|
+
|
|
4483
|
+
## Your Task
|
|
4484
|
+
Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
|
|
4485
|
+
minimal changes to the harness files that will fix those failures.
|
|
4486
|
+
|
|
4487
|
+
## Diagnosis Process
|
|
4488
|
+
1. For each failed/low-scoring task:
|
|
4489
|
+
a. Read the full trace (stdout, tool calls, file changes)
|
|
4490
|
+
b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
|
|
4491
|
+
c. Trace the failure back to a specific harness decision
|
|
4492
|
+
d. Propose a fix
|
|
4493
|
+
|
|
4494
|
+
2. For each successful task:
|
|
4495
|
+
a. Note what worked well
|
|
4496
|
+
b. Ensure proposed changes don't break what's working
|
|
4497
|
+
|
|
4498
|
+
3. Check history for counterfactual evidence
|
|
4499
|
+
|
|
4500
|
+
## Output Format
|
|
4501
|
+
Return a JSON object:
|
|
4502
|
+
{
|
|
4503
|
+
"reasoning": "Your full causal analysis...",
|
|
4504
|
+
"mutations": [
|
|
4505
|
+
{ "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
|
|
4506
|
+
{ "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
|
|
4507
|
+
],
|
|
4508
|
+
"expected_impact": { "task-id": "+15% \u2014 explanation" }
|
|
4509
|
+
}
|
|
4510
|
+
|
|
4511
|
+
## Rules
|
|
4512
|
+
- MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
|
|
4513
|
+
- Each mutation must have a clear rationale tied to a specific trace observation.
|
|
4514
|
+
- Never remove something that's working for another task.
|
|
4515
|
+
- If a previous iteration's change caused a regression, REVERT it.
|
|
4516
|
+
- Prefer ADDITIVE changes over replacements when possible.
|
|
4517
|
+
|
|
4518
|
+
Return ONLY valid JSON.`;
|
|
4519
|
+
var STDOUT_TRUNCATION_LIMIT = 2e3;
|
|
4520
|
+
async function readHarnessFiles(harnessPath) {
|
|
4521
|
+
const result = {};
|
|
4522
|
+
async function walk(dir, prefix) {
|
|
4523
|
+
let entries;
|
|
4524
|
+
try {
|
|
4525
|
+
entries = await fs19.readdir(dir, { withFileTypes: true });
|
|
4526
|
+
} catch {
|
|
4527
|
+
return;
|
|
4528
|
+
}
|
|
4529
|
+
for (const entry of entries) {
|
|
4530
|
+
const relativePath = prefix ? path19.join(prefix, entry.name) : entry.name;
|
|
4531
|
+
const fullPath = path19.join(dir, entry.name);
|
|
4532
|
+
if (entry.isDirectory()) {
|
|
4533
|
+
await walk(fullPath, relativePath);
|
|
4534
|
+
} else if (entry.isFile()) {
|
|
4535
|
+
try {
|
|
4536
|
+
result[relativePath] = await fs19.readFile(fullPath, "utf-8");
|
|
4537
|
+
} catch {
|
|
4538
|
+
}
|
|
4539
|
+
}
|
|
4540
|
+
}
|
|
4541
|
+
}
|
|
4542
|
+
await walk(harnessPath, "");
|
|
4543
|
+
return result;
|
|
4544
|
+
}
|
|
4545
|
+
function truncateStdout(stdout, limit) {
|
|
4546
|
+
if (stdout.length <= limit) {
|
|
4547
|
+
return stdout;
|
|
4548
|
+
}
|
|
4549
|
+
return `[...truncated, showing last ${limit} chars...]
|
|
4550
|
+
${stdout.slice(-limit)}`;
|
|
4551
|
+
}
|
|
4552
|
+
function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
|
|
4553
|
+
const sections = [];
|
|
4554
|
+
sections.push("## Current Harness Files\n");
|
|
4555
|
+
const fileEntries = Object.entries(harnessFiles);
|
|
4556
|
+
if (fileEntries.length === 0) {
|
|
4557
|
+
sections.push("(No harness files found)\n");
|
|
4558
|
+
} else {
|
|
4559
|
+
for (const [filePath, content] of fileEntries) {
|
|
4560
|
+
sections.push(`### ${filePath}
|
|
4561
|
+
\`\`\`
|
|
4562
|
+
${content}
|
|
4563
|
+
\`\`\`
|
|
4564
|
+
`);
|
|
4565
|
+
}
|
|
4566
|
+
}
|
|
4567
|
+
sections.push("## Task Definitions\n");
|
|
4568
|
+
if (tasks.length === 0) {
|
|
4569
|
+
sections.push("(No tasks defined)\n");
|
|
4570
|
+
} else {
|
|
4571
|
+
for (const task of tasks) {
|
|
4572
|
+
sections.push(
|
|
4573
|
+
`### Task: ${task.id}
|
|
4574
|
+
- Template: ${task.template}
|
|
4575
|
+
- Description: ${task.description}
|
|
4576
|
+
- Expected outcome: ${Array.isArray(task.expected_outcome) ? task.expected_outcome.join("; ") : task.expected_outcome}
|
|
4577
|
+
- Scoring: ${task.scoring}
|
|
4578
|
+
`
|
|
4579
|
+
);
|
|
4580
|
+
}
|
|
4581
|
+
}
|
|
4582
|
+
sections.push("## Execution Traces\n");
|
|
4583
|
+
if (traces.length === 0) {
|
|
4584
|
+
sections.push("(No traces available)\n");
|
|
4585
|
+
} else {
|
|
4586
|
+
for (const trace of traces) {
|
|
4587
|
+
const scoreNum = trace.score.score !== void 0 ? trace.score.score : trace.score.pass ? 100 : 0;
|
|
4588
|
+
const truncatedStdout = truncateStdout(trace.stdout, STDOUT_TRUNCATION_LIMIT);
|
|
4589
|
+
const filesChangedList = Object.entries(trace.filesChanged).map(([f, action]) => ` - ${f}: ${action}`).join("\n");
|
|
4590
|
+
sections.push(
|
|
4591
|
+
`### Trace: ${trace.taskId}
|
|
4592
|
+
- Pass: ${trace.score.pass}
|
|
4593
|
+
- Score: ${scoreNum}
|
|
4594
|
+
` + (trace.score.details ? `- Details: ${trace.score.details}
|
|
4595
|
+
` : "") + `- Duration: ${trace.timing.durationMs}ms
|
|
4596
|
+
- Files changed:
|
|
4597
|
+
${filesChangedList || " (none)"}
|
|
4598
|
+
- Stdout (last ${STDOUT_TRUNCATION_LIMIT} chars):
|
|
4599
|
+
\`\`\`
|
|
4600
|
+
${truncatedStdout}
|
|
4601
|
+
\`\`\`
|
|
4602
|
+
`
|
|
4603
|
+
);
|
|
4604
|
+
}
|
|
4605
|
+
}
|
|
4606
|
+
sections.push("## Iteration History\n");
|
|
4607
|
+
if (history.length === 0) {
|
|
4608
|
+
sections.push("(No previous iterations)\n");
|
|
4609
|
+
} else {
|
|
4610
|
+
for (const log of history) {
|
|
4611
|
+
const taskScores = Object.entries(log.taskResults).map(([id, s]) => ` - ${id}: ${s.score !== void 0 ? s.score : s.pass ? 100 : 0} (pass=${s.pass})`).join("\n");
|
|
4612
|
+
sections.push(
|
|
4613
|
+
`### Iteration ${log.iteration} \u2014 Score: ${log.score}
|
|
4614
|
+
- Task results:
|
|
4615
|
+
${taskScores}
|
|
4616
|
+
`
|
|
4617
|
+
);
|
|
4618
|
+
if (log.proposal) {
|
|
4619
|
+
sections.push(
|
|
4620
|
+
`- Proposal reasoning: ${log.proposal.reasoning}
|
|
4621
|
+
- Mutations: ${log.proposal.mutations.length} change(s)
|
|
4622
|
+
`
|
|
4623
|
+
);
|
|
4624
|
+
}
|
|
4625
|
+
}
|
|
4626
|
+
}
|
|
4627
|
+
return sections.join("\n");
|
|
4628
|
+
}
|
|
4629
|
+
function parseProposerResponse(raw) {
|
|
4630
|
+
let cleaned = raw.trim();
|
|
4631
|
+
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/);
|
|
4632
|
+
if (fenceMatch) {
|
|
4633
|
+
cleaned = fenceMatch[1].trim();
|
|
4634
|
+
}
|
|
4635
|
+
let parsed;
|
|
4636
|
+
try {
|
|
4637
|
+
parsed = JSON.parse(cleaned);
|
|
4638
|
+
} catch {
|
|
4639
|
+
throw new Error(`Proposer returned invalid JSON: ${cleaned.slice(0, 200)}`);
|
|
4640
|
+
}
|
|
4641
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
4642
|
+
throw new Error("Proposer response is not a JSON object");
|
|
4643
|
+
}
|
|
4644
|
+
const obj = parsed;
|
|
4645
|
+
if (typeof obj["reasoning"] !== "string") {
|
|
4646
|
+
throw new Error('Proposer response missing required "reasoning" string field');
|
|
4647
|
+
}
|
|
4648
|
+
if (!Array.isArray(obj["mutations"])) {
|
|
4649
|
+
throw new Error('Proposer response missing required "mutations" array field');
|
|
4650
|
+
}
|
|
4651
|
+
const mutations = [];
|
|
4652
|
+
for (const entry of obj["mutations"]) {
|
|
4653
|
+
if (typeof entry !== "object" || entry === null) {
|
|
4654
|
+
continue;
|
|
4655
|
+
}
|
|
4656
|
+
const m = entry;
|
|
4657
|
+
const file = typeof m["file"] === "string" ? m["file"] : "";
|
|
4658
|
+
const action = typeof m["action"] === "string" ? m["action"] : "";
|
|
4659
|
+
const newText = typeof m["new_text"] === "string" ? m["new_text"] : typeof m["newText"] === "string" ? m["newText"] : "";
|
|
4660
|
+
const oldText = typeof m["old_text"] === "string" ? m["old_text"] : typeof m["oldText"] === "string" ? m["oldText"] : void 0;
|
|
4661
|
+
const rationale = typeof m["rationale"] === "string" ? m["rationale"] : "";
|
|
4662
|
+
if (file.includes("..")) {
|
|
4663
|
+
continue;
|
|
4664
|
+
}
|
|
4665
|
+
if (action !== "replace" && action !== "add_section" && action !== "create_file") {
|
|
4666
|
+
continue;
|
|
4667
|
+
}
|
|
4668
|
+
if (action === "replace" && !oldText) {
|
|
4669
|
+
continue;
|
|
4670
|
+
}
|
|
4671
|
+
const mutation = {
|
|
4672
|
+
file,
|
|
4673
|
+
action,
|
|
4674
|
+
newText,
|
|
4675
|
+
rationale
|
|
4676
|
+
};
|
|
4677
|
+
if (oldText !== void 0) {
|
|
4678
|
+
mutation.oldText = oldText;
|
|
4679
|
+
}
|
|
4680
|
+
mutations.push(mutation);
|
|
4681
|
+
}
|
|
4682
|
+
const rawImpact = obj["expected_impact"] ?? obj["expectedImpact"] ?? {};
|
|
4683
|
+
const expectedImpact = {};
|
|
4684
|
+
if (typeof rawImpact === "object" && rawImpact !== null) {
|
|
4685
|
+
for (const [key, value] of Object.entries(rawImpact)) {
|
|
4686
|
+
expectedImpact[key] = typeof value === "string" ? value : String(value);
|
|
4687
|
+
}
|
|
4688
|
+
}
|
|
4689
|
+
return {
|
|
4690
|
+
reasoning: obj["reasoning"],
|
|
4691
|
+
mutations,
|
|
4692
|
+
expectedImpact
|
|
4693
|
+
};
|
|
4694
|
+
}
|
|
4695
|
+
async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
|
|
4696
|
+
const harnessFiles = await readHarnessFiles(harnessPath);
|
|
4697
|
+
const traces = await loadIterationTraces(workspacePath, iteration);
|
|
4698
|
+
const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history);
|
|
4699
|
+
const proposerConfig = { ...config, model: proposerModel };
|
|
4700
|
+
const response = await callLLM(proposerConfig, userMessage, {
|
|
4701
|
+
systemPrompt: PROPOSER_SYSTEM_PROMPT,
|
|
4702
|
+
maxTokens: 8192
|
|
4703
|
+
});
|
|
4704
|
+
return parseProposerResponse(response);
|
|
4705
|
+
}
|
|
4706
|
+
|
|
4707
|
+
// src/evolve/mutator.ts
|
|
4708
|
+
import fs20 from "fs/promises";
|
|
4709
|
+
import path20 from "path";
|
|
4710
|
+
async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
|
|
4711
|
+
const newHarnessPath = path20.join(nextIterationDir, "harness");
|
|
4712
|
+
await copyDir(currentHarnessPath, newHarnessPath);
|
|
4713
|
+
for (const mutation of mutations) {
|
|
4714
|
+
if (mutation.file.includes("..")) {
|
|
4715
|
+
continue;
|
|
4716
|
+
}
|
|
4717
|
+
const filePath = path20.join(newHarnessPath, mutation.file);
|
|
4718
|
+
if (mutation.action === "replace") {
|
|
4719
|
+
if (!mutation.oldText) {
|
|
4720
|
+
continue;
|
|
4721
|
+
}
|
|
4722
|
+
const content = await fs20.readFile(filePath, "utf-8");
|
|
4723
|
+
if (!content.includes(mutation.oldText)) {
|
|
4724
|
+
continue;
|
|
4725
|
+
}
|
|
4726
|
+
await fs20.writeFile(
|
|
4727
|
+
filePath,
|
|
4728
|
+
content.replace(mutation.oldText, mutation.newText),
|
|
4729
|
+
"utf-8"
|
|
4730
|
+
);
|
|
4731
|
+
} else if (mutation.action === "add_section") {
|
|
4732
|
+
try {
|
|
4733
|
+
const content = await fs20.readFile(filePath, "utf-8");
|
|
4734
|
+
await fs20.writeFile(
|
|
4735
|
+
filePath,
|
|
4736
|
+
content + "\n\n" + mutation.newText,
|
|
4737
|
+
"utf-8"
|
|
4738
|
+
);
|
|
4739
|
+
} catch {
|
|
4740
|
+
await fs20.mkdir(path20.dirname(filePath), { recursive: true });
|
|
4741
|
+
await fs20.writeFile(filePath, mutation.newText, "utf-8");
|
|
4742
|
+
}
|
|
4743
|
+
} else if (mutation.action === "create_file") {
|
|
4744
|
+
await fs20.mkdir(path20.dirname(filePath), { recursive: true });
|
|
4745
|
+
await fs20.writeFile(filePath, mutation.newText, "utf-8");
|
|
4746
|
+
}
|
|
4747
|
+
}
|
|
4748
|
+
const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
|
|
4749
|
+
return { newHarnessPath, diffPatch };
|
|
4750
|
+
}
|
|
4751
|
+
async function generateDiff2(oldDir, newDir) {
|
|
4752
|
+
const oldFiles = await readAllFiles(oldDir);
|
|
4753
|
+
const newFiles = await readAllFiles(newDir);
|
|
4754
|
+
const allPaths = /* @__PURE__ */ new Set([
|
|
4755
|
+
...Object.keys(oldFiles),
|
|
4756
|
+
...Object.keys(newFiles)
|
|
4757
|
+
]);
|
|
4758
|
+
const patches = [];
|
|
4759
|
+
for (const filePath of [...allPaths].sort()) {
|
|
4760
|
+
const oldContent = oldFiles[filePath] ?? "";
|
|
4761
|
+
const newContent = newFiles[filePath] ?? "";
|
|
4762
|
+
if (oldContent === newContent) continue;
|
|
4763
|
+
patches.push(`--- a/${filePath}`);
|
|
4764
|
+
patches.push(`+++ b/${filePath}`);
|
|
4765
|
+
if (!oldContent) {
|
|
4766
|
+
for (const line of newContent.split("\n")) {
|
|
4767
|
+
patches.push(`+${line}`);
|
|
4768
|
+
}
|
|
4769
|
+
} else if (!newContent) {
|
|
4770
|
+
for (const line of oldContent.split("\n")) {
|
|
4771
|
+
patches.push(`-${line}`);
|
|
4772
|
+
}
|
|
4773
|
+
} else {
|
|
4774
|
+
const oldLines = oldContent.split("\n");
|
|
4775
|
+
const newLines = newContent.split("\n");
|
|
4776
|
+
for (const line of oldLines) {
|
|
4777
|
+
patches.push(`-${line}`);
|
|
4778
|
+
}
|
|
4779
|
+
for (const line of newLines) {
|
|
4780
|
+
patches.push(`+${line}`);
|
|
4781
|
+
}
|
|
4782
|
+
}
|
|
4783
|
+
patches.push("");
|
|
4784
|
+
}
|
|
4785
|
+
return patches.join("\n");
|
|
4786
|
+
}
|
|
4787
|
+
async function readAllFiles(dir) {
|
|
4788
|
+
const result = {};
|
|
4789
|
+
async function walk(current) {
|
|
4790
|
+
let entries;
|
|
4791
|
+
try {
|
|
4792
|
+
entries = await fs20.readdir(current, { withFileTypes: true });
|
|
4793
|
+
} catch {
|
|
4794
|
+
return;
|
|
4795
|
+
}
|
|
4796
|
+
for (const entry of entries) {
|
|
4797
|
+
const fullPath = path20.join(current, entry.name);
|
|
4798
|
+
const relativePath = path20.relative(dir, fullPath);
|
|
4799
|
+
if (entry.isDirectory()) {
|
|
4800
|
+
await walk(fullPath);
|
|
4801
|
+
} else {
|
|
4802
|
+
result[relativePath] = await fs20.readFile(fullPath, "utf-8");
|
|
4803
|
+
}
|
|
4804
|
+
}
|
|
4805
|
+
}
|
|
4806
|
+
await walk(dir);
|
|
4807
|
+
return result;
|
|
4808
|
+
}
|
|
4809
|
+
|
|
4810
|
+
// src/evolve/loop.ts
|
|
4811
|
+
async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
|
|
4812
|
+
const history = [];
|
|
4813
|
+
let bestScore = -1;
|
|
4814
|
+
let bestIteration = 0;
|
|
4815
|
+
let baselineScore = 0;
|
|
4816
|
+
for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
|
|
4817
|
+
const harnessPath = path21.join(
|
|
4818
|
+
workspacePath,
|
|
4819
|
+
"iterations",
|
|
4820
|
+
iter.toString(),
|
|
4821
|
+
"harness"
|
|
4822
|
+
);
|
|
4823
|
+
try {
|
|
4824
|
+
await fs21.access(harnessPath);
|
|
4825
|
+
} catch {
|
|
4826
|
+
if (iter === 0) {
|
|
4827
|
+
throw new Error(
|
|
4828
|
+
"No baseline harness found. Run `kairn evolve baseline` first."
|
|
4829
|
+
);
|
|
4830
|
+
}
|
|
4831
|
+
break;
|
|
4832
|
+
}
|
|
4833
|
+
onProgress?.({ type: "iteration-start", iteration: iter });
|
|
4834
|
+
const { results, aggregate } = await evaluateAll(
|
|
4835
|
+
tasks,
|
|
4836
|
+
harnessPath,
|
|
4837
|
+
workspacePath,
|
|
4838
|
+
iter,
|
|
4839
|
+
kairnConfig
|
|
4840
|
+
);
|
|
4841
|
+
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
4842
|
+
if (iter === 0) baselineScore = aggregate;
|
|
4843
|
+
if (iter > 0 && aggregate < bestScore) {
|
|
4844
|
+
onProgress?.({
|
|
4845
|
+
type: "rollback",
|
|
4846
|
+
iteration: iter,
|
|
4847
|
+
score: aggregate,
|
|
4848
|
+
message: `Regression: ${aggregate.toFixed(1)}% < ${bestScore.toFixed(1)}%. Rolling back.`
|
|
4849
|
+
});
|
|
4850
|
+
const rollbackLog = {
|
|
4851
|
+
iteration: iter,
|
|
4852
|
+
score: aggregate,
|
|
4853
|
+
taskResults: results,
|
|
4854
|
+
proposal: null,
|
|
4855
|
+
diffPatch: null,
|
|
4856
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
4857
|
+
};
|
|
4858
|
+
await writeIterationLog(workspacePath, rollbackLog);
|
|
4859
|
+
history.push(rollbackLog);
|
|
4860
|
+
if (iter + 1 < evolveConfig.maxIterations) {
|
|
4861
|
+
const nextIterDir2 = path21.join(
|
|
4862
|
+
workspacePath,
|
|
4863
|
+
"iterations",
|
|
4864
|
+
(iter + 1).toString()
|
|
4865
|
+
);
|
|
4866
|
+
const bestHarnessPath = path21.join(
|
|
4867
|
+
workspacePath,
|
|
4868
|
+
"iterations",
|
|
4869
|
+
bestIteration.toString(),
|
|
4870
|
+
"harness"
|
|
4871
|
+
);
|
|
4872
|
+
await copyDir(bestHarnessPath, path21.join(nextIterDir2, "harness"));
|
|
4873
|
+
}
|
|
4874
|
+
continue;
|
|
4875
|
+
}
|
|
4876
|
+
bestScore = aggregate;
|
|
4877
|
+
bestIteration = iter;
|
|
4878
|
+
if (aggregate >= 100) {
|
|
4879
|
+
onProgress?.({ type: "perfect-score", iteration: iter, score: aggregate });
|
|
4880
|
+
const perfectLog = {
|
|
4881
|
+
iteration: iter,
|
|
4882
|
+
score: aggregate,
|
|
4883
|
+
taskResults: results,
|
|
4884
|
+
proposal: null,
|
|
4885
|
+
diffPatch: null,
|
|
4886
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
4887
|
+
};
|
|
4888
|
+
await writeIterationLog(workspacePath, perfectLog);
|
|
4889
|
+
history.push(perfectLog);
|
|
4890
|
+
break;
|
|
4891
|
+
}
|
|
4892
|
+
if (iter === evolveConfig.maxIterations - 1) {
|
|
4893
|
+
const finalLog = {
|
|
4894
|
+
iteration: iter,
|
|
4895
|
+
score: aggregate,
|
|
4896
|
+
taskResults: results,
|
|
4897
|
+
proposal: null,
|
|
4898
|
+
diffPatch: null,
|
|
4899
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
4900
|
+
};
|
|
4901
|
+
await writeIterationLog(workspacePath, finalLog);
|
|
4902
|
+
history.push(finalLog);
|
|
4903
|
+
break;
|
|
4904
|
+
}
|
|
4905
|
+
onProgress?.({ type: "proposing", iteration: iter });
|
|
4906
|
+
let proposal;
|
|
4907
|
+
try {
|
|
4908
|
+
proposal = await propose(
|
|
4909
|
+
iter,
|
|
4910
|
+
workspacePath,
|
|
4911
|
+
harnessPath,
|
|
4912
|
+
history,
|
|
4913
|
+
tasks,
|
|
4914
|
+
kairnConfig,
|
|
4915
|
+
evolveConfig.proposerModel
|
|
4916
|
+
);
|
|
4917
|
+
} catch {
|
|
4918
|
+
const nextIterDir2 = path21.join(
|
|
4919
|
+
workspacePath,
|
|
4920
|
+
"iterations",
|
|
4921
|
+
(iter + 1).toString()
|
|
4922
|
+
);
|
|
4923
|
+
await copyDir(harnessPath, path21.join(nextIterDir2, "harness"));
|
|
4924
|
+
const skipLog = {
|
|
4925
|
+
iteration: iter,
|
|
4926
|
+
score: aggregate,
|
|
4927
|
+
taskResults: results,
|
|
4928
|
+
proposal: null,
|
|
4929
|
+
diffPatch: null,
|
|
4930
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
4931
|
+
};
|
|
4932
|
+
await writeIterationLog(workspacePath, skipLog);
|
|
4933
|
+
history.push(skipLog);
|
|
4934
|
+
continue;
|
|
4935
|
+
}
|
|
4936
|
+
const nextIterDir = path21.join(
|
|
4937
|
+
workspacePath,
|
|
4938
|
+
"iterations",
|
|
4939
|
+
(iter + 1).toString()
|
|
4940
|
+
);
|
|
4941
|
+
let diffPatch = "";
|
|
4942
|
+
try {
|
|
4943
|
+
const mutationResult = await applyMutations(
|
|
4944
|
+
harnessPath,
|
|
4945
|
+
nextIterDir,
|
|
4946
|
+
proposal.mutations
|
|
4947
|
+
);
|
|
4948
|
+
diffPatch = mutationResult.diffPatch;
|
|
4949
|
+
} catch {
|
|
4950
|
+
await copyDir(harnessPath, path21.join(nextIterDir, "harness"));
|
|
4951
|
+
}
|
|
4952
|
+
onProgress?.({
|
|
4953
|
+
type: "mutations-applied",
|
|
4954
|
+
iteration: iter,
|
|
4955
|
+
mutationCount: proposal.mutations.length
|
|
4956
|
+
});
|
|
4957
|
+
const iterLog = {
|
|
4958
|
+
iteration: iter,
|
|
4959
|
+
score: aggregate,
|
|
4960
|
+
taskResults: results,
|
|
4961
|
+
proposal,
|
|
4962
|
+
diffPatch,
|
|
4963
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
4964
|
+
};
|
|
4965
|
+
await writeIterationLog(workspacePath, iterLog);
|
|
4966
|
+
history.push(iterLog);
|
|
4967
|
+
}
|
|
4968
|
+
onProgress?.({
|
|
4969
|
+
type: "complete",
|
|
4970
|
+
iteration: history.length > 0 ? history.length - 1 : 0,
|
|
4971
|
+
score: bestScore
|
|
4972
|
+
});
|
|
4973
|
+
return {
|
|
4974
|
+
iterations: history,
|
|
4975
|
+
bestIteration,
|
|
4976
|
+
bestScore,
|
|
4977
|
+
baselineScore
|
|
4978
|
+
};
|
|
4979
|
+
}
|
|
4980
|
+
|
|
4981
|
+
// src/commands/evolve.ts
|
|
4982
|
+
var DEFAULT_CONFIG = {
|
|
4983
|
+
model: "claude-sonnet-4-6",
|
|
4984
|
+
proposerModel: "claude-opus-4-6",
|
|
4985
|
+
scorer: "pass-fail",
|
|
4986
|
+
maxIterations: 5,
|
|
4987
|
+
parallelTasks: 1
|
|
4988
|
+
};
|
|
4989
|
+
async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
4990
|
+
try {
|
|
4991
|
+
const configStr = await fs22.readFile(path22.join(workspacePath, "config.yaml"), "utf-8");
|
|
4992
|
+
const parsed = yamlParse(configStr);
|
|
4993
|
+
return {
|
|
4994
|
+
model: parsed.model ?? DEFAULT_CONFIG.model,
|
|
4995
|
+
proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
|
|
4996
|
+
scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
|
|
4997
|
+
maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
|
|
4998
|
+
parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
|
|
4999
|
+
};
|
|
5000
|
+
} catch {
|
|
5001
|
+
return { ...DEFAULT_CONFIG };
|
|
5002
|
+
}
|
|
5003
|
+
}
|
|
5004
|
+
var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
|
|
5005
|
+
evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
|
|
5006
|
+
try {
|
|
5007
|
+
const projectRoot = process.cwd();
|
|
5008
|
+
console.log(ui.section("Evolve Init"));
|
|
5009
|
+
const claudeDir = path22.join(projectRoot, ".claude");
|
|
5010
|
+
try {
|
|
5011
|
+
await fs22.access(claudeDir);
|
|
5012
|
+
} catch {
|
|
5013
|
+
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
5014
|
+
process.exit(1);
|
|
5015
|
+
}
|
|
5016
|
+
const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
|
|
5017
|
+
console.log(ui.success("Created .kairn-evolve/ workspace"));
|
|
5018
|
+
const spinner = ora2("Generating project-specific eval tasks...").start();
|
|
5019
|
+
let tasks;
|
|
5020
|
+
try {
|
|
5021
|
+
tasks = await autoGenerateTasks(projectRoot, options.workflow);
|
|
5022
|
+
spinner.succeed(`Generated ${tasks.length} eval tasks`);
|
|
5023
|
+
} catch {
|
|
5024
|
+
spinner.fail("LLM task generation failed");
|
|
5025
|
+
const templateIds = selectTemplatesForWorkflow(options.workflow);
|
|
5026
|
+
tasks = templateIds.map((templateId, index) => ({
|
|
5027
|
+
id: `${templateId}-${index + 1}`,
|
|
5028
|
+
template: templateId,
|
|
5029
|
+
description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
|
|
5030
|
+
setup: "npm install",
|
|
5031
|
+
expected_outcome: "Task completed successfully",
|
|
5032
|
+
scoring: "pass-fail",
|
|
5033
|
+
timeout: 300
|
|
5034
|
+
}));
|
|
5035
|
+
console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
|
|
5036
|
+
}
|
|
5037
|
+
for (const task of tasks) {
|
|
5038
|
+
console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
|
|
5039
|
+
}
|
|
5040
|
+
let addMore = true;
|
|
5041
|
+
while (addMore) {
|
|
5042
|
+
try {
|
|
5043
|
+
addMore = await confirm3({ message: "Add another eval task?", default: false });
|
|
5044
|
+
} catch {
|
|
5045
|
+
addMore = false;
|
|
5046
|
+
}
|
|
5047
|
+
if (addMore) {
|
|
5048
|
+
const templateId = await select4({
|
|
5049
|
+
message: "Select eval template:",
|
|
5050
|
+
choices: Object.values(EVAL_TEMPLATES).map((t) => ({
|
|
5051
|
+
name: `${t.name} \u2014 ${t.description}`,
|
|
5052
|
+
value: t.id
|
|
5053
|
+
}))
|
|
5054
|
+
});
|
|
5055
|
+
const addSpinner = ora2("Generating task...").start();
|
|
5056
|
+
try {
|
|
5057
|
+
const config = await loadConfig();
|
|
5058
|
+
if (config) {
|
|
5059
|
+
let claudeMd = "";
|
|
5060
|
+
try {
|
|
5061
|
+
claudeMd = await fs22.readFile(path22.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
5062
|
+
} catch {
|
|
5063
|
+
}
|
|
5064
|
+
const profile = await buildProjectProfile(projectRoot);
|
|
5065
|
+
const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
|
|
5066
|
+
tasks.push(...newTasks);
|
|
5067
|
+
addSpinner.succeed(`Added ${newTasks.length} task(s)`);
|
|
5068
|
+
} else {
|
|
5069
|
+
addSpinner.fail("No config found");
|
|
5070
|
+
}
|
|
5071
|
+
} catch {
|
|
5072
|
+
addSpinner.fail("Failed to generate task");
|
|
4453
5073
|
}
|
|
4454
5074
|
}
|
|
4455
5075
|
}
|
|
@@ -4469,16 +5089,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
4469
5089
|
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
4470
5090
|
try {
|
|
4471
5091
|
const projectRoot = process.cwd();
|
|
4472
|
-
const workspace =
|
|
5092
|
+
const workspace = path22.join(projectRoot, ".kairn-evolve");
|
|
4473
5093
|
console.log(ui.section("Evolve Baseline"));
|
|
4474
5094
|
try {
|
|
4475
|
-
await
|
|
5095
|
+
await fs22.access(workspace);
|
|
4476
5096
|
} catch {
|
|
4477
5097
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
4478
5098
|
process.exit(1);
|
|
4479
5099
|
}
|
|
4480
5100
|
await snapshotBaseline(projectRoot, workspace);
|
|
4481
|
-
const baselineDir =
|
|
5101
|
+
const baselineDir = path22.join(workspace, "baseline");
|
|
4482
5102
|
const fileCount = await countFiles(baselineDir);
|
|
4483
5103
|
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
4484
5104
|
} catch (err) {
|
|
@@ -4487,21 +5107,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
4487
5107
|
process.exit(1);
|
|
4488
5108
|
}
|
|
4489
5109
|
});
|
|
4490
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").action(async (options) => {
|
|
5110
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
|
|
4491
5111
|
try {
|
|
4492
5112
|
const projectRoot = process.cwd();
|
|
4493
|
-
const workspace =
|
|
5113
|
+
const workspace = path22.join(projectRoot, ".kairn-evolve");
|
|
4494
5114
|
console.log(ui.section("Evolve Run"));
|
|
4495
5115
|
try {
|
|
4496
|
-
await
|
|
5116
|
+
await fs22.access(workspace);
|
|
4497
5117
|
} catch {
|
|
4498
5118
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
4499
5119
|
process.exit(1);
|
|
4500
5120
|
}
|
|
4501
|
-
const tasksPath =
|
|
5121
|
+
const tasksPath = path22.join(workspace, "tasks.yaml");
|
|
4502
5122
|
let tasksContent;
|
|
4503
5123
|
try {
|
|
4504
|
-
tasksContent = await
|
|
5124
|
+
tasksContent = await fs22.readFile(tasksPath, "utf-8");
|
|
4505
5125
|
} catch {
|
|
4506
5126
|
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
4507
5127
|
process.exit(1);
|
|
@@ -4511,37 +5131,107 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
4511
5131
|
console.log(ui.error("No tasks found in tasks.yaml"));
|
|
4512
5132
|
process.exit(1);
|
|
4513
5133
|
}
|
|
4514
|
-
|
|
4515
|
-
|
|
4516
|
-
|
|
4517
|
-
|
|
4518
|
-
|
|
4519
|
-
|
|
4520
|
-
|
|
4521
|
-
|
|
4522
|
-
|
|
4523
|
-
|
|
4524
|
-
|
|
4525
|
-
|
|
4526
|
-
|
|
4527
|
-
|
|
4528
|
-
|
|
4529
|
-
|
|
4530
|
-
|
|
4531
|
-
|
|
4532
|
-
|
|
4533
|
-
|
|
5134
|
+
if (options.task) {
|
|
5135
|
+
const tasksToRun = parsed.tasks.filter((t) => t.id === options.task);
|
|
5136
|
+
if (tasksToRun.length === 0) {
|
|
5137
|
+
console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
|
|
5138
|
+
process.exit(1);
|
|
5139
|
+
}
|
|
5140
|
+
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
5141
|
+
console.log("");
|
|
5142
|
+
const config = await loadConfig();
|
|
5143
|
+
const harnessPath = path22.join(projectRoot, ".claude");
|
|
5144
|
+
const results = [];
|
|
5145
|
+
for (const task of tasksToRun) {
|
|
5146
|
+
const traceDir = path22.join(workspace, "traces", "0", task.id);
|
|
5147
|
+
const spinner = ora2(`Running: ${task.id}`).start();
|
|
5148
|
+
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
5149
|
+
if (config) {
|
|
5150
|
+
const stdout = await fs22.readFile(path22.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
5151
|
+
const stderr = await fs22.readFile(path22.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
5152
|
+
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
5153
|
+
result.score = score;
|
|
5154
|
+
await writeScore(traceDir, score);
|
|
5155
|
+
}
|
|
5156
|
+
results.push(result);
|
|
5157
|
+
const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
|
|
5158
|
+
const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
|
|
5159
|
+
spinner.stop();
|
|
5160
|
+
console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
|
|
5161
|
+
}
|
|
5162
|
+
const passed = results.filter((r) => r.score.pass).length;
|
|
5163
|
+
console.log("");
|
|
5164
|
+
console.log(ui.info(`Results: ${passed}/${results.length} passed`));
|
|
5165
|
+
console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
|
|
5166
|
+
} else {
|
|
5167
|
+
const kairnConfig = await loadConfig();
|
|
5168
|
+
if (!kairnConfig) {
|
|
5169
|
+
console.log(ui.error("No config found. Run kairn init first."));
|
|
5170
|
+
process.exit(1);
|
|
5171
|
+
}
|
|
5172
|
+
const evolveConfig = await loadEvolveConfigFromWorkspace(workspace);
|
|
5173
|
+
const iterations = parseInt(options.iterations ?? "5", 10);
|
|
5174
|
+
if (isNaN(iterations) || iterations < 1) {
|
|
5175
|
+
console.log(ui.error("--iterations must be a positive integer"));
|
|
5176
|
+
process.exit(1);
|
|
5177
|
+
}
|
|
5178
|
+
evolveConfig.maxIterations = iterations;
|
|
5179
|
+
try {
|
|
5180
|
+
await fs22.access(path22.join(workspace, "iterations", "0", "harness"));
|
|
5181
|
+
} catch {
|
|
5182
|
+
console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
|
|
5183
|
+
process.exit(1);
|
|
5184
|
+
}
|
|
5185
|
+
const result = await evolve(workspace, parsed.tasks, kairnConfig, evolveConfig, (event) => {
|
|
5186
|
+
switch (event.type) {
|
|
5187
|
+
case "iteration-start":
|
|
5188
|
+
console.log(ui.section(`Iteration ${event.iteration}`));
|
|
5189
|
+
break;
|
|
5190
|
+
case "iteration-scored": {
|
|
5191
|
+
const scoreColor = event.score !== void 0 && event.score >= 100 ? chalk14.green : event.score !== void 0 && event.score >= 60 ? chalk14.yellow : chalk14.red;
|
|
5192
|
+
console.log(` Score: ${scoreColor((event.score?.toFixed(1) ?? "0") + "%")}`);
|
|
5193
|
+
break;
|
|
5194
|
+
}
|
|
5195
|
+
case "rollback":
|
|
5196
|
+
console.log(chalk14.yellow(` Warning: ${event.message ?? "Regression detected"}`));
|
|
5197
|
+
break;
|
|
5198
|
+
case "proposing":
|
|
5199
|
+
console.log(chalk14.dim(" Proposer analyzing traces..."));
|
|
5200
|
+
break;
|
|
5201
|
+
case "mutations-applied":
|
|
5202
|
+
console.log(chalk14.dim(` Applied ${event.mutationCount ?? 0} mutation(s)`));
|
|
5203
|
+
break;
|
|
5204
|
+
case "perfect-score":
|
|
5205
|
+
console.log(chalk14.green(" Perfect score. Stopping."));
|
|
5206
|
+
break;
|
|
5207
|
+
case "complete":
|
|
5208
|
+
break;
|
|
5209
|
+
}
|
|
5210
|
+
});
|
|
5211
|
+
console.log(ui.section("Evolution Summary"));
|
|
5212
|
+
console.log(` Iterations: ${result.iterations.length}`);
|
|
5213
|
+
console.log(` Baseline: ${result.baselineScore.toFixed(1)}%`);
|
|
5214
|
+
console.log(` Best: ${chalk14.green(result.bestScore.toFixed(1) + "%")} (iteration ${result.bestIteration})`);
|
|
5215
|
+
const improvement = result.bestScore - result.baselineScore;
|
|
5216
|
+
if (improvement > 0) {
|
|
5217
|
+
console.log(` Improvement: ${chalk14.green("+" + improvement.toFixed(1) + " points")}`);
|
|
5218
|
+
} else {
|
|
5219
|
+
console.log(` Improvement: ${improvement.toFixed(1)} points`);
|
|
5220
|
+
}
|
|
5221
|
+
console.log("");
|
|
5222
|
+
console.log(" Iter Score Mutations Status");
|
|
5223
|
+
for (const iter of result.iterations) {
|
|
5224
|
+
const scoreStr = iter.score.toFixed(1).padStart(6) + "%";
|
|
5225
|
+
const mutations = iter.proposal?.mutations.length ?? 0;
|
|
5226
|
+
const mutStr = mutations > 0 ? mutations.toString() : "-";
|
|
5227
|
+
let status = "evaluated";
|
|
5228
|
+
if (iter.iteration === 0) status = "baseline";
|
|
5229
|
+
else if (!iter.proposal && !iter.diffPatch) status = "rollback";
|
|
5230
|
+
else if (iter.score >= 100) status = "perfect";
|
|
5231
|
+
else if (iter.iteration === result.bestIteration) status = "best";
|
|
5232
|
+
console.log(` ${iter.iteration.toString().padStart(4)} ${scoreStr} ${mutStr.padStart(9)} ${status}`);
|
|
4534
5233
|
}
|
|
4535
|
-
results.push(result);
|
|
4536
|
-
const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
|
|
4537
|
-
const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
|
|
4538
|
-
spinner.stop();
|
|
4539
|
-
console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
|
|
4540
5234
|
}
|
|
4541
|
-
const passed = results.filter((r) => r.score.pass).length;
|
|
4542
|
-
console.log("");
|
|
4543
|
-
console.log(ui.info(`Results: ${passed}/${results.length} passed`));
|
|
4544
|
-
console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
|
|
4545
5235
|
} catch (err) {
|
|
4546
5236
|
const msg = err instanceof Error ? err.message : String(err);
|
|
4547
5237
|
console.log(ui.error(msg));
|
|
@@ -4551,10 +5241,10 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
4551
5241
|
async function countFiles(dir) {
|
|
4552
5242
|
let count = 0;
|
|
4553
5243
|
try {
|
|
4554
|
-
const entries = await
|
|
5244
|
+
const entries = await fs22.readdir(dir, { withFileTypes: true });
|
|
4555
5245
|
for (const entry of entries) {
|
|
4556
5246
|
if (entry.isDirectory()) {
|
|
4557
|
-
count += await countFiles(
|
|
5247
|
+
count += await countFiles(path22.join(dir, entry.name));
|
|
4558
5248
|
} else {
|
|
4559
5249
|
count++;
|
|
4560
5250
|
}
|