kairn-cli 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -66
- package/dist/cli.js +348 -24
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -221,7 +221,7 @@ var ui = {
|
|
|
221
221
|
// Key-value pairs
|
|
222
222
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
223
|
// File list
|
|
224
|
-
file: (
|
|
224
|
+
file: (path24) => chalk.dim(` ${path24}`),
|
|
225
225
|
// Tool display
|
|
226
226
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
227
|
${chalk.dim(reason)}`,
|
|
@@ -3694,9 +3694,9 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3694
3694
|
import { Command as Command11 } from "commander";
|
|
3695
3695
|
import chalk14 from "chalk";
|
|
3696
3696
|
import ora2 from "ora";
|
|
3697
|
-
import
|
|
3698
|
-
import
|
|
3699
|
-
import { parse as
|
|
3697
|
+
import fs23 from "fs/promises";
|
|
3698
|
+
import path23 from "path";
|
|
3699
|
+
import { parse as yamlParse2 } from "yaml";
|
|
3700
3700
|
import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
|
|
3701
3701
|
|
|
3702
3702
|
// src/evolve/init.ts
|
|
@@ -4117,6 +4117,27 @@ async function writeIterationLog(workspacePath, log) {
|
|
|
4117
4117
|
"utf-8"
|
|
4118
4118
|
);
|
|
4119
4119
|
}
|
|
4120
|
+
async function loadIterationLog(workspacePath, iteration) {
|
|
4121
|
+
const iterDir = path17.join(workspacePath, "iterations", iteration.toString());
|
|
4122
|
+
try {
|
|
4123
|
+
await fs17.access(iterDir);
|
|
4124
|
+
} catch {
|
|
4125
|
+
return null;
|
|
4126
|
+
}
|
|
4127
|
+
const scoresStr = await fs17.readFile(path17.join(iterDir, "scores.json"), "utf-8").catch(() => "{}");
|
|
4128
|
+
const reasoning = await fs17.readFile(path17.join(iterDir, "proposer_reasoning.md"), "utf-8").catch(() => "");
|
|
4129
|
+
const diffPatch = await fs17.readFile(path17.join(iterDir, "mutation_diff.patch"), "utf-8").catch(() => "");
|
|
4130
|
+
const scoresData = JSON.parse(scoresStr);
|
|
4131
|
+
const proposal = reasoning ? { reasoning, mutations: [], expectedImpact: {} } : null;
|
|
4132
|
+
return {
|
|
4133
|
+
iteration,
|
|
4134
|
+
score: scoresData.score ?? 0,
|
|
4135
|
+
taskResults: scoresData.taskResults ?? {},
|
|
4136
|
+
proposal,
|
|
4137
|
+
diffPatch: diffPatch || null,
|
|
4138
|
+
timestamp: ""
|
|
4139
|
+
};
|
|
4140
|
+
}
|
|
4120
4141
|
|
|
4121
4142
|
// src/evolve/exec.ts
|
|
4122
4143
|
import { exec } from "child_process";
|
|
@@ -4978,6 +4999,215 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
4978
4999
|
};
|
|
4979
5000
|
}
|
|
4980
5001
|
|
|
5002
|
+
// src/evolve/report.ts
|
|
5003
|
+
import fs22 from "fs/promises";
|
|
5004
|
+
import path22 from "path";
|
|
5005
|
+
|
|
5006
|
+
// src/evolve/diagnosis.ts
|
|
5007
|
+
function numericScore(s) {
|
|
5008
|
+
return s.score ?? (s.pass ? 100 : 0);
|
|
5009
|
+
}
|
|
5010
|
+
function diagnoseCounterfactuals(iterations, _tasks) {
|
|
5011
|
+
const entries = [];
|
|
5012
|
+
for (let i = 1; i < iterations.length; i++) {
|
|
5013
|
+
const prev = iterations[i - 1];
|
|
5014
|
+
const curr = iterations[i];
|
|
5015
|
+
if (!curr.proposal && !prev.proposal) continue;
|
|
5016
|
+
const proposal = prev.proposal;
|
|
5017
|
+
if (!proposal || proposal.mutations.length === 0) continue;
|
|
5018
|
+
const mutationSummary = proposal.mutations.map((m) => `${m.action} in ${m.file}: ${m.rationale}`).join("; ");
|
|
5019
|
+
const helpedTasks = [];
|
|
5020
|
+
const hurtTasks = [];
|
|
5021
|
+
const allTaskIds = /* @__PURE__ */ new Set([
|
|
5022
|
+
...Object.keys(prev.taskResults),
|
|
5023
|
+
...Object.keys(curr.taskResults)
|
|
5024
|
+
]);
|
|
5025
|
+
let netDelta = 0;
|
|
5026
|
+
for (const taskId of allTaskIds) {
|
|
5027
|
+
const prevScore = prev.taskResults[taskId] ? numericScore(prev.taskResults[taskId]) : 0;
|
|
5028
|
+
const currScore = curr.taskResults[taskId] ? numericScore(curr.taskResults[taskId]) : 0;
|
|
5029
|
+
const delta = currScore - prevScore;
|
|
5030
|
+
if (delta > 0) {
|
|
5031
|
+
helpedTasks.push({ taskId, delta });
|
|
5032
|
+
} else if (delta < 0) {
|
|
5033
|
+
hurtTasks.push({ taskId, delta });
|
|
5034
|
+
}
|
|
5035
|
+
netDelta += delta;
|
|
5036
|
+
}
|
|
5037
|
+
entries.push({
|
|
5038
|
+
iteration: i,
|
|
5039
|
+
mutationSummary,
|
|
5040
|
+
helpedTasks,
|
|
5041
|
+
hurtTasks,
|
|
5042
|
+
netScoreDelta: netDelta
|
|
5043
|
+
});
|
|
5044
|
+
}
|
|
5045
|
+
return { entries };
|
|
5046
|
+
}
|
|
5047
|
+
|
|
5048
|
+
// src/evolve/report.ts
|
|
5049
|
+
import { parse as yamlParse } from "yaml";
|
|
5050
|
+
function numericScore2(s) {
|
|
5051
|
+
return s.score ?? (s.pass ? 100 : 0);
|
|
5052
|
+
}
|
|
5053
|
+
async function loadAllIterations(workspacePath) {
|
|
5054
|
+
const iterDir = path22.join(workspacePath, "iterations");
|
|
5055
|
+
let entries;
|
|
5056
|
+
try {
|
|
5057
|
+
entries = await fs22.readdir(iterDir);
|
|
5058
|
+
} catch {
|
|
5059
|
+
return [];
|
|
5060
|
+
}
|
|
5061
|
+
const iterations = [];
|
|
5062
|
+
const iterNums = entries.map((e) => parseInt(e, 10)).filter((n) => !isNaN(n)).sort((a, b) => a - b);
|
|
5063
|
+
for (const n of iterNums) {
|
|
5064
|
+
const log = await loadIterationLog(workspacePath, n);
|
|
5065
|
+
if (log) iterations.push(log);
|
|
5066
|
+
}
|
|
5067
|
+
return iterations;
|
|
5068
|
+
}
|
|
5069
|
+
async function loadTasks(workspacePath) {
|
|
5070
|
+
try {
|
|
5071
|
+
const content = await fs22.readFile(path22.join(workspacePath, "tasks.yaml"), "utf-8");
|
|
5072
|
+
const parsed = yamlParse(content);
|
|
5073
|
+
return parsed?.tasks ?? [];
|
|
5074
|
+
} catch {
|
|
5075
|
+
return [];
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
function buildLeaderboard(iterations, tasks) {
|
|
5079
|
+
const taskIds = tasks.map((t) => t.id);
|
|
5080
|
+
return taskIds.map((taskId) => {
|
|
5081
|
+
const scores = {};
|
|
5082
|
+
let bestScore = -1;
|
|
5083
|
+
let bestIteration = 0;
|
|
5084
|
+
for (const iter of iterations) {
|
|
5085
|
+
const s = iter.taskResults[taskId];
|
|
5086
|
+
if (s) {
|
|
5087
|
+
const score = numericScore2(s);
|
|
5088
|
+
scores[iter.iteration] = score;
|
|
5089
|
+
if (score > bestScore) {
|
|
5090
|
+
bestScore = score;
|
|
5091
|
+
bestIteration = iter.iteration;
|
|
5092
|
+
}
|
|
5093
|
+
}
|
|
5094
|
+
}
|
|
5095
|
+
return { taskId, scores, bestIteration, bestScore };
|
|
5096
|
+
});
|
|
5097
|
+
}
|
|
5098
|
+
function iterationStatus(iter, bestIteration) {
|
|
5099
|
+
if (iter.iteration === 0) return "baseline";
|
|
5100
|
+
if (!iter.proposal && !iter.diffPatch) return "rollback";
|
|
5101
|
+
if (iter.score >= 100) return "perfect";
|
|
5102
|
+
if (iter.iteration === bestIteration) return "best";
|
|
5103
|
+
return "evaluated";
|
|
5104
|
+
}
|
|
5105
|
+
async function generateMarkdownReport(workspacePath) {
|
|
5106
|
+
const iterations = await loadAllIterations(workspacePath);
|
|
5107
|
+
const tasks = await loadTasks(workspacePath);
|
|
5108
|
+
if (iterations.length === 0) {
|
|
5109
|
+
return "# Evolution Report\n\nNo iterations found. Run `kairn evolve run` first.\n";
|
|
5110
|
+
}
|
|
5111
|
+
const baselineScore = iterations[0].score;
|
|
5112
|
+
const bestIter = iterations.reduce((best, curr) => curr.score > best.score ? curr : best, iterations[0]);
|
|
5113
|
+
const improvement = bestIter.score - baselineScore;
|
|
5114
|
+
const counterfactuals = diagnoseCounterfactuals(iterations, tasks);
|
|
5115
|
+
const leaderboard = buildLeaderboard(iterations, tasks);
|
|
5116
|
+
const lines = [];
|
|
5117
|
+
lines.push("# Evolution Report");
|
|
5118
|
+
lines.push("");
|
|
5119
|
+
lines.push("## Overview");
|
|
5120
|
+
lines.push("");
|
|
5121
|
+
lines.push(`| Metric | Value |`);
|
|
5122
|
+
lines.push(`|--------|-------|`);
|
|
5123
|
+
lines.push(`| Total iterations | ${iterations.length} |`);
|
|
5124
|
+
lines.push(`| Baseline score | ${baselineScore.toFixed(1)}% |`);
|
|
5125
|
+
lines.push(`| Best score | ${bestIter.score.toFixed(1)}% |`);
|
|
5126
|
+
lines.push(`| Best iteration | ${bestIter.iteration} |`);
|
|
5127
|
+
lines.push(`| Improvement | ${improvement >= 0 ? "+" : ""}${improvement.toFixed(1)} points |`);
|
|
5128
|
+
lines.push("");
|
|
5129
|
+
lines.push("## Iterations");
|
|
5130
|
+
lines.push("");
|
|
5131
|
+
lines.push("| Iter | Score | Mutations | Status |");
|
|
5132
|
+
lines.push("|------|-------|-----------|--------|");
|
|
5133
|
+
for (const iter of iterations) {
|
|
5134
|
+
const mutations = iter.proposal?.mutations.length ?? 0;
|
|
5135
|
+
const mutStr = mutations > 0 ? mutations.toString() : "-";
|
|
5136
|
+
const status = iterationStatus(iter, bestIter.iteration);
|
|
5137
|
+
lines.push(`| ${iter.iteration} | ${iter.score.toFixed(1)}% | ${mutStr} | ${status} |`);
|
|
5138
|
+
}
|
|
5139
|
+
lines.push("");
|
|
5140
|
+
if (leaderboard.length > 0) {
|
|
5141
|
+
lines.push("## Leaderboard");
|
|
5142
|
+
lines.push("");
|
|
5143
|
+
const iterNums = iterations.map((i) => i.iteration);
|
|
5144
|
+
const headerCols = ["Task", ...iterNums.map((n) => `Iter ${n}`), "Best"];
|
|
5145
|
+
lines.push(`| ${headerCols.join(" | ")} |`);
|
|
5146
|
+
lines.push(`| ${headerCols.map(() => "---").join(" | ")} |`);
|
|
5147
|
+
for (const entry of leaderboard) {
|
|
5148
|
+
const scoreCols = iterNums.map((n) => {
|
|
5149
|
+
const s = entry.scores[n];
|
|
5150
|
+
return s !== void 0 ? `${s.toFixed(0)}%` : "-";
|
|
5151
|
+
});
|
|
5152
|
+
lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
|
|
5153
|
+
}
|
|
5154
|
+
lines.push("");
|
|
5155
|
+
}
|
|
5156
|
+
if (counterfactuals.entries.length > 0) {
|
|
5157
|
+
lines.push("## Counterfactual Diagnosis");
|
|
5158
|
+
lines.push("");
|
|
5159
|
+
for (const entry of counterfactuals.entries) {
|
|
5160
|
+
const sign = entry.netScoreDelta >= 0 ? "+" : "";
|
|
5161
|
+
lines.push(`### Iteration ${entry.iteration} (net ${sign}${entry.netScoreDelta.toFixed(1)} points)`);
|
|
5162
|
+
lines.push("");
|
|
5163
|
+
lines.push(`**Mutations:** ${entry.mutationSummary}`);
|
|
5164
|
+
lines.push("");
|
|
5165
|
+
if (entry.helpedTasks.length > 0) {
|
|
5166
|
+
lines.push("**Helped:**");
|
|
5167
|
+
for (const t of entry.helpedTasks) {
|
|
5168
|
+
lines.push(`- ${t.taskId}: +${t.delta.toFixed(1)}`);
|
|
5169
|
+
}
|
|
5170
|
+
lines.push("");
|
|
5171
|
+
}
|
|
5172
|
+
if (entry.hurtTasks.length > 0) {
|
|
5173
|
+
lines.push("**Hurt:**");
|
|
5174
|
+
for (const t of entry.hurtTasks) {
|
|
5175
|
+
lines.push(`- ${t.taskId}: ${t.delta.toFixed(1)}`);
|
|
5176
|
+
}
|
|
5177
|
+
lines.push("");
|
|
5178
|
+
}
|
|
5179
|
+
}
|
|
5180
|
+
}
|
|
5181
|
+
return lines.join("\n");
|
|
5182
|
+
}
|
|
5183
|
+
async function generateJsonReport(workspacePath) {
|
|
5184
|
+
const iterations = await loadAllIterations(workspacePath);
|
|
5185
|
+
const tasks = await loadTasks(workspacePath);
|
|
5186
|
+
const baselineScore = iterations.length > 0 ? iterations[0].score : 0;
|
|
5187
|
+
const bestIter = iterations.length > 0 ? iterations.reduce((best, curr) => curr.score > best.score ? curr : best, iterations[0]) : { score: 0, iteration: 0 };
|
|
5188
|
+
const improvement = bestIter.score - baselineScore;
|
|
5189
|
+
const counterfactuals = diagnoseCounterfactuals(iterations, tasks);
|
|
5190
|
+
const leaderboard = buildLeaderboard(iterations, tasks);
|
|
5191
|
+
return {
|
|
5192
|
+
overview: {
|
|
5193
|
+
title: "Evolution Report",
|
|
5194
|
+
totalIterations: iterations.length,
|
|
5195
|
+
baselineScore,
|
|
5196
|
+
bestScore: bestIter.score,
|
|
5197
|
+
bestIteration: bestIter.iteration,
|
|
5198
|
+
improvement
|
|
5199
|
+
},
|
|
5200
|
+
iterations: iterations.map((iter) => ({
|
|
5201
|
+
iteration: iter.iteration,
|
|
5202
|
+
score: iter.score,
|
|
5203
|
+
mutationCount: iter.proposal?.mutations.length ?? 0,
|
|
5204
|
+
status: iterationStatus(iter, bestIter.iteration)
|
|
5205
|
+
})),
|
|
5206
|
+
leaderboard,
|
|
5207
|
+
counterfactuals
|
|
5208
|
+
};
|
|
5209
|
+
}
|
|
5210
|
+
|
|
4981
5211
|
// src/commands/evolve.ts
|
|
4982
5212
|
var DEFAULT_CONFIG = {
|
|
4983
5213
|
model: "claude-sonnet-4-6",
|
|
@@ -4988,8 +5218,8 @@ var DEFAULT_CONFIG = {
|
|
|
4988
5218
|
};
|
|
4989
5219
|
async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
4990
5220
|
try {
|
|
4991
|
-
const configStr = await
|
|
4992
|
-
const parsed =
|
|
5221
|
+
const configStr = await fs23.readFile(path23.join(workspacePath, "config.yaml"), "utf-8");
|
|
5222
|
+
const parsed = yamlParse2(configStr);
|
|
4993
5223
|
return {
|
|
4994
5224
|
model: parsed.model ?? DEFAULT_CONFIG.model,
|
|
4995
5225
|
proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
|
|
@@ -5006,9 +5236,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5006
5236
|
try {
|
|
5007
5237
|
const projectRoot = process.cwd();
|
|
5008
5238
|
console.log(ui.section("Evolve Init"));
|
|
5009
|
-
const claudeDir =
|
|
5239
|
+
const claudeDir = path23.join(projectRoot, ".claude");
|
|
5010
5240
|
try {
|
|
5011
|
-
await
|
|
5241
|
+
await fs23.access(claudeDir);
|
|
5012
5242
|
} catch {
|
|
5013
5243
|
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
5014
5244
|
process.exit(1);
|
|
@@ -5058,7 +5288,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5058
5288
|
if (config) {
|
|
5059
5289
|
let claudeMd = "";
|
|
5060
5290
|
try {
|
|
5061
|
-
claudeMd = await
|
|
5291
|
+
claudeMd = await fs23.readFile(path23.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
5062
5292
|
} catch {
|
|
5063
5293
|
}
|
|
5064
5294
|
const profile = await buildProjectProfile(projectRoot);
|
|
@@ -5089,16 +5319,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5089
5319
|
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
5090
5320
|
try {
|
|
5091
5321
|
const projectRoot = process.cwd();
|
|
5092
|
-
const workspace =
|
|
5322
|
+
const workspace = path23.join(projectRoot, ".kairn-evolve");
|
|
5093
5323
|
console.log(ui.section("Evolve Baseline"));
|
|
5094
5324
|
try {
|
|
5095
|
-
await
|
|
5325
|
+
await fs23.access(workspace);
|
|
5096
5326
|
} catch {
|
|
5097
5327
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5098
5328
|
process.exit(1);
|
|
5099
5329
|
}
|
|
5100
5330
|
await snapshotBaseline(projectRoot, workspace);
|
|
5101
|
-
const baselineDir =
|
|
5331
|
+
const baselineDir = path23.join(workspace, "baseline");
|
|
5102
5332
|
const fileCount = await countFiles(baselineDir);
|
|
5103
5333
|
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
5104
5334
|
} catch (err) {
|
|
@@ -5110,23 +5340,23 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5110
5340
|
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
|
|
5111
5341
|
try {
|
|
5112
5342
|
const projectRoot = process.cwd();
|
|
5113
|
-
const workspace =
|
|
5343
|
+
const workspace = path23.join(projectRoot, ".kairn-evolve");
|
|
5114
5344
|
console.log(ui.section("Evolve Run"));
|
|
5115
5345
|
try {
|
|
5116
|
-
await
|
|
5346
|
+
await fs23.access(workspace);
|
|
5117
5347
|
} catch {
|
|
5118
5348
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5119
5349
|
process.exit(1);
|
|
5120
5350
|
}
|
|
5121
|
-
const tasksPath =
|
|
5351
|
+
const tasksPath = path23.join(workspace, "tasks.yaml");
|
|
5122
5352
|
let tasksContent;
|
|
5123
5353
|
try {
|
|
5124
|
-
tasksContent = await
|
|
5354
|
+
tasksContent = await fs23.readFile(tasksPath, "utf-8");
|
|
5125
5355
|
} catch {
|
|
5126
5356
|
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
5127
5357
|
process.exit(1);
|
|
5128
5358
|
}
|
|
5129
|
-
const parsed =
|
|
5359
|
+
const parsed = yamlParse2(tasksContent);
|
|
5130
5360
|
if (!parsed?.tasks || parsed.tasks.length === 0) {
|
|
5131
5361
|
console.log(ui.error("No tasks found in tasks.yaml"));
|
|
5132
5362
|
process.exit(1);
|
|
@@ -5140,15 +5370,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5140
5370
|
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
5141
5371
|
console.log("");
|
|
5142
5372
|
const config = await loadConfig();
|
|
5143
|
-
const harnessPath =
|
|
5373
|
+
const harnessPath = path23.join(projectRoot, ".claude");
|
|
5144
5374
|
const results = [];
|
|
5145
5375
|
for (const task of tasksToRun) {
|
|
5146
|
-
const traceDir =
|
|
5376
|
+
const traceDir = path23.join(workspace, "traces", "0", task.id);
|
|
5147
5377
|
const spinner = ora2(`Running: ${task.id}`).start();
|
|
5148
5378
|
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
5149
5379
|
if (config) {
|
|
5150
|
-
const stdout = await
|
|
5151
|
-
const stderr = await
|
|
5380
|
+
const stdout = await fs23.readFile(path23.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
5381
|
+
const stderr = await fs23.readFile(path23.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
5152
5382
|
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
5153
5383
|
result.score = score;
|
|
5154
5384
|
await writeScore(traceDir, score);
|
|
@@ -5177,7 +5407,7 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5177
5407
|
}
|
|
5178
5408
|
evolveConfig.maxIterations = iterations;
|
|
5179
5409
|
try {
|
|
5180
|
-
await
|
|
5410
|
+
await fs23.access(path23.join(workspace, "iterations", "0", "harness"));
|
|
5181
5411
|
} catch {
|
|
5182
5412
|
console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
|
|
5183
5413
|
process.exit(1);
|
|
@@ -5238,13 +5468,107 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5238
5468
|
process.exit(1);
|
|
5239
5469
|
}
|
|
5240
5470
|
});
|
|
5471
|
+
evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
|
|
5472
|
+
try {
|
|
5473
|
+
const projectRoot = process.cwd();
|
|
5474
|
+
const workspace = path23.join(projectRoot, ".kairn-evolve");
|
|
5475
|
+
try {
|
|
5476
|
+
await fs23.access(workspace);
|
|
5477
|
+
} catch {
|
|
5478
|
+
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5479
|
+
process.exit(1);
|
|
5480
|
+
}
|
|
5481
|
+
if (options.json) {
|
|
5482
|
+
const report = await generateJsonReport(workspace);
|
|
5483
|
+
console.log(JSON.stringify(report, null, 2));
|
|
5484
|
+
} else {
|
|
5485
|
+
const markdown = await generateMarkdownReport(workspace);
|
|
5486
|
+
console.log(markdown);
|
|
5487
|
+
}
|
|
5488
|
+
} catch (err) {
|
|
5489
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
5490
|
+
console.log(ui.error(msg));
|
|
5491
|
+
process.exit(1);
|
|
5492
|
+
}
|
|
5493
|
+
});
|
|
5494
|
+
evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
|
|
5495
|
+
try {
|
|
5496
|
+
const projectRoot = process.cwd();
|
|
5497
|
+
const workspace = path23.join(projectRoot, ".kairn-evolve");
|
|
5498
|
+
const iter1 = parseInt(iter1Str, 10);
|
|
5499
|
+
const iter2 = parseInt(iter2Str, 10);
|
|
5500
|
+
if (isNaN(iter1) || isNaN(iter2)) {
|
|
5501
|
+
console.log(ui.error("Both arguments must be integers (iteration numbers)"));
|
|
5502
|
+
process.exit(1);
|
|
5503
|
+
}
|
|
5504
|
+
const harness1 = path23.join(workspace, "iterations", iter1.toString(), "harness");
|
|
5505
|
+
const harness2 = path23.join(workspace, "iterations", iter2.toString(), "harness");
|
|
5506
|
+
try {
|
|
5507
|
+
await fs23.access(harness1);
|
|
5508
|
+
} catch {
|
|
5509
|
+
console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
|
|
5510
|
+
process.exit(1);
|
|
5511
|
+
}
|
|
5512
|
+
try {
|
|
5513
|
+
await fs23.access(harness2);
|
|
5514
|
+
} catch {
|
|
5515
|
+
console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
|
|
5516
|
+
process.exit(1);
|
|
5517
|
+
}
|
|
5518
|
+
console.log(ui.section(`Diff: Iteration ${iter1} \u2192 ${iter2}`));
|
|
5519
|
+
const diffPatch = await generateDiff2(harness1, harness2);
|
|
5520
|
+
if (!diffPatch) {
|
|
5521
|
+
console.log(chalk14.dim(" No harness changes between these iterations."));
|
|
5522
|
+
} else {
|
|
5523
|
+
for (const line of diffPatch.split("\n")) {
|
|
5524
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
5525
|
+
console.log(chalk14.bold(line));
|
|
5526
|
+
} else if (line.startsWith("+")) {
|
|
5527
|
+
console.log(chalk14.green(line));
|
|
5528
|
+
} else if (line.startsWith("-")) {
|
|
5529
|
+
console.log(chalk14.red(line));
|
|
5530
|
+
} else {
|
|
5531
|
+
console.log(line);
|
|
5532
|
+
}
|
|
5533
|
+
}
|
|
5534
|
+
}
|
|
5535
|
+
const [log1, log2] = await Promise.all([
|
|
5536
|
+
loadIterationLog(workspace, iter1),
|
|
5537
|
+
loadIterationLog(workspace, iter2)
|
|
5538
|
+
]);
|
|
5539
|
+
if (log1 && log2) {
|
|
5540
|
+
console.log("");
|
|
5541
|
+
console.log(ui.section("Score Comparison"));
|
|
5542
|
+
console.log("");
|
|
5543
|
+
console.log(" Task Iter " + iter1 + " Iter " + iter2 + " Delta");
|
|
5544
|
+
const allTaskIds = /* @__PURE__ */ new Set([
|
|
5545
|
+
...Object.keys(log1.taskResults),
|
|
5546
|
+
...Object.keys(log2.taskResults)
|
|
5547
|
+
]);
|
|
5548
|
+
for (const taskId of [...allTaskIds].sort()) {
|
|
5549
|
+
const s1 = log1.taskResults[taskId];
|
|
5550
|
+
const s2 = log2.taskResults[taskId];
|
|
5551
|
+
const score1 = s1 ? s1.score ?? (s1.pass ? 100 : 0) : 0;
|
|
5552
|
+
const score2 = s2 ? s2.score ?? (s2.pass ? 100 : 0) : 0;
|
|
5553
|
+
const delta = score2 - score1;
|
|
5554
|
+
const deltaStr = delta > 0 ? chalk14.green(`+${delta.toFixed(0)}`) : delta < 0 ? chalk14.red(delta.toFixed(0).toString()) : chalk14.dim("0");
|
|
5555
|
+
const name = taskId.padEnd(30);
|
|
5556
|
+
console.log(` ${name} ${score1.toFixed(0).padStart(5)}% ${score2.toFixed(0).padStart(5)}% ${deltaStr}`);
|
|
5557
|
+
}
|
|
5558
|
+
}
|
|
5559
|
+
} catch (err) {
|
|
5560
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
5561
|
+
console.log(ui.error(msg));
|
|
5562
|
+
process.exit(1);
|
|
5563
|
+
}
|
|
5564
|
+
});
|
|
5241
5565
|
async function countFiles(dir) {
|
|
5242
5566
|
let count = 0;
|
|
5243
5567
|
try {
|
|
5244
|
-
const entries = await
|
|
5568
|
+
const entries = await fs23.readdir(dir, { withFileTypes: true });
|
|
5245
5569
|
for (const entry of entries) {
|
|
5246
5570
|
if (entry.isDirectory()) {
|
|
5247
|
-
count += await countFiles(
|
|
5571
|
+
count += await countFiles(path23.join(dir, entry.name));
|
|
5248
5572
|
} else {
|
|
5249
5573
|
count++;
|
|
5250
5574
|
}
|