kairn-cli 2.2.3 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +309 -53
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -221,7 +221,7 @@ var ui = {
|
|
|
221
221
|
// Key-value pairs
|
|
222
222
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
223
|
// File list
|
|
224
|
-
file: (
|
|
224
|
+
file: (path25) => chalk.dim(` ${path25}`),
|
|
225
225
|
// Tool display
|
|
226
226
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
227
|
${chalk.dim(reason)}`,
|
|
@@ -3702,8 +3702,8 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3702
3702
|
import { Command as Command11 } from "commander";
|
|
3703
3703
|
import chalk14 from "chalk";
|
|
3704
3704
|
import ora2 from "ora";
|
|
3705
|
-
import
|
|
3706
|
-
import
|
|
3705
|
+
import fs24 from "fs/promises";
|
|
3706
|
+
import path24 from "path";
|
|
3707
3707
|
import { parse as yamlParse2 } from "yaml";
|
|
3708
3708
|
import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
|
|
3709
3709
|
|
|
@@ -4569,32 +4569,114 @@ function parseToolCalls(stdout) {
|
|
|
4569
4569
|
return [];
|
|
4570
4570
|
}
|
|
4571
4571
|
}
|
|
4572
|
-
async function
|
|
4572
|
+
async function runWithConcurrency(tasks, limit) {
|
|
4573
|
+
const results = new Array(tasks.length);
|
|
4574
|
+
const executing = /* @__PURE__ */ new Set();
|
|
4575
|
+
const errors = [];
|
|
4576
|
+
const effectiveLimit = Math.max(1, limit);
|
|
4577
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
4578
|
+
const p = tasks[i]().then(
|
|
4579
|
+
(result) => {
|
|
4580
|
+
results[i] = result;
|
|
4581
|
+
},
|
|
4582
|
+
(err) => {
|
|
4583
|
+
errors.push(err);
|
|
4584
|
+
}
|
|
4585
|
+
);
|
|
4586
|
+
const tracked = p.then(() => {
|
|
4587
|
+
executing.delete(tracked);
|
|
4588
|
+
});
|
|
4589
|
+
executing.add(tracked);
|
|
4590
|
+
if (executing.size >= effectiveLimit) {
|
|
4591
|
+
await Promise.race(executing);
|
|
4592
|
+
}
|
|
4593
|
+
}
|
|
4594
|
+
await Promise.all(executing);
|
|
4595
|
+
if (errors.length > 0) {
|
|
4596
|
+
throw errors[0];
|
|
4597
|
+
}
|
|
4598
|
+
return results;
|
|
4599
|
+
}
|
|
4600
|
+
function computeStddev(values, mean) {
|
|
4601
|
+
if (values.length <= 1) return 0;
|
|
4602
|
+
const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
|
|
4603
|
+
return Math.sqrt(sumSqDiffs / values.length);
|
|
4604
|
+
}
|
|
4605
|
+
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1, parallelTasks = 1) {
|
|
4573
4606
|
const results = {};
|
|
4574
4607
|
const projectRoot = path18.resolve(workspacePath, "..");
|
|
4575
|
-
|
|
4576
|
-
|
|
4577
|
-
|
|
4578
|
-
"traces",
|
|
4579
|
-
iteration.toString(),
|
|
4580
|
-
task.id
|
|
4581
|
-
);
|
|
4608
|
+
const effectiveRuns = Math.max(1, runsPerTask);
|
|
4609
|
+
const concurrency = Math.max(1, parallelTasks);
|
|
4610
|
+
const evaluateTask = async (task) => {
|
|
4582
4611
|
onProgress?.({ type: "task-start", iteration, taskId: task.id });
|
|
4583
|
-
|
|
4584
|
-
|
|
4585
|
-
|
|
4586
|
-
|
|
4587
|
-
|
|
4588
|
-
|
|
4589
|
-
|
|
4590
|
-
|
|
4591
|
-
|
|
4612
|
+
let finalScore;
|
|
4613
|
+
if (effectiveRuns > 1 && config) {
|
|
4614
|
+
const runScores = [];
|
|
4615
|
+
let passCount = 0;
|
|
4616
|
+
for (let run = 0; run < effectiveRuns; run++) {
|
|
4617
|
+
const traceDir = path18.join(
|
|
4618
|
+
workspacePath,
|
|
4619
|
+
"traces",
|
|
4620
|
+
iteration.toString(),
|
|
4621
|
+
`${task.id}_run${run}`
|
|
4622
|
+
);
|
|
4623
|
+
onProgress?.({
|
|
4624
|
+
type: "task-run",
|
|
4625
|
+
iteration,
|
|
4626
|
+
taskId: task.id,
|
|
4627
|
+
message: `Run ${run + 1}/${effectiveRuns} of ${task.id}`
|
|
4628
|
+
});
|
|
4629
|
+
await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4630
|
+
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4631
|
+
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4632
|
+
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4633
|
+
await writeScore(traceDir, score);
|
|
4634
|
+
runScores.push(score.score ?? (score.pass ? 100 : 0));
|
|
4635
|
+
if (score.pass) passCount++;
|
|
4636
|
+
}
|
|
4637
|
+
const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
|
|
4638
|
+
const stddev = computeStddev(runScores, mean);
|
|
4639
|
+
finalScore = {
|
|
4640
|
+
pass: passCount > effectiveRuns / 2,
|
|
4641
|
+
score: mean,
|
|
4642
|
+
details: `Mean of ${effectiveRuns} runs`,
|
|
4643
|
+
variance: {
|
|
4644
|
+
runs: effectiveRuns,
|
|
4645
|
+
scores: runScores,
|
|
4646
|
+
mean,
|
|
4647
|
+
stddev
|
|
4648
|
+
}
|
|
4649
|
+
};
|
|
4650
|
+
} else {
|
|
4651
|
+
const traceDir = path18.join(
|
|
4652
|
+
workspacePath,
|
|
4653
|
+
"traces",
|
|
4654
|
+
iteration.toString(),
|
|
4655
|
+
task.id
|
|
4656
|
+
);
|
|
4657
|
+
const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4658
|
+
finalScore = taskResult.score;
|
|
4659
|
+
if (config) {
|
|
4660
|
+
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4661
|
+
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4662
|
+
finalScore = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4663
|
+
await writeScore(traceDir, finalScore);
|
|
4664
|
+
}
|
|
4665
|
+
}
|
|
4592
4666
|
onProgress?.({
|
|
4593
4667
|
type: "task-scored",
|
|
4594
4668
|
iteration,
|
|
4595
4669
|
taskId: task.id,
|
|
4596
|
-
score:
|
|
4670
|
+
score: finalScore.score ?? (finalScore.pass ? 100 : 0)
|
|
4597
4671
|
});
|
|
4672
|
+
return { id: task.id, score: finalScore };
|
|
4673
|
+
};
|
|
4674
|
+
const taskResults = await runWithConcurrency(
|
|
4675
|
+
tasks.map((task) => () => evaluateTask(task)),
|
|
4676
|
+
concurrency
|
|
4677
|
+
);
|
|
4678
|
+
for (const { id, score } of taskResults) {
|
|
4679
|
+
results[id] = score;
|
|
4598
4680
|
}
|
|
4599
4681
|
const scores = Object.values(results);
|
|
4600
4682
|
const total = scores.reduce(
|
|
@@ -5052,7 +5134,9 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5052
5134
|
workspacePath,
|
|
5053
5135
|
iter,
|
|
5054
5136
|
kairnConfig,
|
|
5055
|
-
onProgress
|
|
5137
|
+
onProgress,
|
|
5138
|
+
evolveConfig.runsPerTask,
|
|
5139
|
+
evolveConfig.parallelTasks
|
|
5056
5140
|
);
|
|
5057
5141
|
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
5058
5142
|
if (iter === 0) baselineScore = aggregate;
|
|
@@ -5409,24 +5493,128 @@ async function generateJsonReport(workspacePath) {
|
|
|
5409
5493
|
};
|
|
5410
5494
|
}
|
|
5411
5495
|
|
|
5496
|
+
// src/evolve/apply.ts
|
|
5497
|
+
import fs23 from "fs/promises";
|
|
5498
|
+
import path23 from "path";
|
|
5499
|
+
async function listIterations(workspacePath) {
|
|
5500
|
+
const iterationsDir = path23.join(workspacePath, "iterations");
|
|
5501
|
+
let entries;
|
|
5502
|
+
try {
|
|
5503
|
+
entries = await fs23.readdir(iterationsDir);
|
|
5504
|
+
} catch {
|
|
5505
|
+
return [];
|
|
5506
|
+
}
|
|
5507
|
+
const nums = [];
|
|
5508
|
+
for (const entry of entries) {
|
|
5509
|
+
const n = parseInt(entry, 10);
|
|
5510
|
+
if (!isNaN(n)) {
|
|
5511
|
+
try {
|
|
5512
|
+
await fs23.access(path23.join(iterationsDir, entry, "harness"));
|
|
5513
|
+
nums.push(n);
|
|
5514
|
+
} catch {
|
|
5515
|
+
}
|
|
5516
|
+
}
|
|
5517
|
+
}
|
|
5518
|
+
return nums.sort((a, b) => a - b);
|
|
5519
|
+
}
|
|
5520
|
+
async function findBestIteration(workspacePath, iterations) {
|
|
5521
|
+
let bestIter = iterations[0];
|
|
5522
|
+
let bestScore = -Infinity;
|
|
5523
|
+
for (const iter of iterations) {
|
|
5524
|
+
const log = await loadIterationLog(workspacePath, iter);
|
|
5525
|
+
const score = log?.score ?? 0;
|
|
5526
|
+
if (score > bestScore) {
|
|
5527
|
+
bestScore = score;
|
|
5528
|
+
bestIter = iter;
|
|
5529
|
+
}
|
|
5530
|
+
}
|
|
5531
|
+
return bestIter;
|
|
5532
|
+
}
|
|
5533
|
+
async function listFilesRecursive(dir) {
|
|
5534
|
+
const results = [];
|
|
5535
|
+
async function walk(current) {
|
|
5536
|
+
let entries;
|
|
5537
|
+
try {
|
|
5538
|
+
entries = await fs23.readdir(current, { withFileTypes: true });
|
|
5539
|
+
} catch {
|
|
5540
|
+
return;
|
|
5541
|
+
}
|
|
5542
|
+
for (const entry of entries) {
|
|
5543
|
+
const fullPath = path23.join(current, entry.name);
|
|
5544
|
+
if (entry.isDirectory()) {
|
|
5545
|
+
await walk(fullPath);
|
|
5546
|
+
} else {
|
|
5547
|
+
results.push(path23.relative(dir, fullPath));
|
|
5548
|
+
}
|
|
5549
|
+
}
|
|
5550
|
+
}
|
|
5551
|
+
await walk(dir);
|
|
5552
|
+
return results;
|
|
5553
|
+
}
|
|
5554
|
+
async function applyEvolution(workspacePath, projectRoot, targetIteration) {
|
|
5555
|
+
const iterations = await listIterations(workspacePath);
|
|
5556
|
+
if (iterations.length === 0) {
|
|
5557
|
+
throw new Error("No iterations found in workspace. Run `kairn evolve run` first.");
|
|
5558
|
+
}
|
|
5559
|
+
let iter;
|
|
5560
|
+
if (targetIteration !== void 0) {
|
|
5561
|
+
if (!iterations.includes(targetIteration)) {
|
|
5562
|
+
throw new Error(
|
|
5563
|
+
`Iteration ${targetIteration} not found. Available: ${iterations.join(", ")}`
|
|
5564
|
+
);
|
|
5565
|
+
}
|
|
5566
|
+
iter = targetIteration;
|
|
5567
|
+
} else {
|
|
5568
|
+
iter = await findBestIteration(workspacePath, iterations);
|
|
5569
|
+
}
|
|
5570
|
+
const harnessPath = path23.join(
|
|
5571
|
+
workspacePath,
|
|
5572
|
+
"iterations",
|
|
5573
|
+
iter.toString(),
|
|
5574
|
+
"harness"
|
|
5575
|
+
);
|
|
5576
|
+
const claudeDir = path23.join(projectRoot, ".claude");
|
|
5577
|
+
const diffPreview = await generateDiff2(claudeDir, harnessPath);
|
|
5578
|
+
const currentFiles = await listFilesRecursive(claudeDir);
|
|
5579
|
+
const targetFiles = await listFilesRecursive(harnessPath);
|
|
5580
|
+
const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
|
|
5581
|
+
const filesChanged = [];
|
|
5582
|
+
for (const filePath of allPaths) {
|
|
5583
|
+
const currentContent = await fs23.readFile(path23.join(claudeDir, filePath), "utf-8").catch(() => null);
|
|
5584
|
+
const targetContent = await fs23.readFile(path23.join(harnessPath, filePath), "utf-8").catch(() => null);
|
|
5585
|
+
if (currentContent !== targetContent) {
|
|
5586
|
+
filesChanged.push(filePath);
|
|
5587
|
+
}
|
|
5588
|
+
}
|
|
5589
|
+
await fs23.rm(claudeDir, { recursive: true, force: true });
|
|
5590
|
+
await copyDir(harnessPath, claudeDir);
|
|
5591
|
+
return {
|
|
5592
|
+
iteration: iter,
|
|
5593
|
+
filesChanged,
|
|
5594
|
+
diffPreview
|
|
5595
|
+
};
|
|
5596
|
+
}
|
|
5597
|
+
|
|
5412
5598
|
// src/commands/evolve.ts
|
|
5413
5599
|
var DEFAULT_CONFIG = {
|
|
5414
5600
|
model: "claude-sonnet-4-6",
|
|
5415
5601
|
proposerModel: "claude-opus-4-6",
|
|
5416
5602
|
scorer: "pass-fail",
|
|
5417
5603
|
maxIterations: 5,
|
|
5418
|
-
parallelTasks: 1
|
|
5604
|
+
parallelTasks: 1,
|
|
5605
|
+
runsPerTask: 1
|
|
5419
5606
|
};
|
|
5420
5607
|
async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
5421
5608
|
try {
|
|
5422
|
-
const configStr = await
|
|
5609
|
+
const configStr = await fs24.readFile(path24.join(workspacePath, "config.yaml"), "utf-8");
|
|
5423
5610
|
const parsed = yamlParse2(configStr);
|
|
5424
5611
|
return {
|
|
5425
5612
|
model: parsed.model ?? DEFAULT_CONFIG.model,
|
|
5426
5613
|
proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
|
|
5427
5614
|
scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
|
|
5428
5615
|
maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
|
|
5429
|
-
parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
|
|
5616
|
+
parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks,
|
|
5617
|
+
runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask
|
|
5430
5618
|
};
|
|
5431
5619
|
} catch {
|
|
5432
5620
|
return { ...DEFAULT_CONFIG };
|
|
@@ -5437,9 +5625,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5437
5625
|
try {
|
|
5438
5626
|
const projectRoot = process.cwd();
|
|
5439
5627
|
console.log(ui.section("Evolve Init"));
|
|
5440
|
-
const claudeDir =
|
|
5628
|
+
const claudeDir = path24.join(projectRoot, ".claude");
|
|
5441
5629
|
try {
|
|
5442
|
-
await
|
|
5630
|
+
await fs24.access(claudeDir);
|
|
5443
5631
|
} catch {
|
|
5444
5632
|
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
5445
5633
|
process.exit(1);
|
|
@@ -5489,7 +5677,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5489
5677
|
if (config) {
|
|
5490
5678
|
let claudeMd = "";
|
|
5491
5679
|
try {
|
|
5492
|
-
claudeMd = await
|
|
5680
|
+
claudeMd = await fs24.readFile(path24.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
5493
5681
|
} catch {
|
|
5494
5682
|
}
|
|
5495
5683
|
const profile = await buildProjectProfile(projectRoot);
|
|
@@ -5520,16 +5708,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5520
5708
|
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
5521
5709
|
try {
|
|
5522
5710
|
const projectRoot = process.cwd();
|
|
5523
|
-
const workspace =
|
|
5711
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5524
5712
|
console.log(ui.section("Evolve Baseline"));
|
|
5525
5713
|
try {
|
|
5526
|
-
await
|
|
5714
|
+
await fs24.access(workspace);
|
|
5527
5715
|
} catch {
|
|
5528
5716
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5529
5717
|
process.exit(1);
|
|
5530
5718
|
}
|
|
5531
5719
|
await snapshotBaseline(projectRoot, workspace);
|
|
5532
|
-
const baselineDir =
|
|
5720
|
+
const baselineDir = path24.join(workspace, "baseline");
|
|
5533
5721
|
const fileCount = await countFiles(baselineDir);
|
|
5534
5722
|
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
5535
5723
|
} catch (err) {
|
|
@@ -5538,21 +5726,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5538
5726
|
process.exit(1);
|
|
5539
5727
|
}
|
|
5540
5728
|
});
|
|
5541
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
|
|
5729
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").action(async (options) => {
|
|
5542
5730
|
try {
|
|
5543
5731
|
const projectRoot = process.cwd();
|
|
5544
|
-
const workspace =
|
|
5732
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5545
5733
|
console.log(ui.section("Evolve Run"));
|
|
5546
5734
|
try {
|
|
5547
|
-
await
|
|
5735
|
+
await fs24.access(workspace);
|
|
5548
5736
|
} catch {
|
|
5549
5737
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5550
5738
|
process.exit(1);
|
|
5551
5739
|
}
|
|
5552
|
-
const tasksPath =
|
|
5740
|
+
const tasksPath = path24.join(workspace, "tasks.yaml");
|
|
5553
5741
|
let tasksContent;
|
|
5554
5742
|
try {
|
|
5555
|
-
tasksContent = await
|
|
5743
|
+
tasksContent = await fs24.readFile(tasksPath, "utf-8");
|
|
5556
5744
|
} catch {
|
|
5557
5745
|
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
5558
5746
|
process.exit(1);
|
|
@@ -5571,15 +5759,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5571
5759
|
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
5572
5760
|
console.log("");
|
|
5573
5761
|
const config = await loadConfig();
|
|
5574
|
-
const harnessPath =
|
|
5762
|
+
const harnessPath = path24.join(projectRoot, ".claude");
|
|
5575
5763
|
const results = [];
|
|
5576
5764
|
for (const task of tasksToRun) {
|
|
5577
|
-
const traceDir =
|
|
5765
|
+
const traceDir = path24.join(workspace, "traces", "0", task.id);
|
|
5578
5766
|
const spinner = ora2(`Running: ${task.id}`).start();
|
|
5579
5767
|
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
5580
5768
|
if (config) {
|
|
5581
|
-
const stdout = await
|
|
5582
|
-
const stderr = await
|
|
5769
|
+
const stdout = await fs24.readFile(path24.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
5770
|
+
const stderr = await fs24.readFile(path24.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
5583
5771
|
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
5584
5772
|
result.score = score;
|
|
5585
5773
|
await writeScore(traceDir, score);
|
|
@@ -5607,8 +5795,20 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5607
5795
|
process.exit(1);
|
|
5608
5796
|
}
|
|
5609
5797
|
evolveConfig.maxIterations = iterations;
|
|
5798
|
+
const runs = parseInt(options.runs ?? "1", 10);
|
|
5799
|
+
if (isNaN(runs) || runs < 1) {
|
|
5800
|
+
console.log(ui.error("--runs must be a positive integer"));
|
|
5801
|
+
process.exit(1);
|
|
5802
|
+
}
|
|
5803
|
+
evolveConfig.runsPerTask = runs;
|
|
5804
|
+
const parallel = parseInt(options.parallel ?? "1", 10);
|
|
5805
|
+
if (isNaN(parallel) || parallel < 1) {
|
|
5806
|
+
console.log(ui.error("--parallel must be a positive integer"));
|
|
5807
|
+
process.exit(1);
|
|
5808
|
+
}
|
|
5809
|
+
evolveConfig.parallelTasks = parallel;
|
|
5610
5810
|
try {
|
|
5611
|
-
await
|
|
5811
|
+
await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
|
|
5612
5812
|
} catch {
|
|
5613
5813
|
console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
|
|
5614
5814
|
process.exit(1);
|
|
@@ -5641,6 +5841,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5641
5841
|
case "task-start":
|
|
5642
5842
|
console.log(chalk14.dim(` Running: ${event.taskId ?? "unknown"}...`));
|
|
5643
5843
|
break;
|
|
5844
|
+
case "task-run":
|
|
5845
|
+
console.log(chalk14.dim(` ${event.message ?? ""}`));
|
|
5846
|
+
break;
|
|
5644
5847
|
case "task-scored": {
|
|
5645
5848
|
const taskScore = event.score ?? 0;
|
|
5646
5849
|
const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");
|
|
@@ -5662,9 +5865,18 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5662
5865
|
console.log(` Improvement: ${improvement.toFixed(1)} points`);
|
|
5663
5866
|
}
|
|
5664
5867
|
console.log("");
|
|
5665
|
-
|
|
5868
|
+
const showVariance = runs > 1;
|
|
5869
|
+
console.log(showVariance ? " Iter Score Mutations Status" : " Iter Score Mutations Status");
|
|
5666
5870
|
for (const iter of result.iterations) {
|
|
5667
|
-
|
|
5871
|
+
let scoreDisplay;
|
|
5872
|
+
if (showVariance) {
|
|
5873
|
+
const taskScores = Object.values(iter.taskResults);
|
|
5874
|
+
const stddevs = taskScores.map((s) => s.variance?.stddev).filter((v) => v !== void 0);
|
|
5875
|
+
const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : 0;
|
|
5876
|
+
scoreDisplay = `${iter.score.toFixed(1).padStart(6)}% \xB1${avgStddev.toFixed(1)}`;
|
|
5877
|
+
} else {
|
|
5878
|
+
scoreDisplay = iter.score.toFixed(1).padStart(6) + "%";
|
|
5879
|
+
}
|
|
5668
5880
|
const mutations = iter.proposal?.mutations.length ?? 0;
|
|
5669
5881
|
const mutStr = mutations > 0 ? mutations.toString() : "-";
|
|
5670
5882
|
let status = "evaluated";
|
|
@@ -5672,7 +5884,7 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5672
5884
|
else if (!iter.proposal && !iter.diffPatch) status = "rollback";
|
|
5673
5885
|
else if (iter.score >= 100) status = "perfect";
|
|
5674
5886
|
else if (iter.iteration === result.bestIteration) status = "best";
|
|
5675
|
-
console.log(` ${iter.iteration.toString().padStart(4)} ${
|
|
5887
|
+
console.log(` ${iter.iteration.toString().padStart(4)} ${scoreDisplay} ${mutStr.padStart(9)} ${status}`);
|
|
5676
5888
|
}
|
|
5677
5889
|
}
|
|
5678
5890
|
} catch (err) {
|
|
@@ -5681,12 +5893,56 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5681
5893
|
process.exit(1);
|
|
5682
5894
|
}
|
|
5683
5895
|
});
|
|
5896
|
+
evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
|
|
5897
|
+
try {
|
|
5898
|
+
const projectRoot = process.cwd();
|
|
5899
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5900
|
+
console.log(ui.section("Evolve Apply"));
|
|
5901
|
+
try {
|
|
5902
|
+
await fs24.access(workspace);
|
|
5903
|
+
} catch {
|
|
5904
|
+
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5905
|
+
process.exit(1);
|
|
5906
|
+
}
|
|
5907
|
+
let targetIteration;
|
|
5908
|
+
if (options.iter) {
|
|
5909
|
+
targetIteration = parseInt(options.iter, 10);
|
|
5910
|
+
if (isNaN(targetIteration)) {
|
|
5911
|
+
console.log(ui.error("--iter must be a number"));
|
|
5912
|
+
process.exit(1);
|
|
5913
|
+
}
|
|
5914
|
+
}
|
|
5915
|
+
const result = await applyEvolution(workspace, projectRoot, targetIteration);
|
|
5916
|
+
if (result.diffPreview) {
|
|
5917
|
+
console.log(ui.section("Changes"));
|
|
5918
|
+
for (const line of result.diffPreview.split("\n")) {
|
|
5919
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
5920
|
+
console.log(chalk14.bold(line));
|
|
5921
|
+
} else if (line.startsWith("+")) {
|
|
5922
|
+
console.log(chalk14.green(line));
|
|
5923
|
+
} else if (line.startsWith("-")) {
|
|
5924
|
+
console.log(chalk14.red(line));
|
|
5925
|
+
} else {
|
|
5926
|
+
console.log(line);
|
|
5927
|
+
}
|
|
5928
|
+
}
|
|
5929
|
+
}
|
|
5930
|
+
console.log("");
|
|
5931
|
+
console.log(ui.success(
|
|
5932
|
+
`Applied iteration ${result.iteration} harness (${result.filesChanged.length} files)`
|
|
5933
|
+
));
|
|
5934
|
+
} catch (err) {
|
|
5935
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
5936
|
+
console.log(ui.error(msg));
|
|
5937
|
+
process.exit(1);
|
|
5938
|
+
}
|
|
5939
|
+
});
|
|
5684
5940
|
evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
|
|
5685
5941
|
try {
|
|
5686
5942
|
const projectRoot = process.cwd();
|
|
5687
|
-
const workspace =
|
|
5943
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5688
5944
|
try {
|
|
5689
|
-
await
|
|
5945
|
+
await fs24.access(workspace);
|
|
5690
5946
|
} catch {
|
|
5691
5947
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5692
5948
|
process.exit(1);
|
|
@@ -5707,23 +5963,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
|
|
|
5707
5963
|
evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
|
|
5708
5964
|
try {
|
|
5709
5965
|
const projectRoot = process.cwd();
|
|
5710
|
-
const workspace =
|
|
5966
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5711
5967
|
const iter1 = parseInt(iter1Str, 10);
|
|
5712
5968
|
const iter2 = parseInt(iter2Str, 10);
|
|
5713
5969
|
if (isNaN(iter1) || isNaN(iter2)) {
|
|
5714
5970
|
console.log(ui.error("Both arguments must be integers (iteration numbers)"));
|
|
5715
5971
|
process.exit(1);
|
|
5716
5972
|
}
|
|
5717
|
-
const harness1 =
|
|
5718
|
-
const harness2 =
|
|
5973
|
+
const harness1 = path24.join(workspace, "iterations", iter1.toString(), "harness");
|
|
5974
|
+
const harness2 = path24.join(workspace, "iterations", iter2.toString(), "harness");
|
|
5719
5975
|
try {
|
|
5720
|
-
await
|
|
5976
|
+
await fs24.access(harness1);
|
|
5721
5977
|
} catch {
|
|
5722
5978
|
console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
|
|
5723
5979
|
process.exit(1);
|
|
5724
5980
|
}
|
|
5725
5981
|
try {
|
|
5726
|
-
await
|
|
5982
|
+
await fs24.access(harness2);
|
|
5727
5983
|
} catch {
|
|
5728
5984
|
console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
|
|
5729
5985
|
process.exit(1);
|
|
@@ -5778,10 +6034,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
|
|
|
5778
6034
|
async function countFiles(dir) {
|
|
5779
6035
|
let count = 0;
|
|
5780
6036
|
try {
|
|
5781
|
-
const entries = await
|
|
6037
|
+
const entries = await fs24.readdir(dir, { withFileTypes: true });
|
|
5782
6038
|
for (const entry of entries) {
|
|
5783
6039
|
if (entry.isDirectory()) {
|
|
5784
|
-
count += await countFiles(
|
|
6040
|
+
count += await countFiles(path24.join(dir, entry.name));
|
|
5785
6041
|
} else {
|
|
5786
6042
|
count++;
|
|
5787
6043
|
}
|