kairn-cli 2.2.2 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +315 -56
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -221,7 +221,7 @@ var ui = {
|
|
|
221
221
|
// Key-value pairs
|
|
222
222
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
223
|
// File list
|
|
224
|
-
file: (
|
|
224
|
+
file: (path25) => chalk.dim(` ${path25}`),
|
|
225
225
|
// Tool display
|
|
226
226
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
227
|
${chalk.dim(reason)}`,
|
|
@@ -3702,8 +3702,8 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3702
3702
|
import { Command as Command11 } from "commander";
|
|
3703
3703
|
import chalk14 from "chalk";
|
|
3704
3704
|
import ora2 from "ora";
|
|
3705
|
-
import
|
|
3706
|
-
import
|
|
3705
|
+
import fs24 from "fs/promises";
|
|
3706
|
+
import path24 from "path";
|
|
3707
3707
|
import { parse as yamlParse2 } from "yaml";
|
|
3708
3708
|
import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
|
|
3709
3709
|
|
|
@@ -4031,6 +4031,13 @@ async function snapshotBaseline(projectRoot, workspacePath) {
|
|
|
4031
4031
|
}
|
|
4032
4032
|
await copyDir(claudeDir, baselineDir);
|
|
4033
4033
|
await copyDir(claudeDir, iter0Dir);
|
|
4034
|
+
const mcpJsonPath = path16.join(projectRoot, ".mcp.json");
|
|
4035
|
+
try {
|
|
4036
|
+
await fs16.access(mcpJsonPath);
|
|
4037
|
+
await fs16.copyFile(mcpJsonPath, path16.join(baselineDir, ".mcp.json"));
|
|
4038
|
+
await fs16.copyFile(mcpJsonPath, path16.join(iter0Dir, ".mcp.json"));
|
|
4039
|
+
} catch {
|
|
4040
|
+
}
|
|
4034
4041
|
}
|
|
4035
4042
|
async function copyDir(src, dest) {
|
|
4036
4043
|
await fs16.mkdir(dest, { recursive: true });
|
|
@@ -4353,6 +4360,11 @@ async function scoreTask(task, workspacePath, stdout, stderr, config) {
|
|
|
4353
4360
|
// src/evolve/runner.ts
|
|
4354
4361
|
var execAsync2 = promisify2(exec2);
|
|
4355
4362
|
var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
|
|
4363
|
+
async function deployMcpJson(harnessPath, workDir) {
|
|
4364
|
+
const src = path18.join(harnessPath, ".mcp.json");
|
|
4365
|
+
await fs18.copyFile(src, path18.join(workDir, ".mcp.json")).catch(() => {
|
|
4366
|
+
});
|
|
4367
|
+
}
|
|
4356
4368
|
async function createIsolatedWorkspace(projectRoot, harnessPath) {
|
|
4357
4369
|
const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
4358
4370
|
try {
|
|
@@ -4367,6 +4379,7 @@ async function createIsolatedWorkspace(projectRoot, harnessPath) {
|
|
|
4367
4379
|
});
|
|
4368
4380
|
await fs18.rm(path18.join(tmpDir2, ".claude"), { recursive: true, force: true });
|
|
4369
4381
|
await copyDir(harnessPath, path18.join(tmpDir2, ".claude"));
|
|
4382
|
+
await deployMcpJson(harnessPath, tmpDir2);
|
|
4370
4383
|
return { workDir: tmpDir2, isWorktree: true };
|
|
4371
4384
|
} catch {
|
|
4372
4385
|
}
|
|
@@ -4374,6 +4387,7 @@ async function createIsolatedWorkspace(projectRoot, harnessPath) {
|
|
|
4374
4387
|
await copyProjectDir(projectRoot, tmpDir);
|
|
4375
4388
|
await fs18.rm(path18.join(tmpDir, ".claude"), { recursive: true, force: true });
|
|
4376
4389
|
await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
|
|
4390
|
+
await deployMcpJson(harnessPath, tmpDir);
|
|
4377
4391
|
return { workDir: tmpDir, isWorktree: false };
|
|
4378
4392
|
}
|
|
4379
4393
|
async function copyProjectDir(src, dest) {
|
|
@@ -4555,31 +4569,77 @@ function parseToolCalls(stdout) {
|
|
|
4555
4569
|
return [];
|
|
4556
4570
|
}
|
|
4557
4571
|
}
|
|
4558
|
-
|
|
4572
|
+
function computeStddev(values, mean) {
|
|
4573
|
+
if (values.length <= 1) return 0;
|
|
4574
|
+
const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
|
|
4575
|
+
return Math.sqrt(sumSqDiffs / values.length);
|
|
4576
|
+
}
|
|
4577
|
+
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1) {
|
|
4559
4578
|
const results = {};
|
|
4560
4579
|
const projectRoot = path18.resolve(workspacePath, "..");
|
|
4580
|
+
const effectiveRuns = Math.max(1, runsPerTask);
|
|
4561
4581
|
for (const task of tasks) {
|
|
4562
|
-
const traceDir = path18.join(
|
|
4563
|
-
workspacePath,
|
|
4564
|
-
"traces",
|
|
4565
|
-
iteration.toString(),
|
|
4566
|
-
task.id
|
|
4567
|
-
);
|
|
4568
4582
|
onProgress?.({ type: "task-start", iteration, taskId: task.id });
|
|
4569
|
-
|
|
4570
|
-
|
|
4571
|
-
|
|
4572
|
-
|
|
4573
|
-
|
|
4574
|
-
|
|
4575
|
-
|
|
4576
|
-
|
|
4577
|
-
|
|
4583
|
+
if (effectiveRuns > 1 && config) {
|
|
4584
|
+
const runScores = [];
|
|
4585
|
+
let passCount = 0;
|
|
4586
|
+
for (let run = 0; run < effectiveRuns; run++) {
|
|
4587
|
+
const traceDir = path18.join(
|
|
4588
|
+
workspacePath,
|
|
4589
|
+
"traces",
|
|
4590
|
+
iteration.toString(),
|
|
4591
|
+
`${task.id}_run${run}`
|
|
4592
|
+
);
|
|
4593
|
+
onProgress?.({
|
|
4594
|
+
type: "task-run",
|
|
4595
|
+
iteration,
|
|
4596
|
+
taskId: task.id,
|
|
4597
|
+
message: `Run ${run + 1}/${effectiveRuns} of ${task.id}`
|
|
4598
|
+
});
|
|
4599
|
+
await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4600
|
+
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4601
|
+
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4602
|
+
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4603
|
+
await writeScore(traceDir, score);
|
|
4604
|
+
runScores.push(score.score ?? (score.pass ? 100 : 0));
|
|
4605
|
+
if (score.pass) passCount++;
|
|
4606
|
+
}
|
|
4607
|
+
const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
|
|
4608
|
+
const stddev = computeStddev(runScores, mean);
|
|
4609
|
+
results[task.id] = {
|
|
4610
|
+
pass: passCount > effectiveRuns / 2,
|
|
4611
|
+
score: mean,
|
|
4612
|
+
details: `Mean of ${effectiveRuns} runs`,
|
|
4613
|
+
variance: {
|
|
4614
|
+
runs: effectiveRuns,
|
|
4615
|
+
scores: runScores,
|
|
4616
|
+
mean,
|
|
4617
|
+
stddev
|
|
4618
|
+
}
|
|
4619
|
+
};
|
|
4620
|
+
} else {
|
|
4621
|
+
const traceDir = path18.join(
|
|
4622
|
+
workspacePath,
|
|
4623
|
+
"traces",
|
|
4624
|
+
iteration.toString(),
|
|
4625
|
+
task.id
|
|
4626
|
+
);
|
|
4627
|
+
const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4628
|
+
let score = taskResult.score;
|
|
4629
|
+
if (config) {
|
|
4630
|
+
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4631
|
+
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4632
|
+
score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4633
|
+
await writeScore(traceDir, score);
|
|
4634
|
+
}
|
|
4635
|
+
results[task.id] = score;
|
|
4636
|
+
}
|
|
4637
|
+
const finalScore = results[task.id];
|
|
4578
4638
|
onProgress?.({
|
|
4579
4639
|
type: "task-scored",
|
|
4580
4640
|
iteration,
|
|
4581
4641
|
taskId: task.id,
|
|
4582
|
-
score:
|
|
4642
|
+
score: finalScore.score ?? (finalScore.pass ? 100 : 0)
|
|
4583
4643
|
});
|
|
4584
4644
|
}
|
|
4585
4645
|
const scores = Object.values(results);
|
|
@@ -4623,23 +4683,37 @@ minimal changes to the harness files that will fix those failures.
|
|
|
4623
4683
|
|
|
4624
4684
|
3. Check history for counterfactual evidence
|
|
4625
4685
|
|
|
4686
|
+
## Available Mutation Actions
|
|
4687
|
+
1. **replace** \u2014 Replace old_text with new_text in a file: { "file": "...", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." }
|
|
4688
|
+
2. **add_section** \u2014 Append new content to a file (or create it): { "file": "...", "action": "add_section", "new_text": "...", "rationale": "..." }
|
|
4689
|
+
3. **create_file** \u2014 Create a new file: { "file": "...", "action": "create_file", "new_text": "...", "rationale": "..." }
|
|
4690
|
+
4. **delete_section** \u2014 Remove specific text from a file: { "file": "...", "action": "delete_section", "old_text": "...", "rationale": "..." }
|
|
4691
|
+
5. **delete_file** \u2014 Delete an entire file: { "file": "...", "action": "delete_file", "rationale": "..." }
|
|
4692
|
+
|
|
4626
4693
|
## Output Format
|
|
4627
4694
|
Return a JSON object:
|
|
4628
4695
|
{
|
|
4629
4696
|
"reasoning": "Your full causal analysis...",
|
|
4630
4697
|
"mutations": [
|
|
4631
4698
|
{ "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
|
|
4632
|
-
{ "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." }
|
|
4699
|
+
{ "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." },
|
|
4700
|
+
{ "file": "rules/obsolete.md", "action": "delete_file", "rationale": "..." }
|
|
4633
4701
|
],
|
|
4634
4702
|
"expected_impact": { "task-id": "+15% \u2014 explanation" }
|
|
4635
4703
|
}
|
|
4636
4704
|
|
|
4705
|
+
## MCP Configuration
|
|
4706
|
+
You can also mutate .mcp.json to add, remove, or reconfigure MCP servers.
|
|
4707
|
+
Treat .mcp.json like any other harness file \u2014 propose changes when traces show
|
|
4708
|
+
the agent lacks a tool it needs, or has tools that add noise without benefit.
|
|
4709
|
+
|
|
4637
4710
|
## Rules
|
|
4638
4711
|
- MINIMAL changes only. Don't rewrite the entire CLAUDE.md.
|
|
4639
4712
|
- Each mutation must have a clear rationale tied to a specific trace observation.
|
|
4640
4713
|
- Never remove something that's working for another task.
|
|
4641
4714
|
- If a previous iteration's change caused a regression, REVERT it.
|
|
4642
|
-
-
|
|
4715
|
+
- Consider both additions AND removals. Remove sections that add noise without improving task performance.
|
|
4716
|
+
- Bloated harnesses hurt performance \u2014 trim what isn't earning its keep.
|
|
4643
4717
|
|
|
4644
4718
|
Return ONLY valid JSON.`;
|
|
4645
4719
|
var STDOUT_TRUNCATION_LIMIT = 1e3;
|
|
@@ -4830,10 +4904,11 @@ function parseProposerResponse(raw) {
|
|
|
4830
4904
|
if (file.includes("..")) {
|
|
4831
4905
|
continue;
|
|
4832
4906
|
}
|
|
4833
|
-
|
|
4907
|
+
const validActions = /* @__PURE__ */ new Set(["replace", "add_section", "create_file", "delete_section", "delete_file"]);
|
|
4908
|
+
if (!validActions.has(action)) {
|
|
4834
4909
|
continue;
|
|
4835
4910
|
}
|
|
4836
|
-
if (action === "replace" && !oldText) {
|
|
4911
|
+
if ((action === "replace" || action === "delete_section") && !oldText) {
|
|
4837
4912
|
continue;
|
|
4838
4913
|
}
|
|
4839
4914
|
const mutation = {
|
|
@@ -4912,6 +4987,23 @@ async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
|
|
|
4912
4987
|
} else if (mutation.action === "create_file") {
|
|
4913
4988
|
await fs20.mkdir(path20.dirname(filePath), { recursive: true });
|
|
4914
4989
|
await fs20.writeFile(filePath, mutation.newText, "utf-8");
|
|
4990
|
+
} else if (mutation.action === "delete_section") {
|
|
4991
|
+
if (!mutation.oldText) {
|
|
4992
|
+
continue;
|
|
4993
|
+
}
|
|
4994
|
+
let sectionContent;
|
|
4995
|
+
try {
|
|
4996
|
+
sectionContent = await fs20.readFile(filePath, "utf-8");
|
|
4997
|
+
} catch {
|
|
4998
|
+
continue;
|
|
4999
|
+
}
|
|
5000
|
+
if (!sectionContent.includes(mutation.oldText)) {
|
|
5001
|
+
continue;
|
|
5002
|
+
}
|
|
5003
|
+
await fs20.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
|
|
5004
|
+
} else if (mutation.action === "delete_file") {
|
|
5005
|
+
await fs20.unlink(filePath).catch(() => {
|
|
5006
|
+
});
|
|
4915
5007
|
}
|
|
4916
5008
|
}
|
|
4917
5009
|
const diffPatch = await generateDiff2(currentHarnessPath, newHarnessPath);
|
|
@@ -5006,7 +5098,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5006
5098
|
workspacePath,
|
|
5007
5099
|
iter,
|
|
5008
5100
|
kairnConfig,
|
|
5009
|
-
onProgress
|
|
5101
|
+
onProgress,
|
|
5102
|
+
evolveConfig.runsPerTask
|
|
5010
5103
|
);
|
|
5011
5104
|
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
5012
5105
|
if (iter === 0) baselineScore = aggregate;
|
|
@@ -5363,24 +5456,128 @@ async function generateJsonReport(workspacePath) {
|
|
|
5363
5456
|
};
|
|
5364
5457
|
}
|
|
5365
5458
|
|
|
5459
|
+
// src/evolve/apply.ts
|
|
5460
|
+
import fs23 from "fs/promises";
|
|
5461
|
+
import path23 from "path";
|
|
5462
|
+
async function listIterations(workspacePath) {
|
|
5463
|
+
const iterationsDir = path23.join(workspacePath, "iterations");
|
|
5464
|
+
let entries;
|
|
5465
|
+
try {
|
|
5466
|
+
entries = await fs23.readdir(iterationsDir);
|
|
5467
|
+
} catch {
|
|
5468
|
+
return [];
|
|
5469
|
+
}
|
|
5470
|
+
const nums = [];
|
|
5471
|
+
for (const entry of entries) {
|
|
5472
|
+
const n = parseInt(entry, 10);
|
|
5473
|
+
if (!isNaN(n)) {
|
|
5474
|
+
try {
|
|
5475
|
+
await fs23.access(path23.join(iterationsDir, entry, "harness"));
|
|
5476
|
+
nums.push(n);
|
|
5477
|
+
} catch {
|
|
5478
|
+
}
|
|
5479
|
+
}
|
|
5480
|
+
}
|
|
5481
|
+
return nums.sort((a, b) => a - b);
|
|
5482
|
+
}
|
|
5483
|
+
async function findBestIteration(workspacePath, iterations) {
|
|
5484
|
+
let bestIter = iterations[0];
|
|
5485
|
+
let bestScore = -Infinity;
|
|
5486
|
+
for (const iter of iterations) {
|
|
5487
|
+
const log = await loadIterationLog(workspacePath, iter);
|
|
5488
|
+
const score = log?.score ?? 0;
|
|
5489
|
+
if (score > bestScore) {
|
|
5490
|
+
bestScore = score;
|
|
5491
|
+
bestIter = iter;
|
|
5492
|
+
}
|
|
5493
|
+
}
|
|
5494
|
+
return bestIter;
|
|
5495
|
+
}
|
|
5496
|
+
async function listFilesRecursive(dir) {
|
|
5497
|
+
const results = [];
|
|
5498
|
+
async function walk(current) {
|
|
5499
|
+
let entries;
|
|
5500
|
+
try {
|
|
5501
|
+
entries = await fs23.readdir(current, { withFileTypes: true });
|
|
5502
|
+
} catch {
|
|
5503
|
+
return;
|
|
5504
|
+
}
|
|
5505
|
+
for (const entry of entries) {
|
|
5506
|
+
const fullPath = path23.join(current, entry.name);
|
|
5507
|
+
if (entry.isDirectory()) {
|
|
5508
|
+
await walk(fullPath);
|
|
5509
|
+
} else {
|
|
5510
|
+
results.push(path23.relative(dir, fullPath));
|
|
5511
|
+
}
|
|
5512
|
+
}
|
|
5513
|
+
}
|
|
5514
|
+
await walk(dir);
|
|
5515
|
+
return results;
|
|
5516
|
+
}
|
|
5517
|
+
async function applyEvolution(workspacePath, projectRoot, targetIteration) {
|
|
5518
|
+
const iterations = await listIterations(workspacePath);
|
|
5519
|
+
if (iterations.length === 0) {
|
|
5520
|
+
throw new Error("No iterations found in workspace. Run `kairn evolve run` first.");
|
|
5521
|
+
}
|
|
5522
|
+
let iter;
|
|
5523
|
+
if (targetIteration !== void 0) {
|
|
5524
|
+
if (!iterations.includes(targetIteration)) {
|
|
5525
|
+
throw new Error(
|
|
5526
|
+
`Iteration ${targetIteration} not found. Available: ${iterations.join(", ")}`
|
|
5527
|
+
);
|
|
5528
|
+
}
|
|
5529
|
+
iter = targetIteration;
|
|
5530
|
+
} else {
|
|
5531
|
+
iter = await findBestIteration(workspacePath, iterations);
|
|
5532
|
+
}
|
|
5533
|
+
const harnessPath = path23.join(
|
|
5534
|
+
workspacePath,
|
|
5535
|
+
"iterations",
|
|
5536
|
+
iter.toString(),
|
|
5537
|
+
"harness"
|
|
5538
|
+
);
|
|
5539
|
+
const claudeDir = path23.join(projectRoot, ".claude");
|
|
5540
|
+
const diffPreview = await generateDiff2(claudeDir, harnessPath);
|
|
5541
|
+
const currentFiles = await listFilesRecursive(claudeDir);
|
|
5542
|
+
const targetFiles = await listFilesRecursive(harnessPath);
|
|
5543
|
+
const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
|
|
5544
|
+
const filesChanged = [];
|
|
5545
|
+
for (const filePath of allPaths) {
|
|
5546
|
+
const currentContent = await fs23.readFile(path23.join(claudeDir, filePath), "utf-8").catch(() => null);
|
|
5547
|
+
const targetContent = await fs23.readFile(path23.join(harnessPath, filePath), "utf-8").catch(() => null);
|
|
5548
|
+
if (currentContent !== targetContent) {
|
|
5549
|
+
filesChanged.push(filePath);
|
|
5550
|
+
}
|
|
5551
|
+
}
|
|
5552
|
+
await fs23.rm(claudeDir, { recursive: true, force: true });
|
|
5553
|
+
await copyDir(harnessPath, claudeDir);
|
|
5554
|
+
return {
|
|
5555
|
+
iteration: iter,
|
|
5556
|
+
filesChanged,
|
|
5557
|
+
diffPreview
|
|
5558
|
+
};
|
|
5559
|
+
}
|
|
5560
|
+
|
|
5366
5561
|
// src/commands/evolve.ts
|
|
5367
5562
|
var DEFAULT_CONFIG = {
|
|
5368
5563
|
model: "claude-sonnet-4-6",
|
|
5369
5564
|
proposerModel: "claude-opus-4-6",
|
|
5370
5565
|
scorer: "pass-fail",
|
|
5371
5566
|
maxIterations: 5,
|
|
5372
|
-
parallelTasks: 1
|
|
5567
|
+
parallelTasks: 1,
|
|
5568
|
+
runsPerTask: 1
|
|
5373
5569
|
};
|
|
5374
5570
|
async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
5375
5571
|
try {
|
|
5376
|
-
const configStr = await
|
|
5572
|
+
const configStr = await fs24.readFile(path24.join(workspacePath, "config.yaml"), "utf-8");
|
|
5377
5573
|
const parsed = yamlParse2(configStr);
|
|
5378
5574
|
return {
|
|
5379
5575
|
model: parsed.model ?? DEFAULT_CONFIG.model,
|
|
5380
5576
|
proposerModel: parsed.proposer_model ?? DEFAULT_CONFIG.proposerModel,
|
|
5381
5577
|
scorer: parsed.scorer ?? DEFAULT_CONFIG.scorer,
|
|
5382
5578
|
maxIterations: parsed.max_iterations ?? DEFAULT_CONFIG.maxIterations,
|
|
5383
|
-
parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks
|
|
5579
|
+
parallelTasks: parsed.parallel_tasks ?? DEFAULT_CONFIG.parallelTasks,
|
|
5580
|
+
runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask
|
|
5384
5581
|
};
|
|
5385
5582
|
} catch {
|
|
5386
5583
|
return { ...DEFAULT_CONFIG };
|
|
@@ -5391,9 +5588,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5391
5588
|
try {
|
|
5392
5589
|
const projectRoot = process.cwd();
|
|
5393
5590
|
console.log(ui.section("Evolve Init"));
|
|
5394
|
-
const claudeDir =
|
|
5591
|
+
const claudeDir = path24.join(projectRoot, ".claude");
|
|
5395
5592
|
try {
|
|
5396
|
-
await
|
|
5593
|
+
await fs24.access(claudeDir);
|
|
5397
5594
|
} catch {
|
|
5398
5595
|
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
5399
5596
|
process.exit(1);
|
|
@@ -5443,7 +5640,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5443
5640
|
if (config) {
|
|
5444
5641
|
let claudeMd = "";
|
|
5445
5642
|
try {
|
|
5446
|
-
claudeMd = await
|
|
5643
|
+
claudeMd = await fs24.readFile(path24.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
5447
5644
|
} catch {
|
|
5448
5645
|
}
|
|
5449
5646
|
const profile = await buildProjectProfile(projectRoot);
|
|
@@ -5474,16 +5671,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5474
5671
|
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
5475
5672
|
try {
|
|
5476
5673
|
const projectRoot = process.cwd();
|
|
5477
|
-
const workspace =
|
|
5674
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5478
5675
|
console.log(ui.section("Evolve Baseline"));
|
|
5479
5676
|
try {
|
|
5480
|
-
await
|
|
5677
|
+
await fs24.access(workspace);
|
|
5481
5678
|
} catch {
|
|
5482
5679
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5483
5680
|
process.exit(1);
|
|
5484
5681
|
}
|
|
5485
5682
|
await snapshotBaseline(projectRoot, workspace);
|
|
5486
|
-
const baselineDir =
|
|
5683
|
+
const baselineDir = path24.join(workspace, "baseline");
|
|
5487
5684
|
const fileCount = await countFiles(baselineDir);
|
|
5488
5685
|
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
5489
5686
|
} catch (err) {
|
|
@@ -5492,21 +5689,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5492
5689
|
process.exit(1);
|
|
5493
5690
|
}
|
|
5494
5691
|
});
|
|
5495
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").action(async (options) => {
|
|
5692
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").action(async (options) => {
|
|
5496
5693
|
try {
|
|
5497
5694
|
const projectRoot = process.cwd();
|
|
5498
|
-
const workspace =
|
|
5695
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5499
5696
|
console.log(ui.section("Evolve Run"));
|
|
5500
5697
|
try {
|
|
5501
|
-
await
|
|
5698
|
+
await fs24.access(workspace);
|
|
5502
5699
|
} catch {
|
|
5503
5700
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5504
5701
|
process.exit(1);
|
|
5505
5702
|
}
|
|
5506
|
-
const tasksPath =
|
|
5703
|
+
const tasksPath = path24.join(workspace, "tasks.yaml");
|
|
5507
5704
|
let tasksContent;
|
|
5508
5705
|
try {
|
|
5509
|
-
tasksContent = await
|
|
5706
|
+
tasksContent = await fs24.readFile(tasksPath, "utf-8");
|
|
5510
5707
|
} catch {
|
|
5511
5708
|
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
5512
5709
|
process.exit(1);
|
|
@@ -5525,15 +5722,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5525
5722
|
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
5526
5723
|
console.log("");
|
|
5527
5724
|
const config = await loadConfig();
|
|
5528
|
-
const harnessPath =
|
|
5725
|
+
const harnessPath = path24.join(projectRoot, ".claude");
|
|
5529
5726
|
const results = [];
|
|
5530
5727
|
for (const task of tasksToRun) {
|
|
5531
|
-
const traceDir =
|
|
5728
|
+
const traceDir = path24.join(workspace, "traces", "0", task.id);
|
|
5532
5729
|
const spinner = ora2(`Running: ${task.id}`).start();
|
|
5533
5730
|
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
5534
5731
|
if (config) {
|
|
5535
|
-
const stdout = await
|
|
5536
|
-
const stderr = await
|
|
5732
|
+
const stdout = await fs24.readFile(path24.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
5733
|
+
const stderr = await fs24.readFile(path24.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
5537
5734
|
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
5538
5735
|
result.score = score;
|
|
5539
5736
|
await writeScore(traceDir, score);
|
|
@@ -5561,8 +5758,14 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5561
5758
|
process.exit(1);
|
|
5562
5759
|
}
|
|
5563
5760
|
evolveConfig.maxIterations = iterations;
|
|
5761
|
+
const runs = parseInt(options.runs ?? "1", 10);
|
|
5762
|
+
if (isNaN(runs) || runs < 1) {
|
|
5763
|
+
console.log(ui.error("--runs must be a positive integer"));
|
|
5764
|
+
process.exit(1);
|
|
5765
|
+
}
|
|
5766
|
+
evolveConfig.runsPerTask = runs;
|
|
5564
5767
|
try {
|
|
5565
|
-
await
|
|
5768
|
+
await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
|
|
5566
5769
|
} catch {
|
|
5567
5770
|
console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
|
|
5568
5771
|
process.exit(1);
|
|
@@ -5595,6 +5798,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5595
5798
|
case "task-start":
|
|
5596
5799
|
console.log(chalk14.dim(` Running: ${event.taskId ?? "unknown"}...`));
|
|
5597
5800
|
break;
|
|
5801
|
+
case "task-run":
|
|
5802
|
+
console.log(chalk14.dim(` ${event.message ?? ""}`));
|
|
5803
|
+
break;
|
|
5598
5804
|
case "task-scored": {
|
|
5599
5805
|
const taskScore = event.score ?? 0;
|
|
5600
5806
|
const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");
|
|
@@ -5616,9 +5822,18 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5616
5822
|
console.log(` Improvement: ${improvement.toFixed(1)} points`);
|
|
5617
5823
|
}
|
|
5618
5824
|
console.log("");
|
|
5619
|
-
|
|
5825
|
+
const showVariance = runs > 1;
|
|
5826
|
+
console.log(showVariance ? " Iter Score Mutations Status" : " Iter Score Mutations Status");
|
|
5620
5827
|
for (const iter of result.iterations) {
|
|
5621
|
-
|
|
5828
|
+
let scoreDisplay;
|
|
5829
|
+
if (showVariance) {
|
|
5830
|
+
const taskScores = Object.values(iter.taskResults);
|
|
5831
|
+
const stddevs = taskScores.map((s) => s.variance?.stddev).filter((v) => v !== void 0);
|
|
5832
|
+
const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : 0;
|
|
5833
|
+
scoreDisplay = `${iter.score.toFixed(1).padStart(6)}% \xB1${avgStddev.toFixed(1)}`;
|
|
5834
|
+
} else {
|
|
5835
|
+
scoreDisplay = iter.score.toFixed(1).padStart(6) + "%";
|
|
5836
|
+
}
|
|
5622
5837
|
const mutations = iter.proposal?.mutations.length ?? 0;
|
|
5623
5838
|
const mutStr = mutations > 0 ? mutations.toString() : "-";
|
|
5624
5839
|
let status = "evaluated";
|
|
@@ -5626,7 +5841,7 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5626
5841
|
else if (!iter.proposal && !iter.diffPatch) status = "rollback";
|
|
5627
5842
|
else if (iter.score >= 100) status = "perfect";
|
|
5628
5843
|
else if (iter.iteration === result.bestIteration) status = "best";
|
|
5629
|
-
console.log(` ${iter.iteration.toString().padStart(4)} ${
|
|
5844
|
+
console.log(` ${iter.iteration.toString().padStart(4)} ${scoreDisplay} ${mutStr.padStart(9)} ${status}`);
|
|
5630
5845
|
}
|
|
5631
5846
|
}
|
|
5632
5847
|
} catch (err) {
|
|
@@ -5635,12 +5850,56 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5635
5850
|
process.exit(1);
|
|
5636
5851
|
}
|
|
5637
5852
|
});
|
|
5853
|
+
evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
|
|
5854
|
+
try {
|
|
5855
|
+
const projectRoot = process.cwd();
|
|
5856
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5857
|
+
console.log(ui.section("Evolve Apply"));
|
|
5858
|
+
try {
|
|
5859
|
+
await fs24.access(workspace);
|
|
5860
|
+
} catch {
|
|
5861
|
+
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5862
|
+
process.exit(1);
|
|
5863
|
+
}
|
|
5864
|
+
let targetIteration;
|
|
5865
|
+
if (options.iter) {
|
|
5866
|
+
targetIteration = parseInt(options.iter, 10);
|
|
5867
|
+
if (isNaN(targetIteration)) {
|
|
5868
|
+
console.log(ui.error("--iter must be a number"));
|
|
5869
|
+
process.exit(1);
|
|
5870
|
+
}
|
|
5871
|
+
}
|
|
5872
|
+
const result = await applyEvolution(workspace, projectRoot, targetIteration);
|
|
5873
|
+
if (result.diffPreview) {
|
|
5874
|
+
console.log(ui.section("Changes"));
|
|
5875
|
+
for (const line of result.diffPreview.split("\n")) {
|
|
5876
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
5877
|
+
console.log(chalk14.bold(line));
|
|
5878
|
+
} else if (line.startsWith("+")) {
|
|
5879
|
+
console.log(chalk14.green(line));
|
|
5880
|
+
} else if (line.startsWith("-")) {
|
|
5881
|
+
console.log(chalk14.red(line));
|
|
5882
|
+
} else {
|
|
5883
|
+
console.log(line);
|
|
5884
|
+
}
|
|
5885
|
+
}
|
|
5886
|
+
}
|
|
5887
|
+
console.log("");
|
|
5888
|
+
console.log(ui.success(
|
|
5889
|
+
`Applied iteration ${result.iteration} harness (${result.filesChanged.length} files)`
|
|
5890
|
+
));
|
|
5891
|
+
} catch (err) {
|
|
5892
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
5893
|
+
console.log(ui.error(msg));
|
|
5894
|
+
process.exit(1);
|
|
5895
|
+
}
|
|
5896
|
+
});
|
|
5638
5897
|
evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
|
|
5639
5898
|
try {
|
|
5640
5899
|
const projectRoot = process.cwd();
|
|
5641
|
-
const workspace =
|
|
5900
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5642
5901
|
try {
|
|
5643
|
-
await
|
|
5902
|
+
await fs24.access(workspace);
|
|
5644
5903
|
} catch {
|
|
5645
5904
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5646
5905
|
process.exit(1);
|
|
@@ -5661,23 +5920,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
|
|
|
5661
5920
|
evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
|
|
5662
5921
|
try {
|
|
5663
5922
|
const projectRoot = process.cwd();
|
|
5664
|
-
const workspace =
|
|
5923
|
+
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
5665
5924
|
const iter1 = parseInt(iter1Str, 10);
|
|
5666
5925
|
const iter2 = parseInt(iter2Str, 10);
|
|
5667
5926
|
if (isNaN(iter1) || isNaN(iter2)) {
|
|
5668
5927
|
console.log(ui.error("Both arguments must be integers (iteration numbers)"));
|
|
5669
5928
|
process.exit(1);
|
|
5670
5929
|
}
|
|
5671
|
-
const harness1 =
|
|
5672
|
-
const harness2 =
|
|
5930
|
+
const harness1 = path24.join(workspace, "iterations", iter1.toString(), "harness");
|
|
5931
|
+
const harness2 = path24.join(workspace, "iterations", iter2.toString(), "harness");
|
|
5673
5932
|
try {
|
|
5674
|
-
await
|
|
5933
|
+
await fs24.access(harness1);
|
|
5675
5934
|
} catch {
|
|
5676
5935
|
console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
|
|
5677
5936
|
process.exit(1);
|
|
5678
5937
|
}
|
|
5679
5938
|
try {
|
|
5680
|
-
await
|
|
5939
|
+
await fs24.access(harness2);
|
|
5681
5940
|
} catch {
|
|
5682
5941
|
console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
|
|
5683
5942
|
process.exit(1);
|
|
@@ -5732,10 +5991,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
|
|
|
5732
5991
|
async function countFiles(dir) {
|
|
5733
5992
|
let count = 0;
|
|
5734
5993
|
try {
|
|
5735
|
-
const entries = await
|
|
5994
|
+
const entries = await fs24.readdir(dir, { withFileTypes: true });
|
|
5736
5995
|
for (const entry of entries) {
|
|
5737
5996
|
if (entry.isDirectory()) {
|
|
5738
|
-
count += await countFiles(
|
|
5997
|
+
count += await countFiles(path24.join(dir, entry.name));
|
|
5739
5998
|
} else {
|
|
5740
5999
|
count++;
|
|
5741
6000
|
}
|