kairn-cli 2.2.10 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +516 -171
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1,10 +1,107 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
// src/evolve/memory.ts
|
|
12
|
+
var memory_exports = {};
|
|
13
|
+
__export(memory_exports, {
|
|
14
|
+
buildRunSummary: () => buildRunSummary,
|
|
15
|
+
formatMemoryForProposer: () => formatMemoryForProposer,
|
|
16
|
+
loadProposerMemory: () => loadProposerMemory,
|
|
17
|
+
saveRunSummary: () => saveRunSummary
|
|
18
|
+
});
|
|
19
|
+
import fs19 from "fs/promises";
|
|
20
|
+
import path19 from "path";
|
|
21
|
+
async function loadProposerMemory(workspacePath) {
|
|
22
|
+
const memoryPath = path19.join(workspacePath, MEMORY_FILE);
|
|
23
|
+
try {
|
|
24
|
+
const raw = await fs19.readFile(memoryPath, "utf-8");
|
|
25
|
+
const parsed = JSON.parse(raw);
|
|
26
|
+
if (Array.isArray(parsed)) return parsed;
|
|
27
|
+
return [];
|
|
28
|
+
} catch {
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function buildRunSummary(history, baselineScore, bestScore) {
|
|
33
|
+
const effectiveMutations = [];
|
|
34
|
+
const regressiveMutations = [];
|
|
35
|
+
for (let i = 1; i < history.length; i++) {
|
|
36
|
+
const prev = history[i - 1];
|
|
37
|
+
const curr = history[i];
|
|
38
|
+
if (!curr.proposal?.mutations.length) continue;
|
|
39
|
+
const delta = curr.score - prev.score;
|
|
40
|
+
const summary = curr.proposal.mutations.map((m) => `${m.action} ${m.file}: ${m.rationale}`).join("; ");
|
|
41
|
+
if (delta > 0) {
|
|
42
|
+
effectiveMutations.push(`+${delta.toFixed(1)}: ${summary}`);
|
|
43
|
+
} else if (delta < -5) {
|
|
44
|
+
regressiveMutations.push(`${delta.toFixed(1)}: ${summary}`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
const improvement = bestScore - baselineScore;
|
|
48
|
+
const insights = improvement > 0 ? `Improved ${improvement.toFixed(1)} points. ${effectiveMutations.length} helpful mutations, ${regressiveMutations.length} regressions.` : `No improvement. ${regressiveMutations.length} regressions observed.`;
|
|
49
|
+
return {
|
|
50
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
51
|
+
baselineScore,
|
|
52
|
+
bestScore,
|
|
53
|
+
improvement,
|
|
54
|
+
effectiveMutations,
|
|
55
|
+
regressiveMutations,
|
|
56
|
+
insights
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
async function saveRunSummary(workspacePath, summary) {
|
|
60
|
+
const existing = await loadProposerMemory(workspacePath);
|
|
61
|
+
existing.push(summary);
|
|
62
|
+
const trimmed = existing.slice(-MAX_ENTRIES);
|
|
63
|
+
const memoryPath = path19.join(workspacePath, MEMORY_FILE);
|
|
64
|
+
await fs19.writeFile(memoryPath, JSON.stringify(trimmed, null, 2), "utf-8");
|
|
65
|
+
}
|
|
66
|
+
function formatMemoryForProposer(memory) {
|
|
67
|
+
if (memory.length === 0) return "";
|
|
68
|
+
const lines = ["## Prior Run History\n"];
|
|
69
|
+
for (const entry of memory) {
|
|
70
|
+
lines.push(`### Run at ${entry.timestamp}`);
|
|
71
|
+
lines.push(`- Baseline: ${entry.baselineScore.toFixed(1)}%, Best: ${entry.bestScore.toFixed(1)}%, Improvement: ${entry.improvement >= 0 ? "+" : ""}${entry.improvement.toFixed(1)}`);
|
|
72
|
+
if (entry.effectiveMutations.length > 0) {
|
|
73
|
+
lines.push("- Effective mutations:");
|
|
74
|
+
for (const m of entry.effectiveMutations.slice(0, 3)) {
|
|
75
|
+
lines.push(` - ${m}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if (entry.regressiveMutations.length > 0) {
|
|
79
|
+
lines.push("- Regressive mutations (AVOID these):");
|
|
80
|
+
for (const m of entry.regressiveMutations.slice(0, 3)) {
|
|
81
|
+
lines.push(` - ${m}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
lines.push("");
|
|
85
|
+
}
|
|
86
|
+
return lines.join("\n");
|
|
87
|
+
}
|
|
88
|
+
var MEMORY_FILE, MAX_ENTRIES;
|
|
89
|
+
var init_memory = __esm({
|
|
90
|
+
"src/evolve/memory.ts"() {
|
|
91
|
+
"use strict";
|
|
92
|
+
MEMORY_FILE = "proposer-memory.json";
|
|
93
|
+
MAX_ENTRIES = 10;
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
|
|
1
97
|
// src/cli.ts
|
|
2
98
|
import { Command as Command12 } from "commander";
|
|
3
99
|
import chalk15 from "chalk";
|
|
100
|
+
import { createRequire } from "module";
|
|
4
101
|
|
|
5
102
|
// src/commands/init.ts
|
|
6
103
|
import { Command } from "commander";
|
|
7
|
-
import { input, password, select } from "@inquirer/prompts";
|
|
104
|
+
import { confirm, input, password, select } from "@inquirer/prompts";
|
|
8
105
|
import chalk3 from "chalk";
|
|
9
106
|
import Anthropic from "@anthropic-ai/sdk";
|
|
10
107
|
import OpenAI from "openai";
|
|
@@ -62,6 +159,59 @@ async function saveConfig(config) {
|
|
|
62
159
|
await fs.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2), "utf-8");
|
|
63
160
|
}
|
|
64
161
|
|
|
162
|
+
// src/auth/keychain.ts
|
|
163
|
+
import { exec } from "child_process";
|
|
164
|
+
import { promisify } from "util";
|
|
165
|
+
var execAsync = promisify(exec);
|
|
166
|
+
var KEYCHAIN_SERVICE = "Claude Code-credentials";
|
|
167
|
+
var TOKEN_EXPIRY_BUFFER_MS = 6e4;
|
|
168
|
+
function parseKeychainCredentials(raw) {
|
|
169
|
+
let parsed;
|
|
170
|
+
try {
|
|
171
|
+
parsed = JSON.parse(raw);
|
|
172
|
+
} catch {
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
if (typeof parsed !== "object" || parsed === null) return null;
|
|
176
|
+
const obj = parsed;
|
|
177
|
+
const oauth = obj["claudeAiOauth"];
|
|
178
|
+
if (typeof oauth !== "object" || oauth === null) return null;
|
|
179
|
+
const oauthObj = oauth;
|
|
180
|
+
const accessToken = oauthObj["accessToken"];
|
|
181
|
+
const refreshToken = oauthObj["refreshToken"];
|
|
182
|
+
const expiresAt = oauthObj["expiresAt"];
|
|
183
|
+
const subscriptionType = oauthObj["subscriptionType"];
|
|
184
|
+
if (typeof accessToken !== "string" || !accessToken) return null;
|
|
185
|
+
if (typeof refreshToken !== "string") return null;
|
|
186
|
+
if (typeof expiresAt !== "number") return null;
|
|
187
|
+
return {
|
|
188
|
+
accessToken,
|
|
189
|
+
refreshToken,
|
|
190
|
+
expiresAt,
|
|
191
|
+
subscriptionType: typeof subscriptionType === "string" ? subscriptionType : "unknown"
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function isTokenExpired(credentials) {
|
|
195
|
+
return Date.now() + TOKEN_EXPIRY_BUFFER_MS >= credentials.expiresAt;
|
|
196
|
+
}
|
|
197
|
+
async function readClaudeCodeCredentials(account) {
|
|
198
|
+
if (process.platform !== "darwin") return null;
|
|
199
|
+
try {
|
|
200
|
+
const acct = account ?? "";
|
|
201
|
+
const cmd = acct ? `security find-generic-password -s "${KEYCHAIN_SERVICE}" -a "${acct}" -w` : `security find-generic-password -s "${KEYCHAIN_SERVICE}" -w`;
|
|
202
|
+
const { stdout } = await execAsync(cmd, { timeout: 5e3 });
|
|
203
|
+
return parseKeychainCredentials(stdout.trim());
|
|
204
|
+
} catch {
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
async function getAccessToken(account) {
|
|
209
|
+
const creds = await readClaudeCodeCredentials(account);
|
|
210
|
+
if (!creds) return null;
|
|
211
|
+
if (isTokenExpired(creds)) return null;
|
|
212
|
+
return creds.accessToken;
|
|
213
|
+
}
|
|
214
|
+
|
|
65
215
|
// src/providers.ts
|
|
66
216
|
var PROVIDER_CONFIGS = {
|
|
67
217
|
anthropic: {
|
|
@@ -221,7 +371,7 @@ var ui = {
|
|
|
221
371
|
// Key-value pairs
|
|
222
372
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
373
|
// File list
|
|
224
|
-
file: (
|
|
374
|
+
file: (path26) => chalk.dim(` ${path26}`),
|
|
225
375
|
// Tool display
|
|
226
376
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
377
|
${chalk.dim(reason)}`,
|
|
@@ -508,30 +658,49 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
|
|
|
508
658
|
choices: PROVIDER_MODELS[provider]
|
|
509
659
|
});
|
|
510
660
|
}
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
661
|
+
let apiKey = "";
|
|
662
|
+
let authType = "api-key";
|
|
663
|
+
if (provider === "anthropic") {
|
|
664
|
+
const oauthToken = await getAccessToken();
|
|
665
|
+
if (oauthToken) {
|
|
666
|
+
const useOAuth = await confirm({
|
|
667
|
+
message: "Claude Code subscription detected. Use it instead of an API key? (experimental \u2014 may break)",
|
|
668
|
+
default: true
|
|
669
|
+
});
|
|
670
|
+
if (useOAuth) {
|
|
671
|
+
authType = "claude-code-oauth";
|
|
672
|
+
console.log(ui.warn("Using Claude Code OAuth token. This is undocumented and may break at any time."));
|
|
673
|
+
console.log(ui.success("OAuth token validated"));
|
|
674
|
+
}
|
|
675
|
+
}
|
|
518
676
|
}
|
|
519
|
-
if (
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
677
|
+
if (authType === "api-key") {
|
|
678
|
+
apiKey = await password({
|
|
679
|
+
message: `${providerDisplayName} API key${provider === "other" ? " (Enter to skip)" : ""}`,
|
|
680
|
+
mask: "*"
|
|
681
|
+
});
|
|
682
|
+
if (!apiKey && provider !== "other") {
|
|
683
|
+
console.log(ui.error("No API key provided. Aborting."));
|
|
524
684
|
process.exit(1);
|
|
525
685
|
}
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
686
|
+
if (apiKey) {
|
|
687
|
+
console.log(chalk3.dim("\n Verifying API key..."));
|
|
688
|
+
const valid = await verifyKey(provider, apiKey, baseURL, model);
|
|
689
|
+
if (!valid) {
|
|
690
|
+
console.log(ui.error("Invalid API key. Check your key and try again."));
|
|
691
|
+
process.exit(1);
|
|
692
|
+
}
|
|
693
|
+
console.log(ui.success("API key verified"));
|
|
694
|
+
} else {
|
|
695
|
+
console.log(ui.warn("No API key \u2014 skipping verification"));
|
|
696
|
+
}
|
|
529
697
|
}
|
|
530
698
|
const config = {
|
|
531
699
|
provider,
|
|
532
|
-
api_key: apiKey
|
|
700
|
+
api_key: apiKey,
|
|
533
701
|
model,
|
|
534
702
|
...baseURL ? { base_url: baseURL } : {},
|
|
703
|
+
...authType !== "api-key" ? { auth_type: authType } : {},
|
|
535
704
|
default_runtime: "claude-code",
|
|
536
705
|
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
537
706
|
};
|
|
@@ -555,7 +724,7 @@ var initCommand = new Command("init").description("Set up Kairn with your API ke
|
|
|
555
724
|
|
|
556
725
|
// src/commands/describe.ts
|
|
557
726
|
import { Command as Command2 } from "commander";
|
|
558
|
-
import { input as input2, confirm, select as select2 } from "@inquirer/prompts";
|
|
727
|
+
import { input as input2, confirm as confirm2, select as select2 } from "@inquirer/prompts";
|
|
559
728
|
import chalk5 from "chalk";
|
|
560
729
|
|
|
561
730
|
// src/compiler/compile.ts
|
|
@@ -1240,8 +1409,18 @@ async function callLLM(config, userMessage, options) {
|
|
|
1240
1409
|
const { systemPrompt } = options;
|
|
1241
1410
|
const jsonMode = options.jsonMode ?? false;
|
|
1242
1411
|
const providerName = getProviderName(config.provider);
|
|
1412
|
+
let apiKey = config.api_key;
|
|
1413
|
+
if (config.auth_type === "claude-code-oauth") {
|
|
1414
|
+
const oauthToken = await getAccessToken();
|
|
1415
|
+
if (!oauthToken) {
|
|
1416
|
+
throw new Error(
|
|
1417
|
+
"Claude Code OAuth token unavailable or expired. Run `kairn init` to reconfigure, or launch Claude Code to refresh the token."
|
|
1418
|
+
);
|
|
1419
|
+
}
|
|
1420
|
+
apiKey = oauthToken;
|
|
1421
|
+
}
|
|
1243
1422
|
if (config.provider === "anthropic") {
|
|
1244
|
-
const client2 = new Anthropic2({ apiKey
|
|
1423
|
+
const client2 = new Anthropic2({ apiKey });
|
|
1245
1424
|
const messages = [
|
|
1246
1425
|
{ role: "user", content: userMessage }
|
|
1247
1426
|
];
|
|
@@ -1262,7 +1441,7 @@ async function callLLM(config, userMessage, options) {
|
|
|
1262
1441
|
}
|
|
1263
1442
|
}
|
|
1264
1443
|
const resolvedBaseURL = getBaseURL(config.provider, config.base_url);
|
|
1265
|
-
const clientOptions = { apiKey
|
|
1444
|
+
const clientOptions = { apiKey };
|
|
1266
1445
|
if (resolvedBaseURL) clientOptions.baseURL = resolvedBaseURL;
|
|
1267
1446
|
const client = new OpenAI2(clientOptions);
|
|
1268
1447
|
try {
|
|
@@ -2441,7 +2620,7 @@ Autonomy level: ${autonomyLevel} (${autonomyLabel(autonomyLevel)})`;
|
|
|
2441
2620
|
console.log("");
|
|
2442
2621
|
}
|
|
2443
2622
|
}
|
|
2444
|
-
const proceed = options.yes || await
|
|
2623
|
+
const proceed = options.yes || await confirm2({
|
|
2445
2624
|
message: "Generate environment in current directory?",
|
|
2446
2625
|
default: true
|
|
2447
2626
|
});
|
|
@@ -2656,7 +2835,7 @@ var updateRegistryCommand = new Command5("update-registry").description("Fetch t
|
|
|
2656
2835
|
|
|
2657
2836
|
// src/commands/optimize.ts
|
|
2658
2837
|
import { Command as Command6 } from "commander";
|
|
2659
|
-
import { confirm as
|
|
2838
|
+
import { confirm as confirm3 } from "@inquirer/prompts";
|
|
2660
2839
|
import chalk9 from "chalk";
|
|
2661
2840
|
import ora from "ora";
|
|
2662
2841
|
import fs12 from "fs/promises";
|
|
@@ -2718,7 +2897,7 @@ function detectFramework(deps) {
|
|
|
2718
2897
|
];
|
|
2719
2898
|
const detected = [];
|
|
2720
2899
|
for (const [packages, name] of frameworks) {
|
|
2721
|
-
if (packages.some((
|
|
2900
|
+
if (packages.some((pkg2) => deps.includes(pkg2))) {
|
|
2722
2901
|
detected.push(name);
|
|
2723
2902
|
}
|
|
2724
2903
|
}
|
|
@@ -2742,11 +2921,11 @@ function extractEnvKeys(content) {
|
|
|
2742
2921
|
return keys;
|
|
2743
2922
|
}
|
|
2744
2923
|
async function scanProject(dir) {
|
|
2745
|
-
const
|
|
2746
|
-
const deps =
|
|
2747
|
-
const devDeps =
|
|
2924
|
+
const pkg2 = await readJsonSafe(path11.join(dir, "package.json"));
|
|
2925
|
+
const deps = pkg2?.dependencies ? Object.keys(pkg2.dependencies) : [];
|
|
2926
|
+
const devDeps = pkg2?.devDependencies ? Object.keys(pkg2.devDependencies) : [];
|
|
2748
2927
|
const allDeps = [...deps, ...devDeps];
|
|
2749
|
-
const scripts =
|
|
2928
|
+
const scripts = pkg2?.scripts || {};
|
|
2750
2929
|
const rootFiles = await listDirSafe(dir);
|
|
2751
2930
|
const keyFiles = rootFiles.filter(
|
|
2752
2931
|
(f) => [
|
|
@@ -2808,8 +2987,8 @@ async function scanProject(dir) {
|
|
|
2808
2987
|
existingSkills = await listDirSafe(path11.join(claudeDir, "skills"));
|
|
2809
2988
|
existingAgents = (await listDirSafe(path11.join(claudeDir, "agents"))).filter((f) => f.endsWith(".md")).map((f) => f.replace(".md", ""));
|
|
2810
2989
|
}
|
|
2811
|
-
const name =
|
|
2812
|
-
const description =
|
|
2990
|
+
const name = pkg2?.name || path11.basename(dir);
|
|
2991
|
+
const description = pkg2?.description || "";
|
|
2813
2992
|
return {
|
|
2814
2993
|
name,
|
|
2815
2994
|
description,
|
|
@@ -3028,7 +3207,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
|
|
|
3028
3207
|
}
|
|
3029
3208
|
if (!options.yes) {
|
|
3030
3209
|
console.log("");
|
|
3031
|
-
const proceed = await
|
|
3210
|
+
const proceed = await confirm3({
|
|
3032
3211
|
message: "Generate optimized environment? This will overwrite existing .claude/ files.",
|
|
3033
3212
|
default: false
|
|
3034
3213
|
});
|
|
@@ -3040,7 +3219,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
|
|
|
3040
3219
|
} else {
|
|
3041
3220
|
console.log(chalk9.dim("\n No existing .claude/ directory found \u2014 generating from scratch.\n"));
|
|
3042
3221
|
if (!options.yes) {
|
|
3043
|
-
const proceed = await
|
|
3222
|
+
const proceed = await confirm3({
|
|
3044
3223
|
message: "Generate Claude Code environment for this project?",
|
|
3045
3224
|
default: true
|
|
3046
3225
|
});
|
|
@@ -3103,7 +3282,7 @@ var optimizeCommand = new Command6("optimize").description("Scan an existing pro
|
|
|
3103
3282
|
}
|
|
3104
3283
|
}
|
|
3105
3284
|
console.log("");
|
|
3106
|
-
const apply = await
|
|
3285
|
+
const apply = await confirm3({
|
|
3107
3286
|
message: "Apply these changes?",
|
|
3108
3287
|
default: true
|
|
3109
3288
|
});
|
|
@@ -3699,10 +3878,10 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3699
3878
|
import { Command as Command11 } from "commander";
|
|
3700
3879
|
import chalk14 from "chalk";
|
|
3701
3880
|
import ora2 from "ora";
|
|
3702
|
-
import
|
|
3703
|
-
import
|
|
3881
|
+
import fs25 from "fs/promises";
|
|
3882
|
+
import path25 from "path";
|
|
3704
3883
|
import { parse as yamlParse2 } from "yaml";
|
|
3705
|
-
import { confirm as
|
|
3884
|
+
import { confirm as confirm4, select as select4 } from "@inquirer/prompts";
|
|
3706
3885
|
|
|
3707
3886
|
// src/evolve/init.ts
|
|
3708
3887
|
import fs15 from "fs/promises";
|
|
@@ -3945,14 +4124,14 @@ async function buildProjectProfile(projectRoot) {
|
|
|
3945
4124
|
path15.join(projectRoot, "package.json"),
|
|
3946
4125
|
"utf-8"
|
|
3947
4126
|
);
|
|
3948
|
-
const
|
|
4127
|
+
const pkg2 = JSON.parse(pkgStr);
|
|
3949
4128
|
profile.language = "typescript";
|
|
3950
|
-
if (
|
|
3951
|
-
profile.scripts =
|
|
4129
|
+
if (pkg2.scripts && typeof pkg2.scripts === "object") {
|
|
4130
|
+
profile.scripts = pkg2.scripts;
|
|
3952
4131
|
}
|
|
3953
4132
|
const deps = {
|
|
3954
|
-
...
|
|
3955
|
-
...
|
|
4133
|
+
...pkg2.dependencies ?? {},
|
|
4134
|
+
...pkg2.devDependencies ?? {}
|
|
3956
4135
|
};
|
|
3957
4136
|
if (deps.next) {
|
|
3958
4137
|
profile.framework = "Next.js";
|
|
@@ -4051,8 +4230,8 @@ async function copyDir(src, dest) {
|
|
|
4051
4230
|
}
|
|
4052
4231
|
|
|
4053
4232
|
// src/evolve/runner.ts
|
|
4054
|
-
import { exec as
|
|
4055
|
-
import { promisify as
|
|
4233
|
+
import { exec as exec3, spawn } from "child_process";
|
|
4234
|
+
import { promisify as promisify3 } from "util";
|
|
4056
4235
|
import fs18 from "fs/promises";
|
|
4057
4236
|
import os3 from "os";
|
|
4058
4237
|
import path18 from "path";
|
|
@@ -4177,11 +4356,11 @@ async function loadIterationLog(workspacePath, iteration) {
|
|
|
4177
4356
|
}
|
|
4178
4357
|
|
|
4179
4358
|
// src/evolve/exec.ts
|
|
4180
|
-
import { exec } from "child_process";
|
|
4181
|
-
import { promisify } from "util";
|
|
4182
|
-
var
|
|
4359
|
+
import { exec as exec2 } from "child_process";
|
|
4360
|
+
import { promisify as promisify2 } from "util";
|
|
4361
|
+
var execAsync2 = promisify2(exec2);
|
|
4183
4362
|
async function execCommand(cmd, cwd, timeoutMs = 3e4) {
|
|
4184
|
-
return
|
|
4363
|
+
return execAsync2(cmd, { cwd, timeout: timeoutMs });
|
|
4185
4364
|
}
|
|
4186
4365
|
|
|
4187
4366
|
// src/evolve/scorers.ts
|
|
@@ -4341,21 +4520,47 @@ async function rubricScorer(task, workspacePath, stdout, stderr, config) {
|
|
|
4341
4520
|
breakdown
|
|
4342
4521
|
};
|
|
4343
4522
|
}
|
|
4523
|
+
function classifyFailure(score, stdout, stderr) {
|
|
4524
|
+
if (score.pass) return score;
|
|
4525
|
+
const combined = `${stdout}
|
|
4526
|
+
${stderr}`.toLowerCase();
|
|
4527
|
+
const scoreValue = score.score ?? 0;
|
|
4528
|
+
let failureCategory = "unknown";
|
|
4529
|
+
let failureReason = "";
|
|
4530
|
+
if (stderr.includes("[setup]") && stderr.includes("Error") || combined.includes("command not found") || combined.includes("no such file or directory")) {
|
|
4531
|
+
failureCategory = "task";
|
|
4532
|
+
failureReason = "Task setup failed or references missing resources";
|
|
4533
|
+
} else if (combined.includes("token limit") || combined.includes("context length") || combined.includes("rate limit") || combined.includes("api error") || combined.includes("429") || combined.includes("overloaded")) {
|
|
4534
|
+
failureCategory = "model";
|
|
4535
|
+
failureReason = "Model API error, token limit, or rate limit";
|
|
4536
|
+
} else if (combined.includes("build failed") && combined.includes("before") || combined.includes("merge conflict") || combined.includes("git dirty") || combined.includes("uncommitted changes")) {
|
|
4537
|
+
failureCategory = "repo";
|
|
4538
|
+
failureReason = "Pre-existing repo issues (build failure, dirty state)";
|
|
4539
|
+
} else if (scoreValue >= 20 && scoreValue < 80) {
|
|
4540
|
+
failureCategory = "harness";
|
|
4541
|
+
failureReason = "Agent attempted the task but did not follow harness conventions";
|
|
4542
|
+
}
|
|
4543
|
+
return { ...score, failureCategory, failureReason };
|
|
4544
|
+
}
|
|
4344
4545
|
async function scoreTask(task, workspacePath, stdout, stderr, config) {
|
|
4546
|
+
let score;
|
|
4345
4547
|
if (task.scoring === "pass-fail") {
|
|
4346
|
-
|
|
4347
|
-
}
|
|
4348
|
-
|
|
4349
|
-
|
|
4548
|
+
score = await passFailScorer(task, workspacePath, stdout, stderr);
|
|
4549
|
+
} else if (task.scoring === "llm-judge" && config) {
|
|
4550
|
+
score = await llmJudgeScorer(task, workspacePath, stdout, stderr, config);
|
|
4551
|
+
} else if (task.scoring === "rubric" && config) {
|
|
4552
|
+
score = await rubricScorer(task, workspacePath, stdout, stderr, config);
|
|
4553
|
+
} else {
|
|
4554
|
+
score = await passFailScorer(task, workspacePath, stdout, stderr);
|
|
4350
4555
|
}
|
|
4351
|
-
if (
|
|
4352
|
-
|
|
4556
|
+
if (!score.pass) {
|
|
4557
|
+
score = classifyFailure(score, stdout, stderr);
|
|
4353
4558
|
}
|
|
4354
|
-
return
|
|
4559
|
+
return score;
|
|
4355
4560
|
}
|
|
4356
4561
|
|
|
4357
4562
|
// src/evolve/runner.ts
|
|
4358
|
-
var
|
|
4563
|
+
var execAsync3 = promisify3(exec3);
|
|
4359
4564
|
var COPY_SKIP_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".kairn-evolve", ".claude"]);
|
|
4360
4565
|
async function deployMcpJson(harnessPath, workDir) {
|
|
4361
4566
|
const src = path18.join(harnessPath, ".mcp.json");
|
|
@@ -4365,12 +4570,12 @@ async function deployMcpJson(harnessPath, workDir) {
|
|
|
4365
4570
|
async function createIsolatedWorkspace(projectRoot, harnessPath) {
|
|
4366
4571
|
const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
4367
4572
|
try {
|
|
4368
|
-
await
|
|
4573
|
+
await execAsync3("git rev-parse --is-inside-work-tree", {
|
|
4369
4574
|
cwd: projectRoot,
|
|
4370
4575
|
timeout: 5e3
|
|
4371
4576
|
});
|
|
4372
4577
|
const tmpDir2 = path18.join(os3.tmpdir(), `kairn-evolve-wt-${suffix}`);
|
|
4373
|
-
await
|
|
4578
|
+
await execAsync3(`git worktree add --detach "${tmpDir2}" HEAD`, {
|
|
4374
4579
|
cwd: projectRoot,
|
|
4375
4580
|
timeout: 3e4
|
|
4376
4581
|
});
|
|
@@ -4409,14 +4614,14 @@ async function copyProjectDir(src, dest) {
|
|
|
4409
4614
|
async function cleanupIsolatedWorkspace(workDir, isWorktree, projectRoot) {
|
|
4410
4615
|
if (isWorktree) {
|
|
4411
4616
|
try {
|
|
4412
|
-
await
|
|
4617
|
+
await execAsync3(`git worktree remove "${workDir}" --force`, {
|
|
4413
4618
|
cwd: projectRoot,
|
|
4414
4619
|
timeout: 1e4
|
|
4415
4620
|
});
|
|
4416
4621
|
} catch {
|
|
4417
4622
|
await fs18.rm(workDir, { recursive: true, force: true }).catch(() => {
|
|
4418
4623
|
});
|
|
4419
|
-
await
|
|
4624
|
+
await execAsync3("git worktree prune", {
|
|
4420
4625
|
cwd: projectRoot,
|
|
4421
4626
|
timeout: 5e3
|
|
4422
4627
|
}).catch(() => {
|
|
@@ -4437,7 +4642,7 @@ async function runTask(task, harnessPath, traceDir, iteration, projectRoot) {
|
|
|
4437
4642
|
let setupStderr = "";
|
|
4438
4643
|
if (task.setup.trim()) {
|
|
4439
4644
|
try {
|
|
4440
|
-
await
|
|
4645
|
+
await execAsync3(task.setup, { cwd: workDir, timeout: 6e4 });
|
|
4441
4646
|
} catch (err) {
|
|
4442
4647
|
setupStderr = err instanceof Error ? err.message : String(err);
|
|
4443
4648
|
}
|
|
@@ -4685,12 +4890,12 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
|
|
|
4685
4890
|
}
|
|
4686
4891
|
|
|
4687
4892
|
// src/evolve/loop.ts
|
|
4688
|
-
import
|
|
4689
|
-
import
|
|
4893
|
+
import fs22 from "fs/promises";
|
|
4894
|
+
import path22 from "path";
|
|
4690
4895
|
|
|
4691
4896
|
// src/evolve/proposer.ts
|
|
4692
|
-
import
|
|
4693
|
-
import
|
|
4897
|
+
import fs20 from "fs/promises";
|
|
4898
|
+
import path20 from "path";
|
|
4694
4899
|
var PROPOSER_SYSTEM_PROMPT = `You are an expert agent environment optimizer. Your job is to improve a Claude Code
|
|
4695
4900
|
agent environment (.claude/ directory) based on execution traces from real tasks.
|
|
4696
4901
|
|
|
@@ -4763,18 +4968,18 @@ async function readHarnessFiles(harnessPath) {
|
|
|
4763
4968
|
async function walk(dir, prefix) {
|
|
4764
4969
|
let entries;
|
|
4765
4970
|
try {
|
|
4766
|
-
entries = await
|
|
4971
|
+
entries = await fs20.readdir(dir, { withFileTypes: true });
|
|
4767
4972
|
} catch {
|
|
4768
4973
|
return;
|
|
4769
4974
|
}
|
|
4770
4975
|
for (const entry of entries) {
|
|
4771
|
-
const relativePath = prefix ?
|
|
4772
|
-
const fullPath =
|
|
4976
|
+
const relativePath = prefix ? path20.join(prefix, entry.name) : entry.name;
|
|
4977
|
+
const fullPath = path20.join(dir, entry.name);
|
|
4773
4978
|
if (entry.isDirectory()) {
|
|
4774
4979
|
await walk(fullPath, relativePath);
|
|
4775
4980
|
} else if (entry.isFile()) {
|
|
4776
4981
|
try {
|
|
4777
|
-
result[relativePath] = await
|
|
4982
|
+
result[relativePath] = await fs20.readFile(fullPath, "utf-8");
|
|
4778
4983
|
} catch {
|
|
4779
4984
|
}
|
|
4780
4985
|
}
|
|
@@ -4790,7 +4995,7 @@ function truncateStdout(stdout, limit) {
|
|
|
4790
4995
|
return `[...truncated, showing last ${limit} chars...]
|
|
4791
4996
|
${stdout.slice(-limit)}`;
|
|
4792
4997
|
}
|
|
4793
|
-
function buildProposerUserMessage(harnessFiles, traces, tasks, history) {
|
|
4998
|
+
function buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection) {
|
|
4794
4999
|
const harnessSection = ["## Current Harness Files\n"];
|
|
4795
5000
|
const fileEntries = Object.entries(harnessFiles);
|
|
4796
5001
|
if (fileEntries.length === 0) {
|
|
@@ -4826,7 +5031,8 @@ ${content}
|
|
|
4826
5031
|
const historyBudget = remainingBudget - traceBudget;
|
|
4827
5032
|
const traceSection = buildTraceSection(traces, traceBudget);
|
|
4828
5033
|
const historySection = buildHistorySection(history, historyBudget);
|
|
4829
|
-
|
|
5034
|
+
const memoryPart = memorySection ? "\n" + memorySection : "";
|
|
5035
|
+
return fixedContent + "\n" + traceSection + "\n" + historySection + memoryPart;
|
|
4830
5036
|
}
|
|
4831
5037
|
function buildTraceSection(traces, budget) {
|
|
4832
5038
|
if (traces.length === 0) return "## Execution Traces\n\n(No traces available)\n";
|
|
@@ -4981,7 +5187,10 @@ function parseProposerResponse(raw) {
|
|
|
4981
5187
|
async function propose(iteration, workspacePath, harnessPath, history, tasks, config, proposerModel) {
|
|
4982
5188
|
const harnessFiles = await readHarnessFiles(harnessPath);
|
|
4983
5189
|
const traces = await loadIterationTraces(workspacePath, iteration);
|
|
4984
|
-
const
|
|
5190
|
+
const { loadProposerMemory: loadProposerMemory2, formatMemoryForProposer: formatMemoryForProposer2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
|
|
5191
|
+
const memory = await loadProposerMemory2(workspacePath);
|
|
5192
|
+
const memorySection = formatMemoryForProposer2(memory);
|
|
5193
|
+
const userMessage = buildProposerUserMessage(harnessFiles, traces, tasks, history, memorySection);
|
|
4985
5194
|
const proposerConfig = { ...config, model: proposerModel };
|
|
4986
5195
|
const response = await callLLM(proposerConfig, userMessage, {
|
|
4987
5196
|
systemPrompt: PROPOSER_SYSTEM_PROMPT,
|
|
@@ -4992,60 +5201,60 @@ async function propose(iteration, workspacePath, harnessPath, history, tasks, co
|
|
|
4992
5201
|
}
|
|
4993
5202
|
|
|
4994
5203
|
// src/evolve/mutator.ts
|
|
4995
|
-
import
|
|
4996
|
-
import
|
|
5204
|
+
import fs21 from "fs/promises";
|
|
5205
|
+
import path21 from "path";
|
|
4997
5206
|
async function applyMutations(currentHarnessPath, nextIterationDir, mutations) {
|
|
4998
|
-
const newHarnessPath =
|
|
5207
|
+
const newHarnessPath = path21.join(nextIterationDir, "harness");
|
|
4999
5208
|
await copyDir(currentHarnessPath, newHarnessPath);
|
|
5000
5209
|
for (const mutation of mutations) {
|
|
5001
5210
|
if (mutation.file.includes("..")) {
|
|
5002
5211
|
continue;
|
|
5003
5212
|
}
|
|
5004
|
-
const filePath =
|
|
5213
|
+
const filePath = path21.join(newHarnessPath, mutation.file);
|
|
5005
5214
|
if (mutation.action === "replace") {
|
|
5006
5215
|
if (!mutation.oldText) {
|
|
5007
5216
|
continue;
|
|
5008
5217
|
}
|
|
5009
|
-
const content = await
|
|
5218
|
+
const content = await fs21.readFile(filePath, "utf-8");
|
|
5010
5219
|
if (!content.includes(mutation.oldText)) {
|
|
5011
5220
|
continue;
|
|
5012
5221
|
}
|
|
5013
|
-
await
|
|
5222
|
+
await fs21.writeFile(
|
|
5014
5223
|
filePath,
|
|
5015
5224
|
content.replace(mutation.oldText, mutation.newText),
|
|
5016
5225
|
"utf-8"
|
|
5017
5226
|
);
|
|
5018
5227
|
} else if (mutation.action === "add_section") {
|
|
5019
5228
|
try {
|
|
5020
|
-
const content = await
|
|
5021
|
-
await
|
|
5229
|
+
const content = await fs21.readFile(filePath, "utf-8");
|
|
5230
|
+
await fs21.writeFile(
|
|
5022
5231
|
filePath,
|
|
5023
5232
|
content + "\n\n" + mutation.newText,
|
|
5024
5233
|
"utf-8"
|
|
5025
5234
|
);
|
|
5026
5235
|
} catch {
|
|
5027
|
-
await
|
|
5028
|
-
await
|
|
5236
|
+
await fs21.mkdir(path21.dirname(filePath), { recursive: true });
|
|
5237
|
+
await fs21.writeFile(filePath, mutation.newText, "utf-8");
|
|
5029
5238
|
}
|
|
5030
5239
|
} else if (mutation.action === "create_file") {
|
|
5031
|
-
await
|
|
5032
|
-
await
|
|
5240
|
+
await fs21.mkdir(path21.dirname(filePath), { recursive: true });
|
|
5241
|
+
await fs21.writeFile(filePath, mutation.newText, "utf-8");
|
|
5033
5242
|
} else if (mutation.action === "delete_section") {
|
|
5034
5243
|
if (!mutation.oldText) {
|
|
5035
5244
|
continue;
|
|
5036
5245
|
}
|
|
5037
5246
|
let sectionContent;
|
|
5038
5247
|
try {
|
|
5039
|
-
sectionContent = await
|
|
5248
|
+
sectionContent = await fs21.readFile(filePath, "utf-8");
|
|
5040
5249
|
} catch {
|
|
5041
5250
|
continue;
|
|
5042
5251
|
}
|
|
5043
5252
|
if (!sectionContent.includes(mutation.oldText)) {
|
|
5044
5253
|
continue;
|
|
5045
5254
|
}
|
|
5046
|
-
await
|
|
5255
|
+
await fs21.writeFile(filePath, sectionContent.replace(mutation.oldText, ""), "utf-8");
|
|
5047
5256
|
} else if (mutation.action === "delete_file") {
|
|
5048
|
-
await
|
|
5257
|
+
await fs21.unlink(filePath).catch(() => {
|
|
5049
5258
|
});
|
|
5050
5259
|
}
|
|
5051
5260
|
}
|
|
@@ -5093,17 +5302,17 @@ async function readAllFiles(dir) {
|
|
|
5093
5302
|
async function walk(current) {
|
|
5094
5303
|
let entries;
|
|
5095
5304
|
try {
|
|
5096
|
-
entries = await
|
|
5305
|
+
entries = await fs21.readdir(current, { withFileTypes: true });
|
|
5097
5306
|
} catch {
|
|
5098
5307
|
return;
|
|
5099
5308
|
}
|
|
5100
5309
|
for (const entry of entries) {
|
|
5101
|
-
const fullPath =
|
|
5102
|
-
const relativePath =
|
|
5310
|
+
const fullPath = path21.join(current, entry.name);
|
|
5311
|
+
const relativePath = path21.relative(dir, fullPath);
|
|
5103
5312
|
if (entry.isDirectory()) {
|
|
5104
5313
|
await walk(fullPath);
|
|
5105
5314
|
} else {
|
|
5106
|
-
result[relativePath] = await
|
|
5315
|
+
result[relativePath] = await fs21.readFile(fullPath, "utf-8");
|
|
5107
5316
|
}
|
|
5108
5317
|
}
|
|
5109
5318
|
}
|
|
@@ -5112,20 +5321,27 @@ async function readAllFiles(dir) {
|
|
|
5112
5321
|
}
|
|
5113
5322
|
|
|
5114
5323
|
// src/evolve/loop.ts
|
|
5324
|
+
function computeMutationCap(iter, maxIterations, maxMutations) {
|
|
5325
|
+
if (maxIterations <= 1) return maxMutations;
|
|
5326
|
+
const progress = iter / (maxIterations - 1);
|
|
5327
|
+
if (progress <= 0.4) return maxMutations;
|
|
5328
|
+
const decayProgress = (progress - 0.4) / 0.6;
|
|
5329
|
+
return Math.max(1, Math.round(maxMutations * (1 - decayProgress * (1 - 1 / maxMutations))));
|
|
5330
|
+
}
|
|
5115
5331
|
async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgress) {
|
|
5116
5332
|
const history = [];
|
|
5117
5333
|
let bestScore = -1;
|
|
5118
5334
|
let bestIteration = 0;
|
|
5119
5335
|
let baselineScore = 0;
|
|
5120
5336
|
for (let iter = 0; iter < evolveConfig.maxIterations; iter++) {
|
|
5121
|
-
const harnessPath =
|
|
5337
|
+
const harnessPath = path22.join(
|
|
5122
5338
|
workspacePath,
|
|
5123
5339
|
"iterations",
|
|
5124
5340
|
iter.toString(),
|
|
5125
5341
|
"harness"
|
|
5126
5342
|
);
|
|
5127
5343
|
try {
|
|
5128
|
-
await
|
|
5344
|
+
await fs22.access(harnessPath);
|
|
5129
5345
|
} catch {
|
|
5130
5346
|
if (iter === 0) {
|
|
5131
5347
|
throw new Error(
|
|
@@ -5158,6 +5374,29 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5158
5374
|
tasksToRun.push(task);
|
|
5159
5375
|
}
|
|
5160
5376
|
}
|
|
5377
|
+
const sampleSize = evolveConfig.evalSampleSize;
|
|
5378
|
+
if (sampleSize > 0 && sampleSize < tasksToRun.length) {
|
|
5379
|
+
const shuffled = [...tasksToRun].sort((a, b) => {
|
|
5380
|
+
const hashA = (iter * 31 + a.id.charCodeAt(0)) % 1e3;
|
|
5381
|
+
const hashB = (iter * 31 + b.id.charCodeAt(0)) % 1e3;
|
|
5382
|
+
return hashA - hashB;
|
|
5383
|
+
});
|
|
5384
|
+
const sampled = new Set(shuffled.slice(0, sampleSize).map((t) => t.id));
|
|
5385
|
+
for (const task of tasksToRun) {
|
|
5386
|
+
if (!sampled.has(task.id)) {
|
|
5387
|
+
const prev = prevLog.taskResults[task.id];
|
|
5388
|
+
const prevVal = prev ? prev.score ?? (prev.pass ? 100 : 0) : 0;
|
|
5389
|
+
carriedScores[task.id] = { pass: prevVal >= 50, score: prevVal };
|
|
5390
|
+
onProgress?.({
|
|
5391
|
+
type: "task-skipped",
|
|
5392
|
+
iteration: iter,
|
|
5393
|
+
taskId: task.id,
|
|
5394
|
+
message: `Sampled out ${task.id} (mini-batch ${sampleSize}/${tasksToRun.length})`
|
|
5395
|
+
});
|
|
5396
|
+
}
|
|
5397
|
+
}
|
|
5398
|
+
tasksToRun = tasksToRun.filter((t) => sampled.has(t.id));
|
|
5399
|
+
}
|
|
5161
5400
|
}
|
|
5162
5401
|
const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
|
|
5163
5402
|
tasksToRun,
|
|
@@ -5218,7 +5457,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5218
5457
|
};
|
|
5219
5458
|
await writeIterationLog(workspacePath, rollbackLog);
|
|
5220
5459
|
history.push(rollbackLog);
|
|
5221
|
-
const bestHarnessPath =
|
|
5460
|
+
const bestHarnessPath = path22.join(
|
|
5222
5461
|
workspacePath,
|
|
5223
5462
|
"iterations",
|
|
5224
5463
|
bestIteration.toString(),
|
|
@@ -5236,13 +5475,14 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5236
5475
|
kairnConfig,
|
|
5237
5476
|
evolveConfig.proposerModel
|
|
5238
5477
|
);
|
|
5239
|
-
|
|
5478
|
+
const rollbackCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
|
|
5479
|
+
if (rollbackProposal.mutations.length > rollbackCap) {
|
|
5240
5480
|
rollbackProposal = {
|
|
5241
5481
|
...rollbackProposal,
|
|
5242
|
-
mutations: rollbackProposal.mutations.slice(0,
|
|
5482
|
+
mutations: rollbackProposal.mutations.slice(0, rollbackCap)
|
|
5243
5483
|
};
|
|
5244
5484
|
}
|
|
5245
|
-
const nextIterDir2 =
|
|
5485
|
+
const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
|
|
5246
5486
|
await applyMutations(bestHarnessPath, nextIterDir2, rollbackProposal.mutations);
|
|
5247
5487
|
onProgress?.({
|
|
5248
5488
|
type: "mutations-applied",
|
|
@@ -5250,8 +5490,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5250
5490
|
mutationCount: rollbackProposal.mutations.length
|
|
5251
5491
|
});
|
|
5252
5492
|
} catch {
|
|
5253
|
-
const nextIterDir2 =
|
|
5254
|
-
await copyDir(bestHarnessPath,
|
|
5493
|
+
const nextIterDir2 = path22.join(workspacePath, "iterations", (iter + 1).toString());
|
|
5494
|
+
await copyDir(bestHarnessPath, path22.join(nextIterDir2, "harness"));
|
|
5255
5495
|
}
|
|
5256
5496
|
}
|
|
5257
5497
|
continue;
|
|
@@ -5297,10 +5537,11 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5297
5537
|
kairnConfig,
|
|
5298
5538
|
evolveConfig.proposerModel
|
|
5299
5539
|
);
|
|
5300
|
-
|
|
5540
|
+
const iterCap = computeMutationCap(iter, evolveConfig.maxIterations, evolveConfig.maxMutationsPerIteration);
|
|
5541
|
+
if (proposal.mutations.length > iterCap) {
|
|
5301
5542
|
proposal = {
|
|
5302
5543
|
...proposal,
|
|
5303
|
-
mutations: proposal.mutations.slice(0,
|
|
5544
|
+
mutations: proposal.mutations.slice(0, iterCap)
|
|
5304
5545
|
};
|
|
5305
5546
|
}
|
|
5306
5547
|
} catch (err) {
|
|
@@ -5310,12 +5551,12 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5310
5551
|
iteration: iter,
|
|
5311
5552
|
message: `Proposer failed: ${errMsg}`
|
|
5312
5553
|
});
|
|
5313
|
-
const nextIterDir2 =
|
|
5554
|
+
const nextIterDir2 = path22.join(
|
|
5314
5555
|
workspacePath,
|
|
5315
5556
|
"iterations",
|
|
5316
5557
|
(iter + 1).toString()
|
|
5317
5558
|
);
|
|
5318
|
-
await copyDir(harnessPath,
|
|
5559
|
+
await copyDir(harnessPath, path22.join(nextIterDir2, "harness"));
|
|
5319
5560
|
const skipLog = {
|
|
5320
5561
|
iteration: iter,
|
|
5321
5562
|
score: aggregate,
|
|
@@ -5328,7 +5569,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5328
5569
|
history.push(skipLog);
|
|
5329
5570
|
continue;
|
|
5330
5571
|
}
|
|
5331
|
-
const nextIterDir =
|
|
5572
|
+
const nextIterDir = path22.join(
|
|
5332
5573
|
workspacePath,
|
|
5333
5574
|
"iterations",
|
|
5334
5575
|
(iter + 1).toString()
|
|
@@ -5342,7 +5583,7 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5342
5583
|
);
|
|
5343
5584
|
diffPatch = mutationResult.diffPatch;
|
|
5344
5585
|
} catch {
|
|
5345
|
-
await copyDir(harnessPath,
|
|
5586
|
+
await copyDir(harnessPath, path22.join(nextIterDir, "harness"));
|
|
5346
5587
|
}
|
|
5347
5588
|
onProgress?.({
|
|
5348
5589
|
type: "mutations-applied",
|
|
@@ -5360,6 +5601,62 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5360
5601
|
await writeIterationLog(workspacePath, iterLog);
|
|
5361
5602
|
history.push(iterLog);
|
|
5362
5603
|
}
|
|
5604
|
+
if (evolveConfig.usePrincipal && history.length >= 2) {
|
|
5605
|
+
onProgress?.({ type: "proposing", iteration: history.length, message: "Principal Proposer synthesizing final harness" });
|
|
5606
|
+
const baselineHarnessPath = path22.join(workspacePath, "iterations", "0", "harness");
|
|
5607
|
+
try {
|
|
5608
|
+
const principalProposal = await propose(
|
|
5609
|
+
history.length,
|
|
5610
|
+
workspacePath,
|
|
5611
|
+
baselineHarnessPath,
|
|
5612
|
+
history,
|
|
5613
|
+
tasks,
|
|
5614
|
+
kairnConfig,
|
|
5615
|
+
evolveConfig.proposerModel
|
|
5616
|
+
);
|
|
5617
|
+
if (principalProposal.mutations.length > evolveConfig.maxMutationsPerIteration) {
|
|
5618
|
+
principalProposal.mutations = principalProposal.mutations.slice(0, evolveConfig.maxMutationsPerIteration);
|
|
5619
|
+
}
|
|
5620
|
+
const principalIterNum = history.length;
|
|
5621
|
+
const principalIterDir = path22.join(workspacePath, "iterations", principalIterNum.toString());
|
|
5622
|
+
const mutResult = await applyMutations(baselineHarnessPath, principalIterDir, principalProposal.mutations);
|
|
5623
|
+
onProgress?.({ type: "iteration-start", iteration: principalIterNum });
|
|
5624
|
+
const { results: principalResults, aggregate: principalAggregate } = await evaluateAll(
|
|
5625
|
+
tasks,
|
|
5626
|
+
mutResult.newHarnessPath,
|
|
5627
|
+
workspacePath,
|
|
5628
|
+
principalIterNum,
|
|
5629
|
+
kairnConfig,
|
|
5630
|
+
onProgress,
|
|
5631
|
+
evolveConfig.runsPerTask,
|
|
5632
|
+
evolveConfig.parallelTasks
|
|
5633
|
+
);
|
|
5634
|
+
onProgress?.({ type: "iteration-scored", iteration: principalIterNum, score: principalAggregate });
|
|
5635
|
+
const principalLog = {
|
|
5636
|
+
iteration: principalIterNum,
|
|
5637
|
+
score: principalAggregate,
|
|
5638
|
+
taskResults: principalResults,
|
|
5639
|
+
proposal: principalProposal,
|
|
5640
|
+
diffPatch: mutResult.diffPatch,
|
|
5641
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
5642
|
+
};
|
|
5643
|
+
await writeIterationLog(workspacePath, principalLog);
|
|
5644
|
+
history.push(principalLog);
|
|
5645
|
+
if (principalAggregate > bestScore) {
|
|
5646
|
+
bestScore = principalAggregate;
|
|
5647
|
+
bestIteration = principalIterNum;
|
|
5648
|
+
}
|
|
5649
|
+
} catch (err) {
|
|
5650
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
5651
|
+
onProgress?.({ type: "proposer-error", iteration: history.length, message: `Principal failed: ${errMsg}` });
|
|
5652
|
+
}
|
|
5653
|
+
}
|
|
5654
|
+
try {
|
|
5655
|
+
const { buildRunSummary: buildRunSummary2, saveRunSummary: saveRunSummary2 } = await Promise.resolve().then(() => (init_memory(), memory_exports));
|
|
5656
|
+
const summary = buildRunSummary2(history, baselineScore, bestScore);
|
|
5657
|
+
await saveRunSummary2(workspacePath, summary);
|
|
5658
|
+
} catch {
|
|
5659
|
+
}
|
|
5363
5660
|
onProgress?.({
|
|
5364
5661
|
type: "complete",
|
|
5365
5662
|
iteration: history.length > 0 ? history.length - 1 : 0,
|
|
@@ -5374,8 +5671,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5374
5671
|
}
|
|
5375
5672
|
|
|
5376
5673
|
// src/evolve/report.ts
|
|
5377
|
-
import
|
|
5378
|
-
import
|
|
5674
|
+
import fs23 from "fs/promises";
|
|
5675
|
+
import path23 from "path";
|
|
5379
5676
|
|
|
5380
5677
|
// src/evolve/diagnosis.ts
|
|
5381
5678
|
function numericScore(s) {
|
|
@@ -5425,10 +5722,10 @@ function numericScore2(s) {
|
|
|
5425
5722
|
return s.score ?? (s.pass ? 100 : 0);
|
|
5426
5723
|
}
|
|
5427
5724
|
async function loadAllIterations(workspacePath) {
|
|
5428
|
-
const iterDir =
|
|
5725
|
+
const iterDir = path23.join(workspacePath, "iterations");
|
|
5429
5726
|
let entries;
|
|
5430
5727
|
try {
|
|
5431
|
-
entries = await
|
|
5728
|
+
entries = await fs23.readdir(iterDir);
|
|
5432
5729
|
} catch {
|
|
5433
5730
|
return [];
|
|
5434
5731
|
}
|
|
@@ -5442,7 +5739,7 @@ async function loadAllIterations(workspacePath) {
|
|
|
5442
5739
|
}
|
|
5443
5740
|
async function loadTasks(workspacePath) {
|
|
5444
5741
|
try {
|
|
5445
|
-
const content = await
|
|
5742
|
+
const content = await fs23.readFile(path23.join(workspacePath, "tasks.yaml"), "utf-8");
|
|
5446
5743
|
const parsed = yamlParse(content);
|
|
5447
5744
|
return parsed?.tasks ?? [];
|
|
5448
5745
|
} catch {
|
|
@@ -5453,6 +5750,7 @@ function buildLeaderboard(iterations, tasks) {
|
|
|
5453
5750
|
const taskIds = tasks.map((t) => t.id);
|
|
5454
5751
|
return taskIds.map((taskId) => {
|
|
5455
5752
|
const scores = {};
|
|
5753
|
+
const variance = {};
|
|
5456
5754
|
let bestScore = -1;
|
|
5457
5755
|
let bestIteration = 0;
|
|
5458
5756
|
for (const iter of iterations) {
|
|
@@ -5460,13 +5758,21 @@ function buildLeaderboard(iterations, tasks) {
|
|
|
5460
5758
|
if (s) {
|
|
5461
5759
|
const score = numericScore2(s);
|
|
5462
5760
|
scores[iter.iteration] = score;
|
|
5761
|
+
if (s.variance) {
|
|
5762
|
+
variance[iter.iteration] = {
|
|
5763
|
+
mean: s.variance.mean,
|
|
5764
|
+
stddev: s.variance.stddev,
|
|
5765
|
+
runs: s.variance.runs
|
|
5766
|
+
};
|
|
5767
|
+
}
|
|
5463
5768
|
if (score > bestScore) {
|
|
5464
5769
|
bestScore = score;
|
|
5465
5770
|
bestIteration = iter.iteration;
|
|
5466
5771
|
}
|
|
5467
5772
|
}
|
|
5468
5773
|
}
|
|
5469
|
-
|
|
5774
|
+
const hasVariance = Object.keys(variance).length > 0;
|
|
5775
|
+
return { taskId, scores, bestIteration, bestScore, ...hasVariance ? { variance } : {} };
|
|
5470
5776
|
});
|
|
5471
5777
|
}
|
|
5472
5778
|
function iterationStatus(iter, bestIteration) {
|
|
@@ -5502,13 +5808,29 @@ async function generateMarkdownReport(workspacePath) {
|
|
|
5502
5808
|
lines.push("");
|
|
5503
5809
|
lines.push("## Iterations");
|
|
5504
5810
|
lines.push("");
|
|
5505
|
-
|
|
5506
|
-
|
|
5811
|
+
const hasVariance = iterations.some(
|
|
5812
|
+
(iter) => Object.values(iter.taskResults).some((s) => s.variance)
|
|
5813
|
+
);
|
|
5814
|
+
if (hasVariance) {
|
|
5815
|
+
lines.push("| Iter | Score | Mutations | Status |");
|
|
5816
|
+
lines.push("|------|-------|-----------|--------|");
|
|
5817
|
+
} else {
|
|
5818
|
+
lines.push("| Iter | Score | Mutations | Status |");
|
|
5819
|
+
lines.push("|------|-------|-----------|--------|");
|
|
5820
|
+
}
|
|
5507
5821
|
for (const iter of iterations) {
|
|
5508
5822
|
const mutations = iter.proposal?.mutations.length ?? 0;
|
|
5509
5823
|
const mutStr = mutations > 0 ? mutations.toString() : "-";
|
|
5510
5824
|
const status = iterationStatus(iter, bestIter.iteration);
|
|
5511
|
-
|
|
5825
|
+
let scoreStr = `${iter.score.toFixed(1)}%`;
|
|
5826
|
+
if (hasVariance) {
|
|
5827
|
+
const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
|
|
5828
|
+
if (stddevs.length > 0) {
|
|
5829
|
+
const avgStddev = stddevs.reduce((a, b) => a + b, 0) / stddevs.length;
|
|
5830
|
+
scoreStr = `${iter.score.toFixed(1)}% \xB1${avgStddev.toFixed(1)}`;
|
|
5831
|
+
}
|
|
5832
|
+
}
|
|
5833
|
+
lines.push(`| ${iter.iteration} | ${scoreStr} | ${mutStr} | ${status} |`);
|
|
5512
5834
|
}
|
|
5513
5835
|
lines.push("");
|
|
5514
5836
|
if (leaderboard.length > 0) {
|
|
@@ -5521,7 +5843,10 @@ async function generateMarkdownReport(workspacePath) {
|
|
|
5521
5843
|
for (const entry of leaderboard) {
|
|
5522
5844
|
const scoreCols = iterNums.map((n) => {
|
|
5523
5845
|
const s = entry.scores[n];
|
|
5524
|
-
|
|
5846
|
+
if (s === void 0) return "-";
|
|
5847
|
+
const v = entry.variance?.[n];
|
|
5848
|
+
if (v && v.runs > 1) return `${s.toFixed(0)}% \xB1${v.stddev.toFixed(1)}`;
|
|
5849
|
+
return `${s.toFixed(0)}%`;
|
|
5525
5850
|
});
|
|
5526
5851
|
lines.push(`| ${entry.taskId} | ${scoreCols.join(" | ")} | ${entry.bestScore.toFixed(0)}% (iter ${entry.bestIteration}) |`);
|
|
5527
5852
|
}
|
|
@@ -5571,25 +5896,30 @@ async function generateJsonReport(workspacePath) {
|
|
|
5571
5896
|
bestIteration: bestIter.iteration,
|
|
5572
5897
|
improvement
|
|
5573
5898
|
},
|
|
5574
|
-
iterations: iterations.map((iter) =>
|
|
5575
|
-
|
|
5576
|
-
|
|
5577
|
-
|
|
5578
|
-
|
|
5579
|
-
|
|
5899
|
+
iterations: iterations.map((iter) => {
|
|
5900
|
+
const stddevs = Object.values(iter.taskResults).map((s) => s.variance?.stddev).filter((v) => v !== void 0);
|
|
5901
|
+
const avgStddev = stddevs.length > 0 ? stddevs.reduce((a, b) => a + b, 0) / stddevs.length : void 0;
|
|
5902
|
+
return {
|
|
5903
|
+
iteration: iter.iteration,
|
|
5904
|
+
score: iter.score,
|
|
5905
|
+
...avgStddev !== void 0 ? { stddev: avgStddev } : {},
|
|
5906
|
+
mutationCount: iter.proposal?.mutations.length ?? 0,
|
|
5907
|
+
status: iterationStatus(iter, bestIter.iteration)
|
|
5908
|
+
};
|
|
5909
|
+
}),
|
|
5580
5910
|
leaderboard,
|
|
5581
5911
|
counterfactuals
|
|
5582
5912
|
};
|
|
5583
5913
|
}
|
|
5584
5914
|
|
|
5585
5915
|
// src/evolve/apply.ts
|
|
5586
|
-
import
|
|
5587
|
-
import
|
|
5916
|
+
import fs24 from "fs/promises";
|
|
5917
|
+
import path24 from "path";
|
|
5588
5918
|
async function listIterations(workspacePath) {
|
|
5589
|
-
const iterationsDir =
|
|
5919
|
+
const iterationsDir = path24.join(workspacePath, "iterations");
|
|
5590
5920
|
let entries;
|
|
5591
5921
|
try {
|
|
5592
|
-
entries = await
|
|
5922
|
+
entries = await fs24.readdir(iterationsDir);
|
|
5593
5923
|
} catch {
|
|
5594
5924
|
return [];
|
|
5595
5925
|
}
|
|
@@ -5598,7 +5928,7 @@ async function listIterations(workspacePath) {
|
|
|
5598
5928
|
const n = parseInt(entry, 10);
|
|
5599
5929
|
if (!isNaN(n)) {
|
|
5600
5930
|
try {
|
|
5601
|
-
await
|
|
5931
|
+
await fs24.access(path24.join(iterationsDir, entry, "harness"));
|
|
5602
5932
|
nums.push(n);
|
|
5603
5933
|
} catch {
|
|
5604
5934
|
}
|
|
@@ -5624,16 +5954,16 @@ async function listFilesRecursive(dir) {
|
|
|
5624
5954
|
async function walk(current) {
|
|
5625
5955
|
let entries;
|
|
5626
5956
|
try {
|
|
5627
|
-
entries = await
|
|
5957
|
+
entries = await fs24.readdir(current, { withFileTypes: true });
|
|
5628
5958
|
} catch {
|
|
5629
5959
|
return;
|
|
5630
5960
|
}
|
|
5631
5961
|
for (const entry of entries) {
|
|
5632
|
-
const fullPath =
|
|
5962
|
+
const fullPath = path24.join(current, entry.name);
|
|
5633
5963
|
if (entry.isDirectory()) {
|
|
5634
5964
|
await walk(fullPath);
|
|
5635
5965
|
} else {
|
|
5636
|
-
results.push(
|
|
5966
|
+
results.push(path24.relative(dir, fullPath));
|
|
5637
5967
|
}
|
|
5638
5968
|
}
|
|
5639
5969
|
}
|
|
@@ -5656,37 +5986,37 @@ async function applyEvolution(workspacePath, projectRoot, targetIteration) {
|
|
|
5656
5986
|
} else {
|
|
5657
5987
|
iter = await findBestIteration(workspacePath, iterations);
|
|
5658
5988
|
}
|
|
5659
|
-
const harnessPath =
|
|
5989
|
+
const harnessPath = path24.join(
|
|
5660
5990
|
workspacePath,
|
|
5661
5991
|
"iterations",
|
|
5662
5992
|
iter.toString(),
|
|
5663
5993
|
"harness"
|
|
5664
5994
|
);
|
|
5665
|
-
const claudeDir =
|
|
5995
|
+
const claudeDir = path24.join(projectRoot, ".claude");
|
|
5666
5996
|
const diffPreview = await generateDiff2(claudeDir, harnessPath);
|
|
5667
5997
|
const currentFiles = await listFilesRecursive(claudeDir);
|
|
5668
5998
|
const targetFiles = await listFilesRecursive(harnessPath);
|
|
5669
5999
|
const allPaths = /* @__PURE__ */ new Set([...currentFiles, ...targetFiles]);
|
|
5670
6000
|
const filesChanged = [];
|
|
5671
6001
|
for (const filePath of allPaths) {
|
|
5672
|
-
const currentContent = await
|
|
5673
|
-
const targetContent = await
|
|
6002
|
+
const currentContent = await fs24.readFile(path24.join(claudeDir, filePath), "utf-8").catch(() => null);
|
|
6003
|
+
const targetContent = await fs24.readFile(path24.join(harnessPath, filePath), "utf-8").catch(() => null);
|
|
5674
6004
|
if (currentContent !== targetContent) {
|
|
5675
6005
|
filesChanged.push(filePath);
|
|
5676
6006
|
}
|
|
5677
6007
|
}
|
|
5678
|
-
await
|
|
6008
|
+
await fs24.rm(claudeDir, { recursive: true, force: true });
|
|
5679
6009
|
await copyDir(harnessPath, claudeDir);
|
|
5680
|
-
const harnessMcpJson =
|
|
5681
|
-
const projectMcpJson =
|
|
6010
|
+
const harnessMcpJson = path24.join(harnessPath, ".mcp.json");
|
|
6011
|
+
const projectMcpJson = path24.join(projectRoot, ".mcp.json");
|
|
5682
6012
|
try {
|
|
5683
|
-
await
|
|
5684
|
-
const currentMcp = await
|
|
5685
|
-
const targetMcp = await
|
|
6013
|
+
await fs24.access(harnessMcpJson);
|
|
6014
|
+
const currentMcp = await fs24.readFile(projectMcpJson, "utf-8").catch(() => null);
|
|
6015
|
+
const targetMcp = await fs24.readFile(harnessMcpJson, "utf-8").catch(() => null);
|
|
5686
6016
|
if (currentMcp !== targetMcp) {
|
|
5687
6017
|
filesChanged.push(".mcp.json");
|
|
5688
6018
|
}
|
|
5689
|
-
await
|
|
6019
|
+
await fs24.copyFile(harnessMcpJson, projectMcpJson);
|
|
5690
6020
|
} catch {
|
|
5691
6021
|
}
|
|
5692
6022
|
return {
|
|
@@ -5706,11 +6036,13 @@ var DEFAULT_CONFIG = {
|
|
|
5706
6036
|
runsPerTask: 1,
|
|
5707
6037
|
maxMutationsPerIteration: 3,
|
|
5708
6038
|
pruneThreshold: 95,
|
|
5709
|
-
maxTaskDrop: 20
|
|
6039
|
+
maxTaskDrop: 20,
|
|
6040
|
+
usePrincipal: false,
|
|
6041
|
+
evalSampleSize: 0
|
|
5710
6042
|
};
|
|
5711
6043
|
async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
5712
6044
|
try {
|
|
5713
|
-
const configStr = await
|
|
6045
|
+
const configStr = await fs25.readFile(path25.join(workspacePath, "config.yaml"), "utf-8");
|
|
5714
6046
|
const parsed = yamlParse2(configStr);
|
|
5715
6047
|
return {
|
|
5716
6048
|
model: parsed.model ?? DEFAULT_CONFIG.model,
|
|
@@ -5721,7 +6053,9 @@ async function loadEvolveConfigFromWorkspace(workspacePath) {
|
|
|
5721
6053
|
runsPerTask: parsed.runs_per_task ?? DEFAULT_CONFIG.runsPerTask,
|
|
5722
6054
|
maxMutationsPerIteration: parsed.max_mutations_per_iteration ?? DEFAULT_CONFIG.maxMutationsPerIteration,
|
|
5723
6055
|
pruneThreshold: parsed.prune_threshold ?? DEFAULT_CONFIG.pruneThreshold,
|
|
5724
|
-
maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop
|
|
6056
|
+
maxTaskDrop: parsed.max_task_drop ?? DEFAULT_CONFIG.maxTaskDrop,
|
|
6057
|
+
usePrincipal: parsed.use_principal ?? DEFAULT_CONFIG.usePrincipal,
|
|
6058
|
+
evalSampleSize: parsed.eval_sample_size ?? DEFAULT_CONFIG.evalSampleSize
|
|
5725
6059
|
};
|
|
5726
6060
|
} catch {
|
|
5727
6061
|
return { ...DEFAULT_CONFIG };
|
|
@@ -5732,9 +6066,9 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5732
6066
|
try {
|
|
5733
6067
|
const projectRoot = process.cwd();
|
|
5734
6068
|
console.log(ui.section("Evolve Init"));
|
|
5735
|
-
const claudeDir =
|
|
6069
|
+
const claudeDir = path25.join(projectRoot, ".claude");
|
|
5736
6070
|
try {
|
|
5737
|
-
await
|
|
6071
|
+
await fs25.access(claudeDir);
|
|
5738
6072
|
} catch {
|
|
5739
6073
|
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
5740
6074
|
process.exit(1);
|
|
@@ -5766,7 +6100,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5766
6100
|
let addMore = true;
|
|
5767
6101
|
while (addMore) {
|
|
5768
6102
|
try {
|
|
5769
|
-
addMore = await
|
|
6103
|
+
addMore = await confirm4({ message: "Add another eval task?", default: false });
|
|
5770
6104
|
} catch {
|
|
5771
6105
|
addMore = false;
|
|
5772
6106
|
}
|
|
@@ -5784,7 +6118,7 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5784
6118
|
if (config) {
|
|
5785
6119
|
let claudeMd = "";
|
|
5786
6120
|
try {
|
|
5787
|
-
claudeMd = await
|
|
6121
|
+
claudeMd = await fs25.readFile(path25.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
5788
6122
|
} catch {
|
|
5789
6123
|
}
|
|
5790
6124
|
const profile = await buildProjectProfile(projectRoot);
|
|
@@ -5815,16 +6149,16 @@ evolveCommand.command("init").description("Initialize an evolution workspace wit
|
|
|
5815
6149
|
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
5816
6150
|
try {
|
|
5817
6151
|
const projectRoot = process.cwd();
|
|
5818
|
-
const workspace =
|
|
6152
|
+
const workspace = path25.join(projectRoot, ".kairn-evolve");
|
|
5819
6153
|
console.log(ui.section("Evolve Baseline"));
|
|
5820
6154
|
try {
|
|
5821
|
-
await
|
|
6155
|
+
await fs25.access(workspace);
|
|
5822
6156
|
} catch {
|
|
5823
6157
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5824
6158
|
process.exit(1);
|
|
5825
6159
|
}
|
|
5826
6160
|
await snapshotBaseline(projectRoot, workspace);
|
|
5827
|
-
const baselineDir =
|
|
6161
|
+
const baselineDir = path25.join(workspace, "baseline");
|
|
5828
6162
|
const fileCount = await countFiles(baselineDir);
|
|
5829
6163
|
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
5830
6164
|
} catch (err) {
|
|
@@ -5833,21 +6167,21 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5833
6167
|
process.exit(1);
|
|
5834
6168
|
}
|
|
5835
6169
|
});
|
|
5836
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").action(async (options) => {
|
|
6170
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").option("--max-mutations <n>", "Max mutations per iteration", "3").option("--prune-threshold <n>", "Skip tasks scoring above this on middle iterations", "95").option("--max-task-drop <n>", "Roll back if any task drops more than N points", "20").option("--principal", "Run Principal Proposer as final iteration").option("--eval-sample <n>", "Sample N tasks per middle iteration (0 = all)", "0").action(async (options) => {
|
|
5837
6171
|
try {
|
|
5838
6172
|
const projectRoot = process.cwd();
|
|
5839
|
-
const workspace =
|
|
6173
|
+
const workspace = path25.join(projectRoot, ".kairn-evolve");
|
|
5840
6174
|
console.log(ui.section("Evolve Run"));
|
|
5841
6175
|
try {
|
|
5842
|
-
await
|
|
6176
|
+
await fs25.access(workspace);
|
|
5843
6177
|
} catch {
|
|
5844
6178
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
5845
6179
|
process.exit(1);
|
|
5846
6180
|
}
|
|
5847
|
-
const tasksPath =
|
|
6181
|
+
const tasksPath = path25.join(workspace, "tasks.yaml");
|
|
5848
6182
|
let tasksContent;
|
|
5849
6183
|
try {
|
|
5850
|
-
tasksContent = await
|
|
6184
|
+
tasksContent = await fs25.readFile(tasksPath, "utf-8");
|
|
5851
6185
|
} catch {
|
|
5852
6186
|
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
5853
6187
|
process.exit(1);
|
|
@@ -5866,15 +6200,15 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5866
6200
|
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
5867
6201
|
console.log("");
|
|
5868
6202
|
const config = await loadConfig();
|
|
5869
|
-
const harnessPath =
|
|
6203
|
+
const harnessPath = path25.join(projectRoot, ".claude");
|
|
5870
6204
|
const results = [];
|
|
5871
6205
|
for (const task of tasksToRun) {
|
|
5872
|
-
const traceDir =
|
|
6206
|
+
const traceDir = path25.join(workspace, "traces", "0", task.id);
|
|
5873
6207
|
const spinner = ora2(`Running: ${task.id}`).start();
|
|
5874
6208
|
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
5875
6209
|
if (config) {
|
|
5876
|
-
const stdout = await
|
|
5877
|
-
const stderr = await
|
|
6210
|
+
const stdout = await fs25.readFile(path25.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
6211
|
+
const stderr = await fs25.readFile(path25.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
5878
6212
|
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
5879
6213
|
result.score = score;
|
|
5880
6214
|
await writeScore(traceDir, score);
|
|
@@ -5932,8 +6266,17 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5932
6266
|
process.exit(1);
|
|
5933
6267
|
}
|
|
5934
6268
|
evolveConfig.maxTaskDrop = maxTaskDrop;
|
|
6269
|
+
if (options.principal) {
|
|
6270
|
+
evolveConfig.usePrincipal = true;
|
|
6271
|
+
}
|
|
6272
|
+
const evalSample = parseInt(options.evalSample ?? "0", 10);
|
|
6273
|
+
if (isNaN(evalSample) || evalSample < 0) {
|
|
6274
|
+
console.log(ui.error("--eval-sample must be a non-negative integer"));
|
|
6275
|
+
process.exit(1);
|
|
6276
|
+
}
|
|
6277
|
+
evolveConfig.evalSampleSize = evalSample;
|
|
5935
6278
|
try {
|
|
5936
|
-
await
|
|
6279
|
+
await fs25.access(path25.join(workspace, "iterations", "0", "harness"));
|
|
5937
6280
|
} catch {
|
|
5938
6281
|
console.log(ui.error("No baseline harness found. Run kairn evolve baseline first."));
|
|
5939
6282
|
process.exit(1);
|
|
@@ -6027,10 +6370,10 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
6027
6370
|
evolveCommand.command("apply").description("Apply the best evolved harness to your project").option("--iter <n>", "Apply a specific iteration instead of the best").option("--force", "Apply even if git working tree is dirty").option("--no-commit", "Skip automatic git commit after applying").action(async (options) => {
|
|
6028
6371
|
try {
|
|
6029
6372
|
const projectRoot = process.cwd();
|
|
6030
|
-
const workspace =
|
|
6373
|
+
const workspace = path25.join(projectRoot, ".kairn-evolve");
|
|
6031
6374
|
console.log(ui.section("Evolve Apply"));
|
|
6032
6375
|
try {
|
|
6033
|
-
await
|
|
6376
|
+
await fs25.access(workspace);
|
|
6034
6377
|
} catch {
|
|
6035
6378
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
6036
6379
|
process.exit(1);
|
|
@@ -6071,9 +6414,9 @@ evolveCommand.command("apply").description("Apply the best evolved harness to yo
|
|
|
6071
6414
|
evolveCommand.command("report").description("Generate a summary report of the evolution run").option("--json", "Output machine-readable JSON instead of Markdown").action(async (options) => {
|
|
6072
6415
|
try {
|
|
6073
6416
|
const projectRoot = process.cwd();
|
|
6074
|
-
const workspace =
|
|
6417
|
+
const workspace = path25.join(projectRoot, ".kairn-evolve");
|
|
6075
6418
|
try {
|
|
6076
|
-
await
|
|
6419
|
+
await fs25.access(workspace);
|
|
6077
6420
|
} catch {
|
|
6078
6421
|
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
6079
6422
|
process.exit(1);
|
|
@@ -6094,23 +6437,23 @@ evolveCommand.command("report").description("Generate a summary report of the ev
|
|
|
6094
6437
|
evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes between two iterations").action(async (iter1Str, iter2Str) => {
|
|
6095
6438
|
try {
|
|
6096
6439
|
const projectRoot = process.cwd();
|
|
6097
|
-
const workspace =
|
|
6440
|
+
const workspace = path25.join(projectRoot, ".kairn-evolve");
|
|
6098
6441
|
const iter1 = parseInt(iter1Str, 10);
|
|
6099
6442
|
const iter2 = parseInt(iter2Str, 10);
|
|
6100
6443
|
if (isNaN(iter1) || isNaN(iter2)) {
|
|
6101
6444
|
console.log(ui.error("Both arguments must be integers (iteration numbers)"));
|
|
6102
6445
|
process.exit(1);
|
|
6103
6446
|
}
|
|
6104
|
-
const harness1 =
|
|
6105
|
-
const harness2 =
|
|
6447
|
+
const harness1 = path25.join(workspace, "iterations", iter1.toString(), "harness");
|
|
6448
|
+
const harness2 = path25.join(workspace, "iterations", iter2.toString(), "harness");
|
|
6106
6449
|
try {
|
|
6107
|
-
await
|
|
6450
|
+
await fs25.access(harness1);
|
|
6108
6451
|
} catch {
|
|
6109
6452
|
console.log(ui.error(`Iteration ${iter1} harness not found at ${harness1}`));
|
|
6110
6453
|
process.exit(1);
|
|
6111
6454
|
}
|
|
6112
6455
|
try {
|
|
6113
|
-
await
|
|
6456
|
+
await fs25.access(harness2);
|
|
6114
6457
|
} catch {
|
|
6115
6458
|
console.log(ui.error(`Iteration ${iter2} harness not found at ${harness2}`));
|
|
6116
6459
|
process.exit(1);
|
|
@@ -6165,10 +6508,10 @@ evolveCommand.command("diff <iter1> <iter2>").description("Show harness changes
|
|
|
6165
6508
|
async function countFiles(dir) {
|
|
6166
6509
|
let count = 0;
|
|
6167
6510
|
try {
|
|
6168
|
-
const entries = await
|
|
6511
|
+
const entries = await fs25.readdir(dir, { withFileTypes: true });
|
|
6169
6512
|
for (const entry of entries) {
|
|
6170
6513
|
if (entry.isDirectory()) {
|
|
6171
|
-
count += await countFiles(
|
|
6514
|
+
count += await countFiles(path25.join(dir, entry.name));
|
|
6172
6515
|
} else {
|
|
6173
6516
|
count++;
|
|
6174
6517
|
}
|
|
@@ -6179,10 +6522,12 @@ async function countFiles(dir) {
|
|
|
6179
6522
|
}
|
|
6180
6523
|
|
|
6181
6524
|
// src/cli.ts
|
|
6525
|
+
var require2 = createRequire(import.meta.url);
|
|
6526
|
+
var pkg = require2("../package.json");
|
|
6182
6527
|
var program = new Command12();
|
|
6183
6528
|
program.name("kairn").description(
|
|
6184
6529
|
"Compile natural language intent into optimized Claude Code environments"
|
|
6185
|
-
).version(
|
|
6530
|
+
).version(pkg.version).option("--no-color", "Disable colored output");
|
|
6186
6531
|
program.addCommand(initCommand);
|
|
6187
6532
|
program.addCommand(describeCommand);
|
|
6188
6533
|
program.addCommand(optimizeCommand);
|