kairn-cli 1.13.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1026 -88
- package/dist/cli.js.map +1 -1
- package/package.json +6 -3
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// src/cli.ts
|
|
2
|
-
import { Command as
|
|
3
|
-
import
|
|
2
|
+
import { Command as Command12 } from "commander";
|
|
3
|
+
import chalk15 from "chalk";
|
|
4
4
|
|
|
5
5
|
// src/commands/init.ts
|
|
6
6
|
import { Command } from "commander";
|
|
@@ -221,7 +221,7 @@ var ui = {
|
|
|
221
221
|
// Key-value pairs
|
|
222
222
|
kv: (key, value) => ` ${chalk.cyan(key.padEnd(14))} ${value}`,
|
|
223
223
|
// File list
|
|
224
|
-
file: (
|
|
224
|
+
file: (path20) => chalk.dim(` ${path20}`),
|
|
225
225
|
// Tool display
|
|
226
226
|
tool: (name, reason) => ` ${warmStone("\u25CF")} ${chalk.bold(name)}
|
|
227
227
|
${chalk.dim(reason)}`,
|
|
@@ -562,8 +562,6 @@ import chalk5 from "chalk";
|
|
|
562
562
|
import fs4 from "fs/promises";
|
|
563
563
|
import path4 from "path";
|
|
564
564
|
import crypto from "crypto";
|
|
565
|
-
import Anthropic2 from "@anthropic-ai/sdk";
|
|
566
|
-
import OpenAI2 from "openai";
|
|
567
565
|
|
|
568
566
|
// src/compiler/prompt.ts
|
|
569
567
|
var SKELETON_PROMPT = `You are the Kairn skeleton compiler. Your job is to select tools and outline the project structure from a user's natural language description.
|
|
@@ -716,6 +714,13 @@ At the start of every session, before doing ANY work:
|
|
|
716
714
|
4. Summarize what you see in 2-3 lines, then proceed
|
|
717
715
|
|
|
718
716
|
This saves 2-5 exploratory turns. Never ask "what files are here?" \u2014 look first.
|
|
717
|
+
|
|
718
|
+
## Completion Standards
|
|
719
|
+
|
|
720
|
+
Never mark a task "done" without running the Completion Verification checklist.
|
|
721
|
+
Tests passing is necessary but not sufficient \u2014 also verify requirements coverage,
|
|
722
|
+
state cleanliness, and review changes from the perspective of a test engineer,
|
|
723
|
+
code reviewer, and the requesting user.
|
|
719
724
|
\`\`\`
|
|
720
725
|
|
|
721
726
|
Do not add generic filler. Every line must be specific to the user's workflow.
|
|
@@ -738,6 +743,7 @@ Do not add generic filler. Every line must be specific to the user's workflow.
|
|
|
738
743
|
14. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
|
|
739
744
|
15. "Engineering Standards", "Tool Usage Policy", and "Code Philosophy" sections in CLAUDE.md
|
|
740
745
|
16. A "First Turn Protocol" section in CLAUDE.md (orient before working: pwd, ls, git status, check relevant runtimes, read task files)
|
|
746
|
+
17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
|
|
741
747
|
|
|
742
748
|
## Shell-Integrated Commands
|
|
743
749
|
|
|
@@ -855,7 +861,7 @@ Merge this into the settings hooks alongside the PreToolUse and PostToolUse hook
|
|
|
855
861
|
- \`/project:status\` command (live git status, recent commits, SPRINT.md overview using ! prefix)
|
|
856
862
|
- \`/project:fix\` command (takes $ARGUMENTS as issue number, plans fix, implements, tests, commits)
|
|
857
863
|
- \`/project:sprint\` command (define acceptance criteria before coding, writes to docs/SPRINT.md)
|
|
858
|
-
- \`/project:develop\` command (full development pipeline \u2014 orchestrates @architect \u2192 @planner \u2192 @implementer \u2192 @verifier \u2192 @fixer \u2192 @grill \u2192 @doc-updater through spec, plan, TDD implement, review, and doc update phases)
|
|
864
|
+
- \`/project:develop\` command (full development pipeline \u2014 orchestrates @architect \u2192 @planner \u2192 @implementer \u2192 @verifier \u2192 @fixer \u2192 @grill \u2192 @doc-updater through spec, plan, TDD implement, review, and doc update phases). MUST include a Phase 7 "Completion Gate" that runs a Completion Verification checklist before marking the feature done: re-read original requirements, confirm each is met with evidence, run test suite + lint/typecheck, review git diff for unexpected changes or debug artifacts, answer 3 perspective questions (test engineer, code reviewer, requesting user). If ANY check fails, loop back to fix before completing.
|
|
859
865
|
- A TDD skill using the 3-phase isolation pattern (RED \u2192 GREEN \u2192 REFACTOR):
|
|
860
866
|
- RED: Write failing test only. Verify it FAILS.
|
|
861
867
|
- GREEN: Write MINIMUM code to pass. Nothing extra.
|
|
@@ -1023,6 +1029,13 @@ At the start of every session, before doing ANY work:
|
|
|
1023
1029
|
4. Summarize what you see in 2-3 lines, then proceed
|
|
1024
1030
|
|
|
1025
1031
|
This saves 2-5 exploratory turns. Never ask "what files are here?" \u2014 look first.
|
|
1032
|
+
|
|
1033
|
+
## Completion Standards
|
|
1034
|
+
|
|
1035
|
+
Never mark a task "done" without running the Completion Verification checklist.
|
|
1036
|
+
Tests passing is necessary but not sufficient \u2014 also verify requirements coverage,
|
|
1037
|
+
state cleanliness, and review changes from the perspective of a test engineer,
|
|
1038
|
+
code reviewer, and the requesting user.
|
|
1026
1039
|
\`\`\`
|
|
1027
1040
|
|
|
1028
1041
|
Do not add generic filler. Every line must be specific to the user's workflow.
|
|
@@ -1045,6 +1058,7 @@ Do not add generic filler. Every line must be specific to the user's workflow.
|
|
|
1045
1058
|
14. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
|
|
1046
1059
|
15. "Engineering Standards", "Tool Usage Policy", and "Code Philosophy" sections in CLAUDE.md
|
|
1047
1060
|
16. A "First Turn Protocol" section in CLAUDE.md (orient before working: pwd, ls, git status, check relevant runtimes, read task files)
|
|
1061
|
+
17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
|
|
1048
1062
|
|
|
1049
1063
|
## Tool Selection Rules
|
|
1050
1064
|
|
|
@@ -1188,76 +1202,9 @@ async function loadRegistry() {
|
|
|
1188
1202
|
return Array.from(merged.values());
|
|
1189
1203
|
}
|
|
1190
1204
|
|
|
1191
|
-
// src/
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
(t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
|
|
1195
|
-
).join("\n");
|
|
1196
|
-
return `## User Intent
|
|
1197
|
-
|
|
1198
|
-
${intent}
|
|
1199
|
-
|
|
1200
|
-
## Available Tool Registry
|
|
1201
|
-
|
|
1202
|
-
${registrySummary}
|
|
1203
|
-
|
|
1204
|
-
Generate the skeleton JSON now.`;
|
|
1205
|
-
}
|
|
1206
|
-
function buildHarnessMessage(intent, skeleton, concise) {
|
|
1207
|
-
const skeletonJson = JSON.stringify(skeleton, null, 2);
|
|
1208
|
-
const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
|
|
1209
|
-
return `## User Intent
|
|
1210
|
-
|
|
1211
|
-
${intent}
|
|
1212
|
-
|
|
1213
|
-
## Project Skeleton
|
|
1214
|
-
|
|
1215
|
-
${skeletonJson}
|
|
1216
|
-
|
|
1217
|
-
Generate the harness content JSON now.${conciseNote}`;
|
|
1218
|
-
}
|
|
1219
|
-
function parseSkeletonResponse(text) {
|
|
1220
|
-
let cleaned = text.trim();
|
|
1221
|
-
if (cleaned.startsWith("```")) {
|
|
1222
|
-
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1223
|
-
}
|
|
1224
|
-
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1225
|
-
if (!jsonMatch) {
|
|
1226
|
-
throw new Error("Pass 1 (skeleton) did not return valid JSON.");
|
|
1227
|
-
}
|
|
1228
|
-
try {
|
|
1229
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
1230
|
-
if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
|
|
1231
|
-
throw new Error("Skeleton missing required fields: name, tools");
|
|
1232
|
-
}
|
|
1233
|
-
return parsed;
|
|
1234
|
-
} catch (err) {
|
|
1235
|
-
throw new Error(
|
|
1236
|
-
`Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
1237
|
-
);
|
|
1238
|
-
}
|
|
1239
|
-
}
|
|
1240
|
-
function parseHarnessResponse(text) {
|
|
1241
|
-
let cleaned = text.trim();
|
|
1242
|
-
if (cleaned.startsWith("```")) {
|
|
1243
|
-
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1244
|
-
}
|
|
1245
|
-
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1246
|
-
if (!jsonMatch) {
|
|
1247
|
-
throw new Error("Pass 2 (harness) did not return valid JSON.");
|
|
1248
|
-
}
|
|
1249
|
-
try {
|
|
1250
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
1251
|
-
if (!parsed.claude_md || !parsed.commands) {
|
|
1252
|
-
throw new Error("Harness missing required fields: claude_md, commands");
|
|
1253
|
-
}
|
|
1254
|
-
return parsed;
|
|
1255
|
-
} catch (err) {
|
|
1256
|
-
throw new Error(
|
|
1257
|
-
`Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
1258
|
-
);
|
|
1259
|
-
}
|
|
1260
|
-
}
|
|
1205
|
+
// src/llm.ts
|
|
1206
|
+
import Anthropic2 from "@anthropic-ai/sdk";
|
|
1207
|
+
import OpenAI2 from "openai";
|
|
1261
1208
|
function classifyError(err, provider) {
|
|
1262
1209
|
const msg = err instanceof Error ? err.message : String(err);
|
|
1263
1210
|
const status = err?.status;
|
|
@@ -1289,8 +1236,8 @@ function classifyError(err, provider) {
|
|
|
1289
1236
|
return `${provider} API error: ${msg}`;
|
|
1290
1237
|
}
|
|
1291
1238
|
async function callLLM(config, userMessage, options) {
|
|
1292
|
-
const maxTokens = options
|
|
1293
|
-
const systemPrompt = options
|
|
1239
|
+
const maxTokens = options.maxTokens ?? 8192;
|
|
1240
|
+
const systemPrompt = options.systemPrompt;
|
|
1294
1241
|
const providerName = getProviderName(config.provider);
|
|
1295
1242
|
if (config.provider === "anthropic") {
|
|
1296
1243
|
const client2 = new Anthropic2({ apiKey: config.api_key });
|
|
@@ -1332,6 +1279,77 @@ async function callLLM(config, userMessage, options) {
|
|
|
1332
1279
|
throw new Error(classifyError(err, providerName));
|
|
1333
1280
|
}
|
|
1334
1281
|
}
|
|
1282
|
+
|
|
1283
|
+
// src/compiler/compile.ts
|
|
1284
|
+
function buildSkeletonMessage(intent, registry) {
|
|
1285
|
+
const registrySummary = registry.map(
|
|
1286
|
+
(t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
|
|
1287
|
+
).join("\n");
|
|
1288
|
+
return `## User Intent
|
|
1289
|
+
|
|
1290
|
+
${intent}
|
|
1291
|
+
|
|
1292
|
+
## Available Tool Registry
|
|
1293
|
+
|
|
1294
|
+
${registrySummary}
|
|
1295
|
+
|
|
1296
|
+
Generate the skeleton JSON now.`;
|
|
1297
|
+
}
|
|
1298
|
+
function buildHarnessMessage(intent, skeleton, concise) {
|
|
1299
|
+
const skeletonJson = JSON.stringify(skeleton, null, 2);
|
|
1300
|
+
const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
|
|
1301
|
+
return `## User Intent
|
|
1302
|
+
|
|
1303
|
+
${intent}
|
|
1304
|
+
|
|
1305
|
+
## Project Skeleton
|
|
1306
|
+
|
|
1307
|
+
${skeletonJson}
|
|
1308
|
+
|
|
1309
|
+
Generate the harness content JSON now.${conciseNote}`;
|
|
1310
|
+
}
|
|
1311
|
+
function parseSkeletonResponse(text) {
|
|
1312
|
+
let cleaned = text.trim();
|
|
1313
|
+
if (cleaned.startsWith("```")) {
|
|
1314
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1315
|
+
}
|
|
1316
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1317
|
+
if (!jsonMatch) {
|
|
1318
|
+
throw new Error("Pass 1 (skeleton) did not return valid JSON.");
|
|
1319
|
+
}
|
|
1320
|
+
try {
|
|
1321
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
1322
|
+
if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
|
|
1323
|
+
throw new Error("Skeleton missing required fields: name, tools");
|
|
1324
|
+
}
|
|
1325
|
+
return parsed;
|
|
1326
|
+
} catch (err) {
|
|
1327
|
+
throw new Error(
|
|
1328
|
+
`Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
1329
|
+
);
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
function parseHarnessResponse(text) {
|
|
1333
|
+
let cleaned = text.trim();
|
|
1334
|
+
if (cleaned.startsWith("```")) {
|
|
1335
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1336
|
+
}
|
|
1337
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1338
|
+
if (!jsonMatch) {
|
|
1339
|
+
throw new Error("Pass 2 (harness) did not return valid JSON.");
|
|
1340
|
+
}
|
|
1341
|
+
try {
|
|
1342
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
1343
|
+
if (!parsed.claude_md || !parsed.commands) {
|
|
1344
|
+
throw new Error("Harness missing required fields: claude_md, commands");
|
|
1345
|
+
}
|
|
1346
|
+
return parsed;
|
|
1347
|
+
} catch (err) {
|
|
1348
|
+
throw new Error(
|
|
1349
|
+
`Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
1350
|
+
);
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1335
1353
|
function buildSettings(skeleton, registry) {
|
|
1336
1354
|
const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
|
|
1337
1355
|
const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
|
|
@@ -1501,7 +1519,9 @@ async function generateClarifications(intent, onProgress) {
|
|
|
1501
1519
|
onProgress?.("Analyzing your request...");
|
|
1502
1520
|
const clarificationConfig = { ...config };
|
|
1503
1521
|
clarificationConfig.model = getCheapModel(config.provider, config.model);
|
|
1504
|
-
const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent
|
|
1522
|
+
const response = await callLLM(clarificationConfig, CLARIFICATION_PROMPT + "\n\nUser description: " + intent, {
|
|
1523
|
+
systemPrompt: SYSTEM_PROMPT
|
|
1524
|
+
});
|
|
1505
1525
|
try {
|
|
1506
1526
|
let cleaned = response.trim();
|
|
1507
1527
|
if (cleaned.startsWith("```")) {
|
|
@@ -1682,7 +1702,29 @@ If confidence is LOW or MEDIUM, fix issues and re-verify.
|
|
|
1682
1702
|
Run /project:grill for adversarial review.
|
|
1683
1703
|
Fix any BLOCKERs.
|
|
1684
1704
|
|
|
1685
|
-
## Phase 6:
|
|
1705
|
+
## Phase 6: COMPLETION GATE
|
|
1706
|
+
|
|
1707
|
+
Before shipping, run the Completion Verification checklist:
|
|
1708
|
+
|
|
1709
|
+
### Requirements Check
|
|
1710
|
+
- [ ] Re-read the ORIGINAL task description (not your interpretation)
|
|
1711
|
+
- [ ] Each explicit requirement is met with evidence (test output, diff)
|
|
1712
|
+
- [ ] Each implicit requirement (error handling, types, tests) is addressed
|
|
1713
|
+
|
|
1714
|
+
### State Check
|
|
1715
|
+
- [ ] Test suite passes
|
|
1716
|
+
- [ ] Lint/typecheck passes
|
|
1717
|
+
- [ ] \`git diff --stat\` \u2014 every changed file is intentional
|
|
1718
|
+
- [ ] No debug artifacts (console.log, TODO, commented-out code, temp files)
|
|
1719
|
+
|
|
1720
|
+
### Perspective Check (1 sentence each)
|
|
1721
|
+
- **Test engineer:** Most likely production failure mode?
|
|
1722
|
+
- **Code reviewer:** What would I flag in review?
|
|
1723
|
+
- **Requesting user:** Does this solve my actual problem?
|
|
1724
|
+
|
|
1725
|
+
ALL pass \u2192 proceed to ship. ANY fail \u2192 fix first, then re-verify.
|
|
1726
|
+
|
|
1727
|
+
## Phase 7: SHIP
|
|
1686
1728
|
Run /project:commit.
|
|
1687
1729
|
Report what was built and what's next from docs/SPRINT.md.
|
|
1688
1730
|
|
|
@@ -1762,11 +1804,26 @@ Run verification:
|
|
|
1762
1804
|
- Run functional tests
|
|
1763
1805
|
- If NEEDS FIXES: fix and re-verify
|
|
1764
1806
|
|
|
1765
|
-
## Phase 5:
|
|
1807
|
+
## Phase 5: COMPLETION GATE
|
|
1808
|
+
|
|
1809
|
+
Before creating a PR, run the Completion Verification checklist:
|
|
1810
|
+
- [ ] Re-read the ORIGINAL spec from docs/SPRINT.md
|
|
1811
|
+
- [ ] Each requirement is met with evidence (test output, diff)
|
|
1812
|
+
- [ ] Test suite + lint/typecheck pass
|
|
1813
|
+
- [ ] \`git diff --stat\` \u2014 every changed file is intentional, no debug artifacts
|
|
1814
|
+
- **Test engineer:** Most likely production failure mode?
|
|
1815
|
+
- **Code reviewer:** What would I flag in review?
|
|
1816
|
+
- **Requesting user:** Does this solve my actual problem?
|
|
1817
|
+
|
|
1818
|
+
ALL pass \u2192 proceed to PR. ANY fail \u2192 fix first, then re-verify.
|
|
1819
|
+
|
|
1820
|
+
Include the checklist results in the PR description.
|
|
1821
|
+
|
|
1822
|
+
## Phase 6: PR
|
|
1766
1823
|
Create a pull request:
|
|
1767
|
-
gh pr create --title "feat: {name}" --body "{spec + QA report}"
|
|
1824
|
+
gh pr create --title "feat: {name}" --body "{spec + QA report + verification checklist}"
|
|
1768
1825
|
|
|
1769
|
-
## Phase
|
|
1826
|
+
## Phase 7: NEXT
|
|
1770
1827
|
Report:
|
|
1771
1828
|
"PR #{N} ready for review: {link}
|
|
1772
1829
|
Next priority from SPRINT.md: {next task}
|
|
@@ -1790,13 +1847,19 @@ Repeat until max features reached or stopped:
|
|
|
1790
1847
|
2. Create worktree + branch
|
|
1791
1848
|
3. Implement the feature
|
|
1792
1849
|
4. Run verification (build, test, lint)
|
|
1793
|
-
5.
|
|
1794
|
-
|
|
1795
|
-
|
|
1850
|
+
5. Run Completion Verification checklist:
|
|
1851
|
+
- Requirements met with evidence
|
|
1852
|
+
- Tests + lint/typecheck pass
|
|
1853
|
+
- No debug artifacts or unexpected file changes
|
|
1854
|
+
- 3-perspective check (test engineer, reviewer, user)
|
|
1855
|
+
6. Open PR via gh (include verification results in PR body)
|
|
1856
|
+
7. Report status
|
|
1857
|
+
8. Move to next feature
|
|
1796
1858
|
|
|
1797
1859
|
## Stop Conditions
|
|
1798
1860
|
- Max 5 features per autopilot session
|
|
1799
1861
|
- Any BLOCKER from verification
|
|
1862
|
+
- Completion Verification checklist fails after 2 fix attempts
|
|
1800
1863
|
- Build failure that can't be resolved in 3 attempts
|
|
1801
1864
|
- User presses Escape`;
|
|
1802
1865
|
var AUTOPILOT_WARNING = `
|
|
@@ -3627,8 +3690,882 @@ var keysCommand = new Command10("keys").description("Add or update API keys for
|
|
|
3627
3690
|
console.log("");
|
|
3628
3691
|
});
|
|
3629
3692
|
|
|
3693
|
+
// src/commands/evolve.ts
|
|
3694
|
+
import { Command as Command11 } from "commander";
|
|
3695
|
+
import chalk14 from "chalk";
|
|
3696
|
+
import ora2 from "ora";
|
|
3697
|
+
import fs19 from "fs/promises";
|
|
3698
|
+
import path19 from "path";
|
|
3699
|
+
import { parse as yamlParse } from "yaml";
|
|
3700
|
+
import { confirm as confirm3, select as select4 } from "@inquirer/prompts";
|
|
3701
|
+
|
|
3702
|
+
// src/evolve/init.ts
|
|
3703
|
+
import fs15 from "fs/promises";
|
|
3704
|
+
import path15 from "path";
|
|
3705
|
+
import { stringify as yamlStringify } from "yaml";
|
|
3706
|
+
|
|
3707
|
+
// src/evolve/templates.ts
|
|
3708
|
+
var EVAL_TEMPLATES = {
|
|
3709
|
+
"add-feature": {
|
|
3710
|
+
id: "add-feature",
|
|
3711
|
+
name: "Add Feature",
|
|
3712
|
+
description: "Can the agent add a new capability?",
|
|
3713
|
+
bestFor: ["feature-development", "api-building", "full-stack"]
|
|
3714
|
+
},
|
|
3715
|
+
"fix-bug": {
|
|
3716
|
+
id: "fix-bug",
|
|
3717
|
+
name: "Fix Bug",
|
|
3718
|
+
description: "Can the agent diagnose and fix a problem?",
|
|
3719
|
+
bestFor: ["maintenance", "debugging", "qa"]
|
|
3720
|
+
},
|
|
3721
|
+
"refactor": {
|
|
3722
|
+
id: "refactor",
|
|
3723
|
+
name: "Refactor",
|
|
3724
|
+
description: "Can the agent restructure code?",
|
|
3725
|
+
bestFor: ["maintenance", "architecture", "backend"]
|
|
3726
|
+
},
|
|
3727
|
+
"test-writing": {
|
|
3728
|
+
id: "test-writing",
|
|
3729
|
+
name: "Test Writing",
|
|
3730
|
+
description: "Can the agent write tests?",
|
|
3731
|
+
bestFor: ["tdd", "qa", "backend"]
|
|
3732
|
+
},
|
|
3733
|
+
"config-change": {
|
|
3734
|
+
id: "config-change",
|
|
3735
|
+
name: "Config Change",
|
|
3736
|
+
description: "Can the agent update configuration?",
|
|
3737
|
+
bestFor: ["devops", "infrastructure", "backend"]
|
|
3738
|
+
},
|
|
3739
|
+
"documentation": {
|
|
3740
|
+
id: "documentation",
|
|
3741
|
+
name: "Documentation",
|
|
3742
|
+
description: "Can the agent write and update docs?",
|
|
3743
|
+
bestFor: ["content", "api-building", "full-stack"]
|
|
3744
|
+
}
|
|
3745
|
+
};
|
|
3746
|
+
function selectTemplatesForWorkflow(workflowType) {
|
|
3747
|
+
const mapping = {
|
|
3748
|
+
"feature-development": ["add-feature", "test-writing", "documentation"],
|
|
3749
|
+
"api-building": ["add-feature", "fix-bug", "test-writing"],
|
|
3750
|
+
"full-stack": ["add-feature", "fix-bug", "test-writing"],
|
|
3751
|
+
"maintenance": ["fix-bug", "refactor", "test-writing"],
|
|
3752
|
+
"debugging": ["fix-bug", "test-writing"],
|
|
3753
|
+
"qa": ["fix-bug", "test-writing", "add-feature"],
|
|
3754
|
+
"architecture": ["refactor", "test-writing", "config-change"],
|
|
3755
|
+
"backend": ["fix-bug", "refactor", "config-change", "test-writing"],
|
|
3756
|
+
"devops": ["config-change", "fix-bug"],
|
|
3757
|
+
"infrastructure": ["config-change", "refactor"],
|
|
3758
|
+
"tdd": ["test-writing", "add-feature", "fix-bug"],
|
|
3759
|
+
"content": ["documentation", "add-feature"],
|
|
3760
|
+
"research": ["documentation", "add-feature"]
|
|
3761
|
+
};
|
|
3762
|
+
return mapping[workflowType] || ["add-feature", "fix-bug", "test-writing"];
|
|
3763
|
+
}
|
|
3764
|
+
var TASK_GENERATION_PROMPT = `You are an eval task generator for Claude Code agent environments. Given a project's CLAUDE.md, project structure, and selected eval templates, generate concrete, project-specific tasks.
|
|
3765
|
+
|
|
3766
|
+
Each task must be realistic and testable against the actual project. Avoid generic placeholders.
|
|
3767
|
+
|
|
3768
|
+
Return a JSON object with a "tasks" array. Each task has:
|
|
3769
|
+
- id: kebab-case identifier (e.g., "add-health-endpoint")
|
|
3770
|
+
- template: which eval template this instantiates
|
|
3771
|
+
- description: concrete task description the agent will receive
|
|
3772
|
+
- setup: shell commands to prepare the workspace (e.g., "npm install")
|
|
3773
|
+
- expected_outcome: multi-line string describing what success looks like
|
|
3774
|
+
- scoring: "pass-fail", "llm-judge", or "rubric"
|
|
3775
|
+
- timeout: seconds (300 for features/bugs, 600 for refactors, 180 for config/docs/tests)
|
|
3776
|
+
|
|
3777
|
+
Return ONLY valid JSON, no markdown fences.`;
|
|
3778
|
+
function parseJsonResponse(raw) {
|
|
3779
|
+
let cleaned = raw.trim();
|
|
3780
|
+
if (cleaned.startsWith("```")) {
|
|
3781
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
3782
|
+
}
|
|
3783
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/) ?? cleaned.match(/\[[\s\S]*\]/);
|
|
3784
|
+
if (!jsonMatch) {
|
|
3785
|
+
throw new Error(
|
|
3786
|
+
"LLM response did not contain valid JSON. Try again or use a different model."
|
|
3787
|
+
);
|
|
3788
|
+
}
|
|
3789
|
+
try {
|
|
3790
|
+
return JSON.parse(jsonMatch[0]);
|
|
3791
|
+
} catch (err) {
|
|
3792
|
+
throw new Error(
|
|
3793
|
+
`Failed to parse LLM response as JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
3794
|
+
);
|
|
3795
|
+
}
|
|
3796
|
+
}
|
|
3797
|
+
var REQUIRED_TASK_FIELDS = [
|
|
3798
|
+
"id",
|
|
3799
|
+
"template",
|
|
3800
|
+
"description",
|
|
3801
|
+
"setup",
|
|
3802
|
+
"expected_outcome",
|
|
3803
|
+
"scoring",
|
|
3804
|
+
"timeout"
|
|
3805
|
+
];
|
|
3806
|
+
function validateTask(obj, index) {
|
|
3807
|
+
if (typeof obj !== "object" || obj === null) {
|
|
3808
|
+
throw new Error(`Task at index ${index} is not an object`);
|
|
3809
|
+
}
|
|
3810
|
+
const record = obj;
|
|
3811
|
+
for (const field of REQUIRED_TASK_FIELDS) {
|
|
3812
|
+
if (!(field in record) || record[field] === void 0 || record[field] === null) {
|
|
3813
|
+
throw new Error(`Task at index ${index} is missing required field: ${field}`);
|
|
3814
|
+
}
|
|
3815
|
+
}
|
|
3816
|
+
return record;
|
|
3817
|
+
}
|
|
3818
|
+
function buildTaskGenerationMessage(claudeMd, projectProfile, templates) {
|
|
3819
|
+
const profileLines = [
|
|
3820
|
+
`Language: ${projectProfile.language ?? "unknown"}`,
|
|
3821
|
+
`Framework: ${projectProfile.framework ?? "none"}`,
|
|
3822
|
+
`Scripts: ${Object.entries(projectProfile.scripts).map(([k, v]) => `${k}=${v}`).join(", ") || "none"}`,
|
|
3823
|
+
`Key files: ${projectProfile.keyFiles.join(", ") || "none"}`
|
|
3824
|
+
];
|
|
3825
|
+
const templateDescriptions = templates.map((t) => {
|
|
3826
|
+
const meta = EVAL_TEMPLATES[t];
|
|
3827
|
+
return `- ${t}: ${meta.description}`;
|
|
3828
|
+
}).join("\n");
|
|
3829
|
+
return [
|
|
3830
|
+
"## CLAUDE.md",
|
|
3831
|
+
"",
|
|
3832
|
+
claudeMd,
|
|
3833
|
+
"",
|
|
3834
|
+
"## Project Profile",
|
|
3835
|
+
"",
|
|
3836
|
+
...profileLines,
|
|
3837
|
+
"",
|
|
3838
|
+
"## Selected Eval Templates",
|
|
3839
|
+
"",
|
|
3840
|
+
templateDescriptions,
|
|
3841
|
+
"",
|
|
3842
|
+
"Generate concrete, project-specific tasks for each template above."
|
|
3843
|
+
].join("\n");
|
|
3844
|
+
}
|
|
3845
|
+
async function generateTasksFromTemplates(claudeMd, projectProfile, templates, config) {
|
|
3846
|
+
const userMessage = buildTaskGenerationMessage(claudeMd, projectProfile, templates);
|
|
3847
|
+
const rawResponse = await callLLM(config, userMessage, {
|
|
3848
|
+
systemPrompt: TASK_GENERATION_PROMPT,
|
|
3849
|
+
maxTokens: 4096
|
|
3850
|
+
});
|
|
3851
|
+
const parsed = parseJsonResponse(rawResponse);
|
|
3852
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
3853
|
+
throw new Error("LLM response is not a JSON object");
|
|
3854
|
+
}
|
|
3855
|
+
const tasksObj = parsed;
|
|
3856
|
+
if (!Array.isArray(tasksObj.tasks)) {
|
|
3857
|
+
throw new Error("LLM response does not contain a 'tasks' array");
|
|
3858
|
+
}
|
|
3859
|
+
const tasks = [];
|
|
3860
|
+
for (let i = 0; i < tasksObj.tasks.length; i++) {
|
|
3861
|
+
tasks.push(validateTask(tasksObj.tasks[i], i));
|
|
3862
|
+
}
|
|
3863
|
+
return tasks;
|
|
3864
|
+
}
|
|
3865
|
+
|
|
3866
|
+
// src/evolve/init.ts
|
|
3867
|
+
async function createEvolveWorkspace(projectRoot, config) {
|
|
3868
|
+
const workspace = path15.join(projectRoot, ".kairn-evolve");
|
|
3869
|
+
await fs15.mkdir(path15.join(workspace, "baseline"), { recursive: true });
|
|
3870
|
+
await fs15.mkdir(path15.join(workspace, "traces"), { recursive: true });
|
|
3871
|
+
await fs15.mkdir(path15.join(workspace, "iterations"), { recursive: true });
|
|
3872
|
+
const configObj = {
|
|
3873
|
+
model: config.model,
|
|
3874
|
+
proposer_model: config.proposerModel,
|
|
3875
|
+
scorer: config.scorer,
|
|
3876
|
+
max_iterations: config.maxIterations,
|
|
3877
|
+
parallel_tasks: config.parallelTasks
|
|
3878
|
+
};
|
|
3879
|
+
await fs15.writeFile(
|
|
3880
|
+
path15.join(workspace, "config.yaml"),
|
|
3881
|
+
yamlStringify(configObj),
|
|
3882
|
+
"utf-8"
|
|
3883
|
+
);
|
|
3884
|
+
return workspace;
|
|
3885
|
+
}
|
|
3886
|
+
async function writeTasksFile(workspacePath, tasks) {
|
|
3887
|
+
const doc = {
|
|
3888
|
+
tasks: tasks.map((t) => ({
|
|
3889
|
+
id: t.id,
|
|
3890
|
+
template: t.template,
|
|
3891
|
+
description: t.description,
|
|
3892
|
+
setup: t.setup,
|
|
3893
|
+
expected_outcome: t.expected_outcome,
|
|
3894
|
+
scoring: t.scoring,
|
|
3895
|
+
...t.rubric ? { rubric: t.rubric } : {},
|
|
3896
|
+
timeout: t.timeout
|
|
3897
|
+
}))
|
|
3898
|
+
};
|
|
3899
|
+
const header = "# .kairn-evolve/tasks.yaml\n# Auto-generated by kairn evolve init \u2014 edit freely\n";
|
|
3900
|
+
await fs15.writeFile(
|
|
3901
|
+
path15.join(workspacePath, "tasks.yaml"),
|
|
3902
|
+
header + yamlStringify(doc),
|
|
3903
|
+
"utf-8"
|
|
3904
|
+
);
|
|
3905
|
+
}
|
|
3906
|
+
async function buildProjectProfile(projectRoot) {
|
|
3907
|
+
const profile = {
|
|
3908
|
+
language: null,
|
|
3909
|
+
framework: null,
|
|
3910
|
+
scripts: {},
|
|
3911
|
+
keyFiles: []
|
|
3912
|
+
};
|
|
3913
|
+
try {
|
|
3914
|
+
const pkgStr = await fs15.readFile(
|
|
3915
|
+
path15.join(projectRoot, "package.json"),
|
|
3916
|
+
"utf-8"
|
|
3917
|
+
);
|
|
3918
|
+
const pkg = JSON.parse(pkgStr);
|
|
3919
|
+
profile.language = "typescript";
|
|
3920
|
+
if (pkg.scripts && typeof pkg.scripts === "object") {
|
|
3921
|
+
profile.scripts = pkg.scripts;
|
|
3922
|
+
}
|
|
3923
|
+
const deps = {
|
|
3924
|
+
...pkg.dependencies ?? {},
|
|
3925
|
+
...pkg.devDependencies ?? {}
|
|
3926
|
+
};
|
|
3927
|
+
if (deps.next) {
|
|
3928
|
+
profile.framework = "Next.js";
|
|
3929
|
+
} else if (deps.express) {
|
|
3930
|
+
profile.framework = "Express";
|
|
3931
|
+
} else if (deps.react) {
|
|
3932
|
+
profile.framework = "React";
|
|
3933
|
+
} else if (deps.vue) {
|
|
3934
|
+
profile.framework = "Vue";
|
|
3935
|
+
} else if (deps.commander) {
|
|
3936
|
+
profile.framework = "CLI (Commander.js)";
|
|
3937
|
+
}
|
|
3938
|
+
} catch {
|
|
3939
|
+
}
|
|
3940
|
+
if (!profile.language) {
|
|
3941
|
+
try {
|
|
3942
|
+
await fs15.access(path15.join(projectRoot, "pyproject.toml"));
|
|
3943
|
+
profile.language = "python";
|
|
3944
|
+
} catch {
|
|
3945
|
+
try {
|
|
3946
|
+
await fs15.access(path15.join(projectRoot, "requirements.txt"));
|
|
3947
|
+
profile.language = "python";
|
|
3948
|
+
} catch {
|
|
3949
|
+
}
|
|
3950
|
+
}
|
|
3951
|
+
}
|
|
3952
|
+
try {
|
|
3953
|
+
const entries = await fs15.readdir(projectRoot);
|
|
3954
|
+
const keyPatterns = [
|
|
3955
|
+
"README.md",
|
|
3956
|
+
"package.json",
|
|
3957
|
+
"tsconfig.json",
|
|
3958
|
+
"pyproject.toml",
|
|
3959
|
+
"Cargo.toml",
|
|
3960
|
+
"go.mod",
|
|
3961
|
+
"Makefile",
|
|
3962
|
+
"Dockerfile"
|
|
3963
|
+
];
|
|
3964
|
+
profile.keyFiles = entries.filter((e) => keyPatterns.includes(e));
|
|
3965
|
+
} catch {
|
|
3966
|
+
}
|
|
3967
|
+
return profile;
|
|
3968
|
+
}
|
|
3969
|
+
async function autoGenerateTasks(projectRoot, workflowType) {
|
|
3970
|
+
const config = await loadConfig();
|
|
3971
|
+
if (!config) {
|
|
3972
|
+
throw new Error("No config found. Run `kairn init` first.");
|
|
3973
|
+
}
|
|
3974
|
+
let claudeMd = "";
|
|
3975
|
+
try {
|
|
3976
|
+
claudeMd = await fs15.readFile(
|
|
3977
|
+
path15.join(projectRoot, ".claude", "CLAUDE.md"),
|
|
3978
|
+
"utf-8"
|
|
3979
|
+
);
|
|
3980
|
+
} catch {
|
|
3981
|
+
}
|
|
3982
|
+
const profile = await buildProjectProfile(projectRoot);
|
|
3983
|
+
const templates = selectTemplatesForWorkflow(workflowType);
|
|
3984
|
+
return generateTasksFromTemplates(claudeMd, profile, templates, config);
|
|
3985
|
+
}
|
|
3986
|
+
|
|
3987
|
+
// src/evolve/baseline.ts
|
|
3988
|
+
import fs16 from "fs/promises";
|
|
3989
|
+
import path16 from "path";
|
|
3990
|
+
async function snapshotBaseline(projectRoot, workspacePath) {
|
|
3991
|
+
const claudeDir = path16.join(projectRoot, ".claude");
|
|
3992
|
+
const baselineDir = path16.join(workspacePath, "baseline");
|
|
3993
|
+
const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
|
|
3994
|
+
try {
|
|
3995
|
+
await fs16.access(claudeDir);
|
|
3996
|
+
} catch {
|
|
3997
|
+
throw new Error(`.claude/ directory not found in ${projectRoot}`);
|
|
3998
|
+
}
|
|
3999
|
+
await copyDir(claudeDir, baselineDir);
|
|
4000
|
+
await copyDir(claudeDir, iter0Dir);
|
|
4001
|
+
}
|
|
4002
|
+
async function copyDir(src, dest) {
|
|
4003
|
+
await fs16.mkdir(dest, { recursive: true });
|
|
4004
|
+
const entries = await fs16.readdir(src, { withFileTypes: true });
|
|
4005
|
+
for (const entry of entries) {
|
|
4006
|
+
const srcPath = path16.join(src, entry.name);
|
|
4007
|
+
const destPath = path16.join(dest, entry.name);
|
|
4008
|
+
if (entry.isDirectory()) {
|
|
4009
|
+
await copyDir(srcPath, destPath);
|
|
4010
|
+
} else {
|
|
4011
|
+
await fs16.copyFile(srcPath, destPath);
|
|
4012
|
+
}
|
|
4013
|
+
}
|
|
4014
|
+
}
|
|
4015
|
+
|
|
4016
|
+
// src/evolve/runner.ts
|
|
4017
|
+
import { exec, spawn } from "child_process";
|
|
4018
|
+
import { promisify } from "util";
|
|
4019
|
+
import fs18 from "fs/promises";
|
|
4020
|
+
import os3 from "os";
|
|
4021
|
+
import path18 from "path";
|
|
4022
|
+
|
|
4023
|
+
// src/evolve/trace.ts
|
|
4024
|
+
import fs17 from "fs/promises";
|
|
4025
|
+
import path17 from "path";
|
|
4026
|
+
async function writeTrace(traceDir, trace) {
|
|
4027
|
+
await fs17.mkdir(traceDir, { recursive: true });
|
|
4028
|
+
await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
|
|
4029
|
+
await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
|
|
4030
|
+
const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
|
|
4031
|
+
await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
|
|
4032
|
+
await fs17.writeFile(
|
|
4033
|
+
path17.join(traceDir, "files_changed.json"),
|
|
4034
|
+
JSON.stringify(trace.filesChanged, null, 2),
|
|
4035
|
+
"utf-8"
|
|
4036
|
+
);
|
|
4037
|
+
await fs17.writeFile(
|
|
4038
|
+
path17.join(traceDir, "timing.json"),
|
|
4039
|
+
JSON.stringify(trace.timing, null, 2),
|
|
4040
|
+
"utf-8"
|
|
4041
|
+
);
|
|
4042
|
+
await fs17.writeFile(
|
|
4043
|
+
path17.join(traceDir, "score.json"),
|
|
4044
|
+
JSON.stringify(trace.score, null, 2),
|
|
4045
|
+
"utf-8"
|
|
4046
|
+
);
|
|
4047
|
+
}
|
|
4048
|
+
async function writeScore(traceDir, score) {
|
|
4049
|
+
await fs17.writeFile(
|
|
4050
|
+
path17.join(traceDir, "score.json"),
|
|
4051
|
+
JSON.stringify(score, null, 2),
|
|
4052
|
+
"utf-8"
|
|
4053
|
+
);
|
|
4054
|
+
}
|
|
4055
|
+
|
|
4056
|
+
// src/evolve/runner.ts
|
|
4057
|
+
var execAsync = promisify(exec);
|
|
4058
|
+
async function runTask(task, harnessPath, traceDir, iteration) {
|
|
4059
|
+
await fs18.mkdir(traceDir, { recursive: true });
|
|
4060
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4061
|
+
const startMs = Date.now();
|
|
4062
|
+
const tmpDir = await fs18.mkdtemp(path18.join(os3.tmpdir(), "kairn-evolve-"));
|
|
4063
|
+
try {
|
|
4064
|
+
await copyDir(harnessPath, path18.join(tmpDir, ".claude"));
|
|
4065
|
+
let setupStderr = "";
|
|
4066
|
+
if (task.setup.trim()) {
|
|
4067
|
+
try {
|
|
4068
|
+
await execAsync(task.setup, { cwd: tmpDir, timeout: 6e4 });
|
|
4069
|
+
} catch (err) {
|
|
4070
|
+
setupStderr = err instanceof Error ? err.message : String(err);
|
|
4071
|
+
}
|
|
4072
|
+
}
|
|
4073
|
+
const filesBefore = await snapshotFileList(tmpDir);
|
|
4074
|
+
const spawnResult = await spawnClaude(task.description, tmpDir, task.timeout);
|
|
4075
|
+
const filesAfter = await snapshotFileList(tmpDir);
|
|
4076
|
+
const filesChanged = diffFileLists(filesBefore, filesAfter);
|
|
4077
|
+
const toolCalls = parseToolCalls(spawnResult.stdout);
|
|
4078
|
+
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4079
|
+
const durationMs = Date.now() - startMs;
|
|
4080
|
+
const combinedStderr = setupStderr ? `[setup] ${setupStderr}
|
|
4081
|
+
${spawnResult.stderr}` : spawnResult.stderr;
|
|
4082
|
+
const trace = {
|
|
4083
|
+
taskId: task.id,
|
|
4084
|
+
iteration,
|
|
4085
|
+
stdout: spawnResult.stdout,
|
|
4086
|
+
stderr: combinedStderr,
|
|
4087
|
+
toolCalls,
|
|
4088
|
+
filesChanged,
|
|
4089
|
+
score: { pass: false, details: "Pending scoring" },
|
|
4090
|
+
timing: { startedAt, completedAt, durationMs }
|
|
4091
|
+
};
|
|
4092
|
+
await writeTrace(traceDir, trace);
|
|
4093
|
+
return {
|
|
4094
|
+
taskId: task.id,
|
|
4095
|
+
score: trace.score,
|
|
4096
|
+
traceDir
|
|
4097
|
+
};
|
|
4098
|
+
} finally {
|
|
4099
|
+
await fs18.rm(tmpDir, { recursive: true, force: true }).catch(() => {
|
|
4100
|
+
});
|
|
4101
|
+
}
|
|
4102
|
+
}
|
|
4103
|
+
async function spawnClaude(instruction, cwd, timeoutSec) {
|
|
4104
|
+
return new Promise((resolve) => {
|
|
4105
|
+
const args = ["--print", "--output-format", "text", "--max-turns", "50"];
|
|
4106
|
+
const child = spawn("claude", args, {
|
|
4107
|
+
cwd,
|
|
4108
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
4109
|
+
timeout: timeoutSec * 1e3,
|
|
4110
|
+
env: { ...process.env }
|
|
4111
|
+
});
|
|
4112
|
+
let stdout = "";
|
|
4113
|
+
let stderr = "";
|
|
4114
|
+
child.stdout.on("data", (data) => {
|
|
4115
|
+
stdout += data.toString();
|
|
4116
|
+
});
|
|
4117
|
+
child.stderr.on("data", (data) => {
|
|
4118
|
+
stderr += data.toString();
|
|
4119
|
+
});
|
|
4120
|
+
child.stdin.write(instruction);
|
|
4121
|
+
child.stdin.end();
|
|
4122
|
+
child.on("close", (code) => {
|
|
4123
|
+
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
4124
|
+
});
|
|
4125
|
+
child.on("error", (err) => {
|
|
4126
|
+
resolve({
|
|
4127
|
+
stdout,
|
|
4128
|
+
stderr: stderr + `
|
|
4129
|
+
Spawn error: ${err.message}`,
|
|
4130
|
+
exitCode: 1
|
|
4131
|
+
});
|
|
4132
|
+
});
|
|
4133
|
+
});
|
|
4134
|
+
}
|
|
4135
|
+
async function snapshotFileList(dir) {
|
|
4136
|
+
const result = {};
|
|
4137
|
+
async function walk(current) {
|
|
4138
|
+
let entries;
|
|
4139
|
+
try {
|
|
4140
|
+
entries = await fs18.readdir(current, { withFileTypes: true });
|
|
4141
|
+
} catch {
|
|
4142
|
+
return;
|
|
4143
|
+
}
|
|
4144
|
+
for (const entry of entries) {
|
|
4145
|
+
const fullPath = path18.join(current, entry.name);
|
|
4146
|
+
const relativePath = path18.relative(dir, fullPath);
|
|
4147
|
+
if (relativePath.startsWith(".claude")) continue;
|
|
4148
|
+
if (relativePath.startsWith("node_modules")) continue;
|
|
4149
|
+
if (relativePath.startsWith(".git")) continue;
|
|
4150
|
+
if (entry.isDirectory()) {
|
|
4151
|
+
await walk(fullPath);
|
|
4152
|
+
} else {
|
|
4153
|
+
try {
|
|
4154
|
+
const stat = await fs18.stat(fullPath);
|
|
4155
|
+
result[relativePath] = stat.mtimeMs;
|
|
4156
|
+
} catch {
|
|
4157
|
+
}
|
|
4158
|
+
}
|
|
4159
|
+
}
|
|
4160
|
+
}
|
|
4161
|
+
await walk(dir);
|
|
4162
|
+
return result;
|
|
4163
|
+
}
|
|
4164
|
+
function diffFileLists(before, after) {
|
|
4165
|
+
const changes = {};
|
|
4166
|
+
for (const [file, mtime] of Object.entries(after)) {
|
|
4167
|
+
if (!(file in before)) {
|
|
4168
|
+
changes[file] = "created";
|
|
4169
|
+
} else if (before[file] !== mtime) {
|
|
4170
|
+
changes[file] = "modified";
|
|
4171
|
+
}
|
|
4172
|
+
}
|
|
4173
|
+
for (const file of Object.keys(before)) {
|
|
4174
|
+
if (!(file in after)) {
|
|
4175
|
+
changes[file] = "deleted";
|
|
4176
|
+
}
|
|
4177
|
+
}
|
|
4178
|
+
return changes;
|
|
4179
|
+
}
|
|
4180
|
+
function parseToolCalls(stdout) {
|
|
4181
|
+
try {
|
|
4182
|
+
const lines = stdout.split("\n").filter((l) => l.trim());
|
|
4183
|
+
const toolCalls = [];
|
|
4184
|
+
for (const line of lines) {
|
|
4185
|
+
try {
|
|
4186
|
+
const obj = JSON.parse(line);
|
|
4187
|
+
if (obj.type === "tool_use" || obj.tool_name) {
|
|
4188
|
+
toolCalls.push(obj);
|
|
4189
|
+
}
|
|
4190
|
+
} catch {
|
|
4191
|
+
}
|
|
4192
|
+
}
|
|
4193
|
+
return toolCalls;
|
|
4194
|
+
} catch {
|
|
4195
|
+
return [];
|
|
4196
|
+
}
|
|
4197
|
+
}
|
|
4198
|
+
|
|
4199
|
+
// src/evolve/exec.ts
|
|
4200
|
+
import { exec as exec2 } from "child_process";
|
|
4201
|
+
import { promisify as promisify2 } from "util";
|
|
4202
|
+
var execAsync2 = promisify2(exec2);
|
|
4203
|
+
async function execCommand(cmd, cwd, timeoutMs = 3e4) {
|
|
4204
|
+
return execAsync2(cmd, { cwd, timeout: timeoutMs });
|
|
4205
|
+
}
|
|
4206
|
+
|
|
4207
|
+
// src/evolve/scorers.ts
|
|
4208
|
+
var COMMAND_PATTERN = /^(npm |npx |node |python |make |cargo |go |git |test |ls |cat |grep |curl )/;
|
|
4209
|
+
var SHELL_METACHAR_PATTERN = /[;|&`$()<>]/;
|
|
4210
|
+
var JUDGE_SYSTEM_PROMPT = `You are an eval judge for Claude Code agent tasks. Given a task description, expected outcome, and actual execution results, determine if the task was completed successfully.
|
|
4211
|
+
|
|
4212
|
+
Return ONLY valid JSON with this structure:
|
|
4213
|
+
{
|
|
4214
|
+
"pass": true/false,
|
|
4215
|
+
"score": 0-100,
|
|
4216
|
+
"reasoning": "Brief explanation of your judgment"
|
|
4217
|
+
}`;
|
|
4218
|
+
var RUBRIC_SYSTEM_PROMPT = `You are an eval judge scoring a specific criterion. Given the task, the criterion to evaluate, and the execution results, score the criterion.
|
|
4219
|
+
|
|
4220
|
+
Return ONLY valid JSON:
|
|
4221
|
+
{
|
|
4222
|
+
"score": 0.0-1.0,
|
|
4223
|
+
"reasoning": "Brief explanation"
|
|
4224
|
+
}`;
|
|
4225
|
+
async function passFailScorer(task, workspacePath, stdout, stderr) {
|
|
4226
|
+
const outcomes = Array.isArray(task.expected_outcome) ? task.expected_outcome : task.expected_outcome.split("\n");
|
|
4227
|
+
const commands = outcomes.map((line) => line.replace(/^-\s*/, "").trim()).filter((line) => COMMAND_PATTERN.test(line));
|
|
4228
|
+
if (commands.length > 0) {
|
|
4229
|
+
const failures = [];
|
|
4230
|
+
for (const cmd of commands) {
|
|
4231
|
+
if (SHELL_METACHAR_PATTERN.test(cmd)) {
|
|
4232
|
+
failures.push(`Rejected unsafe command (shell metacharacters): ${cmd}`);
|
|
4233
|
+
continue;
|
|
4234
|
+
}
|
|
4235
|
+
try {
|
|
4236
|
+
await execCommand(cmd, workspacePath);
|
|
4237
|
+
} catch (err) {
|
|
4238
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4239
|
+
failures.push(`Command failed: ${cmd}
|
|
4240
|
+
${msg}`);
|
|
4241
|
+
}
|
|
4242
|
+
}
|
|
4243
|
+
const passed2 = failures.length === 0;
|
|
4244
|
+
return {
|
|
4245
|
+
pass: passed2,
|
|
4246
|
+
score: passed2 ? 100 : 0,
|
|
4247
|
+
details: passed2 ? `All ${commands.length} verification commands passed` : failures.join("\n")
|
|
4248
|
+
};
|
|
4249
|
+
}
|
|
4250
|
+
const hasErrors = stderr.toLowerCase().includes("error") || stderr.toLowerCase().includes("failed") || stderr.toLowerCase().includes("exception");
|
|
4251
|
+
const passed = !hasErrors;
|
|
4252
|
+
return {
|
|
4253
|
+
pass: passed,
|
|
4254
|
+
score: passed ? 100 : 0,
|
|
4255
|
+
details: passed ? "No errors detected in output" : "Errors found in stderr"
|
|
4256
|
+
};
|
|
4257
|
+
}
|
|
4258
|
+
async function llmJudgeScorer(task, workspacePath, stdout, stderr, config) {
|
|
4259
|
+
const expectedOutcome = Array.isArray(task.expected_outcome) ? task.expected_outcome.join("\n") : task.expected_outcome;
|
|
4260
|
+
const userMessage = [
|
|
4261
|
+
"## Task",
|
|
4262
|
+
task.description,
|
|
4263
|
+
"",
|
|
4264
|
+
"## Expected Outcome",
|
|
4265
|
+
expectedOutcome,
|
|
4266
|
+
"",
|
|
4267
|
+
"## Actual stdout (last 2000 chars)",
|
|
4268
|
+
stdout.slice(-2e3),
|
|
4269
|
+
"",
|
|
4270
|
+
"## Actual stderr (last 1000 chars)",
|
|
4271
|
+
stderr.slice(-1e3)
|
|
4272
|
+
].join("\n");
|
|
4273
|
+
try {
|
|
4274
|
+
const response = await callLLM(config, userMessage, {
|
|
4275
|
+
systemPrompt: JUDGE_SYSTEM_PROMPT,
|
|
4276
|
+
maxTokens: 1024
|
|
4277
|
+
});
|
|
4278
|
+
let cleaned = response.trim();
|
|
4279
|
+
if (cleaned.startsWith("```")) {
|
|
4280
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
4281
|
+
}
|
|
4282
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
4283
|
+
if (!jsonMatch) {
|
|
4284
|
+
return { pass: false, score: 0, reasoning: "Judge returned invalid JSON" };
|
|
4285
|
+
}
|
|
4286
|
+
const result = JSON.parse(jsonMatch[0]);
|
|
4287
|
+
return {
|
|
4288
|
+
pass: result.pass,
|
|
4289
|
+
score: result.score,
|
|
4290
|
+
reasoning: result.reasoning
|
|
4291
|
+
};
|
|
4292
|
+
} catch (err) {
|
|
4293
|
+
return {
|
|
4294
|
+
pass: false,
|
|
4295
|
+
score: 0,
|
|
4296
|
+
reasoning: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
|
|
4297
|
+
};
|
|
4298
|
+
}
|
|
4299
|
+
}
|
|
4300
|
+
async function rubricScorer(task, workspacePath, stdout, stderr, config) {
|
|
4301
|
+
if (!task.rubric || task.rubric.length === 0) {
|
|
4302
|
+
return passFailScorer(task, workspacePath, stdout, stderr);
|
|
4303
|
+
}
|
|
4304
|
+
const breakdown = [];
|
|
4305
|
+
let weightedSum = 0;
|
|
4306
|
+
for (const criterion of task.rubric) {
|
|
4307
|
+
const userMessage = [
|
|
4308
|
+
"## Task",
|
|
4309
|
+
task.description,
|
|
4310
|
+
"",
|
|
4311
|
+
"## Criterion to Evaluate",
|
|
4312
|
+
`"${criterion.criterion}" (weight: ${criterion.weight})`,
|
|
4313
|
+
"",
|
|
4314
|
+
"## Actual stdout (last 2000 chars)",
|
|
4315
|
+
stdout.slice(-2e3),
|
|
4316
|
+
"",
|
|
4317
|
+
"## Actual stderr (last 500 chars)",
|
|
4318
|
+
stderr.slice(-500)
|
|
4319
|
+
].join("\n");
|
|
4320
|
+
try {
|
|
4321
|
+
const response = await callLLM(config, userMessage, {
|
|
4322
|
+
systemPrompt: RUBRIC_SYSTEM_PROMPT,
|
|
4323
|
+
maxTokens: 512
|
|
4324
|
+
});
|
|
4325
|
+
let cleaned = response.trim();
|
|
4326
|
+
if (cleaned.startsWith("```")) {
|
|
4327
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
4328
|
+
}
|
|
4329
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
4330
|
+
if (jsonMatch) {
|
|
4331
|
+
const result = JSON.parse(jsonMatch[0]);
|
|
4332
|
+
const clampedScore = Math.max(0, Math.min(1, result.score));
|
|
4333
|
+
breakdown.push({
|
|
4334
|
+
criterion: criterion.criterion,
|
|
4335
|
+
score: clampedScore,
|
|
4336
|
+
weight: criterion.weight
|
|
4337
|
+
});
|
|
4338
|
+
weightedSum += clampedScore * criterion.weight;
|
|
4339
|
+
} else {
|
|
4340
|
+
breakdown.push({
|
|
4341
|
+
criterion: criterion.criterion,
|
|
4342
|
+
score: 0,
|
|
4343
|
+
weight: criterion.weight
|
|
4344
|
+
});
|
|
4345
|
+
}
|
|
4346
|
+
} catch {
|
|
4347
|
+
breakdown.push({
|
|
4348
|
+
criterion: criterion.criterion,
|
|
4349
|
+
score: 0,
|
|
4350
|
+
weight: criterion.weight
|
|
4351
|
+
});
|
|
4352
|
+
}
|
|
4353
|
+
}
|
|
4354
|
+
const totalWeight = task.rubric.reduce((sum, c) => sum + c.weight, 0);
|
|
4355
|
+
const totalScore = totalWeight > 0 ? Math.round(weightedSum / totalWeight * 100) : 0;
|
|
4356
|
+
return {
|
|
4357
|
+
pass: totalScore >= 60,
|
|
4358
|
+
score: totalScore,
|
|
4359
|
+
reasoning: `Rubric score: ${totalScore}%`,
|
|
4360
|
+
breakdown
|
|
4361
|
+
};
|
|
4362
|
+
}
|
|
4363
|
+
async function scoreTask(task, workspacePath, stdout, stderr, config) {
|
|
4364
|
+
if (task.scoring === "pass-fail") {
|
|
4365
|
+
return passFailScorer(task, workspacePath, stdout, stderr);
|
|
4366
|
+
}
|
|
4367
|
+
if (task.scoring === "llm-judge" && config) {
|
|
4368
|
+
return llmJudgeScorer(task, workspacePath, stdout, stderr, config);
|
|
4369
|
+
}
|
|
4370
|
+
if (task.scoring === "rubric" && config) {
|
|
4371
|
+
return rubricScorer(task, workspacePath, stdout, stderr, config);
|
|
4372
|
+
}
|
|
4373
|
+
return passFailScorer(task, workspacePath, stdout, stderr);
|
|
4374
|
+
}
|
|
4375
|
+
|
|
4376
|
+
// src/commands/evolve.ts
|
|
4377
|
+
var DEFAULT_CONFIG = {
|
|
4378
|
+
model: "claude-sonnet-4-6",
|
|
4379
|
+
proposerModel: "claude-opus-4-6",
|
|
4380
|
+
scorer: "pass-fail",
|
|
4381
|
+
maxIterations: 5,
|
|
4382
|
+
parallelTasks: 1
|
|
4383
|
+
};
|
|
4384
|
+
var evolveCommand = new Command11("evolve").description("Evolve your agent environment through automated optimization");
|
|
4385
|
+
evolveCommand.command("init").description("Initialize an evolution workspace with auto-generated tasks").option("--workflow <type>", "Workflow type for template selection", "feature-development").action(async (options) => {
|
|
4386
|
+
try {
|
|
4387
|
+
const projectRoot = process.cwd();
|
|
4388
|
+
console.log(ui.section("Evolve Init"));
|
|
4389
|
+
const claudeDir = path19.join(projectRoot, ".claude");
|
|
4390
|
+
try {
|
|
4391
|
+
await fs19.access(claudeDir);
|
|
4392
|
+
} catch {
|
|
4393
|
+
console.log(ui.error("No .claude/ directory found. Run kairn describe first."));
|
|
4394
|
+
process.exit(1);
|
|
4395
|
+
}
|
|
4396
|
+
const workspace = await createEvolveWorkspace(projectRoot, DEFAULT_CONFIG);
|
|
4397
|
+
console.log(ui.success("Created .kairn-evolve/ workspace"));
|
|
4398
|
+
const spinner = ora2("Generating project-specific eval tasks...").start();
|
|
4399
|
+
let tasks;
|
|
4400
|
+
try {
|
|
4401
|
+
tasks = await autoGenerateTasks(projectRoot, options.workflow);
|
|
4402
|
+
spinner.succeed(`Generated ${tasks.length} eval tasks`);
|
|
4403
|
+
} catch {
|
|
4404
|
+
spinner.fail("LLM task generation failed");
|
|
4405
|
+
const templateIds = selectTemplatesForWorkflow(options.workflow);
|
|
4406
|
+
tasks = templateIds.map((templateId, index) => ({
|
|
4407
|
+
id: `${templateId}-${index + 1}`,
|
|
4408
|
+
template: templateId,
|
|
4409
|
+
description: `${EVAL_TEMPLATES[templateId].description} (project-specific task \u2014 edit in tasks.yaml)`,
|
|
4410
|
+
setup: "npm install",
|
|
4411
|
+
expected_outcome: "Task completed successfully",
|
|
4412
|
+
scoring: "pass-fail",
|
|
4413
|
+
timeout: 300
|
|
4414
|
+
}));
|
|
4415
|
+
console.log(ui.info(`Fell back to ${tasks.length} template placeholders`));
|
|
4416
|
+
}
|
|
4417
|
+
for (const task of tasks) {
|
|
4418
|
+
console.log(chalk14.cyan(` ${task.id}`) + chalk14.dim(` (${task.template}) \u2014 ${task.description.slice(0, 80)}`));
|
|
4419
|
+
}
|
|
4420
|
+
let addMore = true;
|
|
4421
|
+
while (addMore) {
|
|
4422
|
+
try {
|
|
4423
|
+
addMore = await confirm3({ message: "Add another eval task?", default: false });
|
|
4424
|
+
} catch {
|
|
4425
|
+
addMore = false;
|
|
4426
|
+
}
|
|
4427
|
+
if (addMore) {
|
|
4428
|
+
const templateId = await select4({
|
|
4429
|
+
message: "Select eval template:",
|
|
4430
|
+
choices: Object.values(EVAL_TEMPLATES).map((t) => ({
|
|
4431
|
+
name: `${t.name} \u2014 ${t.description}`,
|
|
4432
|
+
value: t.id
|
|
4433
|
+
}))
|
|
4434
|
+
});
|
|
4435
|
+
const addSpinner = ora2("Generating task...").start();
|
|
4436
|
+
try {
|
|
4437
|
+
const config = await loadConfig();
|
|
4438
|
+
if (config) {
|
|
4439
|
+
let claudeMd = "";
|
|
4440
|
+
try {
|
|
4441
|
+
claudeMd = await fs19.readFile(path19.join(claudeDir, "CLAUDE.md"), "utf-8");
|
|
4442
|
+
} catch {
|
|
4443
|
+
}
|
|
4444
|
+
const profile = await buildProjectProfile(projectRoot);
|
|
4445
|
+
const newTasks = await generateTasksFromTemplates(claudeMd, profile, [templateId], config);
|
|
4446
|
+
tasks.push(...newTasks);
|
|
4447
|
+
addSpinner.succeed(`Added ${newTasks.length} task(s)`);
|
|
4448
|
+
} else {
|
|
4449
|
+
addSpinner.fail("No config found");
|
|
4450
|
+
}
|
|
4451
|
+
} catch {
|
|
4452
|
+
addSpinner.fail("Failed to generate task");
|
|
4453
|
+
}
|
|
4454
|
+
}
|
|
4455
|
+
}
|
|
4456
|
+
await writeTasksFile(workspace, tasks);
|
|
4457
|
+
console.log(ui.success(`Wrote ${tasks.length} tasks to tasks.yaml`));
|
|
4458
|
+
console.log("");
|
|
4459
|
+
console.log(chalk14.dim(" Next steps:"));
|
|
4460
|
+
console.log(chalk14.dim(" 1. Review .kairn-evolve/tasks.yaml"));
|
|
4461
|
+
console.log(chalk14.dim(" 2. Run: kairn evolve baseline"));
|
|
4462
|
+
console.log(chalk14.dim(" 3. Run: kairn evolve run"));
|
|
4463
|
+
} catch (err) {
|
|
4464
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4465
|
+
console.log(ui.error(msg));
|
|
4466
|
+
process.exit(1);
|
|
4467
|
+
}
|
|
4468
|
+
});
|
|
4469
|
+
evolveCommand.command("baseline").description("Snapshot current .claude/ directory as baseline").action(async () => {
|
|
4470
|
+
try {
|
|
4471
|
+
const projectRoot = process.cwd();
|
|
4472
|
+
const workspace = path19.join(projectRoot, ".kairn-evolve");
|
|
4473
|
+
console.log(ui.section("Evolve Baseline"));
|
|
4474
|
+
try {
|
|
4475
|
+
await fs19.access(workspace);
|
|
4476
|
+
} catch {
|
|
4477
|
+
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
4478
|
+
process.exit(1);
|
|
4479
|
+
}
|
|
4480
|
+
await snapshotBaseline(projectRoot, workspace);
|
|
4481
|
+
const baselineDir = path19.join(workspace, "baseline");
|
|
4482
|
+
const fileCount = await countFiles(baselineDir);
|
|
4483
|
+
console.log(ui.success(`Baseline snapshot created (${fileCount} files)`));
|
|
4484
|
+
} catch (err) {
|
|
4485
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4486
|
+
console.log(ui.error(msg));
|
|
4487
|
+
process.exit(1);
|
|
4488
|
+
}
|
|
4489
|
+
});
|
|
4490
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").action(async (options) => {
|
|
4491
|
+
try {
|
|
4492
|
+
const projectRoot = process.cwd();
|
|
4493
|
+
const workspace = path19.join(projectRoot, ".kairn-evolve");
|
|
4494
|
+
console.log(ui.section("Evolve Run"));
|
|
4495
|
+
try {
|
|
4496
|
+
await fs19.access(workspace);
|
|
4497
|
+
} catch {
|
|
4498
|
+
console.log(ui.error("No .kairn-evolve/ directory found. Run kairn evolve init first."));
|
|
4499
|
+
process.exit(1);
|
|
4500
|
+
}
|
|
4501
|
+
const tasksPath = path19.join(workspace, "tasks.yaml");
|
|
4502
|
+
let tasksContent;
|
|
4503
|
+
try {
|
|
4504
|
+
tasksContent = await fs19.readFile(tasksPath, "utf-8");
|
|
4505
|
+
} catch {
|
|
4506
|
+
console.log(ui.error("No tasks.yaml found. Run kairn evolve init first."));
|
|
4507
|
+
process.exit(1);
|
|
4508
|
+
}
|
|
4509
|
+
const parsed = yamlParse(tasksContent);
|
|
4510
|
+
if (!parsed?.tasks || parsed.tasks.length === 0) {
|
|
4511
|
+
console.log(ui.error("No tasks found in tasks.yaml"));
|
|
4512
|
+
process.exit(1);
|
|
4513
|
+
}
|
|
4514
|
+
const tasksToRun = options.task ? parsed.tasks.filter((t) => t.id === options.task) : parsed.tasks;
|
|
4515
|
+
if (tasksToRun.length === 0) {
|
|
4516
|
+
console.log(ui.error(`Task "${options.task}" not found in tasks.yaml`));
|
|
4517
|
+
process.exit(1);
|
|
4518
|
+
}
|
|
4519
|
+
console.log(ui.info(`Running ${tasksToRun.length} task(s)...`));
|
|
4520
|
+
console.log("");
|
|
4521
|
+
const config = await loadConfig();
|
|
4522
|
+
const harnessPath = path19.join(projectRoot, ".claude");
|
|
4523
|
+
const results = [];
|
|
4524
|
+
for (const task of tasksToRun) {
|
|
4525
|
+
const traceDir = path19.join(workspace, "traces", "0", task.id);
|
|
4526
|
+
const spinner = ora2(`Running: ${task.id}`).start();
|
|
4527
|
+
const result = await runTask(task, harnessPath, traceDir, 0);
|
|
4528
|
+
if (config) {
|
|
4529
|
+
const stdout = await fs19.readFile(path19.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4530
|
+
const stderr = await fs19.readFile(path19.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4531
|
+
const score = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4532
|
+
result.score = score;
|
|
4533
|
+
await writeScore(traceDir, score);
|
|
4534
|
+
}
|
|
4535
|
+
results.push(result);
|
|
4536
|
+
const status = result.score.pass ? chalk14.green("PASS") : chalk14.red("FAIL");
|
|
4537
|
+
const scoreStr = result.score.score !== void 0 ? chalk14.dim(` (${result.score.score}%)`) : "";
|
|
4538
|
+
spinner.stop();
|
|
4539
|
+
console.log(` ${status} ${task.id}${scoreStr}${result.score.details ? chalk14.dim(` \u2014 ${result.score.details}`) : ""}`);
|
|
4540
|
+
}
|
|
4541
|
+
const passed = results.filter((r) => r.score.pass).length;
|
|
4542
|
+
console.log("");
|
|
4543
|
+
console.log(ui.info(`Results: ${passed}/${results.length} passed`));
|
|
4544
|
+
console.log(ui.info("Traces written to .kairn-evolve/traces/0/"));
|
|
4545
|
+
} catch (err) {
|
|
4546
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4547
|
+
console.log(ui.error(msg));
|
|
4548
|
+
process.exit(1);
|
|
4549
|
+
}
|
|
4550
|
+
});
|
|
4551
|
+
async function countFiles(dir) {
|
|
4552
|
+
let count = 0;
|
|
4553
|
+
try {
|
|
4554
|
+
const entries = await fs19.readdir(dir, { withFileTypes: true });
|
|
4555
|
+
for (const entry of entries) {
|
|
4556
|
+
if (entry.isDirectory()) {
|
|
4557
|
+
count += await countFiles(path19.join(dir, entry.name));
|
|
4558
|
+
} else {
|
|
4559
|
+
count++;
|
|
4560
|
+
}
|
|
4561
|
+
}
|
|
4562
|
+
} catch {
|
|
4563
|
+
}
|
|
4564
|
+
return count;
|
|
4565
|
+
}
|
|
4566
|
+
|
|
3630
4567
|
// src/cli.ts
|
|
3631
|
-
var program = new
|
|
4568
|
+
var program = new Command12();
|
|
3632
4569
|
program.name("kairn").description(
|
|
3633
4570
|
"Compile natural language intent into optimized Claude Code environments"
|
|
3634
4571
|
).version("1.9.0").option("--no-color", "Disable colored output");
|
|
@@ -3642,8 +4579,9 @@ program.addCommand(doctorCommand);
|
|
|
3642
4579
|
program.addCommand(registryCommand);
|
|
3643
4580
|
program.addCommand(templatesCommand);
|
|
3644
4581
|
program.addCommand(keysCommand);
|
|
4582
|
+
program.addCommand(evolveCommand);
|
|
3645
4583
|
if (process.argv.includes("--no-color") || process.env.NO_COLOR) {
|
|
3646
|
-
|
|
4584
|
+
chalk15.level = 0;
|
|
3647
4585
|
}
|
|
3648
4586
|
program.parse();
|
|
3649
4587
|
//# sourceMappingURL=cli.js.map
|