karajan-code 2.3.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HU Acceptance Test Runner.
|
|
3
|
+
* Executes acceptance_tests commands for an HU and returns structured results.
|
|
4
|
+
* Brain uses this to determine if an HU is done (all pass) or needs fixing (with diagnostics).
|
|
5
|
+
*/
|
|
6
|
+
import { runCommand } from "../utils/process.js";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Run a single acceptance test command.
|
|
10
|
+
* @param {string} cmd - Shell command to execute
|
|
11
|
+
* @param {string} cwd - Working directory
|
|
12
|
+
* @param {number} [timeoutMs=30000] - Timeout per test
|
|
13
|
+
* @returns {Promise<{cmd: string, passed: boolean, output: string, exitCode: number}>}
|
|
14
|
+
*/
|
|
15
|
+
async function runSingleTest(cmd, cwd, timeoutMs = 30000) {
|
|
16
|
+
try {
|
|
17
|
+
const result = await runCommand("bash", ["-c", cmd], {
|
|
18
|
+
timeout: timeoutMs,
|
|
19
|
+
cwd
|
|
20
|
+
});
|
|
21
|
+
const output = (result.stdout || "") + (result.stderr || "");
|
|
22
|
+
return {
|
|
23
|
+
cmd,
|
|
24
|
+
passed: result.exitCode === 0,
|
|
25
|
+
output: output.slice(-500), // last 500 chars for diagnostics
|
|
26
|
+
exitCode: result.exitCode
|
|
27
|
+
};
|
|
28
|
+
} catch (err) {
|
|
29
|
+
return {
|
|
30
|
+
cmd,
|
|
31
|
+
passed: false,
|
|
32
|
+
output: err.message?.slice(-500) || "Command timed out or crashed",
|
|
33
|
+
exitCode: -1
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Run all acceptance tests for an HU.
|
|
40
|
+
* @param {string[]} tests - Array of shell commands
|
|
41
|
+
* @param {string} cwd - Working directory
|
|
42
|
+
* @returns {Promise<{allPassed: boolean, results: object[], summary: string, diagnostics: string|null}>}
|
|
43
|
+
*/
|
|
44
|
+
export async function runAcceptanceTests(tests, cwd) {
|
|
45
|
+
if (!tests || tests.length === 0) {
|
|
46
|
+
return { allPassed: false, results: [], summary: "No acceptance tests defined", diagnostics: null };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const results = [];
|
|
50
|
+
for (const cmd of tests) {
|
|
51
|
+
const result = await runSingleTest(cmd, cwd);
|
|
52
|
+
results.push(result);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const passed = results.filter(r => r.passed);
|
|
56
|
+
const failed = results.filter(r => !r.passed);
|
|
57
|
+
const allPassed = failed.length === 0;
|
|
58
|
+
|
|
59
|
+
const summary = `${passed.length}/${results.length} acceptance tests passed`;
|
|
60
|
+
|
|
61
|
+
let diagnostics = null;
|
|
62
|
+
if (!allPassed) {
|
|
63
|
+
diagnostics = failed.map(f =>
|
|
64
|
+
`FAIL: ${f.cmd}\n exit=${f.exitCode}\n output: ${f.output.trim().split("\n").slice(-5).join("\n ")}`
|
|
65
|
+
).join("\n\n");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { allPassed, results, summary, diagnostics };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build a concrete diagnostic prompt for Brain to send to the coder.
|
|
73
|
+
* Reads the failed test outputs and produces actionable instructions.
|
|
74
|
+
* @param {object[]} failedResults - Array of failed test results
|
|
75
|
+
* @returns {string} Prompt for the coder with concrete fix instructions
|
|
76
|
+
*/
|
|
77
|
+
export function buildDiagnosticPrompt(failedResults) {
|
|
78
|
+
if (!failedResults || failedResults.length === 0) return "";
|
|
79
|
+
const lines = ["The following acceptance tests FAILED. Fix each one:", ""];
|
|
80
|
+
for (const f of failedResults) {
|
|
81
|
+
lines.push(`❌ Command: ${f.cmd}`);
|
|
82
|
+
lines.push(` Exit code: ${f.exitCode}`);
|
|
83
|
+
const lastLines = f.output.trim().split("\n").slice(-8);
|
|
84
|
+
lines.push(` Last output:`);
|
|
85
|
+
for (const l of lastLines) {
|
|
86
|
+
lines.push(` ${l}`);
|
|
87
|
+
}
|
|
88
|
+
lines.push("");
|
|
89
|
+
}
|
|
90
|
+
lines.push("Fix ALL failing tests. Run each command yourself to verify before finishing.");
|
|
91
|
+
return lines.join("\n");
|
|
92
|
+
}
|
package/src/hu/auto-generator.js
CHANGED
|
@@ -94,12 +94,12 @@ function buildSetupHu({ stackHints }) {
|
|
|
94
94
|
"SCOPE (do ONLY this, nothing else):",
|
|
95
95
|
"- Create package.json (with workspaces if monorepo detected from stack hints)",
|
|
96
96
|
"- Install all runtime + dev dependencies listed in stack hints",
|
|
97
|
-
"-
|
|
97
|
+
"- Install test framework WITH coverage reporter (e.g. vitest + @vitest/coverage-v8)",
|
|
98
|
+
"- Configure vitest.config.js with coverage.enabled = true",
|
|
98
99
|
"- Create .env.example with placeholder variables",
|
|
99
|
-
"- Verify
|
|
100
|
+
"- Verify by running each acceptance_test command below",
|
|
100
101
|
"",
|
|
101
102
|
"DO NOT implement any business logic, API routes, components, or features.",
|
|
102
|
-
"DO NOT add security middleware, auth, or any application code.",
|
|
103
103
|
"This HU is ONLY project scaffolding.",
|
|
104
104
|
"",
|
|
105
105
|
"Stack hints:",
|
|
@@ -114,9 +114,15 @@ function buildSetupHu({ stackHints }) {
|
|
|
114
114
|
certified: { text: certifiedText },
|
|
115
115
|
acceptance_criteria: [
|
|
116
116
|
"npm install succeeds without errors",
|
|
117
|
-
"npm test runs
|
|
118
|
-
"
|
|
119
|
-
"
|
|
117
|
+
"npm test runs without error",
|
|
118
|
+
"npm run test:coverage runs without error",
|
|
119
|
+
".env.example exists"
|
|
120
|
+
],
|
|
121
|
+
acceptance_tests: [
|
|
122
|
+
"npm install --ignore-scripts 2>&1 && echo PASS || echo FAIL",
|
|
123
|
+
"npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
|
|
124
|
+
"npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL",
|
|
125
|
+
"test -f .env.example && echo PASS || echo FAIL"
|
|
120
126
|
]
|
|
121
127
|
};
|
|
122
128
|
}
|
|
@@ -135,8 +141,8 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
|
|
|
135
141
|
"SCOPE (do ONLY this, nothing else):",
|
|
136
142
|
`- Implement: ${subtask}`,
|
|
137
143
|
"- Add unit tests for the new code",
|
|
144
|
+
"- Run ALL acceptance_tests listed below and ensure they pass",
|
|
138
145
|
"- Do NOT touch code outside this subtask's scope",
|
|
139
|
-
"- Do NOT refactor or 'improve' unrelated files",
|
|
140
146
|
"- Target: <200 lines changed (like an atomic PR)"
|
|
141
147
|
].join("\n");
|
|
142
148
|
return {
|
|
@@ -149,7 +155,11 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
|
|
|
149
155
|
acceptance_criteria: [
|
|
150
156
|
`${subtask} is implemented and working`,
|
|
151
157
|
"Unit tests cover the new code",
|
|
152
|
-
"
|
|
158
|
+
"All acceptance_tests pass"
|
|
159
|
+
],
|
|
160
|
+
acceptance_tests: [
|
|
161
|
+
"npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
|
|
162
|
+
"npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL"
|
|
153
163
|
]
|
|
154
164
|
};
|
|
155
165
|
}
|
package/src/mcp/run-kj.js
CHANGED
|
@@ -5,6 +5,13 @@ import { execa } from "execa";
|
|
|
5
5
|
const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
6
6
|
const CLI_PATH = path.resolve(MODULE_DIR, "..", "cli.js");
|
|
7
7
|
|
|
8
|
+
/** Mask a secret token for safe logging: show first 4 and last 4 chars only. */
|
|
9
|
+
function maskToken(token) {
|
|
10
|
+
if (!token || typeof token !== "string") return "***";
|
|
11
|
+
if (token.length <= 8) return "***";
|
|
12
|
+
return `${token.slice(0, 4)}${"*".repeat(Math.min(token.length - 8, 16))}${token.slice(-4)}`;
|
|
13
|
+
}
|
|
14
|
+
|
|
8
15
|
function normalizeBoolFlag(value, flagName, args) {
|
|
9
16
|
if (value === true) args.push(flagName);
|
|
10
17
|
}
|
|
@@ -113,5 +120,15 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
|
|
|
113
120
|
payload.errorSummary = result.stderr.split("\n").filter(Boolean).slice(-3).join(" | ");
|
|
114
121
|
}
|
|
115
122
|
|
|
123
|
+
// Sanitize output: strip sonar token from any log/error output
|
|
124
|
+
if (options.sonarToken) {
|
|
125
|
+
const masked = maskToken(options.sonarToken);
|
|
126
|
+
for (const key of ["stdout", "stderr", "errorSummary"]) {
|
|
127
|
+
if (payload[key] && typeof payload[key] === "string") {
|
|
128
|
+
payload[key] = payload[key].replaceAll(options.sonarToken, masked);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
116
133
|
return payload;
|
|
117
134
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Direct actions: commands Karajan Brain can execute without invoking a full role.
|
|
2
2
|
// Keeps the action catalog small, auditable, and safe.
|
|
3
3
|
|
|
4
|
-
import { execSync } from "node:child_process";
|
|
4
|
+
import { execSync, execFileSync } from "node:child_process";
|
|
5
5
|
import fs from "node:fs/promises";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
|
|
@@ -32,7 +32,11 @@ const ALLOWED_COMMANDS = [
|
|
|
32
32
|
*/
|
|
33
33
|
function isCommandAllowed(cmd) {
|
|
34
34
|
if (!cmd || typeof cmd !== "string") return false;
|
|
35
|
-
|
|
35
|
+
const tokens = cmd.trim().split(/\s+/);
|
|
36
|
+
return ALLOWED_COMMANDS.some(allowed => {
|
|
37
|
+
const allowedTokens = allowed.split(/\s+/);
|
|
38
|
+
return allowedTokens.every((t, i) => tokens[i] === t) && tokens.length === allowedTokens.length;
|
|
39
|
+
});
|
|
36
40
|
}
|
|
37
41
|
|
|
38
42
|
/**
|
|
@@ -128,8 +132,7 @@ async function gitAdd({ files, cwd }) {
|
|
|
128
132
|
return { ok: false, error: `Invalid file path: ${f}`, action: "git_add" };
|
|
129
133
|
}
|
|
130
134
|
}
|
|
131
|
-
|
|
132
|
-
execSync(`git add ${args}`, {
|
|
135
|
+
execFileSync("git", ["add", ...files], {
|
|
133
136
|
cwd: cwd || process.cwd(),
|
|
134
137
|
encoding: "utf8",
|
|
135
138
|
stdio: ["pipe", "pipe", "pipe"]
|
package/src/orchestrator.js
CHANGED
|
@@ -1680,6 +1680,66 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1680
1680
|
logger.info(`HU ${story.id} (${story.task_type}): policies → reviewer=${huPolicies.reviewer}, tdd=${huPolicies.tdd}, sonar=${huPolicies.sonar}, tests=${huPolicies.testsRequired}`);
|
|
1681
1681
|
|
|
1682
1682
|
const branchName = await prepareHuBranch({ story, huBranches, config: ctx.config, logger });
|
|
1683
|
+
const projectDir = ctx.config.projectDir || process.cwd();
|
|
1684
|
+
|
|
1685
|
+
// If HU has acceptance_tests, Brain runs them as the gate instead of
|
|
1686
|
+
// the standard reviewer/tester pipeline. This is the radical fix:
|
|
1687
|
+
// concrete executable tests replace subjective reviewer opinions.
|
|
1688
|
+
if (story.acceptance_tests?.length > 0) {
|
|
1689
|
+
const { runAcceptanceTests, buildDiagnosticPrompt } = await import("./hu/acceptance-runner.js");
|
|
1690
|
+
|
|
1691
|
+
for (let attempt = 1; attempt <= ctx.config.max_iterations; attempt++) {
|
|
1692
|
+
logger.info(`HU ${story.id}: coder iteration ${attempt}/${ctx.config.max_iterations}`);
|
|
1693
|
+
emitProgress(emitter, makeEvent("iteration:start", { ...ctx.eventBase, stage: "iteration" }, {
|
|
1694
|
+
message: `Iteration ${attempt}/${ctx.config.max_iterations}`,
|
|
1695
|
+
detail: { iteration: attempt, maxIterations: ctx.config.max_iterations }
|
|
1696
|
+
}));
|
|
1697
|
+
|
|
1698
|
+
// Coder runs with the HU task + any diagnostic feedback from previous attempt
|
|
1699
|
+
const coderResult = await runCoderStage({
|
|
1700
|
+
coderRoleInstance: ctx.coderRoleInstance, coderRole: ctx.coderRole,
|
|
1701
|
+
config: ctx.config, logger, emitter, eventBase: ctx.eventBase,
|
|
1702
|
+
session: ctx.session, plannedTask: ctx.plannedTask,
|
|
1703
|
+
trackBudget: ctx.trackBudget, iteration: attempt, brainCtx: ctx.brainCtx
|
|
1704
|
+
});
|
|
1705
|
+
if (coderResult?.action === "standby" || coderResult?.action === "pause") {
|
|
1706
|
+
return coderResult?.result || { approved: false, reason: "coder_failed" };
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
// Brain runs acceptance tests
|
|
1710
|
+
logger.info(`HU ${story.id}: running ${story.acceptance_tests.length} acceptance tests`);
|
|
1711
|
+
emitProgress(emitter, makeEvent("hu:acceptance-start", { ...ctx.eventBase, stage: "acceptance" }, {
|
|
1712
|
+
message: `Running ${story.acceptance_tests.length} acceptance tests`,
|
|
1713
|
+
detail: { huId: story.id, testCount: story.acceptance_tests.length }
|
|
1714
|
+
}));
|
|
1715
|
+
|
|
1716
|
+
const testResult = await runAcceptanceTests(story.acceptance_tests, projectDir);
|
|
1717
|
+
emitProgress(emitter, makeEvent("hu:acceptance-end", { ...ctx.eventBase, stage: "acceptance" }, {
|
|
1718
|
+
status: testResult.allPassed ? "ok" : "fail",
|
|
1719
|
+
message: testResult.summary,
|
|
1720
|
+
detail: { allPassed: testResult.allPassed, results: testResult.results.map(r => ({ cmd: r.cmd, passed: r.passed })) }
|
|
1721
|
+
}));
|
|
1722
|
+
|
|
1723
|
+
if (testResult.allPassed) {
|
|
1724
|
+
logger.info(`HU ${story.id}: all acceptance tests PASSED — approved`);
|
|
1725
|
+
await finalizeHuCommit({ story, branchName, config: ctx.config, logger });
|
|
1726
|
+
return { approved: true, sessionId: ctx.session.id, reason: "acceptance_tests_passed" };
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
// Brain diagnoses failures and sends concrete fix to coder
|
|
1730
|
+
const failed = testResult.results.filter(r => !r.passed);
|
|
1731
|
+
const diagnostic = buildDiagnosticPrompt(failed);
|
|
1732
|
+
logger.warn(`HU ${story.id}: ${failed.length} acceptance test(s) FAILED — sending diagnostic to coder`);
|
|
1733
|
+
ctx.session.last_reviewer_feedback = diagnostic;
|
|
1734
|
+
ctx.plannedTask = `${huTask}\n\n--- ACCEPTANCE TEST FAILURES ---\n${diagnostic}`;
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
// All iterations exhausted
|
|
1738
|
+
logger.warn(`HU ${story.id}: max iterations reached with acceptance tests still failing`);
|
|
1739
|
+
return { approved: false, sessionId: ctx.session.id, reason: "acceptance_tests_failed" };
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
// Fallback: no acceptance_tests → standard pipeline (reviewer/tester)
|
|
1683
1743
|
try {
|
|
1684
1744
|
const result = await runIterationLoop(ctx, { task: huTask, askQuestion, emitter, logger });
|
|
1685
1745
|
if (result?.approved) {
|
package/src/sonar/credentials.js
CHANGED
|
@@ -48,5 +48,5 @@ export async function saveSonarToken(token) {
|
|
|
48
48
|
existing.token = token;
|
|
49
49
|
const dir = getKarajanHome();
|
|
50
50
|
await fs.mkdir(dir, { recursive: true });
|
|
51
|
-
await fs.writeFile(filePath, JSON.stringify(existing, null, 2), "utf8");
|
|
51
|
+
await fs.writeFile(filePath, JSON.stringify(existing, null, 2), { encoding: "utf8", mode: 0o600 });
|
|
52
52
|
}
|