karajan-code 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/guards/policy-resolver.js +1 -1
- package/src/hu/acceptance-runner.js +92 -0
- package/src/hu/auto-generator.js +35 -10
- package/src/mcp/run-kj.js +17 -0
- package/src/orchestrator/direct-actions.js +7 -4
- package/src/orchestrator.js +71 -0
- package/src/sonar/credentials.js +1 -1
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@ export const VALID_TASK_TYPES = new Set(["sw", "infra", "doc", "add-tests", "ref
|
|
|
2
2
|
|
|
3
3
|
export const DEFAULT_POLICIES = {
|
|
4
4
|
sw: { tdd: true, sonar: true, reviewer: true, testsRequired: true },
|
|
5
|
-
infra: { tdd: false, sonar: false, reviewer:
|
|
5
|
+
infra: { tdd: false, sonar: false, reviewer: false, testsRequired: false },
|
|
6
6
|
doc: { tdd: false, sonar: false, reviewer: true, testsRequired: false },
|
|
7
7
|
"add-tests": { tdd: false, sonar: true, reviewer: true, testsRequired: true },
|
|
8
8
|
refactor: { tdd: true, sonar: true, reviewer: true, testsRequired: false },
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HU Acceptance Test Runner.
|
|
3
|
+
* Executes acceptance_tests commands for an HU and returns structured results.
|
|
4
|
+
* Brain uses this to determine if an HU is done (all pass) or needs fixing (with diagnostics).
|
|
5
|
+
*/
|
|
6
|
+
import { runCommand } from "../utils/process.js";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Run a single acceptance test command.
|
|
10
|
+
* @param {string} cmd - Shell command to execute
|
|
11
|
+
* @param {string} cwd - Working directory
|
|
12
|
+
* @param {number} [timeoutMs=30000] - Timeout per test
|
|
13
|
+
* @returns {Promise<{cmd: string, passed: boolean, output: string, exitCode: number}>}
|
|
14
|
+
*/
|
|
15
|
+
async function runSingleTest(cmd, cwd, timeoutMs = 30000) {
|
|
16
|
+
try {
|
|
17
|
+
const result = await runCommand("bash", ["-c", cmd], {
|
|
18
|
+
timeout: timeoutMs,
|
|
19
|
+
cwd
|
|
20
|
+
});
|
|
21
|
+
const output = (result.stdout || "") + (result.stderr || "");
|
|
22
|
+
return {
|
|
23
|
+
cmd,
|
|
24
|
+
passed: result.exitCode === 0,
|
|
25
|
+
output: output.slice(-500), // last 500 chars for diagnostics
|
|
26
|
+
exitCode: result.exitCode
|
|
27
|
+
};
|
|
28
|
+
} catch (err) {
|
|
29
|
+
return {
|
|
30
|
+
cmd,
|
|
31
|
+
passed: false,
|
|
32
|
+
output: err.message?.slice(-500) || "Command timed out or crashed",
|
|
33
|
+
exitCode: -1
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Run all acceptance tests for an HU.
|
|
40
|
+
* @param {string[]} tests - Array of shell commands
|
|
41
|
+
* @param {string} cwd - Working directory
|
|
42
|
+
* @returns {Promise<{allPassed: boolean, results: object[], summary: string, diagnostics: string|null}>}
|
|
43
|
+
*/
|
|
44
|
+
export async function runAcceptanceTests(tests, cwd) {
|
|
45
|
+
if (!tests || tests.length === 0) {
|
|
46
|
+
return { allPassed: false, results: [], summary: "No acceptance tests defined", diagnostics: null };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const results = [];
|
|
50
|
+
for (const cmd of tests) {
|
|
51
|
+
const result = await runSingleTest(cmd, cwd);
|
|
52
|
+
results.push(result);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const passed = results.filter(r => r.passed);
|
|
56
|
+
const failed = results.filter(r => !r.passed);
|
|
57
|
+
const allPassed = failed.length === 0;
|
|
58
|
+
|
|
59
|
+
const summary = `${passed.length}/${results.length} acceptance tests passed`;
|
|
60
|
+
|
|
61
|
+
let diagnostics = null;
|
|
62
|
+
if (!allPassed) {
|
|
63
|
+
diagnostics = failed.map(f =>
|
|
64
|
+
`FAIL: ${f.cmd}\n exit=${f.exitCode}\n output: ${f.output.trim().split("\n").slice(-5).join("\n ")}`
|
|
65
|
+
).join("\n\n");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return { allPassed, results, summary, diagnostics };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build a concrete diagnostic prompt for Brain to send to the coder.
|
|
73
|
+
* Reads the failed test outputs and produces actionable instructions.
|
|
74
|
+
* @param {object[]} failedResults - Array of failed test results
|
|
75
|
+
* @returns {string} Prompt for the coder with concrete fix instructions
|
|
76
|
+
*/
|
|
77
|
+
export function buildDiagnosticPrompt(failedResults) {
|
|
78
|
+
if (!failedResults || failedResults.length === 0) return "";
|
|
79
|
+
const lines = ["The following acceptance tests FAILED. Fix each one:", ""];
|
|
80
|
+
for (const f of failedResults) {
|
|
81
|
+
lines.push(`❌ Command: ${f.cmd}`);
|
|
82
|
+
lines.push(` Exit code: ${f.exitCode}`);
|
|
83
|
+
const lastLines = f.output.trim().split("\n").slice(-8);
|
|
84
|
+
lines.push(` Last output:`);
|
|
85
|
+
for (const l of lastLines) {
|
|
86
|
+
lines.push(` ${l}`);
|
|
87
|
+
}
|
|
88
|
+
lines.push("");
|
|
89
|
+
}
|
|
90
|
+
lines.push("Fix ALL failing tests. Run each command yourself to verify before finishing.");
|
|
91
|
+
return lines.join("\n");
|
|
92
|
+
}
|
package/src/hu/auto-generator.js
CHANGED
|
@@ -66,6 +66,20 @@ export function needsSetupHu({ isNewProject = false, stackHints = [], subtasks =
|
|
|
66
66
|
return subtasks.some(s => setupKeywords.test(s));
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
/**
|
|
70
|
+
* Filter conflicting stack hints. When Node.js ecosystem keywords are present,
|
|
71
|
+
* remove Go/Rust/Python keywords that were detected from gitignore patterns
|
|
72
|
+
* but aren't actually part of the task.
|
|
73
|
+
*/
|
|
74
|
+
function filterConflictingHints(hints) {
|
|
75
|
+
if (!hints || hints.length === 0) return hints;
|
|
76
|
+
const nodeEcosystem = new Set(["express", "vite", "vitest", "jest", "next", "astro", "react", "vue", "svelte", "nestjs", "monorepo", "workspaces"]);
|
|
77
|
+
const goKeywords = new Set(["gin", "fiber", "go"]);
|
|
78
|
+
const hasNode = hints.some(h => nodeEcosystem.has(h));
|
|
79
|
+
if (!hasNode) return hints;
|
|
80
|
+
return hints.filter(h => !goKeywords.has(h));
|
|
81
|
+
}
|
|
82
|
+
|
|
69
83
|
/**
|
|
70
84
|
* Build a MINIMAL setup HU — project structure + deps only.
|
|
71
85
|
* NEVER includes the full original task. The coder must only do setup.
|
|
@@ -80,12 +94,12 @@ function buildSetupHu({ stackHints }) {
|
|
|
80
94
|
"SCOPE (do ONLY this, nothing else):",
|
|
81
95
|
"- Create package.json (with workspaces if monorepo detected from stack hints)",
|
|
82
96
|
"- Install all runtime + dev dependencies listed in stack hints",
|
|
83
|
-
"-
|
|
97
|
+
"- Install test framework WITH coverage reporter (e.g. vitest + @vitest/coverage-v8)",
|
|
98
|
+
"- Configure vitest.config.js with coverage.enabled = true",
|
|
84
99
|
"- Create .env.example with placeholder variables",
|
|
85
|
-
"- Verify
|
|
100
|
+
"- Verify by running each acceptance_test command below",
|
|
86
101
|
"",
|
|
87
102
|
"DO NOT implement any business logic, API routes, components, or features.",
|
|
88
|
-
"DO NOT add security middleware, auth, or any application code.",
|
|
89
103
|
"This HU is ONLY project scaffolding.",
|
|
90
104
|
"",
|
|
91
105
|
"Stack hints:",
|
|
@@ -100,9 +114,15 @@ function buildSetupHu({ stackHints }) {
|
|
|
100
114
|
certified: { text: certifiedText },
|
|
101
115
|
acceptance_criteria: [
|
|
102
116
|
"npm install succeeds without errors",
|
|
103
|
-
"npm test runs
|
|
104
|
-
"
|
|
105
|
-
"
|
|
117
|
+
"npm test runs without error",
|
|
118
|
+
"npm run test:coverage runs without error",
|
|
119
|
+
".env.example exists"
|
|
120
|
+
],
|
|
121
|
+
acceptance_tests: [
|
|
122
|
+
"npm install --ignore-scripts 2>&1 && echo PASS || echo FAIL",
|
|
123
|
+
"npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
|
|
124
|
+
"npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL",
|
|
125
|
+
"test -f .env.example && echo PASS || echo FAIL"
|
|
106
126
|
]
|
|
107
127
|
};
|
|
108
128
|
}
|
|
@@ -121,8 +141,8 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
|
|
|
121
141
|
"SCOPE (do ONLY this, nothing else):",
|
|
122
142
|
`- Implement: ${subtask}`,
|
|
123
143
|
"- Add unit tests for the new code",
|
|
144
|
+
"- Run ALL acceptance_tests listed below and ensure they pass",
|
|
124
145
|
"- Do NOT touch code outside this subtask's scope",
|
|
125
|
-
"- Do NOT refactor or 'improve' unrelated files",
|
|
126
146
|
"- Target: <200 lines changed (like an atomic PR)"
|
|
127
147
|
].join("\n");
|
|
128
148
|
return {
|
|
@@ -135,7 +155,11 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
|
|
|
135
155
|
acceptance_criteria: [
|
|
136
156
|
`${subtask} is implemented and working`,
|
|
137
157
|
"Unit tests cover the new code",
|
|
138
|
-
"
|
|
158
|
+
"All acceptance_tests pass"
|
|
159
|
+
],
|
|
160
|
+
acceptance_tests: [
|
|
161
|
+
"npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
|
|
162
|
+
"npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL"
|
|
139
163
|
]
|
|
140
164
|
};
|
|
141
165
|
}
|
|
@@ -168,13 +192,14 @@ export function generateHuBatch({
|
|
|
168
192
|
}
|
|
169
193
|
|
|
170
194
|
const stories = [];
|
|
171
|
-
const
|
|
195
|
+
const filteredHints = filterConflictingHints(stackHints);
|
|
196
|
+
const needsSetup = needsSetupHu({ isNewProject, stackHints: filteredHints, subtasks });
|
|
172
197
|
let nextId = 1;
|
|
173
198
|
|
|
174
199
|
const projectName = deriveProjectName(originalTask);
|
|
175
200
|
|
|
176
201
|
if (needsSetup) {
|
|
177
|
-
stories.push(buildSetupHu({ stackHints }));
|
|
202
|
+
stories.push(buildSetupHu({ stackHints: filteredHints }));
|
|
178
203
|
nextId = 2;
|
|
179
204
|
}
|
|
180
205
|
|
package/src/mcp/run-kj.js
CHANGED
|
@@ -5,6 +5,13 @@ import { execa } from "execa";
|
|
|
5
5
|
const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
6
6
|
const CLI_PATH = path.resolve(MODULE_DIR, "..", "cli.js");
|
|
7
7
|
|
|
8
|
+
/** Mask a secret token for safe logging: show first 4 and last 4 chars only. */
|
|
9
|
+
function maskToken(token) {
|
|
10
|
+
if (!token || typeof token !== "string") return "***";
|
|
11
|
+
if (token.length <= 8) return "***";
|
|
12
|
+
return `${token.slice(0, 4)}${"*".repeat(Math.min(token.length - 8, 16))}${token.slice(-4)}`;
|
|
13
|
+
}
|
|
14
|
+
|
|
8
15
|
function normalizeBoolFlag(value, flagName, args) {
|
|
9
16
|
if (value === true) args.push(flagName);
|
|
10
17
|
}
|
|
@@ -113,5 +120,15 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
|
|
|
113
120
|
payload.errorSummary = result.stderr.split("\n").filter(Boolean).slice(-3).join(" | ");
|
|
114
121
|
}
|
|
115
122
|
|
|
123
|
+
// Sanitize output: strip sonar token from any log/error output
|
|
124
|
+
if (options.sonarToken) {
|
|
125
|
+
const masked = maskToken(options.sonarToken);
|
|
126
|
+
for (const key of ["stdout", "stderr", "errorSummary"]) {
|
|
127
|
+
if (payload[key] && typeof payload[key] === "string") {
|
|
128
|
+
payload[key] = payload[key].replaceAll(options.sonarToken, masked);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
116
133
|
return payload;
|
|
117
134
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Direct actions: commands Karajan Brain can execute without invoking a full role.
|
|
2
2
|
// Keeps the action catalog small, auditable, and safe.
|
|
3
3
|
|
|
4
|
-
import { execSync } from "node:child_process";
|
|
4
|
+
import { execSync, execFileSync } from "node:child_process";
|
|
5
5
|
import fs from "node:fs/promises";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
|
|
@@ -32,7 +32,11 @@ const ALLOWED_COMMANDS = [
|
|
|
32
32
|
*/
|
|
33
33
|
function isCommandAllowed(cmd) {
|
|
34
34
|
if (!cmd || typeof cmd !== "string") return false;
|
|
35
|
-
|
|
35
|
+
const tokens = cmd.trim().split(/\s+/);
|
|
36
|
+
return ALLOWED_COMMANDS.some(allowed => {
|
|
37
|
+
const allowedTokens = allowed.split(/\s+/);
|
|
38
|
+
return allowedTokens.every((t, i) => tokens[i] === t) && tokens.length === allowedTokens.length;
|
|
39
|
+
});
|
|
36
40
|
}
|
|
37
41
|
|
|
38
42
|
/**
|
|
@@ -128,8 +132,7 @@ async function gitAdd({ files, cwd }) {
|
|
|
128
132
|
return { ok: false, error: `Invalid file path: ${f}`, action: "git_add" };
|
|
129
133
|
}
|
|
130
134
|
}
|
|
131
|
-
|
|
132
|
-
execSync(`git add ${args}`, {
|
|
135
|
+
execFileSync("git", ["add", ...files], {
|
|
133
136
|
cwd: cwd || process.cwd(),
|
|
134
137
|
encoding: "utf8",
|
|
135
138
|
stdio: ["pipe", "pipe", "pipe"]
|
package/src/orchestrator.js
CHANGED
|
@@ -1669,7 +1669,77 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1669
1669
|
ctx.brainCtx.feedbackQueue = fresh.feedbackQueue;
|
|
1670
1670
|
ctx.brainCtx.verificationTracker = fresh.verificationTracker;
|
|
1671
1671
|
}
|
|
1672
|
+
// Apply per-HU policies based on task_type (infra skips reviewer/sonar/tdd)
|
|
1673
|
+
const { applyPolicies } = await import("./guards/policy-resolver.js");
|
|
1674
|
+
const huPolicies = applyPolicies({ taskType: story.task_type, policies: ctx.config.policies });
|
|
1675
|
+
const savedFlags = { ...ctx.pipelineFlags };
|
|
1676
|
+
if (!huPolicies.reviewer) ctx.pipelineFlags.reviewerEnabled = false;
|
|
1677
|
+
if (!huPolicies.tdd) ctx.config.development = { ...ctx.config.development, methodology: "standard", require_test_changes: false };
|
|
1678
|
+
if (!huPolicies.sonar) ctx.config.sonarqube = { ...ctx.config.sonarqube, enabled: false };
|
|
1679
|
+
if (!huPolicies.testsRequired) ctx.pipelineFlags.testerEnabled = false;
|
|
1680
|
+
logger.info(`HU ${story.id} (${story.task_type}): policies → reviewer=${huPolicies.reviewer}, tdd=${huPolicies.tdd}, sonar=${huPolicies.sonar}, tests=${huPolicies.testsRequired}`);
|
|
1681
|
+
|
|
1672
1682
|
const branchName = await prepareHuBranch({ story, huBranches, config: ctx.config, logger });
|
|
1683
|
+
const projectDir = ctx.config.projectDir || process.cwd();
|
|
1684
|
+
|
|
1685
|
+
// If HU has acceptance_tests, Brain runs them as the gate instead of
|
|
1686
|
+
// the standard reviewer/tester pipeline. This is the radical fix:
|
|
1687
|
+
// concrete executable tests replace subjective reviewer opinions.
|
|
1688
|
+
if (story.acceptance_tests?.length > 0) {
|
|
1689
|
+
const { runAcceptanceTests, buildDiagnosticPrompt } = await import("./hu/acceptance-runner.js");
|
|
1690
|
+
|
|
1691
|
+
for (let attempt = 1; attempt <= ctx.config.max_iterations; attempt++) {
|
|
1692
|
+
logger.info(`HU ${story.id}: coder iteration ${attempt}/${ctx.config.max_iterations}`);
|
|
1693
|
+
emitProgress(emitter, makeEvent("iteration:start", { ...ctx.eventBase, stage: "iteration" }, {
|
|
1694
|
+
message: `Iteration ${attempt}/${ctx.config.max_iterations}`,
|
|
1695
|
+
detail: { iteration: attempt, maxIterations: ctx.config.max_iterations }
|
|
1696
|
+
}));
|
|
1697
|
+
|
|
1698
|
+
// Coder runs with the HU task + any diagnostic feedback from previous attempt
|
|
1699
|
+
const coderResult = await runCoderStage({
|
|
1700
|
+
coderRoleInstance: ctx.coderRoleInstance, coderRole: ctx.coderRole,
|
|
1701
|
+
config: ctx.config, logger, emitter, eventBase: ctx.eventBase,
|
|
1702
|
+
session: ctx.session, plannedTask: ctx.plannedTask,
|
|
1703
|
+
trackBudget: ctx.trackBudget, iteration: attempt, brainCtx: ctx.brainCtx
|
|
1704
|
+
});
|
|
1705
|
+
if (coderResult?.action === "standby" || coderResult?.action === "pause") {
|
|
1706
|
+
return coderResult?.result || { approved: false, reason: "coder_failed" };
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
// Brain runs acceptance tests
|
|
1710
|
+
logger.info(`HU ${story.id}: running ${story.acceptance_tests.length} acceptance tests`);
|
|
1711
|
+
emitProgress(emitter, makeEvent("hu:acceptance-start", { ...ctx.eventBase, stage: "acceptance" }, {
|
|
1712
|
+
message: `Running ${story.acceptance_tests.length} acceptance tests`,
|
|
1713
|
+
detail: { huId: story.id, testCount: story.acceptance_tests.length }
|
|
1714
|
+
}));
|
|
1715
|
+
|
|
1716
|
+
const testResult = await runAcceptanceTests(story.acceptance_tests, projectDir);
|
|
1717
|
+
emitProgress(emitter, makeEvent("hu:acceptance-end", { ...ctx.eventBase, stage: "acceptance" }, {
|
|
1718
|
+
status: testResult.allPassed ? "ok" : "fail",
|
|
1719
|
+
message: testResult.summary,
|
|
1720
|
+
detail: { allPassed: testResult.allPassed, results: testResult.results.map(r => ({ cmd: r.cmd, passed: r.passed })) }
|
|
1721
|
+
}));
|
|
1722
|
+
|
|
1723
|
+
if (testResult.allPassed) {
|
|
1724
|
+
logger.info(`HU ${story.id}: all acceptance tests PASSED — approved`);
|
|
1725
|
+
await finalizeHuCommit({ story, branchName, config: ctx.config, logger });
|
|
1726
|
+
return { approved: true, sessionId: ctx.session.id, reason: "acceptance_tests_passed" };
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
// Brain diagnoses failures and sends concrete fix to coder
|
|
1730
|
+
const failed = testResult.results.filter(r => !r.passed);
|
|
1731
|
+
const diagnostic = buildDiagnosticPrompt(failed);
|
|
1732
|
+
logger.warn(`HU ${story.id}: ${failed.length} acceptance test(s) FAILED — sending diagnostic to coder`);
|
|
1733
|
+
ctx.session.last_reviewer_feedback = diagnostic;
|
|
1734
|
+
ctx.plannedTask = `${huTask}\n\n--- ACCEPTANCE TEST FAILURES ---\n${diagnostic}`;
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
// All iterations exhausted
|
|
1738
|
+
logger.warn(`HU ${story.id}: max iterations reached with acceptance tests still failing`);
|
|
1739
|
+
return { approved: false, sessionId: ctx.session.id, reason: "acceptance_tests_failed" };
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
// Fallback: no acceptance_tests → standard pipeline (reviewer/tester)
|
|
1673
1743
|
try {
|
|
1674
1744
|
const result = await runIterationLoop(ctx, { task: huTask, askQuestion, emitter, logger });
|
|
1675
1745
|
if (result?.approved) {
|
|
@@ -1678,6 +1748,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1678
1748
|
return result;
|
|
1679
1749
|
} finally {
|
|
1680
1750
|
ctx.config.max_iterations = originalMaxIterations;
|
|
1751
|
+
Object.assign(ctx.pipelineFlags, savedFlags);
|
|
1681
1752
|
}
|
|
1682
1753
|
},
|
|
1683
1754
|
emitter,
|
package/src/sonar/credentials.js
CHANGED
|
@@ -48,5 +48,5 @@ export async function saveSonarToken(token) {
|
|
|
48
48
|
existing.token = token;
|
|
49
49
|
const dir = getKarajanHome();
|
|
50
50
|
await fs.mkdir(dir, { recursive: true });
|
|
51
|
-
await fs.writeFile(filePath, JSON.stringify(existing, null, 2), "utf8");
|
|
51
|
+
await fs.writeFile(filePath, JSON.stringify(existing, null, 2), { encoding: "utf8", mode: 0o600 });
|
|
52
52
|
}
|