karajan-code 2.3.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "2.3.2",
3
+ "version": "2.4.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -0,0 +1,92 @@
1
+ /**
2
+ * HU Acceptance Test Runner.
3
+ * Executes acceptance_tests commands for an HU and returns structured results.
4
+ * Brain uses this to determine if an HU is done (all pass) or needs fixing (with diagnostics).
5
+ */
6
+ import { runCommand } from "../utils/process.js";
7
+
8
+ /**
9
+ * Run a single acceptance test command.
10
+ * @param {string} cmd - Shell command to execute
11
+ * @param {string} cwd - Working directory
12
+ * @param {number} [timeoutMs=30000] - Timeout per test
13
+ * @returns {Promise<{cmd: string, passed: boolean, output: string, exitCode: number}>}
14
+ */
15
+ async function runSingleTest(cmd, cwd, timeoutMs = 30000) {
16
+ try {
17
+ const result = await runCommand("bash", ["-c", cmd], {
18
+ timeout: timeoutMs,
19
+ cwd
20
+ });
21
+ const output = (result.stdout || "") + (result.stderr || "");
22
+ return {
23
+ cmd,
24
+ passed: result.exitCode === 0,
25
+ output: output.slice(-500), // last 500 chars for diagnostics
26
+ exitCode: result.exitCode
27
+ };
28
+ } catch (err) {
29
+ return {
30
+ cmd,
31
+ passed: false,
32
+ output: err.message?.slice(-500) || "Command timed out or crashed",
33
+ exitCode: -1
34
+ };
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Run all acceptance tests for an HU.
40
+ * @param {string[]} tests - Array of shell commands
41
+ * @param {string} cwd - Working directory
42
+ * @returns {Promise<{allPassed: boolean, results: object[], summary: string, diagnostics: string|null}>}
43
+ */
44
+ export async function runAcceptanceTests(tests, cwd) {
45
+ if (!tests || tests.length === 0) {
46
+ return { allPassed: false, results: [], summary: "No acceptance tests defined", diagnostics: null };
47
+ }
48
+
49
+ const results = [];
50
+ for (const cmd of tests) {
51
+ const result = await runSingleTest(cmd, cwd);
52
+ results.push(result);
53
+ }
54
+
55
+ const passed = results.filter(r => r.passed);
56
+ const failed = results.filter(r => !r.passed);
57
+ const allPassed = failed.length === 0;
58
+
59
+ const summary = `${passed.length}/${results.length} acceptance tests passed`;
60
+
61
+ let diagnostics = null;
62
+ if (!allPassed) {
63
+ diagnostics = failed.map(f =>
64
+ `FAIL: ${f.cmd}\n exit=${f.exitCode}\n output: ${f.output.trim().split("\n").slice(-5).join("\n ")}`
65
+ ).join("\n\n");
66
+ }
67
+
68
+ return { allPassed, results, summary, diagnostics };
69
+ }
70
+
71
+ /**
72
+ * Build a concrete diagnostic prompt for Brain to send to the coder.
73
+ * Reads the failed test outputs and produces actionable instructions.
74
+ * @param {object[]} failedResults - Array of failed test results
75
+ * @returns {string} Prompt for the coder with concrete fix instructions
76
+ */
77
+ export function buildDiagnosticPrompt(failedResults) {
78
+ if (!failedResults || failedResults.length === 0) return "";
79
+ const lines = ["The following acceptance tests FAILED. Fix each one:", ""];
80
+ for (const f of failedResults) {
81
+ lines.push(`❌ Command: ${f.cmd}`);
82
+ lines.push(` Exit code: ${f.exitCode}`);
83
+ const lastLines = f.output.trim().split("\n").slice(-8);
84
+ lines.push(` Last output:`);
85
+ for (const l of lastLines) {
86
+ lines.push(` ${l}`);
87
+ }
88
+ lines.push("");
89
+ }
90
+ lines.push("Fix ALL failing tests. Run each command yourself to verify before finishing.");
91
+ return lines.join("\n");
92
+ }
@@ -94,12 +94,12 @@ function buildSetupHu({ stackHints }) {
94
94
  "SCOPE (do ONLY this, nothing else):",
95
95
  "- Create package.json (with workspaces if monorepo detected from stack hints)",
96
96
  "- Install all runtime + dev dependencies listed in stack hints",
97
- "- Configure test framework so `npm test` runs (even with 0 tests)",
97
+ "- Install test framework WITH coverage reporter (e.g. vitest + @vitest/coverage-v8)",
98
+ "- Configure vitest.config.js with coverage.enabled = true",
98
99
  "- Create .env.example with placeholder variables",
99
- "- Verify: `npm install` succeeds, `npm test` runs without error",
100
+ "- Verify by running each acceptance_test command below",
100
101
  "",
101
102
  "DO NOT implement any business logic, API routes, components, or features.",
102
- "DO NOT add security middleware, auth, or any application code.",
103
103
  "This HU is ONLY project scaffolding.",
104
104
  "",
105
105
  "Stack hints:",
@@ -114,9 +114,15 @@ function buildSetupHu({ stackHints }) {
114
114
  certified: { text: certifiedText },
115
115
  acceptance_criteria: [
116
116
  "npm install succeeds without errors",
117
- "npm test runs (even with 0 tests)",
118
- ".env.example exists",
119
- "No business logic or application code added"
117
+ "npm test runs without error",
118
+ "npm run test:coverage runs without error",
119
+ ".env.example exists"
120
+ ],
121
+ acceptance_tests: [
122
+ "npm install --ignore-scripts 2>&1 && echo PASS || echo FAIL",
123
+ "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
124
+ "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL",
125
+ "test -f .env.example && echo PASS || echo FAIL"
120
126
  ]
121
127
  };
122
128
  }
@@ -135,8 +141,8 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
135
141
  "SCOPE (do ONLY this, nothing else):",
136
142
  `- Implement: ${subtask}`,
137
143
  "- Add unit tests for the new code",
144
+ "- Run ALL acceptance_tests listed below and ensure they pass",
138
145
  "- Do NOT touch code outside this subtask's scope",
139
- "- Do NOT refactor or 'improve' unrelated files",
140
146
  "- Target: <200 lines changed (like an atomic PR)"
141
147
  ].join("\n");
142
148
  return {
@@ -149,7 +155,11 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
149
155
  acceptance_criteria: [
150
156
  `${subtask} is implemented and working`,
151
157
  "Unit tests cover the new code",
152
- "No changes to files outside this subtask's scope"
158
+ "All acceptance_tests pass"
159
+ ],
160
+ acceptance_tests: [
161
+ "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
162
+ "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL"
153
163
  ]
154
164
  };
155
165
  }
package/src/mcp/run-kj.js CHANGED
@@ -5,6 +5,13 @@ import { execa } from "execa";
5
5
  const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
6
6
  const CLI_PATH = path.resolve(MODULE_DIR, "..", "cli.js");
7
7
 
8
+ /** Mask a secret token for safe logging: show first 4 and last 4 chars only. */
9
+ function maskToken(token) {
10
+ if (!token || typeof token !== "string") return "***";
11
+ if (token.length <= 8) return "***";
12
+ return `${token.slice(0, 4)}${"*".repeat(Math.min(token.length - 8, 16))}${token.slice(-4)}`;
13
+ }
14
+
8
15
  function normalizeBoolFlag(value, flagName, args) {
9
16
  if (value === true) args.push(flagName);
10
17
  }
@@ -113,5 +120,15 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
113
120
  payload.errorSummary = result.stderr.split("\n").filter(Boolean).slice(-3).join(" | ");
114
121
  }
115
122
 
123
+ // Sanitize output: strip sonar token from any log/error output
124
+ if (options.sonarToken) {
125
+ const masked = maskToken(options.sonarToken);
126
+ for (const key of ["stdout", "stderr", "errorSummary"]) {
127
+ if (payload[key] && typeof payload[key] === "string") {
128
+ payload[key] = payload[key].replaceAll(options.sonarToken, masked);
129
+ }
130
+ }
131
+ }
132
+
116
133
  return payload;
117
134
  }
@@ -1,7 +1,7 @@
1
1
  // Direct actions: commands Karajan Brain can execute without invoking a full role.
2
2
  // Keeps the action catalog small, auditable, and safe.
3
3
 
4
- import { execSync } from "node:child_process";
4
+ import { execSync, execFileSync } from "node:child_process";
5
5
  import fs from "node:fs/promises";
6
6
  import path from "node:path";
7
7
 
@@ -32,7 +32,11 @@ const ALLOWED_COMMANDS = [
32
32
  */
33
33
  function isCommandAllowed(cmd) {
34
34
  if (!cmd || typeof cmd !== "string") return false;
35
- return ALLOWED_COMMANDS.some(allowed => cmd.trim().startsWith(allowed));
35
+ const tokens = cmd.trim().split(/\s+/);
36
+ return ALLOWED_COMMANDS.some(allowed => {
37
+ const allowedTokens = allowed.split(/\s+/);
38
+ return allowedTokens.every((t, i) => tokens[i] === t) && tokens.length === allowedTokens.length;
39
+ });
36
40
  }
37
41
 
38
42
  /**
@@ -128,8 +132,7 @@ async function gitAdd({ files, cwd }) {
128
132
  return { ok: false, error: `Invalid file path: ${f}`, action: "git_add" };
129
133
  }
130
134
  }
131
- const args = files.map(f => `"${f}"`).join(" ");
132
- execSync(`git add ${args}`, {
135
+ execFileSync("git", ["add", ...files], {
133
136
  cwd: cwd || process.cwd(),
134
137
  encoding: "utf8",
135
138
  stdio: ["pipe", "pipe", "pipe"]
@@ -1680,6 +1680,66 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
1680
1680
  logger.info(`HU ${story.id} (${story.task_type}): policies → reviewer=${huPolicies.reviewer}, tdd=${huPolicies.tdd}, sonar=${huPolicies.sonar}, tests=${huPolicies.testsRequired}`);
1681
1681
 
1682
1682
  const branchName = await prepareHuBranch({ story, huBranches, config: ctx.config, logger });
1683
+ const projectDir = ctx.config.projectDir || process.cwd();
1684
+
1685
+ // If HU has acceptance_tests, Brain runs them as the gate instead of
1686
+ // the standard reviewer/tester pipeline. This is the radical fix:
1687
+ // concrete executable tests replace subjective reviewer opinions.
1688
+ if (story.acceptance_tests?.length > 0) {
1689
+ const { runAcceptanceTests, buildDiagnosticPrompt } = await import("./hu/acceptance-runner.js");
1690
+
1691
+ for (let attempt = 1; attempt <= ctx.config.max_iterations; attempt++) {
1692
+ logger.info(`HU ${story.id}: coder iteration ${attempt}/${ctx.config.max_iterations}`);
1693
+ emitProgress(emitter, makeEvent("iteration:start", { ...ctx.eventBase, stage: "iteration" }, {
1694
+ message: `Iteration ${attempt}/${ctx.config.max_iterations}`,
1695
+ detail: { iteration: attempt, maxIterations: ctx.config.max_iterations }
1696
+ }));
1697
+
1698
+ // Coder runs with the HU task + any diagnostic feedback from previous attempt
1699
+ const coderResult = await runCoderStage({
1700
+ coderRoleInstance: ctx.coderRoleInstance, coderRole: ctx.coderRole,
1701
+ config: ctx.config, logger, emitter, eventBase: ctx.eventBase,
1702
+ session: ctx.session, plannedTask: ctx.plannedTask,
1703
+ trackBudget: ctx.trackBudget, iteration: attempt, brainCtx: ctx.brainCtx
1704
+ });
1705
+ if (coderResult?.action === "standby" || coderResult?.action === "pause") {
1706
+ return coderResult?.result || { approved: false, reason: "coder_failed" };
1707
+ }
1708
+
1709
+ // Brain runs acceptance tests
1710
+ logger.info(`HU ${story.id}: running ${story.acceptance_tests.length} acceptance tests`);
1711
+ emitProgress(emitter, makeEvent("hu:acceptance-start", { ...ctx.eventBase, stage: "acceptance" }, {
1712
+ message: `Running ${story.acceptance_tests.length} acceptance tests`,
1713
+ detail: { huId: story.id, testCount: story.acceptance_tests.length }
1714
+ }));
1715
+
1716
+ const testResult = await runAcceptanceTests(story.acceptance_tests, projectDir);
1717
+ emitProgress(emitter, makeEvent("hu:acceptance-end", { ...ctx.eventBase, stage: "acceptance" }, {
1718
+ status: testResult.allPassed ? "ok" : "fail",
1719
+ message: testResult.summary,
1720
+ detail: { allPassed: testResult.allPassed, results: testResult.results.map(r => ({ cmd: r.cmd, passed: r.passed })) }
1721
+ }));
1722
+
1723
+ if (testResult.allPassed) {
1724
+ logger.info(`HU ${story.id}: all acceptance tests PASSED — approved`);
1725
+ await finalizeHuCommit({ story, branchName, config: ctx.config, logger });
1726
+ return { approved: true, sessionId: ctx.session.id, reason: "acceptance_tests_passed" };
1727
+ }
1728
+
1729
+ // Brain diagnoses failures and sends concrete fix to coder
1730
+ const failed = testResult.results.filter(r => !r.passed);
1731
+ const diagnostic = buildDiagnosticPrompt(failed);
1732
+ logger.warn(`HU ${story.id}: ${failed.length} acceptance test(s) FAILED — sending diagnostic to coder`);
1733
+ ctx.session.last_reviewer_feedback = diagnostic;
1734
+ ctx.plannedTask = `${huTask}\n\n--- ACCEPTANCE TEST FAILURES ---\n${diagnostic}`;
1735
+ }
1736
+
1737
+ // All iterations exhausted
1738
+ logger.warn(`HU ${story.id}: max iterations reached with acceptance tests still failing`);
1739
+ return { approved: false, sessionId: ctx.session.id, reason: "acceptance_tests_failed" };
1740
+ }
1741
+
1742
+ // Fallback: no acceptance_tests → standard pipeline (reviewer/tester)
1683
1743
  try {
1684
1744
  const result = await runIterationLoop(ctx, { task: huTask, askQuestion, emitter, logger });
1685
1745
  if (result?.approved) {
@@ -48,5 +48,5 @@ export async function saveSonarToken(token) {
48
48
  existing.token = token;
49
49
  const dir = getKarajanHome();
50
50
  await fs.mkdir(dir, { recursive: true });
51
- await fs.writeFile(filePath, JSON.stringify(existing, null, 2), "utf8");
51
+ await fs.writeFile(filePath, JSON.stringify(existing, null, 2), { encoding: "utf8", mode: 0o600 });
52
52
  }