karajan-code 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "2.3.1",
3
+ "version": "2.4.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -2,7 +2,7 @@ export const VALID_TASK_TYPES = new Set(["sw", "infra", "doc", "add-tests", "ref
2
2
 
3
3
  export const DEFAULT_POLICIES = {
4
4
  sw: { tdd: true, sonar: true, reviewer: true, testsRequired: true },
5
- infra: { tdd: false, sonar: false, reviewer: true, testsRequired: false },
5
+ infra: { tdd: false, sonar: false, reviewer: false, testsRequired: false },
6
6
  doc: { tdd: false, sonar: false, reviewer: true, testsRequired: false },
7
7
  "add-tests": { tdd: false, sonar: true, reviewer: true, testsRequired: true },
8
8
  refactor: { tdd: true, sonar: true, reviewer: true, testsRequired: false },
@@ -0,0 +1,92 @@
1
+ /**
2
+ * HU Acceptance Test Runner.
3
+ * Executes acceptance_tests commands for an HU and returns structured results.
4
+ * Brain uses this to determine if an HU is done (all pass) or needs fixing (with diagnostics).
5
+ */
6
+ import { runCommand } from "../utils/process.js";
7
+
8
+ /**
9
+ * Run a single acceptance test command.
10
+ * @param {string} cmd - Shell command to execute
11
+ * @param {string} cwd - Working directory
12
+ * @param {number} [timeoutMs=30000] - Timeout per test
13
+ * @returns {Promise<{cmd: string, passed: boolean, output: string, exitCode: number}>}
14
+ */
15
+ async function runSingleTest(cmd, cwd, timeoutMs = 30000) {
16
+ try {
17
+ const result = await runCommand("bash", ["-c", cmd], {
18
+ timeout: timeoutMs,
19
+ cwd
20
+ });
21
+ const output = (result.stdout || "") + (result.stderr || "");
22
+ return {
23
+ cmd,
24
+ passed: result.exitCode === 0,
25
+ output: output.slice(-500), // last 500 chars for diagnostics
26
+ exitCode: result.exitCode
27
+ };
28
+ } catch (err) {
29
+ return {
30
+ cmd,
31
+ passed: false,
32
+ output: err.message?.slice(-500) || "Command timed out or crashed",
33
+ exitCode: -1
34
+ };
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Run all acceptance tests for an HU.
40
+ * @param {string[]} tests - Array of shell commands
41
+ * @param {string} cwd - Working directory
42
+ * @returns {Promise<{allPassed: boolean, results: object[], summary: string, diagnostics: string|null}>}
43
+ */
44
+ export async function runAcceptanceTests(tests, cwd) {
45
+ if (!tests || tests.length === 0) {
46
+ return { allPassed: false, results: [], summary: "No acceptance tests defined", diagnostics: null };
47
+ }
48
+
49
+ const results = [];
50
+ for (const cmd of tests) {
51
+ const result = await runSingleTest(cmd, cwd);
52
+ results.push(result);
53
+ }
54
+
55
+ const passed = results.filter(r => r.passed);
56
+ const failed = results.filter(r => !r.passed);
57
+ const allPassed = failed.length === 0;
58
+
59
+ const summary = `${passed.length}/${results.length} acceptance tests passed`;
60
+
61
+ let diagnostics = null;
62
+ if (!allPassed) {
63
+ diagnostics = failed.map(f =>
64
+ `FAIL: ${f.cmd}\n exit=${f.exitCode}\n output: ${f.output.trim().split("\n").slice(-5).join("\n ")}`
65
+ ).join("\n\n");
66
+ }
67
+
68
+ return { allPassed, results, summary, diagnostics };
69
+ }
70
+
71
+ /**
72
+ * Build a concrete diagnostic prompt for Brain to send to the coder.
73
+ * Reads the failed test outputs and produces actionable instructions.
74
+ * @param {object[]} failedResults - Array of failed test results
75
+ * @returns {string} Prompt for the coder with concrete fix instructions
76
+ */
77
+ export function buildDiagnosticPrompt(failedResults) {
78
+ if (!failedResults || failedResults.length === 0) return "";
79
+ const lines = ["The following acceptance tests FAILED. Fix each one:", ""];
80
+ for (const f of failedResults) {
81
+ lines.push(`❌ Command: ${f.cmd}`);
82
+ lines.push(` Exit code: ${f.exitCode}`);
83
+ const lastLines = f.output.trim().split("\n").slice(-8);
84
+ lines.push(` Last output:`);
85
+ for (const l of lastLines) {
86
+ lines.push(` ${l}`);
87
+ }
88
+ lines.push("");
89
+ }
90
+ lines.push("Fix ALL failing tests. Run each command yourself to verify before finishing.");
91
+ return lines.join("\n");
92
+ }
@@ -66,6 +66,20 @@ export function needsSetupHu({ isNewProject = false, stackHints = [], subtasks =
66
66
  return subtasks.some(s => setupKeywords.test(s));
67
67
  }
68
68
 
69
+ /**
70
+ * Filter conflicting stack hints. When Node.js ecosystem keywords are present,
71
+ * remove Go/Rust/Python keywords that were detected from gitignore patterns
72
+ * but aren't actually part of the task.
73
+ */
74
+ function filterConflictingHints(hints) {
75
+ if (!hints || hints.length === 0) return hints;
76
+ const nodeEcosystem = new Set(["express", "vite", "vitest", "jest", "next", "astro", "react", "vue", "svelte", "nestjs", "monorepo", "workspaces"]);
77
+ const goKeywords = new Set(["gin", "fiber", "go"]);
78
+ const hasNode = hints.some(h => nodeEcosystem.has(h));
79
+ if (!hasNode) return hints;
80
+ return hints.filter(h => !goKeywords.has(h));
81
+ }
82
+
69
83
  /**
70
84
  * Build a MINIMAL setup HU — project structure + deps only.
71
85
  * NEVER includes the full original task. The coder must only do setup.
@@ -80,12 +94,12 @@ function buildSetupHu({ stackHints }) {
80
94
  "SCOPE (do ONLY this, nothing else):",
81
95
  "- Create package.json (with workspaces if monorepo detected from stack hints)",
82
96
  "- Install all runtime + dev dependencies listed in stack hints",
83
- "- Configure test framework so `npm test` runs (even with 0 tests)",
97
+ "- Install test framework WITH coverage reporter (e.g. vitest + @vitest/coverage-v8)",
98
+ "- Configure vitest.config.js with coverage.enabled = true",
84
99
  "- Create .env.example with placeholder variables",
85
- "- Verify: `npm install` succeeds, `npm test` runs without error",
100
+ "- Verify by running each acceptance_test command below",
86
101
  "",
87
102
  "DO NOT implement any business logic, API routes, components, or features.",
88
- "DO NOT add security middleware, auth, or any application code.",
89
103
  "This HU is ONLY project scaffolding.",
90
104
  "",
91
105
  "Stack hints:",
@@ -100,9 +114,15 @@ function buildSetupHu({ stackHints }) {
100
114
  certified: { text: certifiedText },
101
115
  acceptance_criteria: [
102
116
  "npm install succeeds without errors",
103
- "npm test runs (even with 0 tests)",
104
- ".env.example exists",
105
- "No business logic or application code added"
117
+ "npm test runs without error",
118
+ "npm run test:coverage runs without error",
119
+ ".env.example exists"
120
+ ],
121
+ acceptance_tests: [
122
+ "npm install --ignore-scripts 2>&1 && echo PASS || echo FAIL",
123
+ "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
124
+ "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL",
125
+ "test -f .env.example && echo PASS || echo FAIL"
106
126
  ]
107
127
  };
108
128
  }
@@ -121,8 +141,8 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
121
141
  "SCOPE (do ONLY this, nothing else):",
122
142
  `- Implement: ${subtask}`,
123
143
  "- Add unit tests for the new code",
144
+ "- Run ALL acceptance_tests listed below and ensure they pass",
124
145
  "- Do NOT touch code outside this subtask's scope",
125
- "- Do NOT refactor or 'improve' unrelated files",
126
146
  "- Target: <200 lines changed (like an atomic PR)"
127
147
  ].join("\n");
128
148
  return {
@@ -135,7 +155,11 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
135
155
  acceptance_criteria: [
136
156
  `${subtask} is implemented and working`,
137
157
  "Unit tests cover the new code",
138
- "No changes to files outside this subtask's scope"
158
+ "All acceptance_tests pass"
159
+ ],
160
+ acceptance_tests: [
161
+ "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
162
+ "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL"
139
163
  ]
140
164
  };
141
165
  }
@@ -168,13 +192,14 @@ export function generateHuBatch({
168
192
  }
169
193
 
170
194
  const stories = [];
171
- const needsSetup = needsSetupHu({ isNewProject, stackHints, subtasks });
195
+ const filteredHints = filterConflictingHints(stackHints);
196
+ const needsSetup = needsSetupHu({ isNewProject, stackHints: filteredHints, subtasks });
172
197
  let nextId = 1;
173
198
 
174
199
  const projectName = deriveProjectName(originalTask);
175
200
 
176
201
  if (needsSetup) {
177
- stories.push(buildSetupHu({ stackHints }));
202
+ stories.push(buildSetupHu({ stackHints: filteredHints }));
178
203
  nextId = 2;
179
204
  }
180
205
 
package/src/mcp/run-kj.js CHANGED
@@ -5,6 +5,13 @@ import { execa } from "execa";
5
5
  const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
6
6
  const CLI_PATH = path.resolve(MODULE_DIR, "..", "cli.js");
7
7
 
8
+ /** Mask a secret token for safe logging: show first 4 and last 4 chars only. */
9
+ function maskToken(token) {
10
+ if (!token || typeof token !== "string") return "***";
11
+ if (token.length <= 8) return "***";
12
+ return `${token.slice(0, 4)}${"*".repeat(Math.min(token.length - 8, 16))}${token.slice(-4)}`;
13
+ }
14
+
8
15
  function normalizeBoolFlag(value, flagName, args) {
9
16
  if (value === true) args.push(flagName);
10
17
  }
@@ -113,5 +120,15 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
113
120
  payload.errorSummary = result.stderr.split("\n").filter(Boolean).slice(-3).join(" | ");
114
121
  }
115
122
 
123
+ // Sanitize output: strip sonar token from any log/error output
124
+ if (options.sonarToken) {
125
+ const masked = maskToken(options.sonarToken);
126
+ for (const key of ["stdout", "stderr", "errorSummary"]) {
127
+ if (payload[key] && typeof payload[key] === "string") {
128
+ payload[key] = payload[key].replaceAll(options.sonarToken, masked);
129
+ }
130
+ }
131
+ }
132
+
116
133
  return payload;
117
134
  }
@@ -1,7 +1,7 @@
1
1
  // Direct actions: commands Karajan Brain can execute without invoking a full role.
2
2
  // Keeps the action catalog small, auditable, and safe.
3
3
 
4
- import { execSync } from "node:child_process";
4
+ import { execSync, execFileSync } from "node:child_process";
5
5
  import fs from "node:fs/promises";
6
6
  import path from "node:path";
7
7
 
@@ -32,7 +32,11 @@ const ALLOWED_COMMANDS = [
32
32
  */
33
33
  function isCommandAllowed(cmd) {
34
34
  if (!cmd || typeof cmd !== "string") return false;
35
- return ALLOWED_COMMANDS.some(allowed => cmd.trim().startsWith(allowed));
35
+ const tokens = cmd.trim().split(/\s+/);
36
+ return ALLOWED_COMMANDS.some(allowed => {
37
+ const allowedTokens = allowed.split(/\s+/);
38
+ return allowedTokens.every((t, i) => tokens[i] === t) && tokens.length === allowedTokens.length;
39
+ });
36
40
  }
37
41
 
38
42
  /**
@@ -128,8 +132,7 @@ async function gitAdd({ files, cwd }) {
128
132
  return { ok: false, error: `Invalid file path: ${f}`, action: "git_add" };
129
133
  }
130
134
  }
131
- const args = files.map(f => `"${f}"`).join(" ");
132
- execSync(`git add ${args}`, {
135
+ execFileSync("git", ["add", ...files], {
133
136
  cwd: cwd || process.cwd(),
134
137
  encoding: "utf8",
135
138
  stdio: ["pipe", "pipe", "pipe"]
@@ -1669,7 +1669,77 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
1669
1669
  ctx.brainCtx.feedbackQueue = fresh.feedbackQueue;
1670
1670
  ctx.brainCtx.verificationTracker = fresh.verificationTracker;
1671
1671
  }
1672
+ // Apply per-HU policies based on task_type (infra skips reviewer/sonar/tdd)
1673
+ const { applyPolicies } = await import("./guards/policy-resolver.js");
1674
+ const huPolicies = applyPolicies({ taskType: story.task_type, policies: ctx.config.policies });
1675
+ const savedFlags = { ...ctx.pipelineFlags };
1676
+ if (!huPolicies.reviewer) ctx.pipelineFlags.reviewerEnabled = false;
1677
+ if (!huPolicies.tdd) ctx.config.development = { ...ctx.config.development, methodology: "standard", require_test_changes: false };
1678
+ if (!huPolicies.sonar) ctx.config.sonarqube = { ...ctx.config.sonarqube, enabled: false };
1679
+ if (!huPolicies.testsRequired) ctx.pipelineFlags.testerEnabled = false;
1680
+ logger.info(`HU ${story.id} (${story.task_type}): policies → reviewer=${huPolicies.reviewer}, tdd=${huPolicies.tdd}, sonar=${huPolicies.sonar}, tests=${huPolicies.testsRequired}`);
1681
+
1672
1682
  const branchName = await prepareHuBranch({ story, huBranches, config: ctx.config, logger });
1683
+ const projectDir = ctx.config.projectDir || process.cwd();
1684
+
1685
+ // If HU has acceptance_tests, Brain runs them as the gate instead of
1686
+ // the standard reviewer/tester pipeline. This is the radical fix:
1687
+ // concrete executable tests replace subjective reviewer opinions.
1688
+ if (story.acceptance_tests?.length > 0) {
1689
+ const { runAcceptanceTests, buildDiagnosticPrompt } = await import("./hu/acceptance-runner.js");
1690
+
1691
+ for (let attempt = 1; attempt <= ctx.config.max_iterations; attempt++) {
1692
+ logger.info(`HU ${story.id}: coder iteration ${attempt}/${ctx.config.max_iterations}`);
1693
+ emitProgress(emitter, makeEvent("iteration:start", { ...ctx.eventBase, stage: "iteration" }, {
1694
+ message: `Iteration ${attempt}/${ctx.config.max_iterations}`,
1695
+ detail: { iteration: attempt, maxIterations: ctx.config.max_iterations }
1696
+ }));
1697
+
1698
+ // Coder runs with the HU task + any diagnostic feedback from previous attempt
1699
+ const coderResult = await runCoderStage({
1700
+ coderRoleInstance: ctx.coderRoleInstance, coderRole: ctx.coderRole,
1701
+ config: ctx.config, logger, emitter, eventBase: ctx.eventBase,
1702
+ session: ctx.session, plannedTask: ctx.plannedTask,
1703
+ trackBudget: ctx.trackBudget, iteration: attempt, brainCtx: ctx.brainCtx
1704
+ });
1705
+ if (coderResult?.action === "standby" || coderResult?.action === "pause") {
1706
+ return coderResult?.result || { approved: false, reason: "coder_failed" };
1707
+ }
1708
+
1709
+ // Brain runs acceptance tests
1710
+ logger.info(`HU ${story.id}: running ${story.acceptance_tests.length} acceptance tests`);
1711
+ emitProgress(emitter, makeEvent("hu:acceptance-start", { ...ctx.eventBase, stage: "acceptance" }, {
1712
+ message: `Running ${story.acceptance_tests.length} acceptance tests`,
1713
+ detail: { huId: story.id, testCount: story.acceptance_tests.length }
1714
+ }));
1715
+
1716
+ const testResult = await runAcceptanceTests(story.acceptance_tests, projectDir);
1717
+ emitProgress(emitter, makeEvent("hu:acceptance-end", { ...ctx.eventBase, stage: "acceptance" }, {
1718
+ status: testResult.allPassed ? "ok" : "fail",
1719
+ message: testResult.summary,
1720
+ detail: { allPassed: testResult.allPassed, results: testResult.results.map(r => ({ cmd: r.cmd, passed: r.passed })) }
1721
+ }));
1722
+
1723
+ if (testResult.allPassed) {
1724
+ logger.info(`HU ${story.id}: all acceptance tests PASSED — approved`);
1725
+ await finalizeHuCommit({ story, branchName, config: ctx.config, logger });
1726
+ return { approved: true, sessionId: ctx.session.id, reason: "acceptance_tests_passed" };
1727
+ }
1728
+
1729
+ // Brain diagnoses failures and sends concrete fix to coder
1730
+ const failed = testResult.results.filter(r => !r.passed);
1731
+ const diagnostic = buildDiagnosticPrompt(failed);
1732
+ logger.warn(`HU ${story.id}: ${failed.length} acceptance test(s) FAILED — sending diagnostic to coder`);
1733
+ ctx.session.last_reviewer_feedback = diagnostic;
1734
+ ctx.plannedTask = `${huTask}\n\n--- ACCEPTANCE TEST FAILURES ---\n${diagnostic}`;
1735
+ }
1736
+
1737
+ // All iterations exhausted
1738
+ logger.warn(`HU ${story.id}: max iterations reached with acceptance tests still failing`);
1739
+ return { approved: false, sessionId: ctx.session.id, reason: "acceptance_tests_failed" };
1740
+ }
1741
+
1742
+ // Fallback: no acceptance_tests → standard pipeline (reviewer/tester)
1673
1743
  try {
1674
1744
  const result = await runIterationLoop(ctx, { task: huTask, askQuestion, emitter, logger });
1675
1745
  if (result?.approved) {
@@ -1678,6 +1748,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
1678
1748
  return result;
1679
1749
  } finally {
1680
1750
  ctx.config.max_iterations = originalMaxIterations;
1751
+ Object.assign(ctx.pipelineFlags, savedFlags);
1681
1752
  }
1682
1753
  },
1683
1754
  emitter,
@@ -48,5 +48,5 @@ export async function saveSonarToken(token) {
48
48
  existing.token = token;
49
49
  const dir = getKarajanHome();
50
50
  await fs.mkdir(dir, { recursive: true });
51
- await fs.writeFile(filePath, JSON.stringify(existing, null, 2), "utf8");
51
+ await fs.writeFile(filePath, JSON.stringify(existing, null, 2), { encoding: "utf8", mode: 0o600 });
52
52
  }