npm - karajan-code - Versions diffs - 2.3.2 → 2.4.0 - Mend

karajan-code 2.3.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/hu/acceptance-runner.js +92 -0
package/src/hu/auto-generator.js +18 -8
package/src/mcp/run-kj.js +17 -0
package/src/orchestrator/direct-actions.js +7 -4
package/src/orchestrator.js +60 -0
package/src/sonar/credentials.js +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "karajan-code",
-  "version": "2.3.2",
+  "version": "2.4.0",
   "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
   "type": "module",
   "license": "AGPL-3.0",

package/src/hu/acceptance-runner.js ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * HU Acceptance Test Runner.
+ * Executes acceptance_tests commands for an HU and returns structured results.
+ * Brain uses this to determine if an HU is done (all pass) or needs fixing (with diagnostics).
+ */
+import { runCommand } from "../utils/process.js";
+/**
+ * Run a single acceptance test command.
+ * @param {string} cmd - Shell command to execute
+ * @param {string} cwd - Working directory
+ * @param {number} [timeoutMs=30000] - Timeout per test
+ * @returns {Promise<{cmd: string, passed: boolean, output: string, exitCode: number}>}
+ */
+async function runSingleTest(cmd, cwd, timeoutMs = 30000) {
+  try {
+    const result = await runCommand("bash", ["-c", cmd], {
+      timeout: timeoutMs,
+      cwd
+    });
+    const output = (result.stdout || "") + (result.stderr || "");
+    return {
+      cmd,
+      passed: result.exitCode === 0,
+      output: output.slice(-500), // last 500 chars for diagnostics
+      exitCode: result.exitCode
+    };
+  } catch (err) {
+    return {
+      cmd,
+      passed: false,
+      output: err.message?.slice(-500) || "Command timed out or crashed",
+      exitCode: -1
+    };
+  }
+}
+/**
+ * Run all acceptance tests for an HU.
+ * @param {string[]} tests - Array of shell commands
+ * @param {string} cwd - Working directory
+ * @returns {Promise<{allPassed: boolean, results: object[], summary: string, diagnostics: string|null}>}
+ */
+export async function runAcceptanceTests(tests, cwd) {
+  if (!tests || tests.length === 0) {
+    return { allPassed: false, results: [], summary: "No acceptance tests defined", diagnostics: null };
+  }
+  const results = [];
+  for (const cmd of tests) {
+    const result = await runSingleTest(cmd, cwd);
+    results.push(result);
+  }
+  const passed = results.filter(r => r.passed);
+  const failed = results.filter(r => !r.passed);
+  const allPassed = failed.length === 0;
+  const summary = `${passed.length}/${results.length} acceptance tests passed`;
+  let diagnostics = null;
+  if (!allPassed) {
+    diagnostics = failed.map(f =>
+      `FAIL: ${f.cmd}\n  exit=${f.exitCode}\n  output: ${f.output.trim().split("\n").slice(-5).join("\n  ")}`
+    ).join("\n\n");
+  }
+  return { allPassed, results, summary, diagnostics };
+}
+/**
+ * Build a concrete diagnostic prompt for Brain to send to the coder.
+ * Reads the failed test outputs and produces actionable instructions.
+ * @param {object[]} failedResults - Array of failed test results
+ * @returns {string} Prompt for the coder with concrete fix instructions
+ */
+export function buildDiagnosticPrompt(failedResults) {
+  if (!failedResults || failedResults.length === 0) return "";
+  const lines = ["The following acceptance tests FAILED. Fix each one:", ""];
+  for (const f of failedResults) {
+    lines.push(`❌ Command: ${f.cmd}`);
+    lines.push(`   Exit code: ${f.exitCode}`);
+    const lastLines = f.output.trim().split("\n").slice(-8);
+    lines.push(`   Last output:`);
+    for (const l of lastLines) {
+      lines.push(`     ${l}`);
+    }
+    lines.push("");
+  }
+  lines.push("Fix ALL failing tests. Run each command yourself to verify before finishing.");
+  return lines.join("\n");
+}

package/src/hu/auto-generator.js CHANGED Viewed

@@ -94,12 +94,12 @@ function buildSetupHu({ stackHints }) {
     "SCOPE (do ONLY this, nothing else):",
     "- Create package.json (with workspaces if monorepo detected from stack hints)",
     "- Install all runtime + dev dependencies listed in stack hints",
-    "- Configure test framework so `npm test` runs (even with 0 tests)",
+    "- Install test framework WITH coverage reporter (e.g. vitest + @vitest/coverage-v8)",
+    "- Configure vitest.config.js with coverage.enabled = true",
     "- Create .env.example with placeholder variables",
-    "- Verify: `npm install` succeeds, `npm test` runs without error",
+    "- Verify by running each acceptance_test command below",
     "",
     "DO NOT implement any business logic, API routes, components, or features.",
-    "DO NOT add security middleware, auth, or any application code.",
     "This HU is ONLY project scaffolding.",
     "",
     "Stack hints:",
@@ -114,9 +114,15 @@ function buildSetupHu({ stackHints }) {
     certified: { text: certifiedText },
     acceptance_criteria: [
       "npm install succeeds without errors",
-      "npm test runs (even with 0 tests)",
-      ".env.example exists",
-      "No business logic or application code added"
+      "npm test runs without error",
+      "npm run test:coverage runs without error",
+      ".env.example exists"
+    ],
+    acceptance_tests: [
+      "npm install --ignore-scripts 2>&1 && echo PASS || echo FAIL",
+      "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
+      "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL",
+      "test -f .env.example && echo PASS || echo FAIL"
     ]
   };
 }
@@ -135,8 +141,8 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
     "SCOPE (do ONLY this, nothing else):",
     `- Implement: ${subtask}`,
     "- Add unit tests for the new code",
+    "- Run ALL acceptance_tests listed below and ensure they pass",
     "- Do NOT touch code outside this subtask's scope",
-    "- Do NOT refactor or 'improve' unrelated files",
     "- Target: <200 lines changed (like an atomic PR)"
   ].join("\n");
   return {
@@ -149,7 +155,11 @@ function buildTaskHu({ id, subtask, projectName, blockedBy }) {
     acceptance_criteria: [
       `${subtask} is implemented and working`,
       "Unit tests cover the new code",
-      "No changes to files outside this subtask's scope"
+      "All acceptance_tests pass"
+    ],
+    acceptance_tests: [
+      "npx vitest run 2>&1; test $? -eq 0 && echo PASS || echo FAIL",
+      "npx vitest run --coverage 2>&1 | grep -q 'All files\\|% Stmts' && echo PASS || echo FAIL"
     ]
   };
 }

package/src/mcp/run-kj.js CHANGED Viewed

@@ -5,6 +5,13 @@ import { execa } from "execa";
 const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
 const CLI_PATH = path.resolve(MODULE_DIR, "..", "cli.js");
+/** Mask a secret token for safe logging: show first 4 and last 4 chars only. */
+function maskToken(token) {
+  if (!token || typeof token !== "string") return "***";
+  if (token.length <= 8) return "***";
+  return `${token.slice(0, 4)}${"*".repeat(Math.min(token.length - 8, 16))}${token.slice(-4)}`;
+}
 function normalizeBoolFlag(value, flagName, args) {
   if (value === true) args.push(flagName);
 }
@@ -113,5 +120,15 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
     payload.errorSummary = result.stderr.split("\n").filter(Boolean).slice(-3).join(" | ");
   }
+  // Sanitize output: strip sonar token from any log/error output
+  if (options.sonarToken) {
+    const masked = maskToken(options.sonarToken);
+    for (const key of ["stdout", "stderr", "errorSummary"]) {
+      if (payload[key] && typeof payload[key] === "string") {
+        payload[key] = payload[key].replaceAll(options.sonarToken, masked);
+      }
+    }
+  }
   return payload;
 }

package/src/orchestrator/direct-actions.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // Direct actions: commands Karajan Brain can execute without invoking a full role.
 // Keeps the action catalog small, auditable, and safe.
-import { execSync } from "node:child_process";
+import { execSync, execFileSync } from "node:child_process";
 import fs from "node:fs/promises";
 import path from "node:path";
@@ -32,7 +32,11 @@ const ALLOWED_COMMANDS = [
  */
 function isCommandAllowed(cmd) {
   if (!cmd || typeof cmd !== "string") return false;
-  return ALLOWED_COMMANDS.some(allowed => cmd.trim().startsWith(allowed));
+  const tokens = cmd.trim().split(/\s+/);
+  return ALLOWED_COMMANDS.some(allowed => {
+    const allowedTokens = allowed.split(/\s+/);
+    return allowedTokens.every((t, i) => tokens[i] === t) && tokens.length === allowedTokens.length;
+  });
 }
 /**
@@ -128,8 +132,7 @@ async function gitAdd({ files, cwd }) {
         return { ok: false, error: `Invalid file path: ${f}`, action: "git_add" };
       }
     }
-    const args = files.map(f => `"${f}"`).join(" ");
-    execSync(`git add ${args}`, {
+    execFileSync("git", ["add", ...files], {
       cwd: cwd || process.cwd(),
       encoding: "utf8",
       stdio: ["pipe", "pipe", "pipe"]

package/src/orchestrator.js CHANGED Viewed

@@ -1680,6 +1680,66 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
           logger.info(`HU ${story.id} (${story.task_type}): policies → reviewer=${huPolicies.reviewer}, tdd=${huPolicies.tdd}, sonar=${huPolicies.sonar}, tests=${huPolicies.testsRequired}`);
           const branchName = await prepareHuBranch({ story, huBranches, config: ctx.config, logger });
+          const projectDir = ctx.config.projectDir || process.cwd();
+          // If HU has acceptance_tests, Brain runs them as the gate instead of
+          // the standard reviewer/tester pipeline. This is the radical fix:
+          // concrete executable tests replace subjective reviewer opinions.
+          if (story.acceptance_tests?.length > 0) {
+            const { runAcceptanceTests, buildDiagnosticPrompt } = await import("./hu/acceptance-runner.js");
+            for (let attempt = 1; attempt <= ctx.config.max_iterations; attempt++) {
+              logger.info(`HU ${story.id}: coder iteration ${attempt}/${ctx.config.max_iterations}`);
+              emitProgress(emitter, makeEvent("iteration:start", { ...ctx.eventBase, stage: "iteration" }, {
+                message: `Iteration ${attempt}/${ctx.config.max_iterations}`,
+                detail: { iteration: attempt, maxIterations: ctx.config.max_iterations }
+              }));
+              // Coder runs with the HU task + any diagnostic feedback from previous attempt
+              const coderResult = await runCoderStage({
+                coderRoleInstance: ctx.coderRoleInstance, coderRole: ctx.coderRole,
+                config: ctx.config, logger, emitter, eventBase: ctx.eventBase,
+                session: ctx.session, plannedTask: ctx.plannedTask,
+                trackBudget: ctx.trackBudget, iteration: attempt, brainCtx: ctx.brainCtx
+              });
+              if (coderResult?.action === "standby" || coderResult?.action === "pause") {
+                return coderResult?.result || { approved: false, reason: "coder_failed" };
+              }
+              // Brain runs acceptance tests
+              logger.info(`HU ${story.id}: running ${story.acceptance_tests.length} acceptance tests`);
+              emitProgress(emitter, makeEvent("hu:acceptance-start", { ...ctx.eventBase, stage: "acceptance" }, {
+                message: `Running ${story.acceptance_tests.length} acceptance tests`,
+                detail: { huId: story.id, testCount: story.acceptance_tests.length }
+              }));
+              const testResult = await runAcceptanceTests(story.acceptance_tests, projectDir);
+              emitProgress(emitter, makeEvent("hu:acceptance-end", { ...ctx.eventBase, stage: "acceptance" }, {
+                status: testResult.allPassed ? "ok" : "fail",
+                message: testResult.summary,
+                detail: { allPassed: testResult.allPassed, results: testResult.results.map(r => ({ cmd: r.cmd, passed: r.passed })) }
+              }));
+              if (testResult.allPassed) {
+                logger.info(`HU ${story.id}: all acceptance tests PASSED — approved`);
+                await finalizeHuCommit({ story, branchName, config: ctx.config, logger });
+                return { approved: true, sessionId: ctx.session.id, reason: "acceptance_tests_passed" };
+              }
+              // Brain diagnoses failures and sends concrete fix to coder
+              const failed = testResult.results.filter(r => !r.passed);
+              const diagnostic = buildDiagnosticPrompt(failed);
+              logger.warn(`HU ${story.id}: ${failed.length} acceptance test(s) FAILED — sending diagnostic to coder`);
+              ctx.session.last_reviewer_feedback = diagnostic;
+              ctx.plannedTask = `${huTask}\n\n--- ACCEPTANCE TEST FAILURES ---\n${diagnostic}`;
+            }
+            // All iterations exhausted
+            logger.warn(`HU ${story.id}: max iterations reached with acceptance tests still failing`);
+            return { approved: false, sessionId: ctx.session.id, reason: "acceptance_tests_failed" };
+          }
+          // Fallback: no acceptance_tests → standard pipeline (reviewer/tester)
           try {
             const result = await runIterationLoop(ctx, { task: huTask, askQuestion, emitter, logger });
             if (result?.approved) {

package/src/sonar/credentials.js CHANGED Viewed

@@ -48,5 +48,5 @@ export async function saveSonarToken(token) {
   existing.token = token;
   const dir = getKarajanHome();
   await fs.mkdir(dir, { recursive: true });
-  await fs.writeFile(filePath, JSON.stringify(existing, null, 2), "utf8");
+  await fs.writeFile(filePath, JSON.stringify(existing, null, 2), { encoding: "utf8", mode: 0o600 });
 }