npm - karajan-code - Versions diffs - 1.32.1 → 1.34.0 - Mend

karajan-code 1.32.1 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/package.json +1 -1
package/src/config.js +35 -0
package/src/mcp/server-handlers.js +8 -2
package/src/orchestrator/pipeline-context.js +3 -0
package/src/orchestrator.js +39 -2
package/src/prompts/architect.js +5 -1
package/src/prompts/audit.js +4 -10
package/src/prompts/coder.js +9 -1
package/src/prompts/discover.js +4 -10
package/src/prompts/hu-reviewer.js +46 -11
package/src/prompts/planner.js +5 -1
package/src/prompts/reviewer.js +5 -1
package/src/review/parser.js +3 -15
package/src/roles/architect-role.js +1 -1
package/src/roles/coder-role.js +2 -1
package/src/roles/hu-reviewer-role.js +1 -1
package/src/roles/planner-role.js +6 -2
package/src/roles/reviewer-role.js +7 -2
package/src/roles/security-role.js +2 -4
package/src/roles/tester-role.js +2 -4
package/src/roles/triage-role.js +2 -4
package/src/utils/budget.js +51 -14
package/src/utils/display.js +5 -3
package/src/utils/json-extract.js +64 -0
package/templates/roles/architect.md +1 -0
package/templates/roles/coder.md +7 -0
package/templates/roles/hu-reviewer.md +47 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "karajan-code",
-  "version": "1.32.1",
+  "version": "1.34.0",
   "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
   "type": "module",
   "license": "AGPL-3.0",

package/src/config.js CHANGED Viewed

@@ -420,6 +420,34 @@ export function applyRunOverrides(config, flags) {
   return out;
 }
+/**
+ * Check if a model string is compatible with an agent provider.
+ * Only returns false when the model clearly belongs to a DIFFERENT provider.
+ * Returns true if we can't determine or if the model is ambiguous.
+ */
+const AGENT_MODEL_SIGNATURES = {
+  claude: ["claude", "sonnet", "opus", "haiku"],
+  codex: ["o4-", "o3-", "gpt-", "codex"],
+  gemini: ["gemini", "flash-"]
+};
+export function isModelCompatible(agent, model) {
+  if (!model || !agent) return true;
+  const lower = model.toLowerCase();
+  // Check if model clearly belongs to a different provider
+  for (const [provider, signatures] of Object.entries(AGENT_MODEL_SIGNATURES)) {
+    if (provider === agent) continue;
+    if (signatures.some(s => lower.includes(s))) {
+      // Model belongs to a different provider — incompatible
+      return false;
+    }
+  }
+  // Model doesn't clearly belong to any other provider — allow it
+  return true;
+}
 export function resolveRole(config, role) {
   const roles = config?.roles || {};
   const roleConfig = roles[role] || {};
@@ -434,10 +462,17 @@ export function resolveRole(config, role) {
   }
   let model = roleConfig.model ?? null;
+  let modelIsInherited = false;
   if (!model && role === "coder") model = config?.coder_options?.model ?? null;
   if (!model && role === "reviewer") model = config?.reviewer_options?.model ?? null;
   if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect" || role === "hu_reviewer" || role === "hu-reviewer")) {
     model = config?.coder_options?.model ?? null;
+    modelIsInherited = !!model;
+  }
+  // Drop inherited model if incompatible with the resolved provider
+  if (modelIsInherited && provider && model && !isModelCompatible(provider, model)) {
+    model = null;
   }
   return { provider, model };

package/src/mcp/server-handlers.js CHANGED Viewed

@@ -790,7 +790,10 @@ async function handleRun(a, server, extra) {
     }
   }
   if (!isPreflightAcked()) {
-    return buildPreflightRequiredResponse("kj_run");
+    // Auto-acknowledge with defaults for autonomous operation
+    ackPreflight({});
+    const logger = createLogger("info", "mcp");
+    logger.info("Preflight auto-acknowledged with default agent config");
   }
   applySessionOverrides(a, ["coder", "reviewer", "tester", "security", "solomon", "enableTester", "enableSecurity", "enableImpeccable"]);
   return handleRunDirect(a, server, extra);
@@ -801,7 +804,10 @@ async function handleCode(a, server, extra) {
     return failPayload("Missing required field: task");
   }
   if (!isPreflightAcked()) {
-    return buildPreflightRequiredResponse("kj_code");
+    // Auto-acknowledge with defaults for autonomous operation
+    ackPreflight({});
+    const logger = createLogger("info", "mcp");
+    logger.info("Preflight auto-acknowledged with default agent config");
   }
   applySessionOverrides(a, ["coder"]);
   return handleCodeDirect(a, server, extra);

package/src/orchestrator/pipeline-context.js CHANGED Viewed

@@ -37,6 +37,9 @@ export class PipelineContext {
     this.pgProject = null;
     this.pgCard = null;
+    // Product context (loaded from .karajan/context.md or product-vision.md)
+    this.productContext = null;
     // Planned task (may differ from original task after planner)
     this.plannedTask = null;

package/src/orchestrator.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import fs from "node:fs/promises";
+import path from "node:path";
 import { createAgent } from "./agents/index.js";
 import {
   createSession,
@@ -39,6 +41,29 @@ import { runPreflightChecks } from "./orchestrator/preflight-checks.js";
 import { detectRtk } from "./utils/rtk-detect.js";
+// --- Product Context loader ---
+/**
+ * Load product context from well-known file locations.
+ * Returns the file content or null if no file is found.
+ * @param {string|null} projectDir
+ * @returns {Promise<{content: string|null, source: string|null}>}
+ */
+export async function loadProductContext(projectDir) {
+  const base = projectDir || process.cwd();
+  const candidates = [
+    path.join(base, ".karajan", "context.md"),
+    path.join(base, "product-vision.md")
+  ];
+  for (const file of candidates) {
+    try {
+      const content = await fs.readFile(file, "utf8");
+      return { content, source: file };
+    } catch { /* not found, try next */ }
+  }
+  return { content: null, source: null };
+}
 // --- Extracted helper functions (pure refactoring, zero behavior change) ---
 function resolvePipelineFlags(config) {
@@ -71,8 +96,8 @@ async function handleDryRun({ task, config, flags, emitter, pipelineFlags }) {
   const projectDir = config.projectDir || process.cwd();
   const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
   const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
-  const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
-  const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
+  const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
+  const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
   const summary = {
     dry_run: true,
@@ -1099,6 +1124,18 @@ async function initFlowContext({ task, config, logger, emitter, askQuestion, pgT
     }));
   }
+  // --- Product Context ---
+  const ctxProjectDir = config.projectDir || process.cwd();
+  const { content: productContext, source: productContextSource } = await loadProductContext(ctxProjectDir);
+  if (productContext) {
+    config = { ...config, productContext };
+    logger.info(`Product context loaded from ${productContextSource}`);
+    emitProgress(emitter, makeEvent("context:loaded", ctx.eventBase, {
+      message: "Product context loaded",
+      detail: { source: productContextSource }
+    }));
+  }
   ctx.session = await initializeSession({ task, config, flags, pgTaskId, pgProject });
   ctx.eventBase.sessionId = ctx.session.id;

package/src/prompts/architect.js CHANGED Viewed

@@ -6,7 +6,7 @@ const SUBAGENT_PREAMBLE = [
 export const VALID_VERDICTS = new Set(["ready", "needs_clarification"]);
-export function buildArchitectPrompt({ task, instructions, researchContext = null }) {
+export function buildArchitectPrompt({ task, instructions, researchContext = null, productContext = null }) {
   const sections = [SUBAGENT_PREAMBLE];
   if (instructions) {
@@ -31,6 +31,10 @@ export function buildArchitectPrompt({ task, instructions, researchContext = nul
     'JSON schema: {"verdict":"ready|needs_clarification","architecture":{"type":string,"layers":[string],"patterns":[string],"dataModel":{"entities":[string]},"apiContracts":[string],"dependencies":[string],"tradeoffs":[string]},"questions":[string],"summary":string}'
   );
+  if (productContext) {
+    sections.push(`## Product Context\n${productContext}`);
+  }
   if (researchContext) {
     sections.push(`## Research Context\n${researchContext}`);
   }

package/src/prompts/audit.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { extractFirstJson } from "../utils/json-extract.js";
 const SUBAGENT_PREAMBLE = [
   "IMPORTANT: You are running as a Karajan sub-agent.",
   "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -154,16 +156,8 @@ function parseRecommendation(raw) {
 }
 export function parseAuditOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  let parsed;
-  try {
-    parsed = JSON.parse(jsonMatch[0]);
-  } catch {
-    return null;
-  }
+  const parsed = extractFirstJson(raw);
+  if (!parsed) return null;
   // Handle both wrapped (result.summary) and flat structures
   const resultObj = parsed.result || parsed;

package/src/prompts/coder.js CHANGED Viewed

@@ -31,7 +31,7 @@ const SERENA_INSTRUCTIONS = [
   "Fall back to reading files only when Serena tools are not sufficient."
 ].join("\n");
-export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null }) {
+export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null, productContext = null, plan = null }) {
   const sections = [
     serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
     `Task:\n${task}`,
@@ -48,6 +48,14 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
     sections.push(RTK_INSTRUCTIONS);
   }
+  if (productContext) {
+    sections.push(`## Product Context\n${productContext}`);
+  }
+  if (plan) {
+    sections.push(`## Implementation Plan (from planner)\nFollow these steps:\n${plan}`);
+  }
   if (coderRules) {
     sections.push(`Coder rules (MUST follow):\n${coderRules}`);
   }

package/src/prompts/discover.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { extractFirstJson } from "../utils/json-extract.js";
 const SUBAGENT_PREAMBLE = [
   "IMPORTANT: You are running as a Karajan sub-agent.",
   "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -205,16 +207,8 @@ function parseJtbds(rawJtbds) {
 }
 export function parseDiscoverOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  let parsed;
-  try {
-    parsed = JSON.parse(jsonMatch[0]);
-  } catch {
-    return null;
-  }
+  const parsed = extractFirstJson(raw);
+  if (!parsed) return null;
   return {
     verdict: VALID_VERDICTS.has(parsed.verdict) ? parsed.verdict : "ready",

package/src/prompts/hu-reviewer.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { extractFirstJson } from "../utils/json-extract.js";
 const SUBAGENT_PREAMBLE = [
   "IMPORTANT: You are running as a Karajan sub-agent.",
   "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -19,7 +21,7 @@ const DIMENSION_KEYS = [
  * @param {{stories: Array<{id: string, text: string}>, instructions: string|null, context?: string|null}} params
  * @returns {string} The assembled prompt.
  */
-export function buildHuReviewerPrompt({ stories, instructions, context = null }) {
+export function buildHuReviewerPrompt({ stories, instructions, context = null, productContext = null }) {
   const sections = [SUBAGENT_PREAMBLE];
   if (instructions) {
@@ -37,6 +39,10 @@ export function buildHuReviewerPrompt({ stories, instructions, context = null })
     `JSON schema: {"evaluations":[{"story_id":string,"scores":{"D1_jtbd_context":number,"D2_user_specificity":number,"D3_behavior_change":number,"D4_control_zone":number,"D5_time_constraints":number,"D6_survivable_experiment":number},"total":number,"antipatterns_detected":[string],"verdict":"certified|needs_rewrite|needs_context","evaluation_notes":string,"rewritten":object|null,"certified_hu":object|null,"context_needed":object|null}],"batch_summary":{"total":number,"certified":number,"needs_rewrite":number,"needs_context":number,"consolidated_questions":string}}`
   );
+  if (productContext) {
+    sections.push(`## Product Context\n${productContext}`);
+  }
   if (context) {
     sections.push(`## Additional Context\n${context}`);
   }
@@ -88,22 +94,51 @@ function parseEvaluation(raw) {
   };
 }
+const VALID_AC_FORMATS = new Set(["gherkin", "checklist", "pre_post", "invariant"]);
+const AC_PREFIX_RE = /^\[(GHERKIN|CHECKLIST|PRE_POST|INVARIANT)]\s*/i;
+/**
+ * Detect the format of a single acceptance criterion.
+ * Supports both prefixed strings ("[GHERKIN] Given...") and legacy Gherkin objects ({given, when, then}).
+ * @param {string|object} criterion
+ * @returns {{format: string, text: string}}
+ */
+export function detectAcFormat(criterion) {
+  if (typeof criterion === "object" && criterion !== null && ("given" in criterion || "when" in criterion || "then" in criterion)) {
+    const text = `Given ${criterion.given || "..."}, When ${criterion.when || "..."}, Then ${criterion.then || "..."}`;
+    return { format: "gherkin", text };
+  }
+  if (typeof criterion === "string") {
+    const match = AC_PREFIX_RE.exec(criterion);
+    if (match) {
+      const format = match[1].toLowerCase();
+      const text = criterion.slice(match[0].length);
+      return { format, text };
+    }
+    return { format: "checklist", text: criterion };
+  }
+  return { format: "checklist", text: String(criterion) };
+}
+/**
+ * Normalize an acceptance_criteria array to a uniform structure.
+ * Handles both legacy Gherkin objects and prefixed strings.
+ * @param {Array} criteria
+ * @returns {Array<{format: string, text: string}>}
+ */
+export function normalizeAcceptanceCriteria(criteria) {
+  if (!Array.isArray(criteria)) return [];
+  return criteria.map(detectAcFormat);
+}
 /**
  * Parse the raw output from the HU reviewer agent.
  * @param {string} raw - Raw text output from the agent.
  * @returns {object|null} Parsed result with evaluations and batch_summary, or null.
  */
 export function parseHuReviewerOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  let parsed;
-  try {
-    parsed = JSON.parse(jsonMatch[0]);
-  } catch {
-    return null;
-  }
+  const parsed = extractFirstJson(raw);
+  if (!parsed) return null;
   if (!Array.isArray(parsed.evaluations)) return null;

package/src/prompts/planner.js CHANGED Viewed

@@ -64,7 +64,7 @@ function formatArchitectContext(architectContext) {
   return lines.length > 1 ? lines.join("\n") : null;
 }
-export function buildPlannerPrompt({ task, context, architectContext }) {
+export function buildPlannerPrompt({ task, context, architectContext, productContext = null }) {
   const parts = [
     "You are an expert software architect. Create an implementation plan for the following task.",
     "",
@@ -73,6 +73,10 @@ export function buildPlannerPrompt({ task, context, architectContext }) {
     ""
   ];
+  if (productContext) {
+    parts.push("## Product Context", productContext, "");
+  }
   if (context) {
     parts.push("## Context", context, "");
   }

package/src/prompts/reviewer.js CHANGED Viewed

@@ -22,7 +22,7 @@ const SERENA_INSTRUCTIONS = [
   "Fall back to reading files only when Serena tools are not sufficient."
 ].join("\n");
-export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false }) {
+export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false, productContext = null }) {
   const truncatedDiff = diff.length > 12000 ? `${diff.slice(0, 12000)}\n\n[TRUNCATED]` : diff;
   const sections = [
@@ -43,6 +43,10 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
     sections.push(RTK_INSTRUCTIONS);
   }
+  if (productContext) {
+    sections.push(`## Product Context\n${productContext}`);
+  }
   sections.push(
     `Task context:\n${task}`,
     `Review rules:\n${reviewRules}`,

package/src/review/parser.js CHANGED Viewed

@@ -3,23 +3,11 @@
  * Extracted from orchestrator.js to improve testability and reduce complexity.
  */
+import { extractFirstJson } from "../utils/json-extract.js";
 export function parseMaybeJsonString(value) {
   if (typeof value !== "string") return null;
-  try {
-    return JSON.parse(value);
-  } catch {
-    const start = value.indexOf("{");
-    const end = value.lastIndexOf("}");
-    if (start >= 0 && end > start) {
-      const candidate = value.slice(start, end + 1);
-      try {
-        return JSON.parse(candidate);
-      } catch {
-        return null;
-      }
-    }
-    return null;
-  }
+  return extractFirstJson(value);
 }
 function isReviewPayload(obj) {

package/src/roles/architect-role.js CHANGED Viewed

@@ -59,7 +59,7 @@ export class ArchitectRole extends BaseRole {
     const provider = resolveProvider(this.config);
     const agent = this._createAgent(provider, this.config, this.logger);
-    const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext });
+    const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext, productContext: this.config?.productContext || null });
     const runArgs = { prompt, role: "architect" };
     if (onOutput) runArgs.onOutput = onOutput;
     const result = await agent.runTask(runArgs);

package/src/roles/coder-role.js CHANGED Viewed

@@ -42,7 +42,8 @@ export class CoderRole extends BaseRole {
       coderRules: this.instructions,
       methodology: this.config?.development?.methodology || "tdd",
       serenaEnabled: Boolean(this.config?.serena?.enabled),
-      rtkAvailable: Boolean(this.config?.rtk?.available)
+      rtkAvailable: Boolean(this.config?.rtk?.available),
+      productContext: this.config?.productContext || null
     });
     const coderArgs = { prompt, role: "coder" };

package/src/roles/hu-reviewer-role.js CHANGED Viewed

@@ -52,7 +52,7 @@ export class HuReviewerRole extends BaseRole {
     const provider = resolveProvider(this.config);
     const agent = this._createAgent(provider, this.config, this.logger);
-    const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context });
+    const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context, productContext: this.config?.productContext || null });
     const runArgs = { prompt, role: "hu-reviewer" };
     if (onOutput) runArgs.onOutput = onOutput;
     const result = await agent.runTask(runArgs);

package/src/roles/planner-role.js CHANGED Viewed

@@ -63,7 +63,7 @@ function appendArchitectSection(sections, architectContext) {
   sections.push("");
 }
-function buildPrompt({ task, instructions, research, triageDecomposition, architectContext }) {
+function buildPrompt({ task, instructions, research, triageDecomposition, architectContext, productContext = null }) {
   const sections = [];
   if (instructions) {
@@ -76,6 +76,10 @@ function buildPrompt({ task, instructions, research, triageDecomposition, archit
     ""
   );
+  if (productContext) {
+    sections.push("## Product Context", productContext, "");
+  }
   appendDecompositionSection(sections, triageDecomposition);
   appendArchitectSection(sections, architectContext);
   appendResearchSection(sections, research);
@@ -102,7 +106,7 @@ export class PlannerRole extends BaseRole {
     const provider = resolveProvider(this.config);
     const agent = this._createAgent(provider, this.config, this.logger);
-    const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext });
+    const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext, productContext: this.config?.productContext || null });
     const runArgs = { prompt, role: "planner" };
     if (onOutput) runArgs.onOutput = onOutput;

package/src/roles/reviewer-role.js CHANGED Viewed

@@ -25,7 +25,7 @@ function truncateDiff(diff) {
     : diff;
 }
-function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false }) {
+function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false, productContext = null }) {
   const sections = [];
   sections.push(SUBAGENT_PREAMBLE);
@@ -42,6 +42,10 @@ function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAva
     `Task context:\n${task}`
   );
+  if (productContext) {
+    sections.push(`## Product Context\n${productContext}`);
+  }
   if (rtkAvailable) {
     sections.push(RTK_INSTRUCTIONS);
   }
@@ -84,7 +88,8 @@ export class ReviewerRole extends BaseRole {
       reviewRules: reviewRules || null,
       reviewMode: this.config?.review_mode || "standard",
       instructions: this.instructions,
-      rtkAvailable: Boolean(this.config?.rtk?.available)
+      rtkAvailable: Boolean(this.config?.rtk?.available),
+      productContext: this.config?.productContext || null
     });
     const reviewArgs = { prompt, role: "reviewer" };

package/src/roles/security-role.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { BaseRole } from "./base-role.js";
 import { createAgent as defaultCreateAgent } from "../agents/index.js";
+import { extractFirstJson } from "../utils/json-extract.js";
 const SUBAGENT_PREAMBLE = [
   "IMPORTANT: You are running as a Karajan sub-agent.",
@@ -38,10 +39,7 @@ function buildPrompt({ task, diff, instructions }) {
 }
 function parseSecurityOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  return JSON.parse(jsonMatch[0]);
+  return extractFirstJson(raw);
 }
 function buildSummary(parsed) {

package/src/roles/tester-role.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { BaseRole } from "./base-role.js";
 import { createAgent as defaultCreateAgent } from "../agents/index.js";
+import { extractFirstJson } from "../utils/json-extract.js";
 const SUBAGENT_PREAMBLE = [
   "IMPORTANT: You are running as a Karajan sub-agent.",
@@ -42,10 +43,7 @@ function buildPrompt({ task, diff, sonarIssues, instructions }) {
 }
 function parseTesterOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  return JSON.parse(jsonMatch[0]);
+  return extractFirstJson(raw);
 }
 export class TesterRole extends BaseRole {

package/src/roles/triage-role.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { BaseRole } from "./base-role.js";
 import { createAgent as defaultCreateAgent } from "../agents/index.js";
 import { buildTriagePrompt } from "../prompts/triage.js";
 import { VALID_TASK_TYPES } from "../guards/policy-resolver.js";
+import { extractFirstJson } from "../utils/json-extract.js";
 const VALID_LEVELS = new Set(["trivial", "simple", "medium", "complex"]);
 const VALID_ROLES = new Set(["planner", "researcher", "refactorer", "reviewer", "tester", "security", "impeccable"]);
@@ -16,10 +17,7 @@ function resolveProvider(config) {
 }
 function parseTriageOutput(raw) {
-  const text = raw?.trim() || "";
-  const jsonMatch = /\{[\s\S]*\}/.exec(text);
-  if (!jsonMatch) return null;
-  return JSON.parse(jsonMatch[0]);
+  return extractFirstJson(raw);
 }
 function normalizeRoles(roles) {

package/src/utils/budget.js CHANGED Viewed

@@ -1,5 +1,17 @@
 import { calculateUsageCostUsd, DEFAULT_MODEL_PRICING, mergePricing } from "./pricing.js";
+/**
+ * Estimate token counts from character lengths when CLIs don't report usage.
+ * Rough heuristic: ~4 characters per token for English text.
+ */
+export function estimateTokens(promptLength, responseLength) {
+  return {
+    tokens_in: Math.ceil((promptLength || 0) / 4),
+    tokens_out: Math.ceil((responseLength || 0) / 4),
+    estimated: true
+  };
+}
 export function extractUsageMetrics(result, defaultModel = null) {
   const usage = result?.usage || result?.metrics || {};
   const tokens_in =
@@ -27,7 +39,22 @@ export function extractUsageMetrics(result, defaultModel = null) {
     defaultModel ??
     null;
-  return { tokens_in, tokens_out, cost_usd, model };
+  // If no real token data AND no explicit cost, estimate from prompt/output sizes.
+  // Estimation is opt-in: only triggered when result.promptSize is explicitly provided.
+  let estimated = false;
+  let finalTokensIn = tokens_in;
+  let finalTokensOut = tokens_out;
+  const hasExplicitCost = cost_usd !== undefined && cost_usd !== null && cost_usd !== "";
+  if (!tokens_in && !tokens_out && !hasExplicitCost && result?.promptSize > 0) {
+    const promptSize = result.promptSize;
+    const outputSize = (result?.output || result?.summary || "").length;
+    const est = estimateTokens(promptSize, outputSize);
+    finalTokensIn = est.tokens_in;
+    finalTokensOut = est.tokens_out;
+    estimated = true;
+  }
+  return { tokens_in: finalTokensIn, tokens_out: finalTokensOut, cost_usd, model, estimated };
 }
 function toSafeNumber(value) {
@@ -63,7 +90,7 @@ export class BudgetTracker {
     this.pricing = mergePricing(DEFAULT_MODEL_PRICING, options.pricing || {});
   }
-  record({ role, provider, model, tokens_in, tokens_out, cost_usd, duration_ms, stage_index } = {}) {
+  record({ role, provider, model, tokens_in, tokens_out, cost_usd, duration_ms, stage_index, estimated } = {}) {
     const safeTokensIn = toSafeNumber(tokens_in);
     const safeTokensOut = toSafeNumber(tokens_out);
     const hasExplicitCost = cost_usd !== undefined && cost_usd !== null && cost_usd !== "";
@@ -89,6 +116,9 @@ export class BudgetTracker {
     if (stage_index !== undefined && stage_index !== null) {
       entry.stage_index = Number(stage_index);
     }
+    if (estimated) {
+      entry.estimated = true;
+    }
     this.entries.push(entry);
     return entry;
   }
@@ -133,26 +163,33 @@ export class BudgetTracker {
       addToBreakdown(byRole, entry.role, entry);
     }
-    return {
+    const hasEstimates = this.entries.some(e => e.estimated);
+    const result = {
       total_tokens: totals.tokens_in + totals.tokens_out,
       total_cost_usd: totals.cost_usd,
       breakdown_by_role: byRole,
       entries: [...this.entries],
       usage_available: this.hasUsageData()
     };
+    if (hasEstimates) result.includes_estimates = true;
+    return result;
   }
   trace() {
-    return this.entries.map((entry, index) => ({
-      index: entry.stage_index ?? index,
-      role: entry.role,
-      provider: entry.provider,
-      model: entry.model,
-      timestamp: entry.timestamp,
-      duration_ms: entry.duration_ms ?? null,
-      tokens_in: entry.tokens_in,
-      tokens_out: entry.tokens_out,
-      cost_usd: entry.cost_usd
-    }));
+    return this.entries.map((entry, index) => {
+      const item = {
+        index: entry.stage_index ?? index,
+        role: entry.role,
+        provider: entry.provider,
+        model: entry.model,
+        timestamp: entry.timestamp,
+        duration_ms: entry.duration_ms ?? null,
+        tokens_in: entry.tokens_in,
+        tokens_out: entry.tokens_out,
+        cost_usd: entry.cost_usd
+      };
+      if (entry.estimated) item.estimated = true;
+      return item;
+    });
   }
 }

package/src/utils/display.js CHANGED Viewed

@@ -225,11 +225,13 @@ function printSessionBudget(budget) {
     console.log(`  ${ANSI.dim}\ud83d\udcb0 Budget: N/A (provider does not report usage)${ANSI.reset}`);
     return;
   }
-  console.log(`  ${ANSI.dim}\ud83d\udcb0 Total tokens: ${budget.total_tokens ?? 0}${ANSI.reset}`);
-  console.log(`  ${ANSI.dim}\ud83d\udcb0 Total cost: $${Number(budget.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`);
+  const estPrefix = budget.includes_estimates ? "~" : "";
+  const estNote = budget.includes_estimates ? " (includes estimates)" : "";
+  console.log(`  ${ANSI.dim}\ud83d\udcb0 Total tokens: ${estPrefix}${budget.total_tokens ?? 0}${estNote}${ANSI.reset}`);
+  console.log(`  ${ANSI.dim}\ud83d\udcb0 Total cost: ${estPrefix}$${Number(budget.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`);
   for (const [role, metrics] of Object.entries(budget.breakdown_by_role || {})) {
     console.log(
-      `  ${ANSI.dim}   - ${role}: ${metrics.total_tokens ?? 0} tokens, $${Number(metrics.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`
+      `  ${ANSI.dim}   - ${role}: ${estPrefix}${metrics.total_tokens ?? 0} tokens, ${estPrefix}$${Number(metrics.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`
     );
   }
 }

package/src/utils/json-extract.js ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * Robust JSON extraction from agent output.
+ * Extracts the first complete JSON object from a string,
+ * ignoring any trailing text that would cause parse errors.
+ */
+/**
+ * Extract the first valid JSON object from a raw string.
+ * Handles cases where agents output valid JSON followed by extra text.
+ * @param {string} raw - Raw agent output.
+ * @returns {object|null} Parsed JSON object, or null if no valid JSON found.
+ */
+export function extractFirstJson(raw) {
+  if (!raw) return null;
+  const str = typeof raw === "string" ? raw.trim() : String(raw).trim();
+  if (!str) return null;
+  // Fast path: try parsing the whole string first
+  try {
+    return JSON.parse(str);
+  } catch { /* fall through to extraction */ }
+  // Find the first '{' and match to its closing '}'
+  const start = str.indexOf("{");
+  if (start === -1) return null;
+  let depth = 0;
+  let inString = false;
+  let escaped = false;
+  for (let i = start; i < str.length; i++) {
+    const ch = str[i];
+    if (escaped) {
+      escaped = false;
+      continue;
+    }
+    if (ch === "\\") {
+      escaped = true;
+      continue;
+    }
+    if (ch === '"') {
+      inString = !inString;
+      continue;
+    }
+    if (inString) continue;
+    if (ch === "{") depth++;
+    if (ch === "}") depth--;
+    if (depth === 0) {
+      try {
+        return JSON.parse(str.substring(start, i + 1));
+      } catch {
+        return null;
+      }
+    }
+  }
+  return null;
+}

package/templates/roles/architect.md CHANGED Viewed

@@ -12,6 +12,7 @@ You are the **Architect** in a multi-role AI pipeline. Your job is to design the
 - List internal and external dependencies
 - Document tradeoffs and their rationale
 - Flag areas where clarification is needed before implementation
+- Evaluate if the project benefits from containerization (Docker/Docker Compose) for development consistency and deployment, and recommend it in the architecture output if appropriate
 ## Verdict

package/templates/roles/coder.md CHANGED Viewed

@@ -21,6 +21,13 @@ Before reporting done, verify that ALL parts of the task are addressed:
 - Run the test suite after implementation to verify nothing is broken.
 - An incomplete implementation is worse than an error — never report success if parts are missing.
+## Implementation Rules
+- NEVER generate placeholder, stub, or TODO code. Every function must be fully implemented.
+- If the task says "create X", create the complete working implementation, not a skeleton.
+- If tests exist, the implementation MUST make all tests pass.
+- If you write tests first (TDD), the implementation MUST make those tests pass.
+- Do NOT commit code that doesn't compile or doesn't pass tests.
 ## File modification safety
 - NEVER overwrite existing files entirely. Always make targeted, minimal edits.

package/templates/roles/hu-reviewer.md CHANGED Viewed

@@ -100,6 +100,46 @@ The HU depends on other work, APIs, or decisions that are not documented.
 The HU optimizes something without evidence that it is a real problem.
 - Example: "Cache all API responses to improve performance." (Is performance actually a problem? Where is the data?)
+## Acceptance Criteria Format
+Choose the format that best fits the task type:
+### For user-facing behavior → Gherkin
+Use Given/When/Then when the task describes observable user behavior:
+- Given [precondition], When [action], Then [observable result]
+### For technical tasks → Verifiable Checklist
+Use when the task is implementation/refactoring without new user behavior:
+- [ ] Module exports function X with signature Y
+- [ ] All existing tests still pass
+- [ ] Build time does not exceed N seconds
+### For infrastructure → Pre/Post Conditions
+Use when the task changes system configuration or environment:
+- Before: [current state]
+- After: [target state with measurable criteria]
+### For refactors → Invariants
+Use when the task changes internal structure without changing external behavior:
+- External behavior unchanged (same API, same outputs)
+- Test coverage does not decrease below X%
+- Zero regressions in existing test suite
+- [Specific quality metric maintained or improved]
+### Selection rule
+Classify the task FIRST, then apply the matching format:
+- If the HU starts with "As a [user role]" and describes user action → Gherkin
+- If it's about internal code structure, performance, or technical debt → Checklist or Invariants
+- If it's about infrastructure, deployment, or environment → Pre/Post Conditions
+- When in doubt, use Checklist — it's the most universal format
+### Prefixing convention
+When writing acceptance criteria, prefix each criterion with the format tag:
+- `[GHERKIN] Given X, When Y, Then Z`
+- `[CHECKLIST] Function exported as named export from src/validate.js`
+- `[PRE_POST] Before: no cache layer; After: Redis cache with TTL 300s`
+- `[INVARIANT] All existing tests still pass after changes`
 ## Rewrite Instructions
 When a HU scores below certification threshold but has enough information to improve:
@@ -108,7 +148,7 @@ When a HU scores below certification threshold but has enough information to imp
 2. Make the user more specific (D2)
 3. Add quantification where possible (D3)
 4. Clarify boundaries (D4)
-5. Add acceptance criteria in Given/When/Then format
+5. Add acceptance criteria using the appropriate format (see Acceptance Criteria Format above)
 6. Flag what you assumed vs. what was in the original
 **Never invent business requirements.** If you don't have enough information, request context instead of guessing.
@@ -124,8 +164,10 @@ When a HU is certified, produce it in this structured format:
   "want": "single, focused behavior change",
   "so_that": "measurable business outcome with quantification",
   "acceptance_criteria": [
-    {"given": "...", "when": "...", "then": "..."},
-    {"given": "...", "when": "...", "then": "..."}
+    "[GHERKIN] Given precondition, When action, Then result",
+    "[CHECKLIST] Specific verifiable criterion",
+    "[PRE_POST] Before: X; After: Y",
+    "[INVARIANT] Behavior unchanged, tests pass"
   ],
   "boundaries": {
     "in_scope": ["..."],
@@ -137,6 +179,8 @@ When a HU is certified, produce it in this structured format:
 }
 ```
+Note: `acceptance_criteria` supports both legacy Gherkin objects (`{"given":"...","when":"...","then":"..."}`) and prefixed strings. Use prefixed strings for new evaluations.
 ## Output Format
 Return a single valid JSON object with this schema: