karajan-code 1.32.1 → 1.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.32.1",
3
+ "version": "1.34.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
package/src/config.js CHANGED
@@ -420,6 +420,34 @@ export function applyRunOverrides(config, flags) {
420
420
  return out;
421
421
  }
422
422
 
423
+ /**
424
+ * Check if a model string is compatible with an agent provider.
425
+ * Only returns false when the model clearly belongs to a DIFFERENT provider.
426
+ * Returns true if we can't determine or if the model is ambiguous.
427
+ */
428
+ const AGENT_MODEL_SIGNATURES = {
429
+ claude: ["claude", "sonnet", "opus", "haiku"],
430
+ codex: ["o4-", "o3-", "gpt-", "codex"],
431
+ gemini: ["gemini", "flash-"]
432
+ };
433
+
434
+ export function isModelCompatible(agent, model) {
435
+ if (!model || !agent) return true;
436
+ const lower = model.toLowerCase();
437
+
438
+ // Check if model clearly belongs to a different provider
439
+ for (const [provider, signatures] of Object.entries(AGENT_MODEL_SIGNATURES)) {
440
+ if (provider === agent) continue;
441
+ if (signatures.some(s => lower.includes(s))) {
442
+ // Model belongs to a different provider — incompatible
443
+ return false;
444
+ }
445
+ }
446
+
447
+ // Model doesn't clearly belong to any other provider — allow it
448
+ return true;
449
+ }
450
+
423
451
  export function resolveRole(config, role) {
424
452
  const roles = config?.roles || {};
425
453
  const roleConfig = roles[role] || {};
@@ -434,10 +462,17 @@ export function resolveRole(config, role) {
434
462
  }
435
463
 
436
464
  let model = roleConfig.model ?? null;
465
+ let modelIsInherited = false;
437
466
  if (!model && role === "coder") model = config?.coder_options?.model ?? null;
438
467
  if (!model && role === "reviewer") model = config?.reviewer_options?.model ?? null;
439
468
  if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect" || role === "hu_reviewer" || role === "hu-reviewer")) {
440
469
  model = config?.coder_options?.model ?? null;
470
+ modelIsInherited = !!model;
471
+ }
472
+
473
+ // Drop inherited model if incompatible with the resolved provider
474
+ if (modelIsInherited && provider && model && !isModelCompatible(provider, model)) {
475
+ model = null;
441
476
  }
442
477
 
443
478
  return { provider, model };
@@ -790,7 +790,10 @@ async function handleRun(a, server, extra) {
790
790
  }
791
791
  }
792
792
  if (!isPreflightAcked()) {
793
- return buildPreflightRequiredResponse("kj_run");
793
+ // Auto-acknowledge with defaults for autonomous operation
794
+ ackPreflight({});
795
+ const logger = createLogger("info", "mcp");
796
+ logger.info("Preflight auto-acknowledged with default agent config");
794
797
  }
795
798
  applySessionOverrides(a, ["coder", "reviewer", "tester", "security", "solomon", "enableTester", "enableSecurity", "enableImpeccable"]);
796
799
  return handleRunDirect(a, server, extra);
@@ -801,7 +804,10 @@ async function handleCode(a, server, extra) {
801
804
  return failPayload("Missing required field: task");
802
805
  }
803
806
  if (!isPreflightAcked()) {
804
- return buildPreflightRequiredResponse("kj_code");
807
+ // Auto-acknowledge with defaults for autonomous operation
808
+ ackPreflight({});
809
+ const logger = createLogger("info", "mcp");
810
+ logger.info("Preflight auto-acknowledged with default agent config");
805
811
  }
806
812
  applySessionOverrides(a, ["coder"]);
807
813
  return handleCodeDirect(a, server, extra);
@@ -37,6 +37,9 @@ export class PipelineContext {
37
37
  this.pgProject = null;
38
38
  this.pgCard = null;
39
39
 
40
+ // Product context (loaded from .karajan/context.md or product-vision.md)
41
+ this.productContext = null;
42
+
40
43
  // Planned task (may differ from original task after planner)
41
44
  this.plannedTask = null;
42
45
 
@@ -1,3 +1,5 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
1
3
  import { createAgent } from "./agents/index.js";
2
4
  import {
3
5
  createSession,
@@ -39,6 +41,29 @@ import { runPreflightChecks } from "./orchestrator/preflight-checks.js";
39
41
  import { detectRtk } from "./utils/rtk-detect.js";
40
42
 
41
43
 
44
+ // --- Product Context loader ---
45
+
46
+ /**
47
+ * Load product context from well-known file locations.
48
+ * Returns the file content or null if no file is found.
49
+ * @param {string|null} projectDir
50
+ * @returns {Promise<{content: string|null, source: string|null}>}
51
+ */
52
+ export async function loadProductContext(projectDir) {
53
+ const base = projectDir || process.cwd();
54
+ const candidates = [
55
+ path.join(base, ".karajan", "context.md"),
56
+ path.join(base, "product-vision.md")
57
+ ];
58
+ for (const file of candidates) {
59
+ try {
60
+ const content = await fs.readFile(file, "utf8");
61
+ return { content, source: file };
62
+ } catch { /* not found, try next */ }
63
+ }
64
+ return { content: null, source: null };
65
+ }
66
+
42
67
  // --- Extracted helper functions (pure refactoring, zero behavior change) ---
43
68
 
44
69
  function resolvePipelineFlags(config) {
@@ -71,8 +96,8 @@ async function handleDryRun({ task, config, flags, emitter, pipelineFlags }) {
71
96
  const projectDir = config.projectDir || process.cwd();
72
97
  const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
73
98
  const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
74
- const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
75
- const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
99
+ const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
100
+ const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
76
101
 
77
102
  const summary = {
78
103
  dry_run: true,
@@ -1099,6 +1124,18 @@ async function initFlowContext({ task, config, logger, emitter, askQuestion, pgT
1099
1124
  }));
1100
1125
  }
1101
1126
 
1127
+ // --- Product Context ---
1128
+ const ctxProjectDir = config.projectDir || process.cwd();
1129
+ const { content: productContext, source: productContextSource } = await loadProductContext(ctxProjectDir);
1130
+ if (productContext) {
1131
+ config = { ...config, productContext };
1132
+ logger.info(`Product context loaded from ${productContextSource}`);
1133
+ emitProgress(emitter, makeEvent("context:loaded", ctx.eventBase, {
1134
+ message: "Product context loaded",
1135
+ detail: { source: productContextSource }
1136
+ }));
1137
+ }
1138
+
1102
1139
  ctx.session = await initializeSession({ task, config, flags, pgTaskId, pgProject });
1103
1140
  ctx.eventBase.sessionId = ctx.session.id;
1104
1141
 
@@ -6,7 +6,7 @@ const SUBAGENT_PREAMBLE = [
6
6
 
7
7
  export const VALID_VERDICTS = new Set(["ready", "needs_clarification"]);
8
8
 
9
- export function buildArchitectPrompt({ task, instructions, researchContext = null }) {
9
+ export function buildArchitectPrompt({ task, instructions, researchContext = null, productContext = null }) {
10
10
  const sections = [SUBAGENT_PREAMBLE];
11
11
 
12
12
  if (instructions) {
@@ -31,6 +31,10 @@ export function buildArchitectPrompt({ task, instructions, researchContext = nul
31
31
  'JSON schema: {"verdict":"ready|needs_clarification","architecture":{"type":string,"layers":[string],"patterns":[string],"dataModel":{"entities":[string]},"apiContracts":[string],"dependencies":[string],"tradeoffs":[string]},"questions":[string],"summary":string}'
32
32
  );
33
33
 
34
+ if (productContext) {
35
+ sections.push(`## Product Context\n${productContext}`);
36
+ }
37
+
34
38
  if (researchContext) {
35
39
  sections.push(`## Research Context\n${researchContext}`);
36
40
  }
@@ -1,3 +1,5 @@
1
+ import { extractFirstJson } from "../utils/json-extract.js";
2
+
1
3
  const SUBAGENT_PREAMBLE = [
2
4
  "IMPORTANT: You are running as a Karajan sub-agent.",
3
5
  "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -154,16 +156,8 @@ function parseRecommendation(raw) {
154
156
  }
155
157
 
156
158
  export function parseAuditOutput(raw) {
157
- const text = raw?.trim() || "";
158
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
159
- if (!jsonMatch) return null;
160
-
161
- let parsed;
162
- try {
163
- parsed = JSON.parse(jsonMatch[0]);
164
- } catch {
165
- return null;
166
- }
159
+ const parsed = extractFirstJson(raw);
160
+ if (!parsed) return null;
167
161
 
168
162
  // Handle both wrapped (result.summary) and flat structures
169
163
  const resultObj = parsed.result || parsed;
@@ -31,7 +31,7 @@ const SERENA_INSTRUCTIONS = [
31
31
  "Fall back to reading files only when Serena tools are not sufficient."
32
32
  ].join("\n");
33
33
 
34
- export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null }) {
34
+ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null, productContext = null, plan = null }) {
35
35
  const sections = [
36
36
  serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
37
37
  `Task:\n${task}`,
@@ -48,6 +48,14 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
48
48
  sections.push(RTK_INSTRUCTIONS);
49
49
  }
50
50
 
51
+ if (productContext) {
52
+ sections.push(`## Product Context\n${productContext}`);
53
+ }
54
+
55
+ if (plan) {
56
+ sections.push(`## Implementation Plan (from planner)\nFollow these steps:\n${plan}`);
57
+ }
58
+
51
59
  if (coderRules) {
52
60
  sections.push(`Coder rules (MUST follow):\n${coderRules}`);
53
61
  }
@@ -1,3 +1,5 @@
1
+ import { extractFirstJson } from "../utils/json-extract.js";
2
+
1
3
  const SUBAGENT_PREAMBLE = [
2
4
  "IMPORTANT: You are running as a Karajan sub-agent.",
3
5
  "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -205,16 +207,8 @@ function parseJtbds(rawJtbds) {
205
207
  }
206
208
 
207
209
  export function parseDiscoverOutput(raw) {
208
- const text = raw?.trim() || "";
209
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
210
- if (!jsonMatch) return null;
211
-
212
- let parsed;
213
- try {
214
- parsed = JSON.parse(jsonMatch[0]);
215
- } catch {
216
- return null;
217
- }
210
+ const parsed = extractFirstJson(raw);
211
+ if (!parsed) return null;
218
212
 
219
213
  return {
220
214
  verdict: VALID_VERDICTS.has(parsed.verdict) ? parsed.verdict : "ready",
@@ -1,3 +1,5 @@
1
+ import { extractFirstJson } from "../utils/json-extract.js";
2
+
1
3
  const SUBAGENT_PREAMBLE = [
2
4
  "IMPORTANT: You are running as a Karajan sub-agent.",
3
5
  "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
@@ -19,7 +21,7 @@ const DIMENSION_KEYS = [
19
21
  * @param {{stories: Array<{id: string, text: string}>, instructions: string|null, context?: string|null}} params
20
22
  * @returns {string} The assembled prompt.
21
23
  */
22
- export function buildHuReviewerPrompt({ stories, instructions, context = null }) {
24
+ export function buildHuReviewerPrompt({ stories, instructions, context = null, productContext = null }) {
23
25
  const sections = [SUBAGENT_PREAMBLE];
24
26
 
25
27
  if (instructions) {
@@ -37,6 +39,10 @@ export function buildHuReviewerPrompt({ stories, instructions, context = null })
37
39
  `JSON schema: {"evaluations":[{"story_id":string,"scores":{"D1_jtbd_context":number,"D2_user_specificity":number,"D3_behavior_change":number,"D4_control_zone":number,"D5_time_constraints":number,"D6_survivable_experiment":number},"total":number,"antipatterns_detected":[string],"verdict":"certified|needs_rewrite|needs_context","evaluation_notes":string,"rewritten":object|null,"certified_hu":object|null,"context_needed":object|null}],"batch_summary":{"total":number,"certified":number,"needs_rewrite":number,"needs_context":number,"consolidated_questions":string}}`
38
40
  );
39
41
 
42
+ if (productContext) {
43
+ sections.push(`## Product Context\n${productContext}`);
44
+ }
45
+
40
46
  if (context) {
41
47
  sections.push(`## Additional Context\n${context}`);
42
48
  }
@@ -88,22 +94,51 @@ function parseEvaluation(raw) {
88
94
  };
89
95
  }
90
96
 
97
+ const VALID_AC_FORMATS = new Set(["gherkin", "checklist", "pre_post", "invariant"]);
98
+ const AC_PREFIX_RE = /^\[(GHERKIN|CHECKLIST|PRE_POST|INVARIANT)]\s*/i;
99
+
100
+ /**
101
+ * Detect the format of a single acceptance criterion.
102
+ * Supports both prefixed strings ("[GHERKIN] Given...") and legacy Gherkin objects ({given, when, then}).
103
+ * @param {string|object} criterion
104
+ * @returns {{format: string, text: string}}
105
+ */
106
+ export function detectAcFormat(criterion) {
107
+ if (typeof criterion === "object" && criterion !== null && ("given" in criterion || "when" in criterion || "then" in criterion)) {
108
+ const text = `Given ${criterion.given || "..."}, When ${criterion.when || "..."}, Then ${criterion.then || "..."}`;
109
+ return { format: "gherkin", text };
110
+ }
111
+ if (typeof criterion === "string") {
112
+ const match = AC_PREFIX_RE.exec(criterion);
113
+ if (match) {
114
+ const format = match[1].toLowerCase();
115
+ const text = criterion.slice(match[0].length);
116
+ return { format, text };
117
+ }
118
+ return { format: "checklist", text: criterion };
119
+ }
120
+ return { format: "checklist", text: String(criterion) };
121
+ }
122
+
123
+ /**
124
+ * Normalize an acceptance_criteria array to a uniform structure.
125
+ * Handles both legacy Gherkin objects and prefixed strings.
126
+ * @param {Array} criteria
127
+ * @returns {Array<{format: string, text: string}>}
128
+ */
129
+ export function normalizeAcceptanceCriteria(criteria) {
130
+ if (!Array.isArray(criteria)) return [];
131
+ return criteria.map(detectAcFormat);
132
+ }
133
+
91
134
  /**
92
135
  * Parse the raw output from the HU reviewer agent.
93
136
  * @param {string} raw - Raw text output from the agent.
94
137
  * @returns {object|null} Parsed result with evaluations and batch_summary, or null.
95
138
  */
96
139
  export function parseHuReviewerOutput(raw) {
97
- const text = raw?.trim() || "";
98
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
99
- if (!jsonMatch) return null;
100
-
101
- let parsed;
102
- try {
103
- parsed = JSON.parse(jsonMatch[0]);
104
- } catch {
105
- return null;
106
- }
140
+ const parsed = extractFirstJson(raw);
141
+ if (!parsed) return null;
107
142
 
108
143
  if (!Array.isArray(parsed.evaluations)) return null;
109
144
 
@@ -64,7 +64,7 @@ function formatArchitectContext(architectContext) {
64
64
  return lines.length > 1 ? lines.join("\n") : null;
65
65
  }
66
66
 
67
- export function buildPlannerPrompt({ task, context, architectContext }) {
67
+ export function buildPlannerPrompt({ task, context, architectContext, productContext = null }) {
68
68
  const parts = [
69
69
  "You are an expert software architect. Create an implementation plan for the following task.",
70
70
  "",
@@ -73,6 +73,10 @@ export function buildPlannerPrompt({ task, context, architectContext }) {
73
73
  ""
74
74
  ];
75
75
 
76
+ if (productContext) {
77
+ parts.push("## Product Context", productContext, "");
78
+ }
79
+
76
80
  if (context) {
77
81
  parts.push("## Context", context, "");
78
82
  }
@@ -22,7 +22,7 @@ const SERENA_INSTRUCTIONS = [
22
22
  "Fall back to reading files only when Serena tools are not sufficient."
23
23
  ].join("\n");
24
24
 
25
- export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false }) {
25
+ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false, productContext = null }) {
26
26
  const truncatedDiff = diff.length > 12000 ? `${diff.slice(0, 12000)}\n\n[TRUNCATED]` : diff;
27
27
 
28
28
  const sections = [
@@ -43,6 +43,10 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
43
43
  sections.push(RTK_INSTRUCTIONS);
44
44
  }
45
45
 
46
+ if (productContext) {
47
+ sections.push(`## Product Context\n${productContext}`);
48
+ }
49
+
46
50
  sections.push(
47
51
  `Task context:\n${task}`,
48
52
  `Review rules:\n${reviewRules}`,
@@ -3,23 +3,11 @@
3
3
  * Extracted from orchestrator.js to improve testability and reduce complexity.
4
4
  */
5
5
 
6
+ import { extractFirstJson } from "../utils/json-extract.js";
7
+
6
8
  export function parseMaybeJsonString(value) {
7
9
  if (typeof value !== "string") return null;
8
- try {
9
- return JSON.parse(value);
10
- } catch {
11
- const start = value.indexOf("{");
12
- const end = value.lastIndexOf("}");
13
- if (start >= 0 && end > start) {
14
- const candidate = value.slice(start, end + 1);
15
- try {
16
- return JSON.parse(candidate);
17
- } catch {
18
- return null;
19
- }
20
- }
21
- return null;
22
- }
10
+ return extractFirstJson(value);
23
11
  }
24
12
 
25
13
  function isReviewPayload(obj) {
@@ -59,7 +59,7 @@ export class ArchitectRole extends BaseRole {
59
59
  const provider = resolveProvider(this.config);
60
60
  const agent = this._createAgent(provider, this.config, this.logger);
61
61
 
62
- const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext });
62
+ const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext, productContext: this.config?.productContext || null });
63
63
  const runArgs = { prompt, role: "architect" };
64
64
  if (onOutput) runArgs.onOutput = onOutput;
65
65
  const result = await agent.runTask(runArgs);
@@ -42,7 +42,8 @@ export class CoderRole extends BaseRole {
42
42
  coderRules: this.instructions,
43
43
  methodology: this.config?.development?.methodology || "tdd",
44
44
  serenaEnabled: Boolean(this.config?.serena?.enabled),
45
- rtkAvailable: Boolean(this.config?.rtk?.available)
45
+ rtkAvailable: Boolean(this.config?.rtk?.available),
46
+ productContext: this.config?.productContext || null
46
47
  });
47
48
 
48
49
  const coderArgs = { prompt, role: "coder" };
@@ -52,7 +52,7 @@ export class HuReviewerRole extends BaseRole {
52
52
  const provider = resolveProvider(this.config);
53
53
  const agent = this._createAgent(provider, this.config, this.logger);
54
54
 
55
- const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context });
55
+ const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context, productContext: this.config?.productContext || null });
56
56
  const runArgs = { prompt, role: "hu-reviewer" };
57
57
  if (onOutput) runArgs.onOutput = onOutput;
58
58
  const result = await agent.runTask(runArgs);
@@ -63,7 +63,7 @@ function appendArchitectSection(sections, architectContext) {
63
63
  sections.push("");
64
64
  }
65
65
 
66
- function buildPrompt({ task, instructions, research, triageDecomposition, architectContext }) {
66
+ function buildPrompt({ task, instructions, research, triageDecomposition, architectContext, productContext = null }) {
67
67
  const sections = [];
68
68
 
69
69
  if (instructions) {
@@ -76,6 +76,10 @@ function buildPrompt({ task, instructions, research, triageDecomposition, archit
76
76
  ""
77
77
  );
78
78
 
79
+ if (productContext) {
80
+ sections.push("## Product Context", productContext, "");
81
+ }
82
+
79
83
  appendDecompositionSection(sections, triageDecomposition);
80
84
  appendArchitectSection(sections, architectContext);
81
85
  appendResearchSection(sections, research);
@@ -102,7 +106,7 @@ export class PlannerRole extends BaseRole {
102
106
  const provider = resolveProvider(this.config);
103
107
 
104
108
  const agent = this._createAgent(provider, this.config, this.logger);
105
- const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext });
109
+ const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext, productContext: this.config?.productContext || null });
106
110
 
107
111
  const runArgs = { prompt, role: "planner" };
108
112
  if (onOutput) runArgs.onOutput = onOutput;
@@ -25,7 +25,7 @@ function truncateDiff(diff) {
25
25
  : diff;
26
26
  }
27
27
 
28
- function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false }) {
28
+ function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false, productContext = null }) {
29
29
  const sections = [];
30
30
 
31
31
  sections.push(SUBAGENT_PREAMBLE);
@@ -42,6 +42,10 @@ function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAva
42
42
  `Task context:\n${task}`
43
43
  );
44
44
 
45
+ if (productContext) {
46
+ sections.push(`## Product Context\n${productContext}`);
47
+ }
48
+
45
49
  if (rtkAvailable) {
46
50
  sections.push(RTK_INSTRUCTIONS);
47
51
  }
@@ -84,7 +88,8 @@ export class ReviewerRole extends BaseRole {
84
88
  reviewRules: reviewRules || null,
85
89
  reviewMode: this.config?.review_mode || "standard",
86
90
  instructions: this.instructions,
87
- rtkAvailable: Boolean(this.config?.rtk?.available)
91
+ rtkAvailable: Boolean(this.config?.rtk?.available),
92
+ productContext: this.config?.productContext || null
88
93
  });
89
94
 
90
95
  const reviewArgs = { prompt, role: "reviewer" };
@@ -1,5 +1,6 @@
1
1
  import { BaseRole } from "./base-role.js";
2
2
  import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
+ import { extractFirstJson } from "../utils/json-extract.js";
3
4
 
4
5
  const SUBAGENT_PREAMBLE = [
5
6
  "IMPORTANT: You are running as a Karajan sub-agent.",
@@ -38,10 +39,7 @@ function buildPrompt({ task, diff, instructions }) {
38
39
  }
39
40
 
40
41
  function parseSecurityOutput(raw) {
41
- const text = raw?.trim() || "";
42
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
43
- if (!jsonMatch) return null;
44
- return JSON.parse(jsonMatch[0]);
42
+ return extractFirstJson(raw);
45
43
  }
46
44
 
47
45
  function buildSummary(parsed) {
@@ -1,5 +1,6 @@
1
1
  import { BaseRole } from "./base-role.js";
2
2
  import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
+ import { extractFirstJson } from "../utils/json-extract.js";
3
4
 
4
5
  const SUBAGENT_PREAMBLE = [
5
6
  "IMPORTANT: You are running as a Karajan sub-agent.",
@@ -42,10 +43,7 @@ function buildPrompt({ task, diff, sonarIssues, instructions }) {
42
43
  }
43
44
 
44
45
  function parseTesterOutput(raw) {
45
- const text = raw?.trim() || "";
46
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
47
- if (!jsonMatch) return null;
48
- return JSON.parse(jsonMatch[0]);
46
+ return extractFirstJson(raw);
49
47
  }
50
48
 
51
49
  export class TesterRole extends BaseRole {
@@ -2,6 +2,7 @@ import { BaseRole } from "./base-role.js";
2
2
  import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
3
  import { buildTriagePrompt } from "../prompts/triage.js";
4
4
  import { VALID_TASK_TYPES } from "../guards/policy-resolver.js";
5
+ import { extractFirstJson } from "../utils/json-extract.js";
5
6
 
6
7
  const VALID_LEVELS = new Set(["trivial", "simple", "medium", "complex"]);
7
8
  const VALID_ROLES = new Set(["planner", "researcher", "refactorer", "reviewer", "tester", "security", "impeccable"]);
@@ -16,10 +17,7 @@ function resolveProvider(config) {
16
17
  }
17
18
 
18
19
  function parseTriageOutput(raw) {
19
- const text = raw?.trim() || "";
20
- const jsonMatch = /\{[\s\S]*\}/.exec(text);
21
- if (!jsonMatch) return null;
22
- return JSON.parse(jsonMatch[0]);
20
+ return extractFirstJson(raw);
23
21
  }
24
22
 
25
23
  function normalizeRoles(roles) {
@@ -1,5 +1,17 @@
1
1
  import { calculateUsageCostUsd, DEFAULT_MODEL_PRICING, mergePricing } from "./pricing.js";
2
2
 
3
+ /**
4
+ * Estimate token counts from character lengths when CLIs don't report usage.
5
+ * Rough heuristic: ~4 characters per token for English text.
6
+ */
7
+ export function estimateTokens(promptLength, responseLength) {
8
+ return {
9
+ tokens_in: Math.ceil((promptLength || 0) / 4),
10
+ tokens_out: Math.ceil((responseLength || 0) / 4),
11
+ estimated: true
12
+ };
13
+ }
14
+
3
15
  export function extractUsageMetrics(result, defaultModel = null) {
4
16
  const usage = result?.usage || result?.metrics || {};
5
17
  const tokens_in =
@@ -27,7 +39,22 @@ export function extractUsageMetrics(result, defaultModel = null) {
27
39
  defaultModel ??
28
40
  null;
29
41
 
30
- return { tokens_in, tokens_out, cost_usd, model };
42
+ // If no real token data AND no explicit cost, estimate from prompt/output sizes.
43
+ // Estimation is opt-in: only triggered when result.promptSize is explicitly provided.
44
+ let estimated = false;
45
+ let finalTokensIn = tokens_in;
46
+ let finalTokensOut = tokens_out;
47
+ const hasExplicitCost = cost_usd !== undefined && cost_usd !== null && cost_usd !== "";
48
+ if (!tokens_in && !tokens_out && !hasExplicitCost && result?.promptSize > 0) {
49
+ const promptSize = result.promptSize;
50
+ const outputSize = (result?.output || result?.summary || "").length;
51
+ const est = estimateTokens(promptSize, outputSize);
52
+ finalTokensIn = est.tokens_in;
53
+ finalTokensOut = est.tokens_out;
54
+ estimated = true;
55
+ }
56
+
57
+ return { tokens_in: finalTokensIn, tokens_out: finalTokensOut, cost_usd, model, estimated };
31
58
  }
32
59
 
33
60
  function toSafeNumber(value) {
@@ -63,7 +90,7 @@ export class BudgetTracker {
63
90
  this.pricing = mergePricing(DEFAULT_MODEL_PRICING, options.pricing || {});
64
91
  }
65
92
 
66
- record({ role, provider, model, tokens_in, tokens_out, cost_usd, duration_ms, stage_index } = {}) {
93
+ record({ role, provider, model, tokens_in, tokens_out, cost_usd, duration_ms, stage_index, estimated } = {}) {
67
94
  const safeTokensIn = toSafeNumber(tokens_in);
68
95
  const safeTokensOut = toSafeNumber(tokens_out);
69
96
  const hasExplicitCost = cost_usd !== undefined && cost_usd !== null && cost_usd !== "";
@@ -89,6 +116,9 @@ export class BudgetTracker {
89
116
  if (stage_index !== undefined && stage_index !== null) {
90
117
  entry.stage_index = Number(stage_index);
91
118
  }
119
+ if (estimated) {
120
+ entry.estimated = true;
121
+ }
92
122
  this.entries.push(entry);
93
123
  return entry;
94
124
  }
@@ -133,26 +163,33 @@ export class BudgetTracker {
133
163
  addToBreakdown(byRole, entry.role, entry);
134
164
  }
135
165
 
136
- return {
166
+ const hasEstimates = this.entries.some(e => e.estimated);
167
+ const result = {
137
168
  total_tokens: totals.tokens_in + totals.tokens_out,
138
169
  total_cost_usd: totals.cost_usd,
139
170
  breakdown_by_role: byRole,
140
171
  entries: [...this.entries],
141
172
  usage_available: this.hasUsageData()
142
173
  };
174
+ if (hasEstimates) result.includes_estimates = true;
175
+ return result;
143
176
  }
144
177
 
145
178
  trace() {
146
- return this.entries.map((entry, index) => ({
147
- index: entry.stage_index ?? index,
148
- role: entry.role,
149
- provider: entry.provider,
150
- model: entry.model,
151
- timestamp: entry.timestamp,
152
- duration_ms: entry.duration_ms ?? null,
153
- tokens_in: entry.tokens_in,
154
- tokens_out: entry.tokens_out,
155
- cost_usd: entry.cost_usd
156
- }));
179
+ return this.entries.map((entry, index) => {
180
+ const item = {
181
+ index: entry.stage_index ?? index,
182
+ role: entry.role,
183
+ provider: entry.provider,
184
+ model: entry.model,
185
+ timestamp: entry.timestamp,
186
+ duration_ms: entry.duration_ms ?? null,
187
+ tokens_in: entry.tokens_in,
188
+ tokens_out: entry.tokens_out,
189
+ cost_usd: entry.cost_usd
190
+ };
191
+ if (entry.estimated) item.estimated = true;
192
+ return item;
193
+ });
157
194
  }
158
195
  }
@@ -225,11 +225,13 @@ function printSessionBudget(budget) {
225
225
  console.log(` ${ANSI.dim}\ud83d\udcb0 Budget: N/A (provider does not report usage)${ANSI.reset}`);
226
226
  return;
227
227
  }
228
- console.log(` ${ANSI.dim}\ud83d\udcb0 Total tokens: ${budget.total_tokens ?? 0}${ANSI.reset}`);
229
- console.log(` ${ANSI.dim}\ud83d\udcb0 Total cost: $${Number(budget.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`);
228
+ const estPrefix = budget.includes_estimates ? "~" : "";
229
+ const estNote = budget.includes_estimates ? " (includes estimates)" : "";
230
+ console.log(` ${ANSI.dim}\ud83d\udcb0 Total tokens: ${estPrefix}${budget.total_tokens ?? 0}${estNote}${ANSI.reset}`);
231
+ console.log(` ${ANSI.dim}\ud83d\udcb0 Total cost: ${estPrefix}$${Number(budget.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`);
230
232
  for (const [role, metrics] of Object.entries(budget.breakdown_by_role || {})) {
231
233
  console.log(
232
- ` ${ANSI.dim} - ${role}: ${metrics.total_tokens ?? 0} tokens, $${Number(metrics.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`
234
+ ` ${ANSI.dim} - ${role}: ${estPrefix}${metrics.total_tokens ?? 0} tokens, ${estPrefix}$${Number(metrics.total_cost_usd || 0).toFixed(2)}${ANSI.reset}`
233
235
  );
234
236
  }
235
237
  }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Robust JSON extraction from agent output.
3
+ * Extracts the first complete JSON object from a string,
4
+ * ignoring any trailing text that would cause parse errors.
5
+ */
6
+
7
+ /**
8
+ * Extract the first valid JSON object from a raw string.
9
+ * Handles cases where agents output valid JSON followed by extra text.
10
+ * @param {string} raw - Raw agent output.
11
+ * @returns {object|null} Parsed JSON object, or null if no valid JSON found.
12
+ */
13
+ export function extractFirstJson(raw) {
14
+ if (!raw) return null;
15
+ const str = typeof raw === "string" ? raw.trim() : String(raw).trim();
16
+ if (!str) return null;
17
+
18
+ // Fast path: try parsing the whole string first
19
+ try {
20
+ return JSON.parse(str);
21
+ } catch { /* fall through to extraction */ }
22
+
23
+ // Find the first '{' and match to its closing '}'
24
+ const start = str.indexOf("{");
25
+ if (start === -1) return null;
26
+
27
+ let depth = 0;
28
+ let inString = false;
29
+ let escaped = false;
30
+
31
+ for (let i = start; i < str.length; i++) {
32
+ const ch = str[i];
33
+
34
+ if (escaped) {
35
+ escaped = false;
36
+ continue;
37
+ }
38
+
39
+ if (ch === "\\") {
40
+ escaped = true;
41
+ continue;
42
+ }
43
+
44
+ if (ch === '"') {
45
+ inString = !inString;
46
+ continue;
47
+ }
48
+
49
+ if (inString) continue;
50
+
51
+ if (ch === "{") depth++;
52
+ if (ch === "}") depth--;
53
+
54
+ if (depth === 0) {
55
+ try {
56
+ return JSON.parse(str.substring(start, i + 1));
57
+ } catch {
58
+ return null;
59
+ }
60
+ }
61
+ }
62
+
63
+ return null;
64
+ }
@@ -12,6 +12,7 @@ You are the **Architect** in a multi-role AI pipeline. Your job is to design the
12
12
  - List internal and external dependencies
13
13
  - Document tradeoffs and their rationale
14
14
  - Flag areas where clarification is needed before implementation
15
+ - Evaluate if the project benefits from containerization (Docker/Docker Compose) for development consistency and deployment, and recommend it in the architecture output if appropriate
15
16
 
16
17
  ## Verdict
17
18
 
@@ -21,6 +21,13 @@ Before reporting done, verify that ALL parts of the task are addressed:
21
21
  - Run the test suite after implementation to verify nothing is broken.
22
22
  - An incomplete implementation is worse than an error — never report success if parts are missing.
23
23
 
24
+ ## Implementation Rules
25
+ - NEVER generate placeholder, stub, or TODO code. Every function must be fully implemented.
26
+ - If the task says "create X", create the complete working implementation, not a skeleton.
27
+ - If tests exist, the implementation MUST make all tests pass.
28
+ - If you write tests first (TDD), the implementation MUST make those tests pass.
29
+ - Do NOT commit code that doesn't compile or doesn't pass tests.
30
+
24
31
  ## File modification safety
25
32
 
26
33
  - NEVER overwrite existing files entirely. Always make targeted, minimal edits.
@@ -100,6 +100,46 @@ The HU depends on other work, APIs, or decisions that are not documented.
100
100
  The HU optimizes something without evidence that it is a real problem.
101
101
  - Example: "Cache all API responses to improve performance." (Is performance actually a problem? Where is the data?)
102
102
 
103
+ ## Acceptance Criteria Format
104
+
105
+ Choose the format that best fits the task type:
106
+
107
+ ### For user-facing behavior → Gherkin
108
+ Use Given/When/Then when the task describes observable user behavior:
109
+ - Given [precondition], When [action], Then [observable result]
110
+
111
+ ### For technical tasks → Verifiable Checklist
112
+ Use when the task is implementation/refactoring without new user behavior:
113
+ - [ ] Module exports function X with signature Y
114
+ - [ ] All existing tests still pass
115
+ - [ ] Build time does not exceed N seconds
116
+
117
+ ### For infrastructure → Pre/Post Conditions
118
+ Use when the task changes system configuration or environment:
119
+ - Before: [current state]
120
+ - After: [target state with measurable criteria]
121
+
122
+ ### For refactors → Invariants
123
+ Use when the task changes internal structure without changing external behavior:
124
+ - External behavior unchanged (same API, same outputs)
125
+ - Test coverage does not decrease below X%
126
+ - Zero regressions in existing test suite
127
+ - [Specific quality metric maintained or improved]
128
+
129
+ ### Selection rule
130
+ Classify the task FIRST, then apply the matching format:
131
+ - If the HU starts with "As a [user role]" and describes user action → Gherkin
132
+ - If it's about internal code structure, performance, or technical debt → Checklist or Invariants
133
+ - If it's about infrastructure, deployment, or environment → Pre/Post Conditions
134
+ - When in doubt, use Checklist — it's the most universal format
135
+
136
+ ### Prefixing convention
137
+ When writing acceptance criteria, prefix each criterion with the format tag:
138
+ - `[GHERKIN] Given X, When Y, Then Z`
139
+ - `[CHECKLIST] Function exported as named export from src/validate.js`
140
+ - `[PRE_POST] Before: no cache layer; After: Redis cache with TTL 300s`
141
+ - `[INVARIANT] All existing tests still pass after changes`
142
+
103
143
  ## Rewrite Instructions
104
144
 
105
145
  When a HU scores below certification threshold but has enough information to improve:
@@ -108,7 +148,7 @@ When a HU scores below certification threshold but has enough information to imp
108
148
  2. Make the user more specific (D2)
109
149
  3. Add quantification where possible (D3)
110
150
  4. Clarify boundaries (D4)
111
- 5. Add acceptance criteria in Given/When/Then format
151
+ 5. Add acceptance criteria using the appropriate format (see Acceptance Criteria Format above)
112
152
  6. Flag what you assumed vs. what was in the original
113
153
 
114
154
  **Never invent business requirements.** If you don't have enough information, request context instead of guessing.
@@ -124,8 +164,10 @@ When a HU is certified, produce it in this structured format:
124
164
  "want": "single, focused behavior change",
125
165
  "so_that": "measurable business outcome with quantification",
126
166
  "acceptance_criteria": [
127
- {"given": "...", "when": "...", "then": "..."},
128
- {"given": "...", "when": "...", "then": "..."}
167
+ "[GHERKIN] Given precondition, When action, Then result",
168
+ "[CHECKLIST] Specific verifiable criterion",
169
+ "[PRE_POST] Before: X; After: Y",
170
+ "[INVARIANT] Behavior unchanged, tests pass"
129
171
  ],
130
172
  "boundaries": {
131
173
  "in_scope": ["..."],
@@ -137,6 +179,8 @@ When a HU is certified, produce it in this structured format:
137
179
  }
138
180
  ```
139
181
 
182
+ Note: `acceptance_criteria` supports both legacy Gherkin objects (`{"given":"...","when":"...","then":"..."}`) and prefixed strings. Use prefixed strings for new evaluations.
183
+
140
184
  ## Output Format
141
185
 
142
186
  Return a single valid JSON object with this schema: