karajan-code 1.32.1 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.32.1",
3
+ "version": "1.33.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -37,6 +37,9 @@ export class PipelineContext {
37
37
  this.pgProject = null;
38
38
  this.pgCard = null;
39
39
 
40
+ // Product context (loaded from .karajan/context.md or product-vision.md)
41
+ this.productContext = null;
42
+
40
43
  // Planned task (may differ from original task after planner)
41
44
  this.plannedTask = null;
42
45
 
@@ -1,3 +1,5 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
1
3
  import { createAgent } from "./agents/index.js";
2
4
  import {
3
5
  createSession,
@@ -39,6 +41,29 @@ import { runPreflightChecks } from "./orchestrator/preflight-checks.js";
39
41
  import { detectRtk } from "./utils/rtk-detect.js";
40
42
 
41
43
 
44
+ // --- Product Context loader ---
45
+
46
+ /**
47
+ * Load product context from well-known file locations.
48
+ * Returns the file content or null if no file is found.
49
+ * @param {string|null} projectDir
50
+ * @returns {Promise<{content: string|null, source: string|null}>}
51
+ */
52
+ export async function loadProductContext(projectDir) {
53
+ const base = projectDir || process.cwd();
54
+ const candidates = [
55
+ path.join(base, ".karajan", "context.md"),
56
+ path.join(base, "product-vision.md")
57
+ ];
58
+ for (const file of candidates) {
59
+ try {
60
+ const content = await fs.readFile(file, "utf8");
61
+ return { content, source: file };
62
+ } catch { /* not found, try next */ }
63
+ }
64
+ return { content: null, source: null };
65
+ }
66
+
42
67
  // --- Extracted helper functions (pure refactoring, zero behavior change) ---
43
68
 
44
69
  function resolvePipelineFlags(config) {
@@ -71,8 +96,8 @@ async function handleDryRun({ task, config, flags, emitter, pipelineFlags }) {
71
96
  const projectDir = config.projectDir || process.cwd();
72
97
  const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
73
98
  const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
74
- const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
75
- const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available) });
99
+ const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
100
+ const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled), rtkAvailable: Boolean(config.rtk?.available), productContext: config.productContext || null });
76
101
 
77
102
  const summary = {
78
103
  dry_run: true,
@@ -1099,6 +1124,18 @@ async function initFlowContext({ task, config, logger, emitter, askQuestion, pgT
1099
1124
  }));
1100
1125
  }
1101
1126
 
1127
+ // --- Product Context ---
1128
+ const ctxProjectDir = config.projectDir || process.cwd();
1129
+ const { content: productContext, source: productContextSource } = await loadProductContext(ctxProjectDir);
1130
+ if (productContext) {
1131
+ config = { ...config, productContext };
1132
+ logger.info(`Product context loaded from ${productContextSource}`);
1133
+ emitProgress(emitter, makeEvent("context:loaded", ctx.eventBase, {
1134
+ message: "Product context loaded",
1135
+ detail: { source: productContextSource }
1136
+ }));
1137
+ }
1138
+
1102
1139
  ctx.session = await initializeSession({ task, config, flags, pgTaskId, pgProject });
1103
1140
  ctx.eventBase.sessionId = ctx.session.id;
1104
1141
 
@@ -6,7 +6,7 @@ const SUBAGENT_PREAMBLE = [
6
6
 
7
7
  export const VALID_VERDICTS = new Set(["ready", "needs_clarification"]);
8
8
 
9
- export function buildArchitectPrompt({ task, instructions, researchContext = null }) {
9
+ export function buildArchitectPrompt({ task, instructions, researchContext = null, productContext = null }) {
10
10
  const sections = [SUBAGENT_PREAMBLE];
11
11
 
12
12
  if (instructions) {
@@ -31,6 +31,10 @@ export function buildArchitectPrompt({ task, instructions, researchContext = nul
31
31
  'JSON schema: {"verdict":"ready|needs_clarification","architecture":{"type":string,"layers":[string],"patterns":[string],"dataModel":{"entities":[string]},"apiContracts":[string],"dependencies":[string],"tradeoffs":[string]},"questions":[string],"summary":string}'
32
32
  );
33
33
 
34
+ if (productContext) {
35
+ sections.push(`## Product Context\n${productContext}`);
36
+ }
37
+
34
38
  if (researchContext) {
35
39
  sections.push(`## Research Context\n${researchContext}`);
36
40
  }
@@ -31,7 +31,7 @@ const SERENA_INSTRUCTIONS = [
31
31
  "Fall back to reading files only when Serena tools are not sufficient."
32
32
  ].join("\n");
33
33
 
34
- export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null }) {
34
+ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, rtkAvailable = false, deferredContext = null, productContext = null }) {
35
35
  const sections = [
36
36
  serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
37
37
  `Task:\n${task}`,
@@ -48,6 +48,10 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
48
48
  sections.push(RTK_INSTRUCTIONS);
49
49
  }
50
50
 
51
+ if (productContext) {
52
+ sections.push(`## Product Context\n${productContext}`);
53
+ }
54
+
51
55
  if (coderRules) {
52
56
  sections.push(`Coder rules (MUST follow):\n${coderRules}`);
53
57
  }
@@ -19,7 +19,7 @@ const DIMENSION_KEYS = [
19
19
  * @param {{stories: Array<{id: string, text: string}>, instructions: string|null, context?: string|null}} params
20
20
  * @returns {string} The assembled prompt.
21
21
  */
22
- export function buildHuReviewerPrompt({ stories, instructions, context = null }) {
22
+ export function buildHuReviewerPrompt({ stories, instructions, context = null, productContext = null }) {
23
23
  const sections = [SUBAGENT_PREAMBLE];
24
24
 
25
25
  if (instructions) {
@@ -37,6 +37,10 @@ export function buildHuReviewerPrompt({ stories, instructions, context = null })
37
37
  `JSON schema: {"evaluations":[{"story_id":string,"scores":{"D1_jtbd_context":number,"D2_user_specificity":number,"D3_behavior_change":number,"D4_control_zone":number,"D5_time_constraints":number,"D6_survivable_experiment":number},"total":number,"antipatterns_detected":[string],"verdict":"certified|needs_rewrite|needs_context","evaluation_notes":string,"rewritten":object|null,"certified_hu":object|null,"context_needed":object|null}],"batch_summary":{"total":number,"certified":number,"needs_rewrite":number,"needs_context":number,"consolidated_questions":string}}`
38
38
  );
39
39
 
40
+ if (productContext) {
41
+ sections.push(`## Product Context\n${productContext}`);
42
+ }
43
+
40
44
  if (context) {
41
45
  sections.push(`## Additional Context\n${context}`);
42
46
  }
@@ -88,6 +92,43 @@ function parseEvaluation(raw) {
88
92
  };
89
93
  }
90
94
 
95
+ const VALID_AC_FORMATS = new Set(["gherkin", "checklist", "pre_post", "invariant"]);
96
+ const AC_PREFIX_RE = /^\[(GHERKIN|CHECKLIST|PRE_POST|INVARIANT)]\s*/i;
97
+
98
+ /**
99
+ * Detect the format of a single acceptance criterion.
100
+ * Supports both prefixed strings ("[GHERKIN] Given...") and legacy Gherkin objects ({given, when, then}).
101
+ * @param {string|object} criterion
102
+ * @returns {{format: string, text: string}}
103
+ */
104
+ export function detectAcFormat(criterion) {
105
+ if (typeof criterion === "object" && criterion !== null && ("given" in criterion || "when" in criterion || "then" in criterion)) {
106
+ const text = `Given ${criterion.given || "..."}, When ${criterion.when || "..."}, Then ${criterion.then || "..."}`;
107
+ return { format: "gherkin", text };
108
+ }
109
+ if (typeof criterion === "string") {
110
+ const match = AC_PREFIX_RE.exec(criterion);
111
+ if (match) {
112
+ const format = match[1].toLowerCase();
113
+ const text = criterion.slice(match[0].length);
114
+ return { format, text };
115
+ }
116
+ return { format: "checklist", text: criterion };
117
+ }
118
+ return { format: "checklist", text: String(criterion) };
119
+ }
120
+
121
+ /**
122
+ * Normalize an acceptance_criteria array to a uniform structure.
123
+ * Handles both legacy Gherkin objects and prefixed strings.
124
+ * @param {Array} criteria
125
+ * @returns {Array<{format: string, text: string}>}
126
+ */
127
+ export function normalizeAcceptanceCriteria(criteria) {
128
+ if (!Array.isArray(criteria)) return [];
129
+ return criteria.map(detectAcFormat);
130
+ }
131
+
91
132
  /**
92
133
  * Parse the raw output from the HU reviewer agent.
93
134
  * @param {string} raw - Raw text output from the agent.
@@ -64,7 +64,7 @@ function formatArchitectContext(architectContext) {
64
64
  return lines.length > 1 ? lines.join("\n") : null;
65
65
  }
66
66
 
67
- export function buildPlannerPrompt({ task, context, architectContext }) {
67
+ export function buildPlannerPrompt({ task, context, architectContext, productContext = null }) {
68
68
  const parts = [
69
69
  "You are an expert software architect. Create an implementation plan for the following task.",
70
70
  "",
@@ -73,6 +73,10 @@ export function buildPlannerPrompt({ task, context, architectContext }) {
73
73
  ""
74
74
  ];
75
75
 
76
+ if (productContext) {
77
+ parts.push("## Product Context", productContext, "");
78
+ }
79
+
76
80
  if (context) {
77
81
  parts.push("## Context", context, "");
78
82
  }
@@ -22,7 +22,7 @@ const SERENA_INSTRUCTIONS = [
22
22
  "Fall back to reading files only when Serena tools are not sufficient."
23
23
  ].join("\n");
24
24
 
25
- export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false }) {
25
+ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabled = false, rtkAvailable = false, productContext = null }) {
26
26
  const truncatedDiff = diff.length > 12000 ? `${diff.slice(0, 12000)}\n\n[TRUNCATED]` : diff;
27
27
 
28
28
  const sections = [
@@ -43,6 +43,10 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
43
43
  sections.push(RTK_INSTRUCTIONS);
44
44
  }
45
45
 
46
+ if (productContext) {
47
+ sections.push(`## Product Context\n${productContext}`);
48
+ }
49
+
46
50
  sections.push(
47
51
  `Task context:\n${task}`,
48
52
  `Review rules:\n${reviewRules}`,
@@ -59,7 +59,7 @@ export class ArchitectRole extends BaseRole {
59
59
  const provider = resolveProvider(this.config);
60
60
  const agent = this._createAgent(provider, this.config, this.logger);
61
61
 
62
- const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext });
62
+ const prompt = buildArchitectPrompt({ task, instructions: this.instructions, researchContext, productContext: this.config?.productContext || null });
63
63
  const runArgs = { prompt, role: "architect" };
64
64
  if (onOutput) runArgs.onOutput = onOutput;
65
65
  const result = await agent.runTask(runArgs);
@@ -42,7 +42,8 @@ export class CoderRole extends BaseRole {
42
42
  coderRules: this.instructions,
43
43
  methodology: this.config?.development?.methodology || "tdd",
44
44
  serenaEnabled: Boolean(this.config?.serena?.enabled),
45
- rtkAvailable: Boolean(this.config?.rtk?.available)
45
+ rtkAvailable: Boolean(this.config?.rtk?.available),
46
+ productContext: this.config?.productContext || null
46
47
  });
47
48
 
48
49
  const coderArgs = { prompt, role: "coder" };
@@ -52,7 +52,7 @@ export class HuReviewerRole extends BaseRole {
52
52
  const provider = resolveProvider(this.config);
53
53
  const agent = this._createAgent(provider, this.config, this.logger);
54
54
 
55
- const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context });
55
+ const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context, productContext: this.config?.productContext || null });
56
56
  const runArgs = { prompt, role: "hu-reviewer" };
57
57
  if (onOutput) runArgs.onOutput = onOutput;
58
58
  const result = await agent.runTask(runArgs);
@@ -63,7 +63,7 @@ function appendArchitectSection(sections, architectContext) {
63
63
  sections.push("");
64
64
  }
65
65
 
66
- function buildPrompt({ task, instructions, research, triageDecomposition, architectContext }) {
66
+ function buildPrompt({ task, instructions, research, triageDecomposition, architectContext, productContext = null }) {
67
67
  const sections = [];
68
68
 
69
69
  if (instructions) {
@@ -76,6 +76,10 @@ function buildPrompt({ task, instructions, research, triageDecomposition, archit
76
76
  ""
77
77
  );
78
78
 
79
+ if (productContext) {
80
+ sections.push("## Product Context", productContext, "");
81
+ }
82
+
79
83
  appendDecompositionSection(sections, triageDecomposition);
80
84
  appendArchitectSection(sections, architectContext);
81
85
  appendResearchSection(sections, research);
@@ -102,7 +106,7 @@ export class PlannerRole extends BaseRole {
102
106
  const provider = resolveProvider(this.config);
103
107
 
104
108
  const agent = this._createAgent(provider, this.config, this.logger);
105
- const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext });
109
+ const prompt = buildPrompt({ task: taskStr, instructions: this.instructions, research, triageDecomposition, architectContext, productContext: this.config?.productContext || null });
106
110
 
107
111
  const runArgs = { prompt, role: "planner" };
108
112
  if (onOutput) runArgs.onOutput = onOutput;
@@ -25,7 +25,7 @@ function truncateDiff(diff) {
25
25
  : diff;
26
26
  }
27
27
 
28
- function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false }) {
28
+ function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAvailable = false, productContext = null }) {
29
29
  const sections = [];
30
30
 
31
31
  sections.push(SUBAGENT_PREAMBLE);
@@ -42,6 +42,10 @@ function buildPrompt({ task, diff, reviewRules, reviewMode, instructions, rtkAva
42
42
  `Task context:\n${task}`
43
43
  );
44
44
 
45
+ if (productContext) {
46
+ sections.push(`## Product Context\n${productContext}`);
47
+ }
48
+
45
49
  if (rtkAvailable) {
46
50
  sections.push(RTK_INSTRUCTIONS);
47
51
  }
@@ -84,7 +88,8 @@ export class ReviewerRole extends BaseRole {
84
88
  reviewRules: reviewRules || null,
85
89
  reviewMode: this.config?.review_mode || "standard",
86
90
  instructions: this.instructions,
87
- rtkAvailable: Boolean(this.config?.rtk?.available)
91
+ rtkAvailable: Boolean(this.config?.rtk?.available),
92
+ productContext: this.config?.productContext || null
88
93
  });
89
94
 
90
95
  const reviewArgs = { prompt, role: "reviewer" };
@@ -12,6 +12,7 @@ You are the **Architect** in a multi-role AI pipeline. Your job is to design the
12
12
  - List internal and external dependencies
13
13
  - Document tradeoffs and their rationale
14
14
  - Flag areas where clarification is needed before implementation
15
+ - Evaluate if the project benefits from containerization (Docker/Docker Compose) for development consistency and deployment, and recommend it in the architecture output if appropriate
15
16
 
16
17
  ## Verdict
17
18
 
@@ -100,6 +100,46 @@ The HU depends on other work, APIs, or decisions that are not documented.
100
100
  The HU optimizes something without evidence that it is a real problem.
101
101
  - Example: "Cache all API responses to improve performance." (Is performance actually a problem? Where is the data?)
102
102
 
103
+ ## Acceptance Criteria Format
104
+
105
+ Choose the format that best fits the task type:
106
+
107
+ ### For user-facing behavior → Gherkin
108
+ Use Given/When/Then when the task describes observable user behavior:
109
+ - Given [precondition], When [action], Then [observable result]
110
+
111
+ ### For technical tasks → Verifiable Checklist
112
+ Use when the task is implementation/refactoring without new user behavior:
113
+ - [ ] Module exports function X with signature Y
114
+ - [ ] All existing tests still pass
115
+ - [ ] Build time does not exceed N seconds
116
+
117
+ ### For infrastructure → Pre/Post Conditions
118
+ Use when the task changes system configuration or environment:
119
+ - Before: [current state]
120
+ - After: [target state with measurable criteria]
121
+
122
+ ### For refactors → Invariants
123
+ Use when the task changes internal structure without changing external behavior:
124
+ - External behavior unchanged (same API, same outputs)
125
+ - Test coverage does not decrease below X%
126
+ - Zero regressions in existing test suite
127
+ - [Specific quality metric maintained or improved]
128
+
129
+ ### Selection rule
130
+ Classify the task FIRST, then apply the matching format:
131
+ - If the HU starts with "As a [user role]" and describes user action → Gherkin
132
+ - If it's about internal code structure, performance, or technical debt → Checklist or Invariants
133
+ - If it's about infrastructure, deployment, or environment → Pre/Post Conditions
134
+ - When in doubt, use Checklist — it's the most universal format
135
+
136
+ ### Prefixing convention
137
+ When writing acceptance criteria, prefix each criterion with the format tag:
138
+ - `[GHERKIN] Given X, When Y, Then Z`
139
+ - `[CHECKLIST] Function exported as named export from src/validate.js`
140
+ - `[PRE_POST] Before: no cache layer; After: Redis cache with TTL 300s`
141
+ - `[INVARIANT] All existing tests still pass after changes`
142
+
103
143
  ## Rewrite Instructions
104
144
 
105
145
  When a HU scores below certification threshold but has enough information to improve:
@@ -108,7 +148,7 @@ When a HU scores below certification threshold but has enough information to imp
108
148
  2. Make the user more specific (D2)
109
149
  3. Add quantification where possible (D3)
110
150
  4. Clarify boundaries (D4)
111
- 5. Add acceptance criteria in Given/When/Then format
151
+ 5. Add acceptance criteria using the appropriate format (see Acceptance Criteria Format above)
112
152
  6. Flag what you assumed vs. what was in the original
113
153
 
114
154
  **Never invent business requirements.** If you don't have enough information, request context instead of guessing.
@@ -124,8 +164,10 @@ When a HU is certified, produce it in this structured format:
124
164
  "want": "single, focused behavior change",
125
165
  "so_that": "measurable business outcome with quantification",
126
166
  "acceptance_criteria": [
127
- {"given": "...", "when": "...", "then": "..."},
128
- {"given": "...", "when": "...", "then": "..."}
167
+ "[GHERKIN] Given precondition, When action, Then result",
168
+ "[CHECKLIST] Specific verifiable criterion",
169
+ "[PRE_POST] Before: X; After: Y",
170
+ "[INVARIANT] Behavior unchanged, tests pass"
129
171
  ],
130
172
  "boundaries": {
131
173
  "in_scope": ["..."],
@@ -137,6 +179,8 @@ When a HU is certified, produce it in this structured format:
137
179
  }
138
180
  ```
139
181
 
182
+ Note: `acceptance_criteria` supports both legacy Gherkin objects (`{"given":"...","when":"...","then":"..."}`) and prefixed strings. Use prefixed strings for new evaluations.
183
+
140
184
  ## Output Format
141
185
 
142
186
  Return a single valid JSON object with this schema: