karajan-code 1.11.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.11.1",
3
+ "version": "1.12.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -6,6 +6,7 @@ import { addCheckpoint, markSessionStatus, saveSession, pauseSession } from "../
6
6
  import { generateDiff } from "../review/diff-generator.js";
7
7
  import { evaluateTddPolicy } from "../review/tdd-policy.js";
8
8
  import { validateReviewResult } from "../review/schema.js";
9
+ import { filterReviewScope, buildDeferredContext } from "../review/scope-filter.js";
9
10
  import { emitProgress, makeEvent } from "../utils/events.js";
10
11
  import { runReviewerWithFallback } from "./reviewer-fallback.js";
11
12
  import { runCoderWithFallback } from "./agent-fallback.js";
@@ -39,6 +40,7 @@ export async function runCoderStage({ coderRoleInstance, coderRole, config, logg
39
40
  task: plannedTask,
40
41
  reviewerFeedback: session.last_reviewer_feedback,
41
42
  sonarSummary: session.last_sonar_summary,
43
+ deferredContext: buildDeferredContext(session.deferred_issues),
42
44
  onOutput: coderStall.onOutput
43
45
  });
44
46
  } finally {
@@ -390,7 +392,7 @@ export async function runSonarStage({ config, logger, emitter, eventBase, sessio
390
392
  return { action: "ok", stageResult };
391
393
  }
392
394
 
393
- export async function runReviewerStage({ reviewerRole, config, logger, emitter, eventBase, session, trackBudget, iteration, reviewRules, task, repeatDetector, budgetSummary }) {
395
+ export async function runReviewerStage({ reviewerRole, config, logger, emitter, eventBase, session, trackBudget, iteration, reviewRules, task, repeatDetector, budgetSummary, askQuestion }) {
394
396
  logger.setContext({ iteration, stage: "reviewer" });
395
397
  emitProgress(
396
398
  emitter,
@@ -489,6 +491,39 @@ export async function runReviewerStage({ reviewerRole, config, logger, emitter,
489
491
  confidence: 0
490
492
  };
491
493
  }
494
+ // --- Scope filter: auto-defer out-of-scope blocking issues ---
495
+ const { review: filteredReview, demoted, deferred, allDemoted } = filterReviewScope(review, diff);
496
+ review = filteredReview;
497
+
498
+ if (demoted.length > 0) {
499
+ logger.info(`Scope filter: deferred ${demoted.length} out-of-scope issue(s)${allDemoted ? " — auto-approved" : ""}`);
500
+
501
+ // Accumulate deferred issues in session for tracking
502
+ if (!session.deferred_issues) session.deferred_issues = [];
503
+ session.deferred_issues.push(...deferred);
504
+ await saveSession(session);
505
+
506
+ emitProgress(
507
+ emitter,
508
+ makeEvent("reviewer:scope_filter", { ...eventBase, stage: "reviewer" }, {
509
+ message: `Scope filter deferred ${demoted.length} out-of-scope issue(s)`,
510
+ detail: {
511
+ demotedCount: demoted.length,
512
+ autoApproved: allDemoted,
513
+ totalDeferred: session.deferred_issues.length,
514
+ deferred: deferred.map(d => ({ file: d.file, id: d.id, description: d.description }))
515
+ }
516
+ })
517
+ );
518
+ await addCheckpoint(session, {
519
+ stage: "reviewer-scope-filter",
520
+ iteration,
521
+ demoted_count: demoted.length,
522
+ auto_approved: allDemoted,
523
+ total_deferred: session.deferred_issues.length
524
+ });
525
+ }
526
+
492
527
  await addCheckpoint(session, {
493
528
  stage: "reviewer",
494
529
  iteration,
@@ -518,8 +553,48 @@ export async function runReviewerStage({ reviewerRole, config, logger, emitter,
518
553
  const repeatState = repeatDetector.isStalled();
519
554
  if (repeatState.stalled) {
520
555
  const repeatCounts = repeatDetector.getRepeatCounts();
556
+
557
+ // --- Solomon mediation for stalled reviewer ---
558
+ logger.warn(`Reviewer stalled (${repeatCounts.reviewer} repeats). Invoking Solomon mediation.`);
559
+ emitProgress(
560
+ emitter,
561
+ makeEvent("solomon:escalate", { ...eventBase, stage: "reviewer" }, {
562
+ message: `Reviewer stalled — Solomon mediating`,
563
+ detail: { repeats: repeatCounts.reviewer, reason: repeatState.reason }
564
+ })
565
+ );
566
+
567
+ const solomonResult = await invokeSolomon({
568
+ config, logger, emitter, eventBase, stage: "reviewer", askQuestion, session, iteration,
569
+ conflict: {
570
+ stage: "reviewer",
571
+ task,
572
+ iterationCount: repeatCounts.reviewer,
573
+ maxIterations: config.session?.fail_fast_repeats ?? 2,
574
+ stalledReason: repeatState.reason,
575
+ blockingIssues: review.blocking_issues,
576
+ history: [{ agent: "reviewer", feedback: review.blocking_issues.map(x => x.description).join("; ") }]
577
+ }
578
+ });
579
+
580
+ if (solomonResult.action === "pause") {
581
+ await markSessionStatus(session, "stalled");
582
+ return { review, stalled: true, stalledResult: { paused: true, sessionId: session.id, question: solomonResult.question, context: "reviewer_stalled" } };
583
+ }
584
+ if (solomonResult.action === "continue") {
585
+ repeatDetector.reviewer = { lastHash: null, repeatCount: 0 };
586
+ if (solomonResult.humanGuidance) {
587
+ session.last_reviewer_feedback = `Solomon/user guidance: ${solomonResult.humanGuidance}`;
588
+ await saveSession(session);
589
+ }
590
+ return { review };
591
+ }
592
+ if (solomonResult.action === "subtask") {
593
+ return { review, stalled: true, stalledResult: { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "reviewer_subtask" } };
594
+ }
595
+
596
+ // Fallback
521
597
  const message = `Manual intervention required: reviewer issues repeated ${repeatCounts.reviewer} times.`;
522
- logger.warn(message);
523
598
  await markSessionStatus(session, "stalled");
524
599
  emitProgress(
525
600
  emitter,
@@ -7,7 +7,8 @@ const DEFAULT_RULES = {
7
7
  max_files_per_iteration: 10,
8
8
  max_stale_iterations: 3,
9
9
  no_new_dependencies_without_task: true,
10
- scope_guard: true
10
+ scope_guard: true,
11
+ reviewer_overreach: true
11
12
  };
12
13
 
13
14
  export function evaluateRules(context, rulesConfig = {}) {
@@ -59,6 +60,17 @@ export function evaluateRules(context, rulesConfig = {}) {
59
60
  });
60
61
  }
61
62
 
63
+ // Rule 5: Reviewer overreach — reviewer consistently flags out-of-scope issues
64
+ if (rules.reviewer_overreach && context.reviewerDemotedCount > 0) {
65
+ const severity = context.reviewerDemotedCount >= 3 ? "critical" : "warn";
66
+ alerts.push({
67
+ rule: "reviewer_overreach",
68
+ severity,
69
+ message: `Reviewer flagged ${context.reviewerDemotedCount} out-of-scope issue(s) that were auto-demoted by scope filter.`,
70
+ detail: { demotedCount: context.reviewerDemotedCount, autoApproved: context.reviewerAutoApproved || false }
71
+ });
72
+ }
73
+
62
74
  return {
63
75
  alerts,
64
76
  hasCritical: alerts.some(a => a.severity === "critical"),
@@ -76,9 +88,20 @@ export async function buildRulesContext({ session, task, iteration }) {
76
88
  filesChanged: 0,
77
89
  staleIterations: 0,
78
90
  newDependencies: [],
79
- outOfScopeFiles: []
91
+ outOfScopeFiles: [],
92
+ reviewerDemotedCount: 0,
93
+ reviewerAutoApproved: false
80
94
  };
81
95
 
96
+ // Count reviewer scope-filter demotions from session checkpoints
97
+ const scopeFilterCheckpoints = (session.checkpoints || [])
98
+ .filter(cp => cp.stage === "reviewer-scope-filter");
99
+ if (scopeFilterCheckpoints.length > 0) {
100
+ const latest = scopeFilterCheckpoints.at(-1);
101
+ context.reviewerDemotedCount = latest.demoted_count || 0;
102
+ context.reviewerAutoApproved = latest.auto_approved || false;
103
+ }
104
+
82
105
  // Count files changed via git
83
106
  try {
84
107
  const { execaCommand } = await import("execa");
@@ -152,7 +152,8 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
152
152
  last_sonar_issue_signature: null,
153
153
  sonar_repeat_count: 0,
154
154
  last_reviewer_issue_signature: null,
155
- reviewer_repeat_count: 0
155
+ reviewer_repeat_count: 0,
156
+ deferred_issues: []
156
157
  };
157
158
  if (pgTaskId) sessionInit.pg_task_id = pgTaskId;
158
159
  if (pgProject) sessionInit.pg_project_id = pgProject;
@@ -496,7 +497,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
496
497
  if (reviewerEnabled) {
497
498
  const reviewerResult = await runReviewerStage({
498
499
  reviewerRole, config, logger, emitter, eventBase, session, trackBudget,
499
- iteration: i, reviewRules, task, repeatDetector, budgetSummary
500
+ iteration: i, reviewRules, task, repeatDetector, budgetSummary, askQuestion
500
501
  });
501
502
  if (reviewerResult.action === "pause") {
502
503
  return reviewerResult.result;
@@ -649,14 +650,17 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
649
650
  }
650
651
  }
651
652
 
653
+ const deferredIssues = session.deferred_issues || [];
652
654
  emitProgress(
653
655
  emitter,
654
656
  makeEvent("session:end", { ...eventBase, stage: "done" }, {
655
- message: "Session approved",
656
- detail: { approved: true, iterations: i, stages: stageResults, git: gitResult, budget: budgetSummary() }
657
+ message: deferredIssues.length > 0
658
+ ? `Session approved (${deferredIssues.length} deferred issue(s) tracked as tech debt)`
659
+ : "Session approved",
660
+ detail: { approved: true, iterations: i, stages: stageResults, git: gitResult, budget: budgetSummary(), deferredIssues }
657
661
  })
658
662
  );
659
- return { approved: true, sessionId: session.id, review, git: gitResult };
663
+ return { approved: true, sessionId: session.id, review, git: gitResult, deferredIssues };
660
664
  }
661
665
 
662
666
  session.last_reviewer_feedback = review.blocking_issues
@@ -29,7 +29,7 @@ const SERENA_INSTRUCTIONS = [
29
29
  "Fall back to reading files only when Serena tools are not sufficient."
30
30
  ].join("\n");
31
31
 
32
- export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false }) {
32
+ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, deferredContext = null }) {
33
33
  const sections = [
34
34
  serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
35
35
  `Task:\n${task}`,
@@ -65,5 +65,9 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
65
65
  sections.push(`Reviewer blocking feedback:\n${reviewerFeedback}`);
66
66
  }
67
67
 
68
+ if (deferredContext) {
69
+ sections.push(deferredContext);
70
+ }
71
+
68
72
  return sections.join("\n\n");
69
73
  }
@@ -26,6 +26,8 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
26
26
  const sections = [
27
27
  serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
28
28
  `You are a code reviewer in ${mode} mode.`,
29
+ "CRITICAL SCOPE RULE: Only review changes that are part of the diff below. Do NOT flag issues in unchanged code, missing features planned for future tasks, or improvements outside the scope of this task. If the diff is correct for what the task asks, approve it — even if the broader codebase has other issues.",
30
+ "Only block approval for issues IN THE DIFF that are bugs, security vulnerabilities, or clear violations of the review rules.",
29
31
  "Return only one valid JSON object and nothing else.",
30
32
  "JSON schema:",
31
33
  '{"approved":boolean,"blocking_issues":[{"id":string,"severity":"critical|high|medium|low","file":string,"line":number,"description":string,"suggested_fix":string}],"non_blocking_suggestions":[string],"summary":string,"confidence":number}'
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Scope filter — auto-defers reviewer blocking issues that reference
3
+ * files NOT present in the diff. This prevents reviewer scope drift
4
+ * (flagging missing features, unchanged code, future tasks) from
5
+ * stalling the pipeline.
6
+ *
7
+ * Deferred issues are NOT forgotten — they are tracked in the session
8
+ * as technical debt that should be resolved in future iterations or
9
+ * follow-up tasks. The coder and planner receive context about what
10
+ * was deferred and why.
11
+ */
12
+
13
+ /**
14
+ * Extract the set of changed file paths from a unified diff string.
15
+ */
16
+ export function extractDiffFiles(diff) {
17
+ const files = new Set();
18
+ for (const line of (diff || "").split("\n")) {
19
+ // Match "+++ b/path" lines in unified diff
20
+ const m = line.match(/^\+\+\+ b\/(.+)/);
21
+ if (m) files.add(m[1]);
22
+ }
23
+ return files;
24
+ }
25
+
26
+ /**
27
+ * Determine whether a blocking issue is within scope of the diff.
28
+ *
29
+ * An issue is considered IN scope when:
30
+ * - It has no `file` field (general concern about the diff)
31
+ * - Its `file` matches one of the changed files (exact or suffix match)
32
+ * - It references a pattern present in the diff content itself
33
+ *
34
+ * An issue is OUT of scope when:
35
+ * - It explicitly references a file NOT in the diff
36
+ */
37
+ export function isIssueInScope(issue, diffFiles, diffContent) {
38
+ const file = (issue.file || "").trim();
39
+
40
+ // No file specified — the reviewer is commenting on the diff generally
41
+ if (!file) return true;
42
+
43
+ // Direct match
44
+ if (diffFiles.has(file)) return true;
45
+
46
+ // Suffix match (reviewer might use full path vs relative)
47
+ for (const df of diffFiles) {
48
+ if (df.endsWith(file) || file.endsWith(df)) return true;
49
+ }
50
+
51
+ // Check if the file path appears anywhere in the diff content
52
+ // (covers cases where the file is referenced in imports/requires)
53
+ if (diffContent && diffContent.includes(file)) return true;
54
+
55
+ return false;
56
+ }
57
+
58
+ /**
59
+ * Filter a review result, demoting out-of-scope blocking issues to
60
+ * non-blocking suggestions.
61
+ *
62
+ * Returns { review, demoted, deferred, allDemoted } where:
63
+ * - review: the filtered review (may flip approved to true)
64
+ * - demoted: array of original issues that were demoted
65
+ * - deferred: structured deferred issues with metadata for session tracking
66
+ * - allDemoted: true if ALL blocking issues were out of scope
67
+ */
68
+ export function filterReviewScope(review, diff) {
69
+ if (!review || review.approved) {
70
+ return { review, demoted: [], deferred: [], allDemoted: false };
71
+ }
72
+
73
+ const diffFiles = extractDiffFiles(diff);
74
+
75
+ // If we can't parse diff files, don't filter (safety)
76
+ if (diffFiles.size === 0) {
77
+ return { review, demoted: [], deferred: [], allDemoted: false };
78
+ }
79
+
80
+ const inScope = [];
81
+ const demoted = [];
82
+
83
+ for (const issue of review.blocking_issues || []) {
84
+ if (isIssueInScope(issue, diffFiles, diff)) {
85
+ inScope.push(issue);
86
+ } else {
87
+ demoted.push(issue);
88
+ }
89
+ }
90
+
91
+ if (demoted.length === 0) {
92
+ return { review, demoted: [], deferred: [], allDemoted: false };
93
+ }
94
+
95
+ const demotedSuggestions = demoted.map(
96
+ (issue) => `[auto-demoted] ${issue.file || "unknown"}: ${issue.description || issue.id || "no description"}`
97
+ );
98
+
99
+ const filtered = {
100
+ ...review,
101
+ blocking_issues: inScope,
102
+ non_blocking_suggestions: [
103
+ ...(review.non_blocking_suggestions || []),
104
+ ...demotedSuggestions
105
+ ]
106
+ };
107
+
108
+ // If no in-scope blocking issues remain, auto-approve
109
+ const allDemoted = inScope.length === 0;
110
+ if (allDemoted) {
111
+ filtered.approved = true;
112
+ filtered.summary = `${review.summary || ""} [Auto-approved: ${demoted.length} out-of-scope issue(s) demoted to suggestions]`.trim();
113
+ }
114
+
115
+ // Build structured deferred issues for session tracking
116
+ const deferred = demoted.map((issue) => ({
117
+ id: issue.id || null,
118
+ file: issue.file || null,
119
+ severity: issue.severity || "medium",
120
+ description: issue.description || "no description",
121
+ suggested_fix: issue.suggested_fix || null,
122
+ deferred_at: new Date().toISOString(),
123
+ reason: "out_of_scope"
124
+ }));
125
+
126
+ return { review: filtered, demoted, deferred, allDemoted };
127
+ }
128
+
129
+ /**
130
+ * Build a human-readable summary of deferred issues for injection
131
+ * into coder/planner prompts so they are aware of the tech debt.
132
+ */
133
+ export function buildDeferredContext(deferredIssues) {
134
+ if (!deferredIssues?.length) return "";
135
+
136
+ const lines = [
137
+ "## Deferred reviewer concerns (technical debt)",
138
+ "The following issues were flagged by the reviewer but deferred because they are outside the current diff scope.",
139
+ "You do NOT need to fix them now, but be aware of them:",
140
+ ""
141
+ ];
142
+
143
+ for (const issue of deferredIssues) {
144
+ const file = issue.file ? `\`${issue.file}\`` : "general";
145
+ const fix = issue.suggested_fix ? ` — Suggestion: ${issue.suggested_fix}` : "";
146
+ lines.push(`- [${issue.severity}] ${file}: ${issue.description}${fix}`);
147
+ }
148
+
149
+ lines.push("");
150
+ lines.push("If your current changes naturally address any of these, great. Otherwise, they will be tracked for future resolution.");
151
+
152
+ return lines.join("\n");
153
+ }
@@ -17,8 +17,8 @@ export class CoderRole extends BaseRole {
17
17
  }
18
18
 
19
19
  async execute(input) {
20
- const { task, reviewerFeedback, sonarSummary, onOutput } = typeof input === "string"
21
- ? { task: input, reviewerFeedback: null, sonarSummary: null, onOutput: null }
20
+ const { task, reviewerFeedback, sonarSummary, deferredContext, onOutput } = typeof input === "string"
21
+ ? { task: input, reviewerFeedback: null, sonarSummary: null, deferredContext: null, onOutput: null }
22
22
  : input || {};
23
23
 
24
24
  const provider = resolveProvider(this.config);
@@ -28,6 +28,7 @@ export class CoderRole extends BaseRole {
28
28
  task: task || this.context?.task || "",
29
29
  reviewerFeedback: reviewerFeedback || null,
30
30
  sonarSummary: sonarSummary || null,
31
+ deferredContext: deferredContext || null,
31
32
  coderRules: this.instructions,
32
33
  methodology: this.config?.development?.methodology || "tdd",
33
34
  serenaEnabled: Boolean(this.config?.serena?.enabled)