karajan-code 1.11.1 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/docs/README.es.md +2 -1
- package/package.json +1 -1
- package/src/becaria/dispatch.js +99 -0
- package/src/becaria/index.js +3 -0
- package/src/becaria/pr-diff.js +26 -0
- package/src/becaria/repo.js +45 -0
- package/src/cli.js +2 -0
- package/src/commands/doctor.js +56 -1
- package/src/commands/init.js +33 -0
- package/src/commands/review.js +54 -2
- package/src/config.js +11 -0
- package/src/git/automation.js +65 -2
- package/src/mcp/tools.js +1 -0
- package/src/orchestrator/iteration-stages.js +85 -3
- package/src/orchestrator/solomon-rules.js +25 -2
- package/src/orchestrator.js +194 -6
- package/src/prompts/coder.js +5 -1
- package/src/prompts/reviewer.js +2 -0
- package/src/review/scope-filter.js +153 -0
- package/src/roles/coder-role.js +3 -2
- package/templates/roles/coder.md +11 -7
- package/templates/roles/planner.md +2 -0
- package/templates/roles/refactorer.md +1 -1
- package/templates/roles/reviewer.md +11 -4
- package/templates/workflows/automerge.yml +30 -0
- package/templates/workflows/becaria-gateway.yml +58 -0
- package/templates/workflows/houston-override.yml +46 -0
|
@@ -7,7 +7,8 @@ const DEFAULT_RULES = {
|
|
|
7
7
|
max_files_per_iteration: 10,
|
|
8
8
|
max_stale_iterations: 3,
|
|
9
9
|
no_new_dependencies_without_task: true,
|
|
10
|
-
scope_guard: true
|
|
10
|
+
scope_guard: true,
|
|
11
|
+
reviewer_overreach: true
|
|
11
12
|
};
|
|
12
13
|
|
|
13
14
|
export function evaluateRules(context, rulesConfig = {}) {
|
|
@@ -59,6 +60,17 @@ export function evaluateRules(context, rulesConfig = {}) {
|
|
|
59
60
|
});
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
// Rule 5: Reviewer overreach — reviewer consistently flags out-of-scope issues
|
|
64
|
+
if (rules.reviewer_overreach && context.reviewerDemotedCount > 0) {
|
|
65
|
+
const severity = context.reviewerDemotedCount >= 3 ? "critical" : "warn";
|
|
66
|
+
alerts.push({
|
|
67
|
+
rule: "reviewer_overreach",
|
|
68
|
+
severity,
|
|
69
|
+
message: `Reviewer flagged ${context.reviewerDemotedCount} out-of-scope issue(s) that were auto-demoted by scope filter.`,
|
|
70
|
+
detail: { demotedCount: context.reviewerDemotedCount, autoApproved: context.reviewerAutoApproved || false }
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
62
74
|
return {
|
|
63
75
|
alerts,
|
|
64
76
|
hasCritical: alerts.some(a => a.severity === "critical"),
|
|
@@ -76,9 +88,20 @@ export async function buildRulesContext({ session, task, iteration }) {
|
|
|
76
88
|
filesChanged: 0,
|
|
77
89
|
staleIterations: 0,
|
|
78
90
|
newDependencies: [],
|
|
79
|
-
outOfScopeFiles: []
|
|
91
|
+
outOfScopeFiles: [],
|
|
92
|
+
reviewerDemotedCount: 0,
|
|
93
|
+
reviewerAutoApproved: false
|
|
80
94
|
};
|
|
81
95
|
|
|
96
|
+
// Count reviewer scope-filter demotions from session checkpoints
|
|
97
|
+
const scopeFilterCheckpoints = (session.checkpoints || [])
|
|
98
|
+
.filter(cp => cp.stage === "reviewer-scope-filter");
|
|
99
|
+
if (scopeFilterCheckpoints.length > 0) {
|
|
100
|
+
const latest = scopeFilterCheckpoints.at(-1);
|
|
101
|
+
context.reviewerDemotedCount = latest.demoted_count || 0;
|
|
102
|
+
context.reviewerAutoApproved = latest.auto_approved || false;
|
|
103
|
+
}
|
|
104
|
+
|
|
82
105
|
// Count files changed via git
|
|
83
106
|
try {
|
|
84
107
|
const { execaCommand } = await import("execa");
|
package/src/orchestrator.js
CHANGED
|
@@ -17,7 +17,9 @@ import { emitProgress, makeEvent } from "./utils/events.js";
|
|
|
17
17
|
import { BudgetTracker, extractUsageMetrics } from "./utils/budget.js";
|
|
18
18
|
import {
|
|
19
19
|
prepareGitAutomation,
|
|
20
|
-
finalizeGitAutomation
|
|
20
|
+
finalizeGitAutomation,
|
|
21
|
+
earlyPrCreation,
|
|
22
|
+
incrementalPush
|
|
21
23
|
} from "./git/automation.js";
|
|
22
24
|
import { resolveRoleMdPath, loadFirstExisting } from "./roles/base-role.js";
|
|
23
25
|
import { resolveReviewProfile } from "./review/profiles.js";
|
|
@@ -152,7 +154,8 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
152
154
|
last_sonar_issue_signature: null,
|
|
153
155
|
sonar_repeat_count: 0,
|
|
154
156
|
last_reviewer_issue_signature: null,
|
|
155
|
-
reviewer_repeat_count: 0
|
|
157
|
+
reviewer_repeat_count: 0,
|
|
158
|
+
deferred_issues: []
|
|
156
159
|
};
|
|
157
160
|
if (pgTaskId) sessionInit.pg_task_id = pgTaskId;
|
|
158
161
|
if (pgProject) sessionInit.pg_project_id = pgProject;
|
|
@@ -287,6 +290,23 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
287
290
|
const plannerResult = await runPlannerStage({ config, logger, emitter, eventBase, session, plannerRole, researchContext, triageDecomposition, trackBudget });
|
|
288
291
|
plannedTask = plannerResult.plannedTask;
|
|
289
292
|
stageResults.planner = plannerResult.stageResult;
|
|
293
|
+
|
|
294
|
+
// BecarIA: dispatch planner comment (only on resume where PR already exists)
|
|
295
|
+
if (Boolean(config.becaria?.enabled) && session.becaria_pr_number) {
|
|
296
|
+
try {
|
|
297
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
298
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
299
|
+
const repo = await detectRepo();
|
|
300
|
+
if (repo) {
|
|
301
|
+
const p = plannerResult.stageResult;
|
|
302
|
+
await dispatchComment({
|
|
303
|
+
repo, prNumber: session.becaria_pr_number, agent: "Planner",
|
|
304
|
+
body: `Plan: ${p?.summary || plannedTask}`,
|
|
305
|
+
becariaConfig: config.becaria
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
} catch { /* non-blocking */ }
|
|
309
|
+
}
|
|
290
310
|
}
|
|
291
311
|
|
|
292
312
|
const gitCtx = await prepareGitAutomation({ config, task, logger, session });
|
|
@@ -392,6 +412,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
392
412
|
|
|
393
413
|
eventBase.iteration = i;
|
|
394
414
|
const iterStart = Date.now();
|
|
415
|
+
const becariaEnabled = Boolean(config.becaria?.enabled) && gitCtx?.enabled;
|
|
395
416
|
logger.setContext({ iteration: i, stage: "iteration" });
|
|
396
417
|
|
|
397
418
|
emitProgress(
|
|
@@ -482,6 +503,75 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
482
503
|
}
|
|
483
504
|
if (sonarResult.stageResult) {
|
|
484
505
|
stageResults.sonar = sonarResult.stageResult;
|
|
506
|
+
// BecarIA: dispatch sonar comment
|
|
507
|
+
if (becariaEnabled && session.becaria_pr_number) {
|
|
508
|
+
try {
|
|
509
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
510
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
511
|
+
const repo = await detectRepo();
|
|
512
|
+
if (repo) {
|
|
513
|
+
const s = sonarResult.stageResult;
|
|
514
|
+
await dispatchComment({
|
|
515
|
+
repo, prNumber: session.becaria_pr_number, agent: "Sonar",
|
|
516
|
+
body: `SonarQube scan: ${s.summary || "completed"}`,
|
|
517
|
+
becariaConfig: config.becaria
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
} catch { /* non-blocking */ }
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// --- BecarIA Gateway: early PR or incremental push ---
|
|
526
|
+
if (becariaEnabled) {
|
|
527
|
+
try {
|
|
528
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
529
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
530
|
+
const repo = await detectRepo();
|
|
531
|
+
|
|
532
|
+
if (!session.becaria_pr_number) {
|
|
533
|
+
// First iteration: commit + push + create PR
|
|
534
|
+
const earlyPr = await earlyPrCreation({ gitCtx, task, logger, session, stageResults });
|
|
535
|
+
if (earlyPr) {
|
|
536
|
+
session.becaria_pr_number = earlyPr.prNumber;
|
|
537
|
+
session.becaria_pr_url = earlyPr.prUrl;
|
|
538
|
+
session.becaria_commits = earlyPr.commits;
|
|
539
|
+
await saveSession(session);
|
|
540
|
+
emitProgress(emitter, makeEvent("becaria:pr-created", { ...eventBase, stage: "becaria" }, {
|
|
541
|
+
message: `Early PR created: #${earlyPr.prNumber}`,
|
|
542
|
+
detail: { prNumber: earlyPr.prNumber, prUrl: earlyPr.prUrl }
|
|
543
|
+
}));
|
|
544
|
+
|
|
545
|
+
// Post coder comment on new PR
|
|
546
|
+
if (repo) {
|
|
547
|
+
const commitList = earlyPr.commits.map((c) => `- \`${c.hash.slice(0, 7)}\` ${c.message}`).join("\n");
|
|
548
|
+
await dispatchComment({
|
|
549
|
+
repo, prNumber: earlyPr.prNumber, agent: "Coder",
|
|
550
|
+
body: `Iteración ${i} completada.\n\nCommits:\n${commitList}`,
|
|
551
|
+
becariaConfig: config.becaria
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
} else {
|
|
556
|
+
// Subsequent iterations: incremental push + comment
|
|
557
|
+
const pushResult = await incrementalPush({ gitCtx, task, logger, session });
|
|
558
|
+
if (pushResult) {
|
|
559
|
+
session.becaria_commits = [...(session.becaria_commits || []), ...pushResult.commits];
|
|
560
|
+
await saveSession(session);
|
|
561
|
+
|
|
562
|
+
if (repo) {
|
|
563
|
+
const feedback = session.last_reviewer_feedback || "N/A";
|
|
564
|
+
const commitList = pushResult.commits.map((c) => `- \`${c.hash.slice(0, 7)}\` ${c.message}`).join("\n");
|
|
565
|
+
await dispatchComment({
|
|
566
|
+
repo, prNumber: session.becaria_pr_number, agent: "Coder",
|
|
567
|
+
body: `Issues corregidos:\n${feedback}\n\nCommits:\n${commitList}`,
|
|
568
|
+
becariaConfig: config.becaria
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
} catch (err) {
|
|
574
|
+
logger.warn(`BecarIA early PR/push failed (non-blocking): ${err.message}`);
|
|
485
575
|
}
|
|
486
576
|
}
|
|
487
577
|
|
|
@@ -496,7 +586,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
496
586
|
if (reviewerEnabled) {
|
|
497
587
|
const reviewerResult = await runReviewerStage({
|
|
498
588
|
reviewerRole, config, logger, emitter, eventBase, session, trackBudget,
|
|
499
|
-
iteration: i, reviewRules, task, repeatDetector, budgetSummary
|
|
589
|
+
iteration: i, reviewRules, task, repeatDetector, budgetSummary, askQuestion
|
|
500
590
|
});
|
|
501
591
|
if (reviewerResult.action === "pause") {
|
|
502
592
|
return reviewerResult.result;
|
|
@@ -575,11 +665,74 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
575
665
|
}
|
|
576
666
|
}
|
|
577
667
|
}
|
|
668
|
+
|
|
669
|
+
// BecarIA: dispatch solomon comment
|
|
670
|
+
if (becariaEnabled && session.becaria_pr_number) {
|
|
671
|
+
try {
|
|
672
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
673
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
674
|
+
const repo = await detectRepo();
|
|
675
|
+
if (repo) {
|
|
676
|
+
const alerts = rulesResult.alerts || [];
|
|
677
|
+
const alertMsg = alerts.length > 0
|
|
678
|
+
? alerts.map(a => `- [${a.severity}] ${a.message}`).join("\n")
|
|
679
|
+
: "No anomalies detected";
|
|
680
|
+
await dispatchComment({
|
|
681
|
+
repo, prNumber: session.becaria_pr_number, agent: "Solomon",
|
|
682
|
+
body: `Supervisor check iteración ${i}: ${alertMsg}`,
|
|
683
|
+
becariaConfig: config.becaria
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
} catch { /* non-blocking */ }
|
|
687
|
+
}
|
|
578
688
|
} catch (err) {
|
|
579
689
|
logger.warn(`Solomon rules evaluation failed: ${err.message}`);
|
|
580
690
|
}
|
|
581
691
|
}
|
|
582
692
|
|
|
693
|
+
// --- BecarIA Gateway: dispatch review result ---
|
|
694
|
+
if (becariaEnabled && session.becaria_pr_number) {
|
|
695
|
+
try {
|
|
696
|
+
const { dispatchReview, dispatchComment } = await import("./becaria/dispatch.js");
|
|
697
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
698
|
+
const repo = await detectRepo();
|
|
699
|
+
if (repo) {
|
|
700
|
+
const bc = config.becaria;
|
|
701
|
+
// Formal review (APPROVE / REQUEST_CHANGES)
|
|
702
|
+
if (review.approved) {
|
|
703
|
+
await dispatchReview({
|
|
704
|
+
repo, prNumber: session.becaria_pr_number,
|
|
705
|
+
event: "APPROVE", body: review.summary || "Approved", agent: "Reviewer", becariaConfig: bc
|
|
706
|
+
});
|
|
707
|
+
} else {
|
|
708
|
+
const blocking = review.blocking_issues?.map((x) => `- ${x.id || "ISSUE"} [${x.severity || ""}] ${x.description}`).join("\n") || "";
|
|
709
|
+
await dispatchReview({
|
|
710
|
+
repo, prNumber: session.becaria_pr_number,
|
|
711
|
+
event: "REQUEST_CHANGES",
|
|
712
|
+
body: blocking || review.summary || "Changes requested",
|
|
713
|
+
agent: "Reviewer", becariaConfig: bc
|
|
714
|
+
});
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Detailed comment
|
|
718
|
+
const status = review.approved ? "APPROVED" : "REQUEST_CHANGES";
|
|
719
|
+
const blocking = review.blocking_issues?.map((x) => `- ${x.id || "ISSUE"} [${x.severity || ""}] ${x.description}`).join("\n") || "";
|
|
720
|
+
const suggestions = review.non_blocking_suggestions?.map((s) => `- ${typeof s === "string" ? s : `${s.id || ""} ${s.description || s}`}`).join("\n") || "";
|
|
721
|
+
let reviewBody = `Review iteración ${i}: ${status}`;
|
|
722
|
+
if (blocking) reviewBody += `\n\n**Blocking:**\n${blocking}`;
|
|
723
|
+
if (suggestions) reviewBody += `\n\n**Suggestions:**\n${suggestions}`;
|
|
724
|
+
await dispatchComment({
|
|
725
|
+
repo, prNumber: session.becaria_pr_number, agent: "Reviewer",
|
|
726
|
+
body: reviewBody, becariaConfig: bc
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
logger.info(`BecarIA: dispatched review for PR #${session.becaria_pr_number}`);
|
|
730
|
+
}
|
|
731
|
+
} catch (err) {
|
|
732
|
+
logger.warn(`BecarIA dispatch failed (non-blocking): ${err.message}`);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
|
|
583
736
|
if (review.approved) {
|
|
584
737
|
session.reviewer_retry_count = 0;
|
|
585
738
|
|
|
@@ -599,6 +752,22 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
599
752
|
}
|
|
600
753
|
if (testerResult.stageResult) {
|
|
601
754
|
stageResults.tester = testerResult.stageResult;
|
|
755
|
+
// BecarIA: dispatch tester comment
|
|
756
|
+
if (becariaEnabled && session.becaria_pr_number) {
|
|
757
|
+
try {
|
|
758
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
759
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
760
|
+
const repo = await detectRepo();
|
|
761
|
+
if (repo) {
|
|
762
|
+
const t = testerResult.stageResult;
|
|
763
|
+
await dispatchComment({
|
|
764
|
+
repo, prNumber: session.becaria_pr_number, agent: "Tester",
|
|
765
|
+
body: `Tests: ${t.summary || "completed"}`,
|
|
766
|
+
becariaConfig: config.becaria
|
|
767
|
+
});
|
|
768
|
+
}
|
|
769
|
+
} catch { /* non-blocking */ }
|
|
770
|
+
}
|
|
602
771
|
}
|
|
603
772
|
}
|
|
604
773
|
|
|
@@ -615,6 +784,22 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
615
784
|
}
|
|
616
785
|
if (securityResult.stageResult) {
|
|
617
786
|
stageResults.security = securityResult.stageResult;
|
|
787
|
+
// BecarIA: dispatch security comment
|
|
788
|
+
if (becariaEnabled && session.becaria_pr_number) {
|
|
789
|
+
try {
|
|
790
|
+
const { dispatchComment } = await import("./becaria/dispatch.js");
|
|
791
|
+
const { detectRepo } = await import("./becaria/repo.js");
|
|
792
|
+
const repo = await detectRepo();
|
|
793
|
+
if (repo) {
|
|
794
|
+
const s = securityResult.stageResult;
|
|
795
|
+
await dispatchComment({
|
|
796
|
+
repo, prNumber: session.becaria_pr_number, agent: "Security",
|
|
797
|
+
body: `Security scan: ${s.summary || "completed"}`,
|
|
798
|
+
becariaConfig: config.becaria
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
} catch { /* non-blocking */ }
|
|
802
|
+
}
|
|
618
803
|
}
|
|
619
804
|
}
|
|
620
805
|
|
|
@@ -649,14 +834,17 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
649
834
|
}
|
|
650
835
|
}
|
|
651
836
|
|
|
837
|
+
const deferredIssues = session.deferred_issues || [];
|
|
652
838
|
emitProgress(
|
|
653
839
|
emitter,
|
|
654
840
|
makeEvent("session:end", { ...eventBase, stage: "done" }, {
|
|
655
|
-
message:
|
|
656
|
-
|
|
841
|
+
message: deferredIssues.length > 0
|
|
842
|
+
? `Session approved (${deferredIssues.length} deferred issue(s) tracked as tech debt)`
|
|
843
|
+
: "Session approved",
|
|
844
|
+
detail: { approved: true, iterations: i, stages: stageResults, git: gitResult, budget: budgetSummary(), deferredIssues }
|
|
657
845
|
})
|
|
658
846
|
);
|
|
659
|
-
return { approved: true, sessionId: session.id, review, git: gitResult };
|
|
847
|
+
return { approved: true, sessionId: session.id, review, git: gitResult, deferredIssues };
|
|
660
848
|
}
|
|
661
849
|
|
|
662
850
|
session.last_reviewer_feedback = review.blocking_issues
|
package/src/prompts/coder.js
CHANGED
|
@@ -29,7 +29,7 @@ const SERENA_INSTRUCTIONS = [
|
|
|
29
29
|
"Fall back to reading files only when Serena tools are not sufficient."
|
|
30
30
|
].join("\n");
|
|
31
31
|
|
|
32
|
-
export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false }) {
|
|
32
|
+
export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, deferredContext = null }) {
|
|
33
33
|
const sections = [
|
|
34
34
|
serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
|
|
35
35
|
`Task:\n${task}`,
|
|
@@ -65,5 +65,9 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
|
|
|
65
65
|
sections.push(`Reviewer blocking feedback:\n${reviewerFeedback}`);
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
if (deferredContext) {
|
|
69
|
+
sections.push(deferredContext);
|
|
70
|
+
}
|
|
71
|
+
|
|
68
72
|
return sections.join("\n\n");
|
|
69
73
|
}
|
package/src/prompts/reviewer.js
CHANGED
|
@@ -26,6 +26,8 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
|
|
|
26
26
|
const sections = [
|
|
27
27
|
serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
|
|
28
28
|
`You are a code reviewer in ${mode} mode.`,
|
|
29
|
+
"CRITICAL SCOPE RULE: Only review changes that are part of the diff below. Do NOT flag issues in unchanged code, missing features planned for future tasks, or improvements outside the scope of this task. If the diff is correct for what the task asks, approve it — even if the broader codebase has other issues.",
|
|
30
|
+
"Only block approval for issues IN THE DIFF that are bugs, security vulnerabilities, or clear violations of the review rules.",
|
|
29
31
|
"Return only one valid JSON object and nothing else.",
|
|
30
32
|
"JSON schema:",
|
|
31
33
|
'{"approved":boolean,"blocking_issues":[{"id":string,"severity":"critical|high|medium|low","file":string,"line":number,"description":string,"suggested_fix":string}],"non_blocking_suggestions":[string],"summary":string,"confidence":number}'
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scope filter — auto-defers reviewer blocking issues that reference
|
|
3
|
+
* files NOT present in the diff. This prevents reviewer scope drift
|
|
4
|
+
* (flagging missing features, unchanged code, future tasks) from
|
|
5
|
+
* stalling the pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Deferred issues are NOT forgotten — they are tracked in the session
|
|
8
|
+
* as technical debt that should be resolved in future iterations or
|
|
9
|
+
* follow-up tasks. The coder and planner receive context about what
|
|
10
|
+
* was deferred and why.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Extract the set of changed file paths from a unified diff string.
|
|
15
|
+
*/
|
|
16
|
+
export function extractDiffFiles(diff) {
|
|
17
|
+
const files = new Set();
|
|
18
|
+
for (const line of (diff || "").split("\n")) {
|
|
19
|
+
// Match "+++ b/path" lines in unified diff
|
|
20
|
+
const m = line.match(/^\+\+\+ b\/(.+)/);
|
|
21
|
+
if (m) files.add(m[1]);
|
|
22
|
+
}
|
|
23
|
+
return files;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Determine whether a blocking issue is within scope of the diff.
|
|
28
|
+
*
|
|
29
|
+
* An issue is considered IN scope when:
|
|
30
|
+
* - It has no `file` field (general concern about the diff)
|
|
31
|
+
* - Its `file` matches one of the changed files (exact or suffix match)
|
|
32
|
+
* - It references a pattern present in the diff content itself
|
|
33
|
+
*
|
|
34
|
+
* An issue is OUT of scope when:
|
|
35
|
+
* - It explicitly references a file NOT in the diff
|
|
36
|
+
*/
|
|
37
|
+
export function isIssueInScope(issue, diffFiles, diffContent) {
|
|
38
|
+
const file = (issue.file || "").trim();
|
|
39
|
+
|
|
40
|
+
// No file specified — the reviewer is commenting on the diff generally
|
|
41
|
+
if (!file) return true;
|
|
42
|
+
|
|
43
|
+
// Direct match
|
|
44
|
+
if (diffFiles.has(file)) return true;
|
|
45
|
+
|
|
46
|
+
// Suffix match (reviewer might use full path vs relative)
|
|
47
|
+
for (const df of diffFiles) {
|
|
48
|
+
if (df.endsWith(file) || file.endsWith(df)) return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Check if the file path appears anywhere in the diff content
|
|
52
|
+
// (covers cases where the file is referenced in imports/requires)
|
|
53
|
+
if (diffContent && diffContent.includes(file)) return true;
|
|
54
|
+
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Filter a review result, demoting out-of-scope blocking issues to
|
|
60
|
+
* non-blocking suggestions.
|
|
61
|
+
*
|
|
62
|
+
* Returns { review, demoted, deferred, allDemoted } where:
|
|
63
|
+
* - review: the filtered review (may flip approved to true)
|
|
64
|
+
* - demoted: array of original issues that were demoted
|
|
65
|
+
* - deferred: structured deferred issues with metadata for session tracking
|
|
66
|
+
* - allDemoted: true if ALL blocking issues were out of scope
|
|
67
|
+
*/
|
|
68
|
+
export function filterReviewScope(review, diff) {
|
|
69
|
+
if (!review || review.approved) {
|
|
70
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const diffFiles = extractDiffFiles(diff);
|
|
74
|
+
|
|
75
|
+
// If we can't parse diff files, don't filter (safety)
|
|
76
|
+
if (diffFiles.size === 0) {
|
|
77
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const inScope = [];
|
|
81
|
+
const demoted = [];
|
|
82
|
+
|
|
83
|
+
for (const issue of review.blocking_issues || []) {
|
|
84
|
+
if (isIssueInScope(issue, diffFiles, diff)) {
|
|
85
|
+
inScope.push(issue);
|
|
86
|
+
} else {
|
|
87
|
+
demoted.push(issue);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (demoted.length === 0) {
|
|
92
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const demotedSuggestions = demoted.map(
|
|
96
|
+
(issue) => `[auto-demoted] ${issue.file || "unknown"}: ${issue.description || issue.id || "no description"}`
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
const filtered = {
|
|
100
|
+
...review,
|
|
101
|
+
blocking_issues: inScope,
|
|
102
|
+
non_blocking_suggestions: [
|
|
103
|
+
...(review.non_blocking_suggestions || []),
|
|
104
|
+
...demotedSuggestions
|
|
105
|
+
]
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// If no in-scope blocking issues remain, auto-approve
|
|
109
|
+
const allDemoted = inScope.length === 0;
|
|
110
|
+
if (allDemoted) {
|
|
111
|
+
filtered.approved = true;
|
|
112
|
+
filtered.summary = `${review.summary || ""} [Auto-approved: ${demoted.length} out-of-scope issue(s) demoted to suggestions]`.trim();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Build structured deferred issues for session tracking
|
|
116
|
+
const deferred = demoted.map((issue) => ({
|
|
117
|
+
id: issue.id || null,
|
|
118
|
+
file: issue.file || null,
|
|
119
|
+
severity: issue.severity || "medium",
|
|
120
|
+
description: issue.description || "no description",
|
|
121
|
+
suggested_fix: issue.suggested_fix || null,
|
|
122
|
+
deferred_at: new Date().toISOString(),
|
|
123
|
+
reason: "out_of_scope"
|
|
124
|
+
}));
|
|
125
|
+
|
|
126
|
+
return { review: filtered, demoted, deferred, allDemoted };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Build a human-readable summary of deferred issues for injection
|
|
131
|
+
* into coder/planner prompts so they are aware of the tech debt.
|
|
132
|
+
*/
|
|
133
|
+
export function buildDeferredContext(deferredIssues) {
|
|
134
|
+
if (!deferredIssues?.length) return "";
|
|
135
|
+
|
|
136
|
+
const lines = [
|
|
137
|
+
"## Deferred reviewer concerns (technical debt)",
|
|
138
|
+
"The following issues were flagged by the reviewer but deferred because they are outside the current diff scope.",
|
|
139
|
+
"You do NOT need to fix them now, but be aware of them:",
|
|
140
|
+
""
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
for (const issue of deferredIssues) {
|
|
144
|
+
const file = issue.file ? `\`${issue.file}\`` : "general";
|
|
145
|
+
const fix = issue.suggested_fix ? ` — Suggestion: ${issue.suggested_fix}` : "";
|
|
146
|
+
lines.push(`- [${issue.severity}] ${file}: ${issue.description}${fix}`);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
lines.push("");
|
|
150
|
+
lines.push("If your current changes naturally address any of these, great. Otherwise, they will be tracked for future resolution.");
|
|
151
|
+
|
|
152
|
+
return lines.join("\n");
|
|
153
|
+
}
|
package/src/roles/coder-role.js
CHANGED
|
@@ -17,8 +17,8 @@ export class CoderRole extends BaseRole {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
async execute(input) {
|
|
20
|
-
const { task, reviewerFeedback, sonarSummary, onOutput } = typeof input === "string"
|
|
21
|
-
? { task: input, reviewerFeedback: null, sonarSummary: null, onOutput: null }
|
|
20
|
+
const { task, reviewerFeedback, sonarSummary, deferredContext, onOutput } = typeof input === "string"
|
|
21
|
+
? { task: input, reviewerFeedback: null, sonarSummary: null, deferredContext: null, onOutput: null }
|
|
22
22
|
: input || {};
|
|
23
23
|
|
|
24
24
|
const provider = resolveProvider(this.config);
|
|
@@ -28,6 +28,7 @@ export class CoderRole extends BaseRole {
|
|
|
28
28
|
task: task || this.context?.task || "",
|
|
29
29
|
reviewerFeedback: reviewerFeedback || null,
|
|
30
30
|
sonarSummary: sonarSummary || null,
|
|
31
|
+
deferredContext: deferredContext || null,
|
|
31
32
|
coderRules: this.instructions,
|
|
32
33
|
methodology: this.config?.development?.methodology || "tdd",
|
|
33
34
|
serenaEnabled: Boolean(this.config?.serena?.enabled)
|
package/templates/roles/coder.md
CHANGED
|
@@ -7,9 +7,20 @@ You are the **Coder** in a multi-role AI pipeline. Your job is to write code and
|
|
|
7
7
|
- Follow TDD methodology when `methodology=tdd` is configured.
|
|
8
8
|
- Write tests BEFORE implementation when using TDD.
|
|
9
9
|
- Keep changes minimal and focused on the task.
|
|
10
|
+
- "Minimal" means no unnecessary changes — it does NOT mean avoiding new files. If the task requires creating new files (pages, components, modules, tests), you MUST create them. Updating references/links without creating the actual files is an incomplete implementation.
|
|
10
11
|
- Do not modify code unrelated to the task.
|
|
12
|
+
- Before creating a new utility or helper, check if a similar one already exists in the codebase. Reuse existing code over creating duplicates.
|
|
11
13
|
- Follow existing code conventions and patterns in the repository.
|
|
12
14
|
|
|
15
|
+
## Task completeness
|
|
16
|
+
|
|
17
|
+
Before reporting done, verify that ALL parts of the task are addressed:
|
|
18
|
+
- Re-read the task description and acceptance criteria.
|
|
19
|
+
- Check every requirement — if the task says "create pages X and Y", both must exist.
|
|
20
|
+
- If the task lists multiple deliverables, each one must be implemented, not just some.
|
|
21
|
+
- Run the test suite after implementation to verify nothing is broken.
|
|
22
|
+
- An incomplete implementation is worse than an error — never report success if parts are missing.
|
|
23
|
+
|
|
13
24
|
## File modification safety
|
|
14
25
|
|
|
15
26
|
- NEVER overwrite existing files entirely. Always make targeted, minimal edits.
|
|
@@ -18,13 +29,6 @@ You are the **Coder** in a multi-role AI pipeline. Your job is to write code and
|
|
|
18
29
|
- If unintended changes are detected, revert immediately with `git checkout -- <file>`.
|
|
19
30
|
- Pay special attention to CSS, HTML, and config files where full rewrites destroy prior work.
|
|
20
31
|
|
|
21
|
-
## Multi-agent environment
|
|
22
|
-
|
|
23
|
-
- Multiple developers and AI agents may be committing and modifying code simultaneously.
|
|
24
|
-
- ALWAYS run `git fetch origin main` and check recent commits before starting work.
|
|
25
|
-
- Before pushing or merging, rebase on the latest main: `git rebase origin/main`.
|
|
26
|
-
- Create a dedicated branch per task and merge via PR, never push directly to main.
|
|
27
|
-
|
|
28
32
|
## Output format
|
|
29
33
|
|
|
30
34
|
Return a JSON object:
|
|
@@ -20,6 +20,8 @@ You are the **Planner** in a multi-role AI pipeline. Your job is to create an im
|
|
|
20
20
|
## Rules
|
|
21
21
|
|
|
22
22
|
- Each step should be small and independently verifiable.
|
|
23
|
+
- Steps must list ALL files involved: both files to modify AND new files to create. If a step requires creating a new file, list it explicitly in the `files` array.
|
|
24
|
+
- The plan must cover ALL requirements from the task. Re-read the task description before finalizing — if something is mentioned in the task, it must appear in a step.
|
|
23
25
|
- Identify the testing strategy (unit, integration, E2E).
|
|
24
26
|
- Consider backward compatibility.
|
|
25
27
|
- Reference research findings when available.
|
|
@@ -5,7 +5,7 @@ You are the **Refactorer** in a multi-role AI pipeline. Your job is to improve c
|
|
|
5
5
|
## Constraints
|
|
6
6
|
|
|
7
7
|
- Do NOT change any observable behavior or API contracts.
|
|
8
|
-
-
|
|
8
|
+
- Focus on the files that were already modified in this session. You may create new files when extracting code (e.g., extracting a helper to a new module), but do not refactor unrelated parts of the codebase.
|
|
9
9
|
- Keep all existing tests passing — run tests after every change.
|
|
10
10
|
- Follow existing code conventions and patterns in the repository.
|
|
11
11
|
- Do NOT add new features or fix unrelated bugs.
|
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
You are the **Reviewer** in a multi-role AI pipeline. Your job is to review code changes against task requirements and quality standards.
|
|
4
4
|
|
|
5
|
+
## Scope constraint
|
|
6
|
+
|
|
7
|
+
- **ONLY review files present in the diff.** Do not flag issues in files that were not changed.
|
|
8
|
+
- If you notice problems in untouched files, mention them as `non_blocking_suggestions` with a note that they are outside the current scope — never as `blocking_issues`.
|
|
9
|
+
- Your job is to review THIS change, not audit the entire codebase.
|
|
10
|
+
|
|
5
11
|
## Review priorities (in order)
|
|
6
12
|
|
|
7
13
|
1. **Security** — vulnerabilities, exposed secrets, injection vectors
|
|
@@ -13,9 +19,10 @@ You are the **Reviewer** in a multi-role AI pipeline. Your job is to review code
|
|
|
13
19
|
## Rules
|
|
14
20
|
|
|
15
21
|
- Focus on security, correctness, and tests first.
|
|
16
|
-
- Only raise blocking issues for concrete production risks.
|
|
22
|
+
- Only raise blocking issues for concrete production risks in the changed files.
|
|
17
23
|
- Keep non-blocking suggestions separate.
|
|
18
24
|
- Style preferences NEVER block approval.
|
|
25
|
+
- Confidence threshold: reject only if < 0.70.
|
|
19
26
|
|
|
20
27
|
## File overwrite detection (BLOCKING)
|
|
21
28
|
|
|
@@ -31,7 +38,7 @@ Return a strict JSON object:
|
|
|
31
38
|
"result": {
|
|
32
39
|
"approved": true,
|
|
33
40
|
"blocking_issues": [],
|
|
34
|
-
"
|
|
41
|
+
"non_blocking_suggestions": ["Optional improvement ideas"],
|
|
35
42
|
"confidence": 0.95
|
|
36
43
|
},
|
|
37
44
|
"summary": "Approved: all changes look correct and well-tested"
|
|
@@ -45,9 +52,9 @@ When rejecting:
|
|
|
45
52
|
"result": {
|
|
46
53
|
"approved": false,
|
|
47
54
|
"blocking_issues": [
|
|
48
|
-
{ "file": "src/foo.js", "line": 42, "severity": "critical", "
|
|
55
|
+
{ "id": "R-1", "file": "src/foo.js", "line": 42, "severity": "critical", "description": "SQL injection vulnerability", "suggested_fix": "Use parameterized queries instead of string concatenation" }
|
|
49
56
|
],
|
|
50
|
-
"
|
|
57
|
+
"non_blocking_suggestions": [],
|
|
51
58
|
"confidence": 0.9
|
|
52
59
|
},
|
|
53
60
|
"summary": "Rejected: 1 critical security issue found"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Auto-merge PRs approved by BecarIA reviewer.
|
|
2
|
+
# Merges when becaria-reviewer[bot] approves and all checks pass.
|
|
3
|
+
|
|
4
|
+
name: Auto-merge (BecarIA)
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
pull_request_review:
|
|
8
|
+
types: [submitted]
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: write
|
|
12
|
+
pull-requests: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
automerge:
|
|
16
|
+
if: >
|
|
17
|
+
github.event.review.state == 'approved' &&
|
|
18
|
+
contains(github.event.review.user.login, 'becaria-reviewer')
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- name: Enable auto-merge
|
|
22
|
+
uses: actions/github-script@v7
|
|
23
|
+
with:
|
|
24
|
+
script: |
|
|
25
|
+
await github.rest.pulls.merge({
|
|
26
|
+
owner: context.repo.owner,
|
|
27
|
+
repo: context.repo.repo,
|
|
28
|
+
pull_number: context.payload.pull_request.number,
|
|
29
|
+
merge_method: 'squash'
|
|
30
|
+
});
|