karajan-code 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -9
- package/docs/README.es.md +8 -5
- package/package.json +1 -1
- package/src/agents/claude-agent.js +12 -2
- package/src/orchestrator/iteration-stages.js +77 -2
- package/src/orchestrator/solomon-rules.js +25 -2
- package/src/orchestrator.js +9 -5
- package/src/prompts/coder.js +5 -1
- package/src/prompts/reviewer.js +2 -0
- package/src/review/scope-filter.js +153 -0
- package/src/roles/coder-role.js +3 -2
package/README.md
CHANGED
|
@@ -30,7 +30,7 @@ Instead of running one AI agent and manually reviewing its output, `kj` chains a
|
|
|
30
30
|
**Key features:**
|
|
31
31
|
- **Multi-agent pipeline** with 11 configurable roles
|
|
32
32
|
- **4 AI agents supported**: Claude, Codex, Gemini, Aider
|
|
33
|
-
- **MCP server** with
|
|
33
|
+
- **MCP server** with 15 tools — use `kj` from Claude, Codex, or any MCP-compatible host without leaving your agent. [See MCP setup](#mcp-server)
|
|
34
34
|
- **TDD enforcement** — test changes required when source files change
|
|
35
35
|
- **SonarQube integration** — static analysis with quality gate enforcement (requires [Docker](#requirements))
|
|
36
36
|
- **Review profiles** — standard, strict, relaxed, paranoid
|
|
@@ -44,6 +44,9 @@ Instead of running one AI agent and manually reviewing its output, `kj` chains a
|
|
|
44
44
|
- **Retry with backoff** — automatic recovery from transient API errors (429, 5xx) with exponential backoff and jitter
|
|
45
45
|
- **Pipeline stage tracker** — cumulative progress view during `kj_run` showing which stages are done, running, or pending — both in CLI and via MCP events for real-time host rendering
|
|
46
46
|
- **Planner observability guardrails** — continuous heartbeat/stall telemetry, configurable max-silence protection (`session.max_agent_silence_minutes`), and hard runtime cap (`session.max_planner_minutes`) to avoid long stuck planner runs
|
|
47
|
+
- **Rate-limit standby** — when agents hit rate limits, Karajan parses cooldown times, waits with exponential backoff, and auto-resumes instead of failing
|
|
48
|
+
- **Preflight handshake** — `kj_preflight` requires human confirmation of agent assignments before execution, preventing AI from silently overriding your config
|
|
49
|
+
- **3-tier config** — session > project > global config layering with `kj_agents` scoping
|
|
47
50
|
- **Planning Game integration** — optionally pair with [Planning Game](https://github.com/AgenteIA-Geniova/planning-game) for agile project management (tasks, sprints, estimation) — like Jira, but open-source and XP-native
|
|
48
51
|
|
|
49
52
|
> **Best with MCP** — Karajan Code is designed to be used as an MCP server inside your AI agent (Claude, Codex, etc.). The agent sends tasks to `kj_run`, gets real-time progress notifications, and receives structured results — no copy-pasting needed.
|
|
@@ -62,16 +65,16 @@ triage? ─> researcher? ─> planner? ─> coder ─> refactorer? ─> sonar?
|
|
|
62
65
|
|
|
63
66
|
| Role | Description | Default |
|
|
64
67
|
|------|-------------|---------|
|
|
65
|
-
| **triage** |
|
|
68
|
+
| **triage** | Pipeline director — analyzes task complexity and activates roles dynamically | **On** |
|
|
66
69
|
| **researcher** | Investigates codebase context before planning | Off |
|
|
67
70
|
| **planner** | Generates structured implementation plans | Off |
|
|
68
71
|
| **coder** | Writes code and tests following TDD methodology | **Always on** |
|
|
69
72
|
| **refactorer** | Improves code clarity without changing behavior | Off |
|
|
70
73
|
| **sonar** | Runs SonarQube static analysis and quality gate checks | On (if configured) |
|
|
71
74
|
| **reviewer** | Code review with configurable strictness profiles | **Always on** |
|
|
72
|
-
| **tester** | Test quality gate and coverage verification |
|
|
73
|
-
| **security** | OWASP security audit |
|
|
74
|
-
| **solomon** |
|
|
75
|
+
| **tester** | Test quality gate and coverage verification | **On** |
|
|
76
|
+
| **security** | OWASP security audit | **On** |
|
|
77
|
+
| **solomon** | Session supervisor — monitors iteration health with 4 rules, escalates on anomalies | **On** |
|
|
75
78
|
| **commiter** | Git commit, push, and PR automation after approval | Off |
|
|
76
79
|
|
|
77
80
|
Roles marked with `?` are optional and can be enabled per-run or via config.
|
|
@@ -272,6 +275,16 @@ Resume a paused session (e.g., after fail-fast).
|
|
|
272
275
|
kj resume s_2026-02-28T20-47-24-270Z --answer "yes, proceed with the fix"
|
|
273
276
|
```
|
|
274
277
|
|
|
278
|
+
### `kj agents`
|
|
279
|
+
|
|
280
|
+
List or change AI agent assignments per role.
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
kj agents # List current agents (with scope column)
|
|
284
|
+
kj agents set coder gemini # Set coder to gemini (project scope)
|
|
285
|
+
kj agents set reviewer claude --global # Set reviewer globally
|
|
286
|
+
```
|
|
287
|
+
|
|
275
288
|
### `kj roles`
|
|
276
289
|
|
|
277
290
|
Inspect pipeline roles and their template instructions.
|
|
@@ -416,9 +429,12 @@ After `npm install -g karajan-code`, the MCP server is auto-registered in Claude
|
|
|
416
429
|
| `kj_resume` | Resume a paused session |
|
|
417
430
|
| `kj_report` | Read session reports (supports `--trace`) |
|
|
418
431
|
| `kj_roles` | List roles or show role templates |
|
|
419
|
-
| `
|
|
420
|
-
| `
|
|
421
|
-
| `
|
|
432
|
+
| `kj_agents` | List or change agent assignments (session/project/global scope) |
|
|
433
|
+
| `kj_preflight` | Human confirms agent config before kj_run/kj_code executes |
|
|
434
|
+
| `kj_code` | Run coder-only mode (with progress notifications) |
|
|
435
|
+
| `kj_review` | Run reviewer-only mode (with progress notifications) |
|
|
436
|
+
| `kj_plan` | Generate implementation plan (with progress notifications) |
|
|
437
|
+
| `kj_status` | Live parsed status of current run (stage, agent, iteration, errors) |
|
|
422
438
|
|
|
423
439
|
### MCP restart after version updates
|
|
424
440
|
|
|
@@ -461,7 +477,7 @@ Use `kj roles show <role>` to inspect any template. Create a project override to
|
|
|
461
477
|
git clone https://github.com/manufosela/karajan-code.git
|
|
462
478
|
cd karajan-code
|
|
463
479
|
npm install
|
|
464
|
-
npm test # Run
|
|
480
|
+
npm test # Run 1180+ tests with Vitest
|
|
465
481
|
npm run test:watch # Watch mode
|
|
466
482
|
npm run validate # Lint + test
|
|
467
483
|
```
|
package/docs/README.es.md
CHANGED
|
@@ -30,7 +30,7 @@ En lugar de ejecutar un agente de IA y revisar manualmente su output, `kj` encad
|
|
|
30
30
|
**Caracteristicas principales:**
|
|
31
31
|
- **Pipeline multi-agente** con 11 roles configurables
|
|
32
32
|
- **4 agentes de IA soportados**: Claude, Codex, Gemini, Aider
|
|
33
|
-
- **Servidor MCP** con
|
|
33
|
+
- **Servidor MCP** con 15 herramientas — usa `kj` desde Claude, Codex o cualquier host compatible con MCP sin salir de tu agente. [Ver configuracion MCP](#servidor-mcp)
|
|
34
34
|
- **TDD obligatorio** — se exigen cambios en tests cuando se modifican ficheros fuente
|
|
35
35
|
- **Integracion con SonarQube** — analisis estatico con quality gates (requiere [Docker](#requisitos))
|
|
36
36
|
- **Perfiles de revision** — standard, strict, relaxed, paranoid
|
|
@@ -43,6 +43,9 @@ En lugar de ejecutar un agente de IA y revisar manualmente su output, `kj` encad
|
|
|
43
43
|
- **Retry con backoff** — recuperacion automatica ante errores transitorios de API (429, 5xx) con backoff exponencial y jitter
|
|
44
44
|
- **Pipeline stage tracker** — vista de progreso acumulativo durante `kj_run` mostrando que stages estan completadas, en ejecucion o pendientes — tanto en CLI como via eventos MCP para renderizado en tiempo real en el host
|
|
45
45
|
- **Guardarrailes de observabilidad del planner** — telemetria continua de heartbeat/stall, proteccion configurable por silencio maximo (`session.max_agent_silence_minutes`) y limite duro de ejecucion (`session.max_planner_minutes`) para evitar bloqueos prolongados en `kj_plan`/planner
|
|
46
|
+
- **Standby por rate-limit** — cuando un agente alcanza limites de uso, Karajan parsea el tiempo de espera, espera con backoff exponencial y reanuda automaticamente en vez de fallar
|
|
47
|
+
- **Preflight handshake** — `kj_preflight` requiere confirmacion humana de la configuracion de agentes antes de ejecutar, previniendo que la IA cambie asignaciones silenciosamente
|
|
48
|
+
- **Config de 3 niveles** — sesion > proyecto > global con scoping de `kj_agents`
|
|
46
49
|
- **Integracion con Planning Game** — combina opcionalmente con [Planning Game](https://github.com/AgenteIA-Geniova/planning-game) para gestion agil de proyectos (tareas, sprints, estimacion) — como Jira, pero open-source y nativo XP
|
|
47
50
|
|
|
48
51
|
> **Mejor con MCP** — Karajan Code esta disenado para usarse como servidor MCP dentro de tu agente de IA (Claude, Codex, etc.). El agente envia tareas a `kj_run`, recibe notificaciones de progreso en tiempo real, y obtiene resultados estructurados — sin copiar y pegar.
|
|
@@ -61,16 +64,16 @@ triage? ─> researcher? ─> planner? ─> coder ─> refactorer? ─> sonar?
|
|
|
61
64
|
|
|
62
65
|
| Rol | Descripcion | Por defecto |
|
|
63
66
|
|-----|-------------|-------------|
|
|
64
|
-
| **triage** |
|
|
67
|
+
| **triage** | Director de pipeline — analiza la complejidad y activa roles dinamicamente | **On** |
|
|
65
68
|
| **researcher** | Investiga el contexto del codebase antes de planificar | Off |
|
|
66
69
|
| **planner** | Genera planes de implementacion estructurados | Off |
|
|
67
70
|
| **coder** | Escribe codigo y tests siguiendo metodologia TDD | **Siempre activo** |
|
|
68
71
|
| **refactorer** | Mejora la claridad del codigo sin cambiar comportamiento | Off |
|
|
69
72
|
| **sonar** | Ejecuta analisis estatico SonarQube y quality gates | On (si configurado) |
|
|
70
73
|
| **reviewer** | Revision de codigo con perfiles de exigencia configurables | **Siempre activo** |
|
|
71
|
-
| **tester** | Quality gate de tests y verificacion de cobertura |
|
|
72
|
-
| **security** | Auditoria de seguridad OWASP |
|
|
73
|
-
| **solomon** |
|
|
74
|
+
| **tester** | Quality gate de tests y verificacion de cobertura | **On** |
|
|
75
|
+
| **security** | Auditoria de seguridad OWASP | **On** |
|
|
76
|
+
| **solomon** | Supervisor de sesion — monitoriza salud de iteraciones con 4 reglas, escala ante anomalias | **On** |
|
|
74
77
|
| **commiter** | Automatizacion de git commit, push y PR tras aprobacion | Off |
|
|
75
78
|
|
|
76
79
|
Los roles marcados con `?` son opcionales y se pueden activar por ejecucion o via config.
|
package/package.json
CHANGED
|
@@ -101,10 +101,20 @@ function pickOutput(res) {
|
|
|
101
101
|
return res.stdout || res.stderr || "";
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Default tools to allow for Claude subprocess.
|
|
106
|
+
* Since claude -p runs non-interactively (stdin: "ignore"), it cannot ask for
|
|
107
|
+
* permission approval. Without --allowedTools, it blocks waiting for approval
|
|
108
|
+
* that never comes.
|
|
109
|
+
*/
|
|
110
|
+
const ALLOWED_TOOLS = [
|
|
111
|
+
"Read", "Write", "Edit", "Bash", "Glob", "Grep"
|
|
112
|
+
];
|
|
113
|
+
|
|
104
114
|
export class ClaudeAgent extends BaseAgent {
|
|
105
115
|
async runTask(task) {
|
|
106
116
|
const role = task.role || "coder";
|
|
107
|
-
const args = ["-p", task.prompt];
|
|
117
|
+
const args = ["-p", task.prompt, "--allowedTools", ...ALLOWED_TOOLS];
|
|
108
118
|
const model = this.getRoleModel(role);
|
|
109
119
|
if (model) args.push("--model", model);
|
|
110
120
|
|
|
@@ -131,7 +141,7 @@ export class ClaudeAgent extends BaseAgent {
|
|
|
131
141
|
}
|
|
132
142
|
|
|
133
143
|
async reviewTask(task) {
|
|
134
|
-
const args = ["-p", task.prompt, "--output-format", "stream-json"];
|
|
144
|
+
const args = ["-p", task.prompt, "--allowedTools", ...ALLOWED_TOOLS, "--output-format", "stream-json"];
|
|
135
145
|
const model = this.getRoleModel(task.role || "reviewer");
|
|
136
146
|
if (model) args.push("--model", model);
|
|
137
147
|
const res = await runCommand(resolveBin("claude"), args, cleanExecaOpts({
|
|
@@ -6,6 +6,7 @@ import { addCheckpoint, markSessionStatus, saveSession, pauseSession } from "../
|
|
|
6
6
|
import { generateDiff } from "../review/diff-generator.js";
|
|
7
7
|
import { evaluateTddPolicy } from "../review/tdd-policy.js";
|
|
8
8
|
import { validateReviewResult } from "../review/schema.js";
|
|
9
|
+
import { filterReviewScope, buildDeferredContext } from "../review/scope-filter.js";
|
|
9
10
|
import { emitProgress, makeEvent } from "../utils/events.js";
|
|
10
11
|
import { runReviewerWithFallback } from "./reviewer-fallback.js";
|
|
11
12
|
import { runCoderWithFallback } from "./agent-fallback.js";
|
|
@@ -39,6 +40,7 @@ export async function runCoderStage({ coderRoleInstance, coderRole, config, logg
|
|
|
39
40
|
task: plannedTask,
|
|
40
41
|
reviewerFeedback: session.last_reviewer_feedback,
|
|
41
42
|
sonarSummary: session.last_sonar_summary,
|
|
43
|
+
deferredContext: buildDeferredContext(session.deferred_issues),
|
|
42
44
|
onOutput: coderStall.onOutput
|
|
43
45
|
});
|
|
44
46
|
} finally {
|
|
@@ -390,7 +392,7 @@ export async function runSonarStage({ config, logger, emitter, eventBase, sessio
|
|
|
390
392
|
return { action: "ok", stageResult };
|
|
391
393
|
}
|
|
392
394
|
|
|
393
|
-
export async function runReviewerStage({ reviewerRole, config, logger, emitter, eventBase, session, trackBudget, iteration, reviewRules, task, repeatDetector, budgetSummary }) {
|
|
395
|
+
export async function runReviewerStage({ reviewerRole, config, logger, emitter, eventBase, session, trackBudget, iteration, reviewRules, task, repeatDetector, budgetSummary, askQuestion }) {
|
|
394
396
|
logger.setContext({ iteration, stage: "reviewer" });
|
|
395
397
|
emitProgress(
|
|
396
398
|
emitter,
|
|
@@ -489,6 +491,39 @@ export async function runReviewerStage({ reviewerRole, config, logger, emitter,
|
|
|
489
491
|
confidence: 0
|
|
490
492
|
};
|
|
491
493
|
}
|
|
494
|
+
// --- Scope filter: auto-defer out-of-scope blocking issues ---
|
|
495
|
+
const { review: filteredReview, demoted, deferred, allDemoted } = filterReviewScope(review, diff);
|
|
496
|
+
review = filteredReview;
|
|
497
|
+
|
|
498
|
+
if (demoted.length > 0) {
|
|
499
|
+
logger.info(`Scope filter: deferred ${demoted.length} out-of-scope issue(s)${allDemoted ? " — auto-approved" : ""}`);
|
|
500
|
+
|
|
501
|
+
// Accumulate deferred issues in session for tracking
|
|
502
|
+
if (!session.deferred_issues) session.deferred_issues = [];
|
|
503
|
+
session.deferred_issues.push(...deferred);
|
|
504
|
+
await saveSession(session);
|
|
505
|
+
|
|
506
|
+
emitProgress(
|
|
507
|
+
emitter,
|
|
508
|
+
makeEvent("reviewer:scope_filter", { ...eventBase, stage: "reviewer" }, {
|
|
509
|
+
message: `Scope filter deferred ${demoted.length} out-of-scope issue(s)`,
|
|
510
|
+
detail: {
|
|
511
|
+
demotedCount: demoted.length,
|
|
512
|
+
autoApproved: allDemoted,
|
|
513
|
+
totalDeferred: session.deferred_issues.length,
|
|
514
|
+
deferred: deferred.map(d => ({ file: d.file, id: d.id, description: d.description }))
|
|
515
|
+
}
|
|
516
|
+
})
|
|
517
|
+
);
|
|
518
|
+
await addCheckpoint(session, {
|
|
519
|
+
stage: "reviewer-scope-filter",
|
|
520
|
+
iteration,
|
|
521
|
+
demoted_count: demoted.length,
|
|
522
|
+
auto_approved: allDemoted,
|
|
523
|
+
total_deferred: session.deferred_issues.length
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
|
|
492
527
|
await addCheckpoint(session, {
|
|
493
528
|
stage: "reviewer",
|
|
494
529
|
iteration,
|
|
@@ -518,8 +553,48 @@ export async function runReviewerStage({ reviewerRole, config, logger, emitter,
|
|
|
518
553
|
const repeatState = repeatDetector.isStalled();
|
|
519
554
|
if (repeatState.stalled) {
|
|
520
555
|
const repeatCounts = repeatDetector.getRepeatCounts();
|
|
556
|
+
|
|
557
|
+
// --- Solomon mediation for stalled reviewer ---
|
|
558
|
+
logger.warn(`Reviewer stalled (${repeatCounts.reviewer} repeats). Invoking Solomon mediation.`);
|
|
559
|
+
emitProgress(
|
|
560
|
+
emitter,
|
|
561
|
+
makeEvent("solomon:escalate", { ...eventBase, stage: "reviewer" }, {
|
|
562
|
+
message: `Reviewer stalled — Solomon mediating`,
|
|
563
|
+
detail: { repeats: repeatCounts.reviewer, reason: repeatState.reason }
|
|
564
|
+
})
|
|
565
|
+
);
|
|
566
|
+
|
|
567
|
+
const solomonResult = await invokeSolomon({
|
|
568
|
+
config, logger, emitter, eventBase, stage: "reviewer", askQuestion, session, iteration,
|
|
569
|
+
conflict: {
|
|
570
|
+
stage: "reviewer",
|
|
571
|
+
task,
|
|
572
|
+
iterationCount: repeatCounts.reviewer,
|
|
573
|
+
maxIterations: config.session?.fail_fast_repeats ?? 2,
|
|
574
|
+
stalledReason: repeatState.reason,
|
|
575
|
+
blockingIssues: review.blocking_issues,
|
|
576
|
+
history: [{ agent: "reviewer", feedback: review.blocking_issues.map(x => x.description).join("; ") }]
|
|
577
|
+
}
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
if (solomonResult.action === "pause") {
|
|
581
|
+
await markSessionStatus(session, "stalled");
|
|
582
|
+
return { review, stalled: true, stalledResult: { paused: true, sessionId: session.id, question: solomonResult.question, context: "reviewer_stalled" } };
|
|
583
|
+
}
|
|
584
|
+
if (solomonResult.action === "continue") {
|
|
585
|
+
repeatDetector.reviewer = { lastHash: null, repeatCount: 0 };
|
|
586
|
+
if (solomonResult.humanGuidance) {
|
|
587
|
+
session.last_reviewer_feedback = `Solomon/user guidance: ${solomonResult.humanGuidance}`;
|
|
588
|
+
await saveSession(session);
|
|
589
|
+
}
|
|
590
|
+
return { review };
|
|
591
|
+
}
|
|
592
|
+
if (solomonResult.action === "subtask") {
|
|
593
|
+
return { review, stalled: true, stalledResult: { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "reviewer_subtask" } };
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// Fallback
|
|
521
597
|
const message = `Manual intervention required: reviewer issues repeated ${repeatCounts.reviewer} times.`;
|
|
522
|
-
logger.warn(message);
|
|
523
598
|
await markSessionStatus(session, "stalled");
|
|
524
599
|
emitProgress(
|
|
525
600
|
emitter,
|
|
@@ -7,7 +7,8 @@ const DEFAULT_RULES = {
|
|
|
7
7
|
max_files_per_iteration: 10,
|
|
8
8
|
max_stale_iterations: 3,
|
|
9
9
|
no_new_dependencies_without_task: true,
|
|
10
|
-
scope_guard: true
|
|
10
|
+
scope_guard: true,
|
|
11
|
+
reviewer_overreach: true
|
|
11
12
|
};
|
|
12
13
|
|
|
13
14
|
export function evaluateRules(context, rulesConfig = {}) {
|
|
@@ -59,6 +60,17 @@ export function evaluateRules(context, rulesConfig = {}) {
|
|
|
59
60
|
});
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
// Rule 5: Reviewer overreach — reviewer consistently flags out-of-scope issues
|
|
64
|
+
if (rules.reviewer_overreach && context.reviewerDemotedCount > 0) {
|
|
65
|
+
const severity = context.reviewerDemotedCount >= 3 ? "critical" : "warn";
|
|
66
|
+
alerts.push({
|
|
67
|
+
rule: "reviewer_overreach",
|
|
68
|
+
severity,
|
|
69
|
+
message: `Reviewer flagged ${context.reviewerDemotedCount} out-of-scope issue(s) that were auto-demoted by scope filter.`,
|
|
70
|
+
detail: { demotedCount: context.reviewerDemotedCount, autoApproved: context.reviewerAutoApproved || false }
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
62
74
|
return {
|
|
63
75
|
alerts,
|
|
64
76
|
hasCritical: alerts.some(a => a.severity === "critical"),
|
|
@@ -76,9 +88,20 @@ export async function buildRulesContext({ session, task, iteration }) {
|
|
|
76
88
|
filesChanged: 0,
|
|
77
89
|
staleIterations: 0,
|
|
78
90
|
newDependencies: [],
|
|
79
|
-
outOfScopeFiles: []
|
|
91
|
+
outOfScopeFiles: [],
|
|
92
|
+
reviewerDemotedCount: 0,
|
|
93
|
+
reviewerAutoApproved: false
|
|
80
94
|
};
|
|
81
95
|
|
|
96
|
+
// Count reviewer scope-filter demotions from session checkpoints
|
|
97
|
+
const scopeFilterCheckpoints = (session.checkpoints || [])
|
|
98
|
+
.filter(cp => cp.stage === "reviewer-scope-filter");
|
|
99
|
+
if (scopeFilterCheckpoints.length > 0) {
|
|
100
|
+
const latest = scopeFilterCheckpoints.at(-1);
|
|
101
|
+
context.reviewerDemotedCount = latest.demoted_count || 0;
|
|
102
|
+
context.reviewerAutoApproved = latest.auto_approved || false;
|
|
103
|
+
}
|
|
104
|
+
|
|
82
105
|
// Count files changed via git
|
|
83
106
|
try {
|
|
84
107
|
const { execaCommand } = await import("execa");
|
package/src/orchestrator.js
CHANGED
|
@@ -152,7 +152,8 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
152
152
|
last_sonar_issue_signature: null,
|
|
153
153
|
sonar_repeat_count: 0,
|
|
154
154
|
last_reviewer_issue_signature: null,
|
|
155
|
-
reviewer_repeat_count: 0
|
|
155
|
+
reviewer_repeat_count: 0,
|
|
156
|
+
deferred_issues: []
|
|
156
157
|
};
|
|
157
158
|
if (pgTaskId) sessionInit.pg_task_id = pgTaskId;
|
|
158
159
|
if (pgProject) sessionInit.pg_project_id = pgProject;
|
|
@@ -496,7 +497,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
496
497
|
if (reviewerEnabled) {
|
|
497
498
|
const reviewerResult = await runReviewerStage({
|
|
498
499
|
reviewerRole, config, logger, emitter, eventBase, session, trackBudget,
|
|
499
|
-
iteration: i, reviewRules, task, repeatDetector, budgetSummary
|
|
500
|
+
iteration: i, reviewRules, task, repeatDetector, budgetSummary, askQuestion
|
|
500
501
|
});
|
|
501
502
|
if (reviewerResult.action === "pause") {
|
|
502
503
|
return reviewerResult.result;
|
|
@@ -649,14 +650,17 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
649
650
|
}
|
|
650
651
|
}
|
|
651
652
|
|
|
653
|
+
const deferredIssues = session.deferred_issues || [];
|
|
652
654
|
emitProgress(
|
|
653
655
|
emitter,
|
|
654
656
|
makeEvent("session:end", { ...eventBase, stage: "done" }, {
|
|
655
|
-
message:
|
|
656
|
-
|
|
657
|
+
message: deferredIssues.length > 0
|
|
658
|
+
? `Session approved (${deferredIssues.length} deferred issue(s) tracked as tech debt)`
|
|
659
|
+
: "Session approved",
|
|
660
|
+
detail: { approved: true, iterations: i, stages: stageResults, git: gitResult, budget: budgetSummary(), deferredIssues }
|
|
657
661
|
})
|
|
658
662
|
);
|
|
659
|
-
return { approved: true, sessionId: session.id, review, git: gitResult };
|
|
663
|
+
return { approved: true, sessionId: session.id, review, git: gitResult, deferredIssues };
|
|
660
664
|
}
|
|
661
665
|
|
|
662
666
|
session.last_reviewer_feedback = review.blocking_issues
|
package/src/prompts/coder.js
CHANGED
|
@@ -29,7 +29,7 @@ const SERENA_INSTRUCTIONS = [
|
|
|
29
29
|
"Fall back to reading files only when Serena tools are not sufficient."
|
|
30
30
|
].join("\n");
|
|
31
31
|
|
|
32
|
-
export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false }) {
|
|
32
|
+
export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary = null, coderRules = null, methodology = "tdd", serenaEnabled = false, deferredContext = null }) {
|
|
33
33
|
const sections = [
|
|
34
34
|
serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
|
|
35
35
|
`Task:\n${task}`,
|
|
@@ -65,5 +65,9 @@ export function buildCoderPrompt({ task, reviewerFeedback = null, sonarSummary =
|
|
|
65
65
|
sections.push(`Reviewer blocking feedback:\n${reviewerFeedback}`);
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
if (deferredContext) {
|
|
69
|
+
sections.push(deferredContext);
|
|
70
|
+
}
|
|
71
|
+
|
|
68
72
|
return sections.join("\n\n");
|
|
69
73
|
}
|
package/src/prompts/reviewer.js
CHANGED
|
@@ -26,6 +26,8 @@ export function buildReviewerPrompt({ task, diff, reviewRules, mode, serenaEnabl
|
|
|
26
26
|
const sections = [
|
|
27
27
|
serenaEnabled ? SUBAGENT_PREAMBLE_SERENA : SUBAGENT_PREAMBLE,
|
|
28
28
|
`You are a code reviewer in ${mode} mode.`,
|
|
29
|
+
"CRITICAL SCOPE RULE: Only review changes that are part of the diff below. Do NOT flag issues in unchanged code, missing features planned for future tasks, or improvements outside the scope of this task. If the diff is correct for what the task asks, approve it — even if the broader codebase has other issues.",
|
|
30
|
+
"Only block approval for issues IN THE DIFF that are bugs, security vulnerabilities, or clear violations of the review rules.",
|
|
29
31
|
"Return only one valid JSON object and nothing else.",
|
|
30
32
|
"JSON schema:",
|
|
31
33
|
'{"approved":boolean,"blocking_issues":[{"id":string,"severity":"critical|high|medium|low","file":string,"line":number,"description":string,"suggested_fix":string}],"non_blocking_suggestions":[string],"summary":string,"confidence":number}'
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scope filter — auto-defers reviewer blocking issues that reference
|
|
3
|
+
* files NOT present in the diff. This prevents reviewer scope drift
|
|
4
|
+
* (flagging missing features, unchanged code, future tasks) from
|
|
5
|
+
* stalling the pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Deferred issues are NOT forgotten — they are tracked in the session
|
|
8
|
+
* as technical debt that should be resolved in future iterations or
|
|
9
|
+
* follow-up tasks. The coder and planner receive context about what
|
|
10
|
+
* was deferred and why.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Extract the set of changed file paths from a unified diff string.
|
|
15
|
+
*/
|
|
16
|
+
export function extractDiffFiles(diff) {
|
|
17
|
+
const files = new Set();
|
|
18
|
+
for (const line of (diff || "").split("\n")) {
|
|
19
|
+
// Match "+++ b/path" lines in unified diff
|
|
20
|
+
const m = line.match(/^\+\+\+ b\/(.+)/);
|
|
21
|
+
if (m) files.add(m[1]);
|
|
22
|
+
}
|
|
23
|
+
return files;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Determine whether a blocking issue is within scope of the diff.
|
|
28
|
+
*
|
|
29
|
+
* An issue is considered IN scope when:
|
|
30
|
+
* - It has no `file` field (general concern about the diff)
|
|
31
|
+
* - Its `file` matches one of the changed files (exact or suffix match)
|
|
32
|
+
* - It references a pattern present in the diff content itself
|
|
33
|
+
*
|
|
34
|
+
* An issue is OUT of scope when:
|
|
35
|
+
* - It explicitly references a file NOT in the diff
|
|
36
|
+
*/
|
|
37
|
+
export function isIssueInScope(issue, diffFiles, diffContent) {
|
|
38
|
+
const file = (issue.file || "").trim();
|
|
39
|
+
|
|
40
|
+
// No file specified — the reviewer is commenting on the diff generally
|
|
41
|
+
if (!file) return true;
|
|
42
|
+
|
|
43
|
+
// Direct match
|
|
44
|
+
if (diffFiles.has(file)) return true;
|
|
45
|
+
|
|
46
|
+
// Suffix match (reviewer might use full path vs relative)
|
|
47
|
+
for (const df of diffFiles) {
|
|
48
|
+
if (df.endsWith(file) || file.endsWith(df)) return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Check if the file path appears anywhere in the diff content
|
|
52
|
+
// (covers cases where the file is referenced in imports/requires)
|
|
53
|
+
if (diffContent && diffContent.includes(file)) return true;
|
|
54
|
+
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Filter a review result, demoting out-of-scope blocking issues to
|
|
60
|
+
* non-blocking suggestions.
|
|
61
|
+
*
|
|
62
|
+
* Returns { review, demoted, deferred, allDemoted } where:
|
|
63
|
+
* - review: the filtered review (may flip approved to true)
|
|
64
|
+
* - demoted: array of original issues that were demoted
|
|
65
|
+
* - deferred: structured deferred issues with metadata for session tracking
|
|
66
|
+
* - allDemoted: true if ALL blocking issues were out of scope
|
|
67
|
+
*/
|
|
68
|
+
export function filterReviewScope(review, diff) {
|
|
69
|
+
if (!review || review.approved) {
|
|
70
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const diffFiles = extractDiffFiles(diff);
|
|
74
|
+
|
|
75
|
+
// If we can't parse diff files, don't filter (safety)
|
|
76
|
+
if (diffFiles.size === 0) {
|
|
77
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const inScope = [];
|
|
81
|
+
const demoted = [];
|
|
82
|
+
|
|
83
|
+
for (const issue of review.blocking_issues || []) {
|
|
84
|
+
if (isIssueInScope(issue, diffFiles, diff)) {
|
|
85
|
+
inScope.push(issue);
|
|
86
|
+
} else {
|
|
87
|
+
demoted.push(issue);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (demoted.length === 0) {
|
|
92
|
+
return { review, demoted: [], deferred: [], allDemoted: false };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const demotedSuggestions = demoted.map(
|
|
96
|
+
(issue) => `[auto-demoted] ${issue.file || "unknown"}: ${issue.description || issue.id || "no description"}`
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
const filtered = {
|
|
100
|
+
...review,
|
|
101
|
+
blocking_issues: inScope,
|
|
102
|
+
non_blocking_suggestions: [
|
|
103
|
+
...(review.non_blocking_suggestions || []),
|
|
104
|
+
...demotedSuggestions
|
|
105
|
+
]
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// If no in-scope blocking issues remain, auto-approve
|
|
109
|
+
const allDemoted = inScope.length === 0;
|
|
110
|
+
if (allDemoted) {
|
|
111
|
+
filtered.approved = true;
|
|
112
|
+
filtered.summary = `${review.summary || ""} [Auto-approved: ${demoted.length} out-of-scope issue(s) demoted to suggestions]`.trim();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Build structured deferred issues for session tracking
|
|
116
|
+
const deferred = demoted.map((issue) => ({
|
|
117
|
+
id: issue.id || null,
|
|
118
|
+
file: issue.file || null,
|
|
119
|
+
severity: issue.severity || "medium",
|
|
120
|
+
description: issue.description || "no description",
|
|
121
|
+
suggested_fix: issue.suggested_fix || null,
|
|
122
|
+
deferred_at: new Date().toISOString(),
|
|
123
|
+
reason: "out_of_scope"
|
|
124
|
+
}));
|
|
125
|
+
|
|
126
|
+
return { review: filtered, demoted, deferred, allDemoted };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Build a human-readable summary of deferred issues for injection
|
|
131
|
+
* into coder/planner prompts so they are aware of the tech debt.
|
|
132
|
+
*/
|
|
133
|
+
export function buildDeferredContext(deferredIssues) {
|
|
134
|
+
if (!deferredIssues?.length) return "";
|
|
135
|
+
|
|
136
|
+
const lines = [
|
|
137
|
+
"## Deferred reviewer concerns (technical debt)",
|
|
138
|
+
"The following issues were flagged by the reviewer but deferred because they are outside the current diff scope.",
|
|
139
|
+
"You do NOT need to fix them now, but be aware of them:",
|
|
140
|
+
""
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
for (const issue of deferredIssues) {
|
|
144
|
+
const file = issue.file ? `\`${issue.file}\`` : "general";
|
|
145
|
+
const fix = issue.suggested_fix ? ` — Suggestion: ${issue.suggested_fix}` : "";
|
|
146
|
+
lines.push(`- [${issue.severity}] ${file}: ${issue.description}${fix}`);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
lines.push("");
|
|
150
|
+
lines.push("If your current changes naturally address any of these, great. Otherwise, they will be tracked for future resolution.");
|
|
151
|
+
|
|
152
|
+
return lines.join("\n");
|
|
153
|
+
}
|
package/src/roles/coder-role.js
CHANGED
|
@@ -17,8 +17,8 @@ export class CoderRole extends BaseRole {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
async execute(input) {
|
|
20
|
-
const { task, reviewerFeedback, sonarSummary, onOutput } = typeof input === "string"
|
|
21
|
-
? { task: input, reviewerFeedback: null, sonarSummary: null, onOutput: null }
|
|
20
|
+
const { task, reviewerFeedback, sonarSummary, deferredContext, onOutput } = typeof input === "string"
|
|
21
|
+
? { task: input, reviewerFeedback: null, sonarSummary: null, deferredContext: null, onOutput: null }
|
|
22
22
|
: input || {};
|
|
23
23
|
|
|
24
24
|
const provider = resolveProvider(this.config);
|
|
@@ -28,6 +28,7 @@ export class CoderRole extends BaseRole {
|
|
|
28
28
|
task: task || this.context?.task || "",
|
|
29
29
|
reviewerFeedback: reviewerFeedback || null,
|
|
30
30
|
sonarSummary: sonarSummary || null,
|
|
31
|
+
deferredContext: deferredContext || null,
|
|
31
32
|
coderRules: this.instructions,
|
|
32
33
|
methodology: this.config?.development?.methodology || "tdd",
|
|
33
34
|
serenaEnabled: Boolean(this.config?.serena?.enabled)
|