karajan-code 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/docs/README.es.md +2 -2
- package/package.json +1 -1
- package/src/agents/availability.js +3 -9
- package/src/agents/index.js +32 -11
- package/src/agents/model-registry.js +62 -0
- package/src/mcp/orphan-guard.js +21 -0
- package/src/mcp/server.js +4 -0
- package/src/orchestrator/iteration-stages.js +404 -0
- package/src/orchestrator/post-loop-stages.js +141 -0
- package/src/orchestrator/pre-loop-stages.js +149 -0
- package/src/orchestrator/reviewer-fallback.js +39 -0
- package/src/orchestrator/solomon-escalation.js +84 -0
- package/src/orchestrator.js +80 -883
- package/src/prompts/planner.js +51 -0
- package/src/repeat-detector.js +11 -0
- package/src/roles/coder-role.js +4 -1
- package/src/roles/planner-role.js +2 -2
- package/src/roles/refactorer-role.js +2 -0
- package/src/roles/reviewer-role.js +13 -6
- package/src/utils/budget.js +30 -0
- package/src/utils/pricing.js +3 -13
package/src/orchestrator.js
CHANGED
|
@@ -1,244 +1,31 @@
|
|
|
1
1
|
import { createAgent } from "./agents/index.js";
|
|
2
2
|
import {
|
|
3
|
-
addCheckpoint,
|
|
4
3
|
createSession,
|
|
5
4
|
loadSession,
|
|
6
5
|
markSessionStatus,
|
|
7
|
-
pauseSession,
|
|
8
6
|
resumeSessionWithAnswer,
|
|
9
7
|
saveSession
|
|
10
8
|
} from "./session-store.js";
|
|
11
9
|
import { computeBaseRef, generateDiff } from "./review/diff-generator.js";
|
|
12
|
-
import { parseJsonOutput } from "./review/parser.js";
|
|
13
|
-
import { validateReviewResult } from "./review/schema.js";
|
|
14
|
-
import { evaluateTddPolicy } from "./review/tdd-policy.js";
|
|
15
10
|
import { buildCoderPrompt } from "./prompts/coder.js";
|
|
16
11
|
import { buildReviewerPrompt } from "./prompts/reviewer.js";
|
|
17
12
|
import { resolveRole } from "./config.js";
|
|
18
|
-
import {
|
|
19
|
-
import { RepeatDetector } from "./repeat-detector.js";
|
|
13
|
+
import { RepeatDetector, getRepeatThreshold } from "./repeat-detector.js";
|
|
20
14
|
import { emitProgress, makeEvent } from "./utils/events.js";
|
|
21
|
-
import { BudgetTracker } from "./utils/budget.js";
|
|
15
|
+
import { BudgetTracker, extractUsageMetrics } from "./utils/budget.js";
|
|
22
16
|
import {
|
|
23
|
-
commitMessageFromTask,
|
|
24
17
|
prepareGitAutomation,
|
|
25
18
|
finalizeGitAutomation
|
|
26
19
|
} from "./git/automation.js";
|
|
27
20
|
import { resolveRoleMdPath, loadFirstExisting } from "./roles/base-role.js";
|
|
28
21
|
import { resolveReviewProfile } from "./review/profiles.js";
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
32
|
-
import {
|
|
33
|
-
import {
|
|
22
|
+
import { CoderRole } from "./roles/coder-role.js";
|
|
23
|
+
import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
|
|
24
|
+
import { runTriageStage, runResearcherStage, runPlannerStage } from "./orchestrator/pre-loop-stages.js";
|
|
25
|
+
import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, runReviewerStage } from "./orchestrator/iteration-stages.js";
|
|
26
|
+
import { runTesterStage, runSecurityStage } from "./orchestrator/post-loop-stages.js";
|
|
34
27
|
|
|
35
|
-
function parsePlannerOutput(output) {
|
|
36
|
-
const text = String(output || "").trim();
|
|
37
|
-
if (!text) return null;
|
|
38
28
|
|
|
39
|
-
const lines = text
|
|
40
|
-
.split(/\r?\n/)
|
|
41
|
-
.map((line) => line.trim())
|
|
42
|
-
.filter(Boolean);
|
|
43
|
-
|
|
44
|
-
let title = null;
|
|
45
|
-
let approach = null;
|
|
46
|
-
const steps = [];
|
|
47
|
-
|
|
48
|
-
for (const line of lines) {
|
|
49
|
-
if (!title) {
|
|
50
|
-
const titleMatch = line.match(/^title\s*:\s*(.+)$/i);
|
|
51
|
-
if (titleMatch) {
|
|
52
|
-
title = titleMatch[1].trim();
|
|
53
|
-
continue;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
if (!approach) {
|
|
58
|
-
const approachMatch = line.match(/^(approach|strategy)\s*:\s*(.+)$/i);
|
|
59
|
-
if (approachMatch) {
|
|
60
|
-
approach = approachMatch[2].trim();
|
|
61
|
-
continue;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
const numberedStep = line.match(/^\d+[\).:-]\s*(.+)$/);
|
|
66
|
-
if (numberedStep) {
|
|
67
|
-
steps.push(numberedStep[1].trim());
|
|
68
|
-
continue;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const bulletStep = line.match(/^[-*]\s+(.+)$/);
|
|
72
|
-
if (bulletStep) {
|
|
73
|
-
steps.push(bulletStep[1].trim());
|
|
74
|
-
continue;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
if (!title) {
|
|
79
|
-
const firstFreeLine = lines.find((line) => !/^(approach|strategy)\s*:/i.test(line) && !/^\d+[\).:-]\s*/.test(line));
|
|
80
|
-
title = firstFreeLine || null;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return { title, approach, steps };
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function getRepeatThreshold(config) {
|
|
87
|
-
const raw =
|
|
88
|
-
config?.failFast?.repeatThreshold ??
|
|
89
|
-
config?.session?.repeat_detection_threshold ??
|
|
90
|
-
config?.session?.fail_fast_repeats ??
|
|
91
|
-
2;
|
|
92
|
-
const value = Number(raw);
|
|
93
|
-
if (Number.isFinite(value) && value > 0) return value;
|
|
94
|
-
return 2;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function extractUsageMetrics(result, defaultModel = null) {
|
|
98
|
-
const usage = result?.usage || result?.metrics || {};
|
|
99
|
-
const tokens_in =
|
|
100
|
-
result?.tokens_in ??
|
|
101
|
-
usage?.tokens_in ??
|
|
102
|
-
usage?.input_tokens ??
|
|
103
|
-
usage?.prompt_tokens ??
|
|
104
|
-
0;
|
|
105
|
-
const tokens_out =
|
|
106
|
-
result?.tokens_out ??
|
|
107
|
-
usage?.tokens_out ??
|
|
108
|
-
usage?.output_tokens ??
|
|
109
|
-
usage?.completion_tokens ??
|
|
110
|
-
0;
|
|
111
|
-
const cost_usd =
|
|
112
|
-
result?.cost_usd ??
|
|
113
|
-
usage?.cost_usd ??
|
|
114
|
-
usage?.usd_cost ??
|
|
115
|
-
usage?.cost;
|
|
116
|
-
const model =
|
|
117
|
-
result?.model ??
|
|
118
|
-
usage?.model ??
|
|
119
|
-
usage?.model_name ??
|
|
120
|
-
usage?.model_id ??
|
|
121
|
-
defaultModel ??
|
|
122
|
-
null;
|
|
123
|
-
|
|
124
|
-
return { tokens_in, tokens_out, cost_usd, model };
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
async function runReviewerWithFallback({ reviewerName, config, logger, prompt, session, iteration, onOutput, onAttemptResult }) {
|
|
128
|
-
const fallbackReviewer = config.reviewer_options?.fallback_reviewer;
|
|
129
|
-
const retries = Math.max(0, Number(config.reviewer_options?.retries ?? 1));
|
|
130
|
-
const candidates = [reviewerName];
|
|
131
|
-
if (fallbackReviewer && fallbackReviewer !== reviewerName) {
|
|
132
|
-
candidates.push(fallbackReviewer);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const attempts = [];
|
|
136
|
-
for (const name of candidates) {
|
|
137
|
-
const reviewer = createAgent(name, config, logger);
|
|
138
|
-
for (let attempt = 1; attempt <= retries + 1; attempt += 1) {
|
|
139
|
-
const result = await reviewer.reviewTask({ prompt, onOutput, role: "reviewer" });
|
|
140
|
-
if (onAttemptResult) {
|
|
141
|
-
await onAttemptResult({ reviewer: name, result });
|
|
142
|
-
}
|
|
143
|
-
attempts.push({ reviewer: name, attempt, ok: result.ok, result });
|
|
144
|
-
await addCheckpoint(session, {
|
|
145
|
-
stage: "reviewer-attempt",
|
|
146
|
-
iteration,
|
|
147
|
-
reviewer: name,
|
|
148
|
-
attempt,
|
|
149
|
-
ok: result.ok
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
if (result.ok) {
|
|
153
|
-
return { result, attempts };
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return { result: null, attempts };
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
async function invokeSolomon({ config, logger, emitter, eventBase, stage, conflict, askQuestion, session, iteration }) {
|
|
162
|
-
const solomonEnabled = Boolean(config.pipeline?.solomon?.enabled);
|
|
163
|
-
|
|
164
|
-
if (!solomonEnabled) {
|
|
165
|
-
return escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration });
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
emitProgress(
|
|
169
|
-
emitter,
|
|
170
|
-
makeEvent("solomon:start", { ...eventBase, stage: "solomon" }, {
|
|
171
|
-
message: `Solomon arbitrating ${stage} conflict`,
|
|
172
|
-
detail: { conflictStage: stage }
|
|
173
|
-
})
|
|
174
|
-
);
|
|
175
|
-
|
|
176
|
-
const solomon = new SolomonRole({ config, logger, emitter });
|
|
177
|
-
await solomon.init({ task: conflict.task || session.task, iteration });
|
|
178
|
-
const ruling = await solomon.run({ conflict });
|
|
179
|
-
|
|
180
|
-
emitProgress(
|
|
181
|
-
emitter,
|
|
182
|
-
makeEvent("solomon:end", { ...eventBase, stage: "solomon" }, {
|
|
183
|
-
message: `Solomon ruling: ${ruling.result?.ruling || "unknown"}`,
|
|
184
|
-
detail: ruling.result
|
|
185
|
-
})
|
|
186
|
-
);
|
|
187
|
-
|
|
188
|
-
await addCheckpoint(session, {
|
|
189
|
-
stage: "solomon",
|
|
190
|
-
iteration,
|
|
191
|
-
ruling: ruling.result?.ruling,
|
|
192
|
-
escalate: ruling.result?.escalate,
|
|
193
|
-
subtask: ruling.result?.subtask?.title || null
|
|
194
|
-
});
|
|
195
|
-
|
|
196
|
-
if (!ruling.ok) {
|
|
197
|
-
// escalate_human
|
|
198
|
-
return escalateToHuman({
|
|
199
|
-
askQuestion, session, emitter, eventBase, stage, iteration,
|
|
200
|
-
conflict: { ...conflict, solomonReason: ruling.result?.escalate_reason }
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
const r = ruling.result?.ruling;
|
|
205
|
-
if (r === "approve" || r === "approve_with_conditions") {
|
|
206
|
-
return { action: "continue", conditions: ruling.result?.conditions || [], ruling };
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
if (r === "create_subtask") {
|
|
210
|
-
return { action: "subtask", subtask: ruling.result?.subtask, ruling };
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return { action: "continue", conditions: [], ruling };
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
async function escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration }) {
|
|
217
|
-
const reason = conflict?.solomonReason || `${stage} conflict unresolved`;
|
|
218
|
-
const question = `${stage} conflict requires human intervention: ${reason}\nDetails: ${JSON.stringify(conflict?.history?.slice(-2) || [], null, 2)}\n\nHow should we proceed?`;
|
|
219
|
-
|
|
220
|
-
if (askQuestion) {
|
|
221
|
-
const answer = await askQuestion(question, { iteration, stage });
|
|
222
|
-
if (answer) {
|
|
223
|
-
return { action: "continue", humanGuidance: answer };
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
await pauseSession(session, {
|
|
228
|
-
question,
|
|
229
|
-
context: { iteration, stage, conflict }
|
|
230
|
-
});
|
|
231
|
-
emitProgress(
|
|
232
|
-
emitter,
|
|
233
|
-
makeEvent("question", { ...eventBase, stage }, {
|
|
234
|
-
status: "paused",
|
|
235
|
-
message: question,
|
|
236
|
-
detail: { question, sessionId: session.id }
|
|
237
|
-
})
|
|
238
|
-
);
|
|
239
|
-
|
|
240
|
-
return { action: "pause", question };
|
|
241
|
-
}
|
|
242
29
|
|
|
243
30
|
export async function runFlow({ task, config, logger, flags = {}, emitter = null, askQuestion = null }) {
|
|
244
31
|
const plannerRole = resolveRole(config, "planner");
|
|
@@ -309,7 +96,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
309
96
|
}
|
|
310
97
|
|
|
311
98
|
const repeatDetector = new RepeatDetector({ threshold: getRepeatThreshold(config) });
|
|
312
|
-
const
|
|
99
|
+
const coderRoleInstance = new CoderRole({ config, logger, emitter, createAgentFn: createAgent });
|
|
313
100
|
const startedAt = Date.now();
|
|
314
101
|
const eventBase = { sessionId: null, iteration: 0, stage: null, startedAt };
|
|
315
102
|
const budgetTracker = new BudgetTracker({ pricing: config?.budget?.pricing });
|
|
@@ -381,172 +168,47 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
381
168
|
|
|
382
169
|
// Accumulate stage results for final summary
|
|
383
170
|
const stageResults = {};
|
|
384
|
-
|
|
385
|
-
let sonarIssuesFinal = null;
|
|
171
|
+
const sonarState = { issuesInitial: null, issuesFinal: null };
|
|
386
172
|
|
|
387
173
|
if (triageEnabled) {
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
);
|
|
395
|
-
|
|
396
|
-
const triage = new TriageRole({ config, logger, emitter });
|
|
397
|
-
await triage.init({ task, sessionId: session.id, iteration: 0 });
|
|
398
|
-
const triageStart = Date.now();
|
|
399
|
-
const triageOutput = await triage.run({ task });
|
|
400
|
-
trackBudget({
|
|
401
|
-
role: "triage",
|
|
402
|
-
provider: config?.roles?.triage?.provider || coderRole.provider,
|
|
403
|
-
model: config?.roles?.triage?.model || coderRole.model,
|
|
404
|
-
result: triageOutput,
|
|
405
|
-
duration_ms: Date.now() - triageStart
|
|
406
|
-
});
|
|
407
|
-
|
|
408
|
-
await addCheckpoint(session, { stage: "triage", iteration: 0, ok: triageOutput.ok });
|
|
409
|
-
|
|
410
|
-
const recommendedRoles = new Set(triageOutput.result?.roles || []);
|
|
411
|
-
if (triageOutput.ok) {
|
|
412
|
-
plannerEnabled = recommendedRoles.has("planner");
|
|
413
|
-
researcherEnabled = recommendedRoles.has("researcher");
|
|
414
|
-
refactorerEnabled = recommendedRoles.has("refactorer");
|
|
415
|
-
reviewerEnabled = recommendedRoles.has("reviewer");
|
|
416
|
-
testerEnabled = recommendedRoles.has("tester");
|
|
417
|
-
securityEnabled = recommendedRoles.has("security");
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
|
|
421
|
-
if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
|
|
422
|
-
if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
|
|
423
|
-
if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
|
|
424
|
-
if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
|
|
425
|
-
if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
|
|
426
|
-
|
|
427
|
-
stageResults.triage = {
|
|
428
|
-
ok: triageOutput.ok,
|
|
429
|
-
level: triageOutput.result?.level || null,
|
|
430
|
-
roles: Array.from(recommendedRoles),
|
|
431
|
-
reasoning: triageOutput.result?.reasoning || null
|
|
432
|
-
};
|
|
433
|
-
|
|
434
|
-
emitProgress(
|
|
435
|
-
emitter,
|
|
436
|
-
makeEvent("triage:end", { ...eventBase, stage: "triage" }, {
|
|
437
|
-
status: triageOutput.ok ? "ok" : "fail",
|
|
438
|
-
message: triageOutput.ok ? "Triage completed" : `Triage failed: ${triageOutput.summary}`,
|
|
439
|
-
detail: stageResults.triage
|
|
440
|
-
})
|
|
441
|
-
);
|
|
442
|
-
} else {
|
|
443
|
-
if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
|
|
444
|
-
if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
|
|
445
|
-
if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
|
|
446
|
-
if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
|
|
447
|
-
if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
|
|
448
|
-
if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
|
|
174
|
+
const triageResult = await runTriageStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
|
|
175
|
+
if (triageResult.roleOverrides.plannerEnabled !== undefined) plannerEnabled = triageResult.roleOverrides.plannerEnabled;
|
|
176
|
+
if (triageResult.roleOverrides.researcherEnabled !== undefined) researcherEnabled = triageResult.roleOverrides.researcherEnabled;
|
|
177
|
+
if (triageResult.roleOverrides.refactorerEnabled !== undefined) refactorerEnabled = triageResult.roleOverrides.refactorerEnabled;
|
|
178
|
+
if (triageResult.roleOverrides.reviewerEnabled !== undefined) reviewerEnabled = triageResult.roleOverrides.reviewerEnabled;
|
|
179
|
+
if (triageResult.roleOverrides.testerEnabled !== undefined) testerEnabled = triageResult.roleOverrides.testerEnabled;
|
|
180
|
+
if (triageResult.roleOverrides.securityEnabled !== undefined) securityEnabled = triageResult.roleOverrides.securityEnabled;
|
|
181
|
+
stageResults.triage = triageResult.stageResult;
|
|
449
182
|
}
|
|
450
183
|
|
|
184
|
+
if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
|
|
185
|
+
if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
|
|
186
|
+
if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
|
|
187
|
+
if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
|
|
188
|
+
if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
|
|
189
|
+
if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
|
|
190
|
+
|
|
451
191
|
// --- Researcher (pre-planning) ---
|
|
452
192
|
let researchContext = null;
|
|
453
193
|
if (researcherEnabled) {
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
makeEvent("researcher:start", { ...eventBase, stage: "researcher" }, {
|
|
458
|
-
message: "Researcher investigating codebase"
|
|
459
|
-
})
|
|
460
|
-
);
|
|
461
|
-
|
|
462
|
-
const researcher = new ResearcherRole({ config, logger, emitter });
|
|
463
|
-
await researcher.init({ task });
|
|
464
|
-
const researchStart = Date.now();
|
|
465
|
-
const researchOutput = await researcher.run({ task });
|
|
466
|
-
trackBudget({
|
|
467
|
-
role: "researcher",
|
|
468
|
-
provider: config?.roles?.researcher?.provider || coderRole.provider,
|
|
469
|
-
model: config?.roles?.researcher?.model || coderRole.model,
|
|
470
|
-
result: researchOutput,
|
|
471
|
-
duration_ms: Date.now() - researchStart
|
|
472
|
-
});
|
|
473
|
-
|
|
474
|
-
await addCheckpoint(session, { stage: "researcher", iteration: 0, ok: researchOutput.ok });
|
|
475
|
-
|
|
476
|
-
emitProgress(
|
|
477
|
-
emitter,
|
|
478
|
-
makeEvent("researcher:end", { ...eventBase, stage: "researcher" }, {
|
|
479
|
-
status: researchOutput.ok ? "ok" : "fail",
|
|
480
|
-
message: researchOutput.ok ? "Research completed" : `Research failed: ${researchOutput.summary}`
|
|
481
|
-
})
|
|
482
|
-
);
|
|
483
|
-
|
|
484
|
-
stageResults.researcher = { ok: researchOutput.ok, summary: researchOutput.summary || null };
|
|
485
|
-
if (researchOutput.ok) {
|
|
486
|
-
researchContext = researchOutput.result;
|
|
487
|
-
}
|
|
194
|
+
const researcherResult = await runResearcherStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
|
|
195
|
+
researchContext = researcherResult.researchContext;
|
|
196
|
+
stageResults.researcher = researcherResult.stageResult;
|
|
488
197
|
}
|
|
489
198
|
|
|
490
199
|
// --- Planner ---
|
|
491
200
|
let plannedTask = task;
|
|
492
201
|
if (plannerEnabled) {
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
makeEvent("planner:start", { ...eventBase, stage: "planner" }, {
|
|
497
|
-
message: `Planner (${plannerRole.provider}) running`,
|
|
498
|
-
detail: { planner: plannerRole.provider }
|
|
499
|
-
})
|
|
500
|
-
);
|
|
501
|
-
const planner = createAgent(plannerRole.provider, config, logger);
|
|
502
|
-
const plannerStart = Date.now();
|
|
503
|
-
const plannerPromptParts = [
|
|
504
|
-
"Create an implementation plan for this task.",
|
|
505
|
-
"Return concise numbered steps focused on execution order and risk.",
|
|
506
|
-
"",
|
|
507
|
-
task
|
|
508
|
-
];
|
|
509
|
-
if (researchContext) {
|
|
510
|
-
plannerPromptParts.push("", "## Research findings", JSON.stringify(researchContext, null, 2));
|
|
511
|
-
}
|
|
512
|
-
const plannerResult = await planner.runTask({ prompt: plannerPromptParts.join("\n"), role: "planner" });
|
|
513
|
-
trackBudget({ role: "planner", provider: plannerRole.provider, model: plannerRole.model, result: plannerResult, duration_ms: Date.now() - plannerStart });
|
|
514
|
-
if (!plannerResult.ok) {
|
|
515
|
-
await markSessionStatus(session, "failed");
|
|
516
|
-
const details = plannerResult.error || plannerResult.output || `exitCode=${plannerResult.exitCode ?? "unknown"}`;
|
|
517
|
-
emitProgress(
|
|
518
|
-
emitter,
|
|
519
|
-
makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
|
|
520
|
-
status: "fail",
|
|
521
|
-
message: `Planner failed: ${details}`
|
|
522
|
-
})
|
|
523
|
-
);
|
|
524
|
-
throw new Error(`Planner failed: ${details}`);
|
|
525
|
-
}
|
|
526
|
-
if (plannerResult.output?.trim()) {
|
|
527
|
-
plannedTask = `${task}\n\nExecution plan:\n${plannerResult.output.trim()}`;
|
|
528
|
-
}
|
|
529
|
-
const parsedPlan = parsePlannerOutput(plannerResult.output);
|
|
530
|
-
stageResults.planner = {
|
|
531
|
-
ok: true,
|
|
532
|
-
title: parsedPlan?.title || null,
|
|
533
|
-
approach: parsedPlan?.approach || null,
|
|
534
|
-
steps: parsedPlan?.steps || [],
|
|
535
|
-
completedSteps: []
|
|
536
|
-
};
|
|
537
|
-
emitProgress(
|
|
538
|
-
emitter,
|
|
539
|
-
makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
|
|
540
|
-
message: "Planner completed"
|
|
541
|
-
})
|
|
542
|
-
);
|
|
202
|
+
const plannerResult = await runPlannerStage({ config, logger, emitter, eventBase, session, plannerRole, researchContext, trackBudget });
|
|
203
|
+
plannedTask = plannerResult.plannedTask;
|
|
204
|
+
stageResults.planner = plannerResult.stageResult;
|
|
543
205
|
}
|
|
544
206
|
|
|
545
207
|
const gitCtx = await prepareGitAutomation({ config, task, logger, session });
|
|
546
208
|
|
|
547
209
|
const projectDir = config.projectDir || process.cwd();
|
|
548
210
|
const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
|
|
549
|
-
|
|
211
|
+
await coderRoleInstance.init();
|
|
550
212
|
|
|
551
213
|
for (let i = 1; i <= config.max_iterations; i += 1) {
|
|
552
214
|
const elapsedMinutes = (Date.now() - startedAt) / 60000;
|
|
@@ -593,295 +255,41 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
593
255
|
logger.info(`Iteration ${i}/${config.max_iterations}`);
|
|
594
256
|
|
|
595
257
|
// --- Coder ---
|
|
596
|
-
|
|
597
|
-
emitProgress(
|
|
598
|
-
emitter,
|
|
599
|
-
makeEvent("coder:start", { ...eventBase, stage: "coder" }, {
|
|
600
|
-
message: `Coder (${coderRole.provider}) running`,
|
|
601
|
-
detail: { coder: coderRole.provider }
|
|
602
|
-
})
|
|
603
|
-
);
|
|
604
|
-
|
|
605
|
-
const coderPrompt = buildCoderPrompt({
|
|
606
|
-
task: plannedTask,
|
|
607
|
-
reviewerFeedback: session.last_reviewer_feedback,
|
|
608
|
-
sonarSummary: session.last_sonar_summary,
|
|
609
|
-
coderRules,
|
|
610
|
-
methodology: config.development?.methodology || "tdd",
|
|
611
|
-
serenaEnabled: Boolean(config.serena?.enabled)
|
|
612
|
-
});
|
|
613
|
-
const coderOnOutput = ({ stream, line }) => {
|
|
614
|
-
emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "coder" }, {
|
|
615
|
-
message: line,
|
|
616
|
-
detail: { stream, agent: coderRole.provider }
|
|
617
|
-
}));
|
|
618
|
-
};
|
|
619
|
-
const coderStart = Date.now();
|
|
620
|
-
const coderResult = await coder.runTask({ prompt: coderPrompt, onOutput: coderOnOutput, role: "coder" });
|
|
621
|
-
trackBudget({ role: "coder", provider: coderRole.provider, model: coderRole.model, result: coderResult, duration_ms: Date.now() - coderStart });
|
|
622
|
-
|
|
623
|
-
if (!coderResult.ok) {
|
|
624
|
-
await markSessionStatus(session, "failed");
|
|
625
|
-
const details = coderResult.error || coderResult.output || `exitCode=${coderResult.exitCode ?? "unknown"}`;
|
|
626
|
-
emitProgress(
|
|
627
|
-
emitter,
|
|
628
|
-
makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
|
|
629
|
-
status: "fail",
|
|
630
|
-
message: `Coder failed: ${details}`
|
|
631
|
-
})
|
|
632
|
-
);
|
|
633
|
-
throw new Error(`Coder failed: ${details}`);
|
|
634
|
-
}
|
|
635
|
-
|
|
636
|
-
await addCheckpoint(session, { stage: "coder", iteration: i, note: "Coder applied changes" });
|
|
637
|
-
emitProgress(
|
|
638
|
-
emitter,
|
|
639
|
-
makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
|
|
640
|
-
message: "Coder completed"
|
|
641
|
-
})
|
|
642
|
-
);
|
|
258
|
+
await runCoderStage({ coderRoleInstance, coderRole, config, logger, emitter, eventBase, session, plannedTask, trackBudget, iteration: i });
|
|
643
259
|
|
|
260
|
+
// --- Refactorer ---
|
|
644
261
|
if (refactorerEnabled) {
|
|
645
|
-
|
|
646
|
-
emitProgress(
|
|
647
|
-
emitter,
|
|
648
|
-
makeEvent("refactorer:start", { ...eventBase, stage: "refactorer" }, {
|
|
649
|
-
message: `Refactorer (${refactorerRole.provider}) running`,
|
|
650
|
-
detail: { refactorer: refactorerRole.provider }
|
|
651
|
-
})
|
|
652
|
-
);
|
|
653
|
-
const refactorer = createAgent(refactorerRole.provider, config, logger);
|
|
654
|
-
const refactorPrompt = [
|
|
655
|
-
`Task context:\n${plannedTask}`,
|
|
656
|
-
"",
|
|
657
|
-
"Refactor the current changes for clarity and maintainability without changing behavior.",
|
|
658
|
-
"Do not expand scope and keep tests green."
|
|
659
|
-
].join("\n");
|
|
660
|
-
const refactorerOnOutput = ({ stream, line }) => {
|
|
661
|
-
emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "refactorer" }, {
|
|
662
|
-
message: line,
|
|
663
|
-
detail: { stream, agent: refactorerRole.provider }
|
|
664
|
-
}));
|
|
665
|
-
};
|
|
666
|
-
const refactorerStart = Date.now();
|
|
667
|
-
const refactorResult = await refactorer.runTask({
|
|
668
|
-
prompt: refactorPrompt,
|
|
669
|
-
onOutput: refactorerOnOutput,
|
|
670
|
-
role: "refactorer"
|
|
671
|
-
});
|
|
672
|
-
trackBudget({ role: "refactorer", provider: refactorerRole.provider, model: refactorerRole.model, result: refactorResult, duration_ms: Date.now() - refactorerStart });
|
|
673
|
-
if (!refactorResult.ok) {
|
|
674
|
-
await markSessionStatus(session, "failed");
|
|
675
|
-
const details = refactorResult.error || refactorResult.output || `exitCode=${refactorResult.exitCode ?? "unknown"}`;
|
|
676
|
-
emitProgress(
|
|
677
|
-
emitter,
|
|
678
|
-
makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
|
|
679
|
-
status: "fail",
|
|
680
|
-
message: `Refactorer failed: ${details}`
|
|
681
|
-
})
|
|
682
|
-
);
|
|
683
|
-
throw new Error(`Refactorer failed: ${details}`);
|
|
684
|
-
}
|
|
685
|
-
await addCheckpoint(session, { stage: "refactorer", iteration: i, note: "Refactorer applied cleanups" });
|
|
686
|
-
emitProgress(
|
|
687
|
-
emitter,
|
|
688
|
-
makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
|
|
689
|
-
message: "Refactorer completed"
|
|
690
|
-
})
|
|
691
|
-
);
|
|
262
|
+
await runRefactorerStage({ refactorerRole, config, logger, emitter, eventBase, session, plannedTask, trackBudget, iteration: i });
|
|
692
263
|
}
|
|
693
264
|
|
|
694
265
|
// --- TDD Policy ---
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
iteration: i,
|
|
701
|
-
ok: tddEval.ok,
|
|
702
|
-
reason: tddEval.reason,
|
|
703
|
-
source_files: tddEval.sourceFiles?.length || 0,
|
|
704
|
-
test_files: tddEval.testFiles?.length || 0
|
|
705
|
-
});
|
|
706
|
-
|
|
707
|
-
emitProgress(
|
|
708
|
-
emitter,
|
|
709
|
-
makeEvent("tdd:result", { ...eventBase, stage: "tdd" }, {
|
|
710
|
-
status: tddEval.ok ? "ok" : "fail",
|
|
711
|
-
message: tddEval.ok ? "TDD policy passed" : `TDD policy failed: ${tddEval.reason}`,
|
|
712
|
-
detail: {
|
|
713
|
-
ok: tddEval.ok,
|
|
714
|
-
reason: tddEval.reason,
|
|
715
|
-
sourceFiles: tddEval.sourceFiles?.length || 0,
|
|
716
|
-
testFiles: tddEval.testFiles?.length || 0
|
|
717
|
-
}
|
|
718
|
-
})
|
|
719
|
-
);
|
|
720
|
-
|
|
721
|
-
if (!tddEval.ok) {
|
|
722
|
-
session.last_reviewer_feedback = tddEval.message;
|
|
723
|
-
session.repeated_issue_count += 1;
|
|
724
|
-
await saveSession(session);
|
|
725
|
-
if (session.repeated_issue_count >= config.session.fail_fast_repeats) {
|
|
726
|
-
const question = `TDD policy has failed ${session.repeated_issue_count} times. The coder is not creating tests. How should we proceed? Issue: ${tddEval.reason}`;
|
|
727
|
-
if (askQuestion) {
|
|
728
|
-
const answer = await askQuestion(question, { iteration: i, stage: "tdd" });
|
|
729
|
-
if (answer) {
|
|
730
|
-
session.last_reviewer_feedback += `\nUser guidance: ${answer}`;
|
|
731
|
-
session.repeated_issue_count = 0;
|
|
732
|
-
await saveSession(session);
|
|
733
|
-
continue;
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
|
-
await pauseSession(session, {
|
|
737
|
-
question,
|
|
738
|
-
context: {
|
|
739
|
-
iteration: i,
|
|
740
|
-
stage: "tdd",
|
|
741
|
-
lastFeedback: tddEval.message,
|
|
742
|
-
repeatedCount: session.repeated_issue_count
|
|
743
|
-
}
|
|
744
|
-
});
|
|
745
|
-
emitProgress(
|
|
746
|
-
emitter,
|
|
747
|
-
makeEvent("question", { ...eventBase, stage: "tdd" }, {
|
|
748
|
-
status: "paused",
|
|
749
|
-
message: question,
|
|
750
|
-
detail: { question, sessionId: session.id }
|
|
751
|
-
})
|
|
752
|
-
);
|
|
753
|
-
return { paused: true, sessionId: session.id, question, context: "tdd_fail_fast" };
|
|
754
|
-
}
|
|
266
|
+
const tddResult = await runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration: i, askQuestion });
|
|
267
|
+
if (tddResult.action === "pause") {
|
|
268
|
+
return tddResult.result;
|
|
269
|
+
}
|
|
270
|
+
if (tddResult.action === "continue") {
|
|
755
271
|
continue;
|
|
756
272
|
}
|
|
757
273
|
|
|
758
|
-
// --- SonarQube
|
|
274
|
+
// --- SonarQube ---
|
|
759
275
|
if (config.sonarqube.enabled) {
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
message: "SonarQube scanning"
|
|
765
|
-
})
|
|
766
|
-
);
|
|
767
|
-
|
|
768
|
-
const sonarRole = new SonarRole({ config, logger, emitter });
|
|
769
|
-
await sonarRole.init({ iteration: i });
|
|
770
|
-
const sonarStart = Date.now();
|
|
771
|
-
const sonarOutput = await sonarRole.run();
|
|
772
|
-
trackBudget({ role: "sonar", provider: "sonar", result: sonarOutput, duration_ms: Date.now() - sonarStart });
|
|
773
|
-
const sonarResult = sonarOutput.result;
|
|
774
|
-
|
|
775
|
-
if (!sonarResult.gateStatus && sonarResult.error) {
|
|
776
|
-
await markSessionStatus(session, "failed");
|
|
777
|
-
emitProgress(
|
|
778
|
-
emitter,
|
|
779
|
-
makeEvent("sonar:end", { ...eventBase, stage: "sonar" }, {
|
|
780
|
-
status: "fail",
|
|
781
|
-
message: `Sonar scan failed: ${sonarResult.error}`
|
|
782
|
-
})
|
|
783
|
-
);
|
|
784
|
-
throw new Error(`Sonar scan failed: ${sonarResult.error}`);
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
session.last_sonar_summary = sonarOutput.summary;
|
|
788
|
-
if (typeof sonarResult.openIssuesTotal === "number") {
|
|
789
|
-
if (sonarIssuesInitial === null) {
|
|
790
|
-
sonarIssuesInitial = sonarResult.openIssuesTotal;
|
|
791
|
-
}
|
|
792
|
-
sonarIssuesFinal = sonarResult.openIssuesTotal;
|
|
793
|
-
}
|
|
794
|
-
await addCheckpoint(session, {
|
|
795
|
-
stage: "sonar",
|
|
796
|
-
iteration: i,
|
|
797
|
-
project_key: sonarResult.projectKey,
|
|
798
|
-
quality_gate: sonarResult.gateStatus,
|
|
799
|
-
open_issues: sonarResult.openIssuesTotal
|
|
276
|
+
const sonarResult = await runSonarStage({
|
|
277
|
+
config, logger, emitter, eventBase, session, trackBudget, iteration: i,
|
|
278
|
+
repeatDetector, budgetSummary, sonarState,
|
|
279
|
+
askQuestion, task
|
|
800
280
|
});
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
status: sonarResult.blocking ? "fail" : "ok",
|
|
806
|
-
message: `Quality gate: ${sonarResult.gateStatus}`,
|
|
807
|
-
detail: { projectKey: sonarResult.projectKey, gateStatus: sonarResult.gateStatus, openIssues: sonarResult.openIssuesTotal }
|
|
808
|
-
})
|
|
809
|
-
);
|
|
810
|
-
|
|
811
|
-
if (sonarResult.blocking) {
|
|
812
|
-
repeatDetector.addIteration(sonarResult.issues, []);
|
|
813
|
-
const repeatState = repeatDetector.isStalled();
|
|
814
|
-
if (repeatState.stalled) {
|
|
815
|
-
const repeatCounts = repeatDetector.getRepeatCounts();
|
|
816
|
-
const message = `No progress: SonarQube issues repeated ${repeatCounts.sonar} times.`;
|
|
817
|
-
logger.warn(message);
|
|
818
|
-
await markSessionStatus(session, "stalled");
|
|
819
|
-
emitProgress(
|
|
820
|
-
emitter,
|
|
821
|
-
makeEvent("session:end", { ...eventBase, stage: "sonar" }, {
|
|
822
|
-
status: "stalled",
|
|
823
|
-
message,
|
|
824
|
-
detail: { reason: repeatState.reason, repeats: repeatCounts.sonar, budget: budgetSummary() }
|
|
825
|
-
})
|
|
826
|
-
);
|
|
827
|
-
return { approved: false, sessionId: session.id, reason: "stalled" };
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
session.last_reviewer_feedback = `Sonar gate blocking (${sonarResult.gateStatus}). Resolve critical findings first.`;
|
|
831
|
-
session.sonar_retry_count = (session.sonar_retry_count || 0) + 1;
|
|
832
|
-
await saveSession(session);
|
|
833
|
-
const maxSonarRetries = config.session.max_sonar_retries ?? config.session.fail_fast_repeats;
|
|
834
|
-
if (session.sonar_retry_count >= maxSonarRetries) {
|
|
835
|
-
emitProgress(
|
|
836
|
-
emitter,
|
|
837
|
-
makeEvent("solomon:escalate", { ...eventBase, stage: "sonar" }, {
|
|
838
|
-
message: `Sonar sub-loop limit reached (${session.sonar_retry_count}/${maxSonarRetries})`,
|
|
839
|
-
detail: { subloop: "sonar", retryCount: session.sonar_retry_count, limit: maxSonarRetries, gateStatus: sonarResult.gateStatus }
|
|
840
|
-
})
|
|
841
|
-
);
|
|
842
|
-
|
|
843
|
-
const solomonResult = await invokeSolomon({
|
|
844
|
-
config, logger, emitter, eventBase, stage: "sonar", askQuestion, session, iteration: i,
|
|
845
|
-
conflict: {
|
|
846
|
-
stage: "sonar",
|
|
847
|
-
task,
|
|
848
|
-
iterationCount: session.sonar_retry_count,
|
|
849
|
-
maxIterations: maxSonarRetries,
|
|
850
|
-
history: [{ agent: "sonar", feedback: session.last_sonar_summary }]
|
|
851
|
-
}
|
|
852
|
-
});
|
|
853
|
-
|
|
854
|
-
if (solomonResult.action === "pause") {
|
|
855
|
-
return { paused: true, sessionId: session.id, question: solomonResult.question, context: "sonar_fail_fast" };
|
|
856
|
-
}
|
|
857
|
-
if (solomonResult.action === "continue") {
|
|
858
|
-
if (solomonResult.humanGuidance) {
|
|
859
|
-
session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
|
|
860
|
-
}
|
|
861
|
-
session.sonar_retry_count = 0;
|
|
862
|
-
await saveSession(session);
|
|
863
|
-
continue;
|
|
864
|
-
}
|
|
865
|
-
if (solomonResult.action === "subtask") {
|
|
866
|
-
return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "sonar_subtask" };
|
|
867
|
-
}
|
|
868
|
-
}
|
|
281
|
+
if (sonarResult.action === "stalled" || sonarResult.action === "pause") {
|
|
282
|
+
return sonarResult.result;
|
|
283
|
+
}
|
|
284
|
+
if (sonarResult.action === "continue") {
|
|
869
285
|
continue;
|
|
870
286
|
}
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
const issuesInitial = sonarIssuesInitial ?? sonarResult.openIssuesTotal ?? 0;
|
|
875
|
-
const issuesFinal = sonarIssuesFinal ?? sonarResult.openIssuesTotal ?? 0;
|
|
876
|
-
stageResults.sonar = {
|
|
877
|
-
gateStatus: sonarResult.gateStatus,
|
|
878
|
-
openIssues: sonarResult.openIssuesTotal,
|
|
879
|
-
issuesInitial,
|
|
880
|
-
issuesFinal,
|
|
881
|
-
issuesResolved: Math.max(issuesInitial - issuesFinal, 0)
|
|
882
|
-
};
|
|
287
|
+
if (sonarResult.stageResult) {
|
|
288
|
+
stageResults.sonar = sonarResult.stageResult;
|
|
289
|
+
}
|
|
883
290
|
}
|
|
884
291
|
|
|
292
|
+
// --- Reviewer ---
|
|
885
293
|
let review = {
|
|
886
294
|
approved: true,
|
|
887
295
|
blocking_issues: [],
|
|
@@ -890,120 +298,13 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
890
298
|
confidence: 1
|
|
891
299
|
};
|
|
892
300
|
if (reviewerEnabled) {
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
makeEvent("reviewer:start", { ...eventBase, stage: "reviewer" }, {
|
|
897
|
-
message: `Reviewer (${reviewerRole.provider}) running`,
|
|
898
|
-
detail: { reviewer: reviewerRole.provider }
|
|
899
|
-
})
|
|
900
|
-
);
|
|
901
|
-
|
|
902
|
-
const diff = await generateDiff({ baseRef: session.session_start_sha });
|
|
903
|
-
const reviewerPrompt = buildReviewerPrompt({
|
|
904
|
-
task,
|
|
905
|
-
diff,
|
|
906
|
-
reviewRules,
|
|
907
|
-
mode: config.review_mode,
|
|
908
|
-
serenaEnabled: Boolean(config.serena?.enabled)
|
|
301
|
+
const reviewerResult = await runReviewerStage({
|
|
302
|
+
reviewerRole, config, logger, emitter, eventBase, session, trackBudget,
|
|
303
|
+
iteration: i, reviewRules, task, repeatDetector, budgetSummary
|
|
909
304
|
});
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
detail: { stream, agent: reviewerRole.provider }
|
|
914
|
-
}));
|
|
915
|
-
};
|
|
916
|
-
const reviewerStart = Date.now();
|
|
917
|
-
const reviewerExec = await runReviewerWithFallback({
|
|
918
|
-
reviewerName: reviewerRole.provider,
|
|
919
|
-
config,
|
|
920
|
-
logger,
|
|
921
|
-
prompt: reviewerPrompt,
|
|
922
|
-
session,
|
|
923
|
-
iteration: i,
|
|
924
|
-
onOutput: reviewerOnOutput,
|
|
925
|
-
onAttemptResult: ({ reviewer, result }) => {
|
|
926
|
-
trackBudget({ role: "reviewer", provider: reviewer, model: reviewerRole.model, result, duration_ms: Date.now() - reviewerStart });
|
|
927
|
-
}
|
|
928
|
-
});
|
|
929
|
-
|
|
930
|
-
if (!reviewerExec.result || !reviewerExec.result.ok) {
|
|
931
|
-
await markSessionStatus(session, "failed");
|
|
932
|
-
const lastAttempt = reviewerExec.attempts.at(-1);
|
|
933
|
-
const details =
|
|
934
|
-
lastAttempt?.result?.error ||
|
|
935
|
-
lastAttempt?.result?.output ||
|
|
936
|
-
`reviewer=${lastAttempt?.reviewer || "unknown"} exitCode=${lastAttempt?.result?.exitCode ?? "unknown"}`;
|
|
937
|
-
emitProgress(
|
|
938
|
-
emitter,
|
|
939
|
-
makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
|
|
940
|
-
status: "fail",
|
|
941
|
-
message: `Reviewer failed: ${details}`
|
|
942
|
-
})
|
|
943
|
-
);
|
|
944
|
-
throw new Error(`Reviewer failed: ${details}`);
|
|
945
|
-
}
|
|
946
|
-
|
|
947
|
-
try {
|
|
948
|
-
const parsed = parseJsonOutput(reviewerExec.result.output);
|
|
949
|
-
if (!parsed) {
|
|
950
|
-
throw new Error("Reviewer output is not valid JSON");
|
|
951
|
-
}
|
|
952
|
-
review = validateReviewResult(parsed);
|
|
953
|
-
} catch (parseErr) {
|
|
954
|
-
logger.warn(`Reviewer output parse/validation failed: ${parseErr.message}`);
|
|
955
|
-
review = {
|
|
956
|
-
approved: false,
|
|
957
|
-
blocking_issues: [{
|
|
958
|
-
id: "PARSE_ERROR",
|
|
959
|
-
severity: "high",
|
|
960
|
-
description: `Reviewer output could not be parsed: ${parseErr.message}`
|
|
961
|
-
}],
|
|
962
|
-
non_blocking_suggestions: [],
|
|
963
|
-
summary: `Parse error: ${parseErr.message}`,
|
|
964
|
-
confidence: 0
|
|
965
|
-
};
|
|
966
|
-
}
|
|
967
|
-
await addCheckpoint(session, {
|
|
968
|
-
stage: "reviewer",
|
|
969
|
-
iteration: i,
|
|
970
|
-
approved: review.approved,
|
|
971
|
-
blocking_issues: review.blocking_issues.length
|
|
972
|
-
});
|
|
973
|
-
|
|
974
|
-
emitProgress(
|
|
975
|
-
emitter,
|
|
976
|
-
makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
|
|
977
|
-
status: review.approved ? "ok" : "fail",
|
|
978
|
-
message: review.approved ? "Review approved" : `Review rejected (${review.blocking_issues.length} blocking)`,
|
|
979
|
-
detail: {
|
|
980
|
-
approved: review.approved,
|
|
981
|
-
blockingCount: review.blocking_issues.length,
|
|
982
|
-
issues: review.blocking_issues.map(
|
|
983
|
-
(x) => `${x.id || "ISSUE"}: ${x.description || "Missing description"}`
|
|
984
|
-
)
|
|
985
|
-
}
|
|
986
|
-
})
|
|
987
|
-
);
|
|
988
|
-
|
|
989
|
-
if (!review.approved) {
|
|
990
|
-
repeatDetector.addIteration([], review.blocking_issues);
|
|
991
|
-
const repeatState = repeatDetector.isStalled();
|
|
992
|
-
if (repeatState.stalled) {
|
|
993
|
-
const repeatCounts = repeatDetector.getRepeatCounts();
|
|
994
|
-
const message = `Manual intervention required: reviewer issues repeated ${repeatCounts.reviewer} times.`;
|
|
995
|
-
logger.warn(message);
|
|
996
|
-
await markSessionStatus(session, "stalled");
|
|
997
|
-
emitProgress(
|
|
998
|
-
emitter,
|
|
999
|
-
makeEvent("session:end", { ...eventBase, stage: "reviewer" }, {
|
|
1000
|
-
status: "stalled",
|
|
1001
|
-
message,
|
|
1002
|
-
detail: { reason: repeatState.reason, repeats: repeatCounts.reviewer, budget: budgetSummary() }
|
|
1003
|
-
})
|
|
1004
|
-
);
|
|
1005
|
-
return { approved: false, sessionId: session.id, reason: "stalled" };
|
|
1006
|
-
}
|
|
305
|
+
review = reviewerResult.review;
|
|
306
|
+
if (reviewerResult.stalled) {
|
|
307
|
+
return reviewerResult.stalledResult;
|
|
1007
308
|
}
|
|
1008
309
|
}
|
|
1009
310
|
|
|
@@ -1023,139 +324,35 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1023
324
|
// --- Post-loop stages: Tester → Security ---
|
|
1024
325
|
const postLoopDiff = await generateDiff({ baseRef: session.session_start_sha });
|
|
1025
326
|
|
|
1026
|
-
// --- Tester ---
|
|
1027
327
|
if (testerEnabled) {
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
makeEvent("tester:start", { ...eventBase, stage: "tester" }, {
|
|
1032
|
-
message: "Tester evaluating test quality"
|
|
1033
|
-
})
|
|
1034
|
-
);
|
|
1035
|
-
|
|
1036
|
-
const tester = new TesterRole({ config, logger, emitter });
|
|
1037
|
-
await tester.init({ task, iteration: i });
|
|
1038
|
-
const testerStart = Date.now();
|
|
1039
|
-
const testerOutput = await tester.run({ task, diff: postLoopDiff });
|
|
1040
|
-
trackBudget({
|
|
1041
|
-
role: "tester",
|
|
1042
|
-
provider: config?.roles?.tester?.provider || coderRole.provider,
|
|
1043
|
-
model: config?.roles?.tester?.model || coderRole.model,
|
|
1044
|
-
result: testerOutput,
|
|
1045
|
-
duration_ms: Date.now() - testerStart
|
|
328
|
+
const testerResult = await runTesterStage({
|
|
329
|
+
config, logger, emitter, eventBase, session, coderRole, trackBudget,
|
|
330
|
+
iteration: i, task, diff: postLoopDiff, askQuestion
|
|
1046
331
|
});
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
})
|
|
1056
|
-
);
|
|
1057
|
-
|
|
1058
|
-
if (!testerOutput.ok) {
|
|
1059
|
-
const maxTesterRetries = config.session?.max_tester_retries ?? 1;
|
|
1060
|
-
session.tester_retry_count = (session.tester_retry_count || 0) + 1;
|
|
1061
|
-
await saveSession(session);
|
|
1062
|
-
|
|
1063
|
-
if (session.tester_retry_count >= maxTesterRetries) {
|
|
1064
|
-
const solomonResult = await invokeSolomon({
|
|
1065
|
-
config, logger, emitter, eventBase, stage: "tester", askQuestion, session, iteration: i,
|
|
1066
|
-
conflict: {
|
|
1067
|
-
stage: "tester",
|
|
1068
|
-
task,
|
|
1069
|
-
diff: postLoopDiff,
|
|
1070
|
-
iterationCount: session.tester_retry_count,
|
|
1071
|
-
maxIterations: maxTesterRetries,
|
|
1072
|
-
history: [{ agent: "tester", feedback: testerOutput.summary }]
|
|
1073
|
-
}
|
|
1074
|
-
});
|
|
1075
|
-
|
|
1076
|
-
if (solomonResult.action === "pause") {
|
|
1077
|
-
return { paused: true, sessionId: session.id, question: solomonResult.question, context: "tester_fail_fast" };
|
|
1078
|
-
}
|
|
1079
|
-
if (solomonResult.action === "subtask") {
|
|
1080
|
-
return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "tester_subtask" };
|
|
1081
|
-
}
|
|
1082
|
-
// continue = Solomon approved, proceed to next stage
|
|
1083
|
-
} else {
|
|
1084
|
-
session.last_reviewer_feedback = `Tester feedback: ${testerOutput.summary}`;
|
|
1085
|
-
await saveSession(session);
|
|
1086
|
-
continue;
|
|
1087
|
-
}
|
|
1088
|
-
} else {
|
|
1089
|
-
session.tester_retry_count = 0;
|
|
1090
|
-
stageResults.tester = { ok: true, summary: testerOutput.summary || "All tests passed" };
|
|
332
|
+
if (testerResult.action === "pause") {
|
|
333
|
+
return testerResult.result;
|
|
334
|
+
}
|
|
335
|
+
if (testerResult.action === "continue") {
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
if (testerResult.stageResult) {
|
|
339
|
+
stageResults.tester = testerResult.stageResult;
|
|
1091
340
|
}
|
|
1092
341
|
}
|
|
1093
342
|
|
|
1094
|
-
// --- Security ---
|
|
1095
343
|
if (securityEnabled) {
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
makeEvent("security:start", { ...eventBase, stage: "security" }, {
|
|
1100
|
-
message: "Security auditing code"
|
|
1101
|
-
})
|
|
1102
|
-
);
|
|
1103
|
-
|
|
1104
|
-
const security = new SecurityRole({ config, logger, emitter });
|
|
1105
|
-
await security.init({ task, iteration: i });
|
|
1106
|
-
const securityStart = Date.now();
|
|
1107
|
-
const securityOutput = await security.run({ task, diff: postLoopDiff });
|
|
1108
|
-
trackBudget({
|
|
1109
|
-
role: "security",
|
|
1110
|
-
provider: config?.roles?.security?.provider || coderRole.provider,
|
|
1111
|
-
model: config?.roles?.security?.model || coderRole.model,
|
|
1112
|
-
result: securityOutput,
|
|
1113
|
-
duration_ms: Date.now() - securityStart
|
|
344
|
+
const securityResult = await runSecurityStage({
|
|
345
|
+
config, logger, emitter, eventBase, session, coderRole, trackBudget,
|
|
346
|
+
iteration: i, task, diff: postLoopDiff, askQuestion
|
|
1114
347
|
});
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
})
|
|
1124
|
-
);
|
|
1125
|
-
|
|
1126
|
-
if (!securityOutput.ok) {
|
|
1127
|
-
const maxSecurityRetries = config.session?.max_security_retries ?? 1;
|
|
1128
|
-
session.security_retry_count = (session.security_retry_count || 0) + 1;
|
|
1129
|
-
await saveSession(session);
|
|
1130
|
-
|
|
1131
|
-
if (session.security_retry_count >= maxSecurityRetries) {
|
|
1132
|
-
const solomonResult = await invokeSolomon({
|
|
1133
|
-
config, logger, emitter, eventBase, stage: "security", askQuestion, session, iteration: i,
|
|
1134
|
-
conflict: {
|
|
1135
|
-
stage: "security",
|
|
1136
|
-
task,
|
|
1137
|
-
diff: postLoopDiff,
|
|
1138
|
-
iterationCount: session.security_retry_count,
|
|
1139
|
-
maxIterations: maxSecurityRetries,
|
|
1140
|
-
history: [{ agent: "security", feedback: securityOutput.summary }]
|
|
1141
|
-
}
|
|
1142
|
-
});
|
|
1143
|
-
|
|
1144
|
-
if (solomonResult.action === "pause") {
|
|
1145
|
-
return { paused: true, sessionId: session.id, question: solomonResult.question, context: "security_fail_fast" };
|
|
1146
|
-
}
|
|
1147
|
-
if (solomonResult.action === "subtask") {
|
|
1148
|
-
return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "security_subtask" };
|
|
1149
|
-
}
|
|
1150
|
-
// continue = Solomon approved, proceed
|
|
1151
|
-
} else {
|
|
1152
|
-
session.last_reviewer_feedback = `Security feedback: ${securityOutput.summary}`;
|
|
1153
|
-
await saveSession(session);
|
|
1154
|
-
continue;
|
|
1155
|
-
}
|
|
1156
|
-
} else {
|
|
1157
|
-
session.security_retry_count = 0;
|
|
1158
|
-
stageResults.security = { ok: true, summary: securityOutput.summary || "No vulnerabilities found" };
|
|
348
|
+
if (securityResult.action === "pause") {
|
|
349
|
+
return securityResult.result;
|
|
350
|
+
}
|
|
351
|
+
if (securityResult.action === "continue") {
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
if (securityResult.stageResult) {
|
|
355
|
+
stageResults.security = securityResult.stageResult;
|
|
1159
356
|
}
|
|
1160
357
|
}
|
|
1161
358
|
|