karajan-code 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +441 -0
  3. package/docs/karajan-code-logo-small.png +0 -0
  4. package/package.json +60 -0
  5. package/scripts/install.js +898 -0
  6. package/scripts/install.sh +7 -0
  7. package/scripts/postinstall.js +117 -0
  8. package/scripts/setup-multi-instance.sh +150 -0
  9. package/src/activity-log.js +59 -0
  10. package/src/agents/aider-agent.js +25 -0
  11. package/src/agents/availability.js +32 -0
  12. package/src/agents/base-agent.js +27 -0
  13. package/src/agents/claude-agent.js +24 -0
  14. package/src/agents/codex-agent.js +27 -0
  15. package/src/agents/gemini-agent.js +25 -0
  16. package/src/agents/index.js +19 -0
  17. package/src/agents/resolve-bin.js +60 -0
  18. package/src/cli.js +200 -0
  19. package/src/commands/code.js +32 -0
  20. package/src/commands/config.js +74 -0
  21. package/src/commands/doctor.js +155 -0
  22. package/src/commands/init.js +181 -0
  23. package/src/commands/plan.js +67 -0
  24. package/src/commands/report.js +340 -0
  25. package/src/commands/resume.js +39 -0
  26. package/src/commands/review.js +26 -0
  27. package/src/commands/roles.js +117 -0
  28. package/src/commands/run.js +91 -0
  29. package/src/commands/scan.js +18 -0
  30. package/src/commands/sonar.js +53 -0
  31. package/src/config.js +322 -0
  32. package/src/git/automation.js +100 -0
  33. package/src/mcp/progress.js +69 -0
  34. package/src/mcp/run-kj.js +87 -0
  35. package/src/mcp/server-handlers.js +259 -0
  36. package/src/mcp/server.js +37 -0
  37. package/src/mcp/tool-arg-normalizers.js +16 -0
  38. package/src/mcp/tools.js +184 -0
  39. package/src/orchestrator.js +1277 -0
  40. package/src/planning-game/adapter.js +105 -0
  41. package/src/planning-game/client.js +81 -0
  42. package/src/prompts/coder.js +60 -0
  43. package/src/prompts/planner.js +26 -0
  44. package/src/prompts/reviewer.js +45 -0
  45. package/src/repeat-detector.js +77 -0
  46. package/src/review/diff-generator.js +22 -0
  47. package/src/review/parser.js +93 -0
  48. package/src/review/profiles.js +66 -0
  49. package/src/review/schema.js +31 -0
  50. package/src/review/tdd-policy.js +57 -0
  51. package/src/roles/base-role.js +127 -0
  52. package/src/roles/coder-role.js +60 -0
  53. package/src/roles/commiter-role.js +94 -0
  54. package/src/roles/index.js +12 -0
  55. package/src/roles/planner-role.js +81 -0
  56. package/src/roles/refactorer-role.js +66 -0
  57. package/src/roles/researcher-role.js +134 -0
  58. package/src/roles/reviewer-role.js +132 -0
  59. package/src/roles/security-role.js +128 -0
  60. package/src/roles/solomon-role.js +199 -0
  61. package/src/roles/sonar-role.js +65 -0
  62. package/src/roles/tester-role.js +114 -0
  63. package/src/roles/triage-role.js +128 -0
  64. package/src/session-store.js +80 -0
  65. package/src/sonar/api.js +78 -0
  66. package/src/sonar/enforcer.js +19 -0
  67. package/src/sonar/manager.js +163 -0
  68. package/src/sonar/project-key.js +83 -0
  69. package/src/sonar/scanner.js +267 -0
  70. package/src/utils/agent-detect.js +32 -0
  71. package/src/utils/budget.js +123 -0
  72. package/src/utils/display.js +346 -0
  73. package/src/utils/events.js +23 -0
  74. package/src/utils/fs.js +19 -0
  75. package/src/utils/git.js +101 -0
  76. package/src/utils/logger.js +86 -0
  77. package/src/utils/paths.js +18 -0
  78. package/src/utils/pricing.js +28 -0
  79. package/src/utils/process.js +67 -0
  80. package/src/utils/wizard.js +41 -0
  81. package/templates/coder-rules.md +24 -0
  82. package/templates/docker-compose.sonar.yml +60 -0
  83. package/templates/kj.config.yml +82 -0
  84. package/templates/review-rules.md +11 -0
  85. package/templates/roles/coder.md +42 -0
  86. package/templates/roles/commiter.md +44 -0
  87. package/templates/roles/planner.md +45 -0
  88. package/templates/roles/refactorer.md +39 -0
  89. package/templates/roles/researcher.md +37 -0
  90. package/templates/roles/reviewer-paranoid.md +38 -0
  91. package/templates/roles/reviewer-relaxed.md +34 -0
  92. package/templates/roles/reviewer-strict.md +37 -0
  93. package/templates/roles/reviewer.md +55 -0
  94. package/templates/roles/security.md +54 -0
  95. package/templates/roles/solomon.md +106 -0
  96. package/templates/roles/sonar.md +49 -0
  97. package/templates/roles/tester.md +41 -0
  98. package/templates/roles/triage.md +25 -0
@@ -0,0 +1,1277 @@
1
+ import { createAgent } from "./agents/index.js";
2
+ import {
3
+ addCheckpoint,
4
+ createSession,
5
+ loadSession,
6
+ markSessionStatus,
7
+ pauseSession,
8
+ resumeSessionWithAnswer,
9
+ saveSession
10
+ } from "./session-store.js";
11
+ import { computeBaseRef, generateDiff } from "./review/diff-generator.js";
12
+ import { parseJsonOutput } from "./review/parser.js";
13
+ import { validateReviewResult } from "./review/schema.js";
14
+ import { evaluateTddPolicy } from "./review/tdd-policy.js";
15
+ import { buildCoderPrompt } from "./prompts/coder.js";
16
+ import { buildReviewerPrompt } from "./prompts/reviewer.js";
17
+ import { resolveRole } from "./config.js";
18
+ import { SonarRole } from "./roles/sonar-role.js";
19
+ import { RepeatDetector } from "./repeat-detector.js";
20
+ import { emitProgress, makeEvent } from "./utils/events.js";
21
+ import { BudgetTracker } from "./utils/budget.js";
22
+ import {
23
+ commitMessageFromTask,
24
+ prepareGitAutomation,
25
+ finalizeGitAutomation
26
+ } from "./git/automation.js";
27
+ import { resolveRoleMdPath, loadFirstExisting } from "./roles/base-role.js";
28
+ import { resolveReviewProfile } from "./review/profiles.js";
29
+ import { ResearcherRole } from "./roles/researcher-role.js";
30
+ import { TriageRole } from "./roles/triage-role.js";
31
+ import { TesterRole } from "./roles/tester-role.js";
32
+ import { SecurityRole } from "./roles/security-role.js";
33
+ import { SolomonRole } from "./roles/solomon-role.js";
34
+
35
+ function parsePlannerOutput(output) {
36
+ const text = String(output || "").trim();
37
+ if (!text) return null;
38
+
39
+ const lines = text
40
+ .split(/\r?\n/)
41
+ .map((line) => line.trim())
42
+ .filter(Boolean);
43
+
44
+ let title = null;
45
+ let approach = null;
46
+ const steps = [];
47
+
48
+ for (const line of lines) {
49
+ if (!title) {
50
+ const titleMatch = line.match(/^title\s*:\s*(.+)$/i);
51
+ if (titleMatch) {
52
+ title = titleMatch[1].trim();
53
+ continue;
54
+ }
55
+ }
56
+
57
+ if (!approach) {
58
+ const approachMatch = line.match(/^(approach|strategy)\s*:\s*(.+)$/i);
59
+ if (approachMatch) {
60
+ approach = approachMatch[2].trim();
61
+ continue;
62
+ }
63
+ }
64
+
65
+ const numberedStep = line.match(/^\d+[\).:-]\s*(.+)$/);
66
+ if (numberedStep) {
67
+ steps.push(numberedStep[1].trim());
68
+ continue;
69
+ }
70
+
71
+ const bulletStep = line.match(/^[-*]\s+(.+)$/);
72
+ if (bulletStep) {
73
+ steps.push(bulletStep[1].trim());
74
+ continue;
75
+ }
76
+ }
77
+
78
+ if (!title) {
79
+ const firstFreeLine = lines.find((line) => !/^(approach|strategy)\s*:/i.test(line) && !/^\d+[\).:-]\s*/.test(line));
80
+ title = firstFreeLine || null;
81
+ }
82
+
83
+ return { title, approach, steps };
84
+ }
85
+
86
+ function getRepeatThreshold(config) {
87
+ const raw =
88
+ config?.failFast?.repeatThreshold ??
89
+ config?.session?.repeat_detection_threshold ??
90
+ config?.session?.fail_fast_repeats ??
91
+ 2;
92
+ const value = Number(raw);
93
+ if (Number.isFinite(value) && value > 0) return value;
94
+ return 2;
95
+ }
96
+
97
+ function extractUsageMetrics(result, defaultModel = null) {
98
+ const usage = result?.usage || result?.metrics || {};
99
+ const tokens_in =
100
+ result?.tokens_in ??
101
+ usage?.tokens_in ??
102
+ usage?.input_tokens ??
103
+ usage?.prompt_tokens ??
104
+ 0;
105
+ const tokens_out =
106
+ result?.tokens_out ??
107
+ usage?.tokens_out ??
108
+ usage?.output_tokens ??
109
+ usage?.completion_tokens ??
110
+ 0;
111
+ const cost_usd =
112
+ result?.cost_usd ??
113
+ usage?.cost_usd ??
114
+ usage?.usd_cost ??
115
+ usage?.cost;
116
+ const model =
117
+ result?.model ??
118
+ usage?.model ??
119
+ usage?.model_name ??
120
+ usage?.model_id ??
121
+ defaultModel ??
122
+ null;
123
+
124
+ return { tokens_in, tokens_out, cost_usd, model };
125
+ }
126
+
127
+ async function runReviewerWithFallback({ reviewerName, config, logger, prompt, session, iteration, onOutput, onAttemptResult }) {
128
+ const fallbackReviewer = config.reviewer_options?.fallback_reviewer;
129
+ const retries = Math.max(0, Number(config.reviewer_options?.retries ?? 1));
130
+ const candidates = [reviewerName];
131
+ if (fallbackReviewer && fallbackReviewer !== reviewerName) {
132
+ candidates.push(fallbackReviewer);
133
+ }
134
+
135
+ const attempts = [];
136
+ for (const name of candidates) {
137
+ const reviewer = createAgent(name, config, logger);
138
+ for (let attempt = 1; attempt <= retries + 1; attempt += 1) {
139
+ const result = await reviewer.reviewTask({ prompt, onOutput, role: "reviewer" });
140
+ if (onAttemptResult) {
141
+ await onAttemptResult({ reviewer: name, result });
142
+ }
143
+ attempts.push({ reviewer: name, attempt, ok: result.ok, result });
144
+ await addCheckpoint(session, {
145
+ stage: "reviewer-attempt",
146
+ iteration,
147
+ reviewer: name,
148
+ attempt,
149
+ ok: result.ok
150
+ });
151
+
152
+ if (result.ok) {
153
+ return { result, attempts };
154
+ }
155
+ }
156
+ }
157
+
158
+ return { result: null, attempts };
159
+ }
160
+
161
+ async function invokeSolomon({ config, logger, emitter, eventBase, stage, conflict, askQuestion, session, iteration }) {
162
+ const solomonEnabled = Boolean(config.pipeline?.solomon?.enabled);
163
+
164
+ if (!solomonEnabled) {
165
+ return escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration });
166
+ }
167
+
168
+ emitProgress(
169
+ emitter,
170
+ makeEvent("solomon:start", { ...eventBase, stage: "solomon" }, {
171
+ message: `Solomon arbitrating ${stage} conflict`,
172
+ detail: { conflictStage: stage }
173
+ })
174
+ );
175
+
176
+ const solomon = new SolomonRole({ config, logger, emitter });
177
+ await solomon.init({ task: conflict.task || session.task, iteration });
178
+ const ruling = await solomon.run({ conflict });
179
+
180
+ emitProgress(
181
+ emitter,
182
+ makeEvent("solomon:end", { ...eventBase, stage: "solomon" }, {
183
+ message: `Solomon ruling: ${ruling.result?.ruling || "unknown"}`,
184
+ detail: ruling.result
185
+ })
186
+ );
187
+
188
+ await addCheckpoint(session, {
189
+ stage: "solomon",
190
+ iteration,
191
+ ruling: ruling.result?.ruling,
192
+ escalate: ruling.result?.escalate,
193
+ subtask: ruling.result?.subtask?.title || null
194
+ });
195
+
196
+ if (!ruling.ok) {
197
+ // escalate_human
198
+ return escalateToHuman({
199
+ askQuestion, session, emitter, eventBase, stage, iteration,
200
+ conflict: { ...conflict, solomonReason: ruling.result?.escalate_reason }
201
+ });
202
+ }
203
+
204
+ const r = ruling.result?.ruling;
205
+ if (r === "approve" || r === "approve_with_conditions") {
206
+ return { action: "continue", conditions: ruling.result?.conditions || [], ruling };
207
+ }
208
+
209
+ if (r === "create_subtask") {
210
+ return { action: "subtask", subtask: ruling.result?.subtask, ruling };
211
+ }
212
+
213
+ return { action: "continue", conditions: [], ruling };
214
+ }
215
+
216
+ async function escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration }) {
217
+ const reason = conflict?.solomonReason || `${stage} conflict unresolved`;
218
+ const question = `${stage} conflict requires human intervention: ${reason}\nDetails: ${JSON.stringify(conflict?.history?.slice(-2) || [], null, 2)}\n\nHow should we proceed?`;
219
+
220
+ if (askQuestion) {
221
+ const answer = await askQuestion(question, { iteration, stage });
222
+ if (answer) {
223
+ return { action: "continue", humanGuidance: answer };
224
+ }
225
+ }
226
+
227
+ await pauseSession(session, {
228
+ question,
229
+ context: { iteration, stage, conflict }
230
+ });
231
+ emitProgress(
232
+ emitter,
233
+ makeEvent("question", { ...eventBase, stage }, {
234
+ status: "paused",
235
+ message: question,
236
+ detail: { question, sessionId: session.id }
237
+ })
238
+ );
239
+
240
+ return { action: "pause", question };
241
+ }
242
+
243
+ export async function runFlow({ task, config, logger, flags = {}, emitter = null, askQuestion = null }) {
244
+ const plannerRole = resolveRole(config, "planner");
245
+ const coderRole = resolveRole(config, "coder");
246
+ const reviewerRole = resolveRole(config, "reviewer");
247
+ const refactorerRole = resolveRole(config, "refactorer");
248
+ let plannerEnabled = Boolean(config.pipeline?.planner?.enabled);
249
+ let refactorerEnabled = Boolean(config.pipeline?.refactorer?.enabled);
250
+ let researcherEnabled = Boolean(config.pipeline?.researcher?.enabled);
251
+ let testerEnabled = Boolean(config.pipeline?.tester?.enabled);
252
+ let securityEnabled = Boolean(config.pipeline?.security?.enabled);
253
+ let reviewerEnabled = config.pipeline?.reviewer?.enabled !== false;
254
+ const triageEnabled = Boolean(config.pipeline?.triage?.enabled);
255
+
256
+ // --- Dry-run: return summary without executing anything ---
257
+ if (flags.dryRun) {
258
+ const projectDir = config.projectDir || process.cwd();
259
+ const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
260
+ const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
261
+ const coderPrompt = buildCoderPrompt({ task, coderRules, methodology: config.development?.methodology, serenaEnabled: Boolean(config.serena?.enabled) });
262
+ const reviewerPrompt = buildReviewerPrompt({ task, diff: "(dry-run: no diff)", reviewRules, mode: config.review_mode, serenaEnabled: Boolean(config.serena?.enabled) });
263
+
264
+ const summary = {
265
+ dry_run: true,
266
+ task,
267
+ roles: {
268
+ planner: plannerRole,
269
+ coder: coderRole,
270
+ reviewer: reviewerRole,
271
+ refactorer: refactorerRole
272
+ },
273
+ pipeline: {
274
+ triage_enabled: triageEnabled,
275
+ planner_enabled: plannerEnabled,
276
+ refactorer_enabled: refactorerEnabled,
277
+ sonar_enabled: Boolean(config.sonarqube?.enabled),
278
+ reviewer_enabled: reviewerEnabled,
279
+ researcher_enabled: researcherEnabled,
280
+ tester_enabled: testerEnabled,
281
+ security_enabled: securityEnabled,
282
+ solomon_enabled: Boolean(config.pipeline?.solomon?.enabled)
283
+ },
284
+ limits: {
285
+ max_iterations: config.max_iterations,
286
+ max_iteration_minutes: config.session?.max_iteration_minutes,
287
+ max_total_minutes: config.session?.max_total_minutes,
288
+ max_sonar_retries: config.session?.max_sonar_retries,
289
+ max_reviewer_retries: config.session?.max_reviewer_retries,
290
+ max_tester_retries: config.session?.max_tester_retries,
291
+ max_security_retries: config.session?.max_security_retries
292
+ },
293
+ prompts: {
294
+ coder: coderPrompt,
295
+ reviewer: reviewerPrompt
296
+ },
297
+ git: config.git
298
+ };
299
+
300
+ emitProgress(
301
+ emitter,
302
+ makeEvent("dry-run:summary", { sessionId: null, iteration: 0, stage: "dry-run", startedAt: Date.now() }, {
303
+ message: "Dry-run complete — no changes made",
304
+ detail: summary
305
+ })
306
+ );
307
+
308
+ return summary;
309
+ }
310
+
311
+ const repeatDetector = new RepeatDetector({ threshold: getRepeatThreshold(config) });
312
+ const coder = createAgent(coderRole.provider, config, logger);
313
+ const startedAt = Date.now();
314
+ const eventBase = { sessionId: null, iteration: 0, stage: null, startedAt };
315
+ const budgetTracker = new BudgetTracker({ pricing: config?.budget?.pricing });
316
+ const budgetLimit = Number(config?.max_budget_usd);
317
+ const hasBudgetLimit = Number.isFinite(budgetLimit) && budgetLimit >= 0;
318
+ const warnThresholdPct = Number(config?.budget?.warn_threshold_pct ?? 80);
319
+
320
+ function budgetSummary() {
321
+ const s = budgetTracker.summary();
322
+ s.trace = budgetTracker.trace();
323
+ return s;
324
+ }
325
+
326
+ let stageCounter = 0;
327
+ function trackBudget({ role, provider, model, result, duration_ms }) {
328
+ const metrics = extractUsageMetrics(result, model);
329
+ budgetTracker.record({ role, provider, ...metrics, duration_ms, stage_index: stageCounter++ });
330
+
331
+ if (!hasBudgetLimit) return;
332
+ const totalCost = budgetTracker.total().cost_usd;
333
+ const pctUsed = budgetLimit === 0 ? 100 : (totalCost / budgetLimit) * 100;
334
+ const status = totalCost > budgetLimit ? "fail" : pctUsed >= warnThresholdPct ? "paused" : "ok";
335
+ emitProgress(
336
+ emitter,
337
+ makeEvent("budget:update", { ...eventBase, stage: role }, {
338
+ status,
339
+ message: `Budget: $${totalCost.toFixed(2)} / $${budgetLimit.toFixed(2)}`,
340
+ detail: {
341
+ ...budgetSummary(),
342
+ max_budget_usd: budgetLimit,
343
+ warn_threshold_pct: warnThresholdPct,
344
+ pct_used: Number(pctUsed.toFixed(2)),
345
+ remaining_usd: budgetTracker.remaining(budgetLimit)
346
+ }
347
+ })
348
+ );
349
+ }
350
+
351
+ const baseRef = await computeBaseRef({ baseBranch: config.base_branch, baseRef: flags.baseRef || null });
352
+ const session = await createSession({
353
+ task,
354
+ config_snapshot: config,
355
+ base_ref: baseRef,
356
+ session_start_sha: baseRef,
357
+ last_reviewer_feedback: null,
358
+ repeated_issue_count: 0,
359
+ sonar_retry_count: 0,
360
+ reviewer_retry_count: 0,
361
+ last_sonar_issue_signature: null,
362
+ sonar_repeat_count: 0,
363
+ last_reviewer_issue_signature: null,
364
+ reviewer_repeat_count: 0
365
+ });
366
+
367
+ eventBase.sessionId = session.id;
368
+
369
+ emitProgress(
370
+ emitter,
371
+ makeEvent("session:start", eventBase, {
372
+ message: "Session started",
373
+ detail: {
374
+ task,
375
+ coder: coderRole.provider,
376
+ reviewer: reviewerRole.provider,
377
+ maxIterations: config.max_iterations
378
+ }
379
+ })
380
+ );
381
+
382
+ // Accumulate stage results for final summary
383
+ const stageResults = {};
384
+ let sonarIssuesInitial = null;
385
+ let sonarIssuesFinal = null;
386
+
387
+ if (triageEnabled) {
388
+ logger.setContext({ iteration: 0, stage: "triage" });
389
+ emitProgress(
390
+ emitter,
391
+ makeEvent("triage:start", { ...eventBase, stage: "triage" }, {
392
+ message: "Triage classifying task complexity"
393
+ })
394
+ );
395
+
396
+ const triage = new TriageRole({ config, logger, emitter });
397
+ await triage.init({ task, sessionId: session.id, iteration: 0 });
398
+ const triageStart = Date.now();
399
+ const triageOutput = await triage.run({ task });
400
+ trackBudget({
401
+ role: "triage",
402
+ provider: config?.roles?.triage?.provider || coderRole.provider,
403
+ model: config?.roles?.triage?.model || coderRole.model,
404
+ result: triageOutput,
405
+ duration_ms: Date.now() - triageStart
406
+ });
407
+
408
+ await addCheckpoint(session, { stage: "triage", iteration: 0, ok: triageOutput.ok });
409
+
410
+ const recommendedRoles = new Set(triageOutput.result?.roles || []);
411
+ if (triageOutput.ok) {
412
+ plannerEnabled = recommendedRoles.has("planner");
413
+ researcherEnabled = recommendedRoles.has("researcher");
414
+ refactorerEnabled = recommendedRoles.has("refactorer");
415
+ reviewerEnabled = recommendedRoles.has("reviewer");
416
+ testerEnabled = recommendedRoles.has("tester");
417
+ securityEnabled = recommendedRoles.has("security");
418
+ }
419
+
420
+ if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
421
+ if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
422
+ if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
423
+ if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
424
+ if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
425
+ if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
426
+
427
+ stageResults.triage = {
428
+ ok: triageOutput.ok,
429
+ level: triageOutput.result?.level || null,
430
+ roles: Array.from(recommendedRoles),
431
+ reasoning: triageOutput.result?.reasoning || null
432
+ };
433
+
434
+ emitProgress(
435
+ emitter,
436
+ makeEvent("triage:end", { ...eventBase, stage: "triage" }, {
437
+ status: triageOutput.ok ? "ok" : "fail",
438
+ message: triageOutput.ok ? "Triage completed" : `Triage failed: ${triageOutput.summary}`,
439
+ detail: stageResults.triage
440
+ })
441
+ );
442
+ } else {
443
+ if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
444
+ if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
445
+ if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
446
+ if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
447
+ if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
448
+ if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
449
+ }
450
+
451
+ // --- Researcher (pre-planning) ---
452
+ let researchContext = null;
453
+ if (researcherEnabled) {
454
+ logger.setContext({ iteration: 0, stage: "researcher" });
455
+ emitProgress(
456
+ emitter,
457
+ makeEvent("researcher:start", { ...eventBase, stage: "researcher" }, {
458
+ message: "Researcher investigating codebase"
459
+ })
460
+ );
461
+
462
+ const researcher = new ResearcherRole({ config, logger, emitter });
463
+ await researcher.init({ task });
464
+ const researchStart = Date.now();
465
+ const researchOutput = await researcher.run({ task });
466
+ trackBudget({
467
+ role: "researcher",
468
+ provider: config?.roles?.researcher?.provider || coderRole.provider,
469
+ model: config?.roles?.researcher?.model || coderRole.model,
470
+ result: researchOutput,
471
+ duration_ms: Date.now() - researchStart
472
+ });
473
+
474
+ await addCheckpoint(session, { stage: "researcher", iteration: 0, ok: researchOutput.ok });
475
+
476
+ emitProgress(
477
+ emitter,
478
+ makeEvent("researcher:end", { ...eventBase, stage: "researcher" }, {
479
+ status: researchOutput.ok ? "ok" : "fail",
480
+ message: researchOutput.ok ? "Research completed" : `Research failed: ${researchOutput.summary}`
481
+ })
482
+ );
483
+
484
+ stageResults.researcher = { ok: researchOutput.ok, summary: researchOutput.summary || null };
485
+ if (researchOutput.ok) {
486
+ researchContext = researchOutput.result;
487
+ }
488
+ }
489
+
490
+ // --- Planner ---
491
+ let plannedTask = task;
492
+ if (plannerEnabled) {
493
+ logger.setContext({ iteration: 0, stage: "planner" });
494
+ emitProgress(
495
+ emitter,
496
+ makeEvent("planner:start", { ...eventBase, stage: "planner" }, {
497
+ message: `Planner (${plannerRole.provider}) running`,
498
+ detail: { planner: plannerRole.provider }
499
+ })
500
+ );
501
+ const planner = createAgent(plannerRole.provider, config, logger);
502
+ const plannerStart = Date.now();
503
+ const plannerPromptParts = [
504
+ "Create an implementation plan for this task.",
505
+ "Return concise numbered steps focused on execution order and risk.",
506
+ "",
507
+ task
508
+ ];
509
+ if (researchContext) {
510
+ plannerPromptParts.push("", "## Research findings", JSON.stringify(researchContext, null, 2));
511
+ }
512
+ const plannerResult = await planner.runTask({ prompt: plannerPromptParts.join("\n"), role: "planner" });
513
+ trackBudget({ role: "planner", provider: plannerRole.provider, model: plannerRole.model, result: plannerResult, duration_ms: Date.now() - plannerStart });
514
+ if (!plannerResult.ok) {
515
+ await markSessionStatus(session, "failed");
516
+ const details = plannerResult.error || plannerResult.output || `exitCode=${plannerResult.exitCode ?? "unknown"}`;
517
+ emitProgress(
518
+ emitter,
519
+ makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
520
+ status: "fail",
521
+ message: `Planner failed: ${details}`
522
+ })
523
+ );
524
+ throw new Error(`Planner failed: ${details}`);
525
+ }
526
+ if (plannerResult.output?.trim()) {
527
+ plannedTask = `${task}\n\nExecution plan:\n${plannerResult.output.trim()}`;
528
+ }
529
+ const parsedPlan = parsePlannerOutput(plannerResult.output);
530
+ stageResults.planner = {
531
+ ok: true,
532
+ title: parsedPlan?.title || null,
533
+ approach: parsedPlan?.approach || null,
534
+ steps: parsedPlan?.steps || [],
535
+ completedSteps: []
536
+ };
537
+ emitProgress(
538
+ emitter,
539
+ makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
540
+ message: "Planner completed"
541
+ })
542
+ );
543
+ }
544
+
545
+ const gitCtx = await prepareGitAutomation({ config, task, logger, session });
546
+
547
+ const projectDir = config.projectDir || process.cwd();
548
+ const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
549
+ const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
550
+
551
+ for (let i = 1; i <= config.max_iterations; i += 1) {
552
+ const elapsedMinutes = (Date.now() - startedAt) / 60000;
553
+ if (elapsedMinutes > config.session.max_total_minutes) {
554
+ await markSessionStatus(session, "failed");
555
+ emitProgress(
556
+ emitter,
557
+ makeEvent("session:end", { ...eventBase, iteration: i, stage: "timeout" }, {
558
+ status: "fail",
559
+ message: "Session timed out",
560
+ detail: { approved: false, reason: "timeout", budget: budgetSummary() }
561
+ })
562
+ );
563
+ throw new Error("Session timed out");
564
+ }
565
+
566
+ if (budgetTracker.isOverBudget(config?.max_budget_usd)) {
567
+ await markSessionStatus(session, "failed");
568
+ const totalCost = budgetTracker.total().cost_usd;
569
+ const message = `Budget exceeded: $${totalCost.toFixed(2)} > $${budgetLimit.toFixed(2)}`;
570
+ emitProgress(
571
+ emitter,
572
+ makeEvent("session:end", { ...eventBase, iteration: i, stage: "budget" }, {
573
+ status: "fail",
574
+ message,
575
+ detail: { approved: false, reason: "budget_exceeded", budget: budgetSummary(), max_budget_usd: budgetLimit }
576
+ })
577
+ );
578
+ throw new Error(message);
579
+ }
580
+
581
+ eventBase.iteration = i;
582
+ const iterStart = Date.now();
583
+ logger.setContext({ iteration: i, stage: "iteration" });
584
+
585
+ emitProgress(
586
+ emitter,
587
+ makeEvent("iteration:start", { ...eventBase, stage: "iteration" }, {
588
+ message: `Iteration ${i}/${config.max_iterations}`,
589
+ detail: { iteration: i, maxIterations: config.max_iterations }
590
+ })
591
+ );
592
+
593
+ logger.info(`Iteration ${i}/${config.max_iterations}`);
594
+
595
+ // --- Coder ---
596
+ logger.setContext({ iteration: i, stage: "coder" });
597
+ emitProgress(
598
+ emitter,
599
+ makeEvent("coder:start", { ...eventBase, stage: "coder" }, {
600
+ message: `Coder (${coderRole.provider}) running`,
601
+ detail: { coder: coderRole.provider }
602
+ })
603
+ );
604
+
605
+ const coderPrompt = buildCoderPrompt({
606
+ task: plannedTask,
607
+ reviewerFeedback: session.last_reviewer_feedback,
608
+ sonarSummary: session.last_sonar_summary,
609
+ coderRules,
610
+ methodology: config.development?.methodology || "tdd",
611
+ serenaEnabled: Boolean(config.serena?.enabled)
612
+ });
613
+ const coderOnOutput = ({ stream, line }) => {
614
+ emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "coder" }, {
615
+ message: line,
616
+ detail: { stream, agent: coderRole.provider }
617
+ }));
618
+ };
619
+ const coderStart = Date.now();
620
+ const coderResult = await coder.runTask({ prompt: coderPrompt, onOutput: coderOnOutput, role: "coder" });
621
+ trackBudget({ role: "coder", provider: coderRole.provider, model: coderRole.model, result: coderResult, duration_ms: Date.now() - coderStart });
622
+
623
+ if (!coderResult.ok) {
624
+ await markSessionStatus(session, "failed");
625
+ const details = coderResult.error || coderResult.output || `exitCode=${coderResult.exitCode ?? "unknown"}`;
626
+ emitProgress(
627
+ emitter,
628
+ makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
629
+ status: "fail",
630
+ message: `Coder failed: ${details}`
631
+ })
632
+ );
633
+ throw new Error(`Coder failed: ${details}`);
634
+ }
635
+
636
+ await addCheckpoint(session, { stage: "coder", iteration: i, note: "Coder applied changes" });
637
+ emitProgress(
638
+ emitter,
639
+ makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
640
+ message: "Coder completed"
641
+ })
642
+ );
643
+
644
+ if (refactorerEnabled) {
645
+ logger.setContext({ iteration: i, stage: "refactorer" });
646
+ emitProgress(
647
+ emitter,
648
+ makeEvent("refactorer:start", { ...eventBase, stage: "refactorer" }, {
649
+ message: `Refactorer (${refactorerRole.provider}) running`,
650
+ detail: { refactorer: refactorerRole.provider }
651
+ })
652
+ );
653
+ const refactorer = createAgent(refactorerRole.provider, config, logger);
654
+ const refactorPrompt = [
655
+ `Task context:\n${plannedTask}`,
656
+ "",
657
+ "Refactor the current changes for clarity and maintainability without changing behavior.",
658
+ "Do not expand scope and keep tests green."
659
+ ].join("\n");
660
+ const refactorerOnOutput = ({ stream, line }) => {
661
+ emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "refactorer" }, {
662
+ message: line,
663
+ detail: { stream, agent: refactorerRole.provider }
664
+ }));
665
+ };
666
+ const refactorerStart = Date.now();
667
+ const refactorResult = await refactorer.runTask({
668
+ prompt: refactorPrompt,
669
+ onOutput: refactorerOnOutput,
670
+ role: "refactorer"
671
+ });
672
+ trackBudget({ role: "refactorer", provider: refactorerRole.provider, model: refactorerRole.model, result: refactorResult, duration_ms: Date.now() - refactorerStart });
673
+ if (!refactorResult.ok) {
674
+ await markSessionStatus(session, "failed");
675
+ const details = refactorResult.error || refactorResult.output || `exitCode=${refactorResult.exitCode ?? "unknown"}`;
676
+ emitProgress(
677
+ emitter,
678
+ makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
679
+ status: "fail",
680
+ message: `Refactorer failed: ${details}`
681
+ })
682
+ );
683
+ throw new Error(`Refactorer failed: ${details}`);
684
+ }
685
+ await addCheckpoint(session, { stage: "refactorer", iteration: i, note: "Refactorer applied cleanups" });
686
+ emitProgress(
687
+ emitter,
688
+ makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
689
+ message: "Refactorer completed"
690
+ })
691
+ );
692
+ }
693
+
694
+ // --- TDD Policy ---
695
+ logger.setContext({ iteration: i, stage: "tdd" });
696
+ const tddDiff = await generateDiff({ baseRef: session.session_start_sha });
697
+ const tddEval = evaluateTddPolicy(tddDiff, config.development);
698
+ await addCheckpoint(session, {
699
+ stage: "tdd-policy",
700
+ iteration: i,
701
+ ok: tddEval.ok,
702
+ reason: tddEval.reason,
703
+ source_files: tddEval.sourceFiles?.length || 0,
704
+ test_files: tddEval.testFiles?.length || 0
705
+ });
706
+
707
+ emitProgress(
708
+ emitter,
709
+ makeEvent("tdd:result", { ...eventBase, stage: "tdd" }, {
710
+ status: tddEval.ok ? "ok" : "fail",
711
+ message: tddEval.ok ? "TDD policy passed" : `TDD policy failed: ${tddEval.reason}`,
712
+ detail: {
713
+ ok: tddEval.ok,
714
+ reason: tddEval.reason,
715
+ sourceFiles: tddEval.sourceFiles?.length || 0,
716
+ testFiles: tddEval.testFiles?.length || 0
717
+ }
718
+ })
719
+ );
720
+
721
+ if (!tddEval.ok) {
722
+ session.last_reviewer_feedback = tddEval.message;
723
+ session.repeated_issue_count += 1;
724
+ await saveSession(session);
725
+ if (session.repeated_issue_count >= config.session.fail_fast_repeats) {
726
+ const question = `TDD policy has failed ${session.repeated_issue_count} times. The coder is not creating tests. How should we proceed? Issue: ${tddEval.reason}`;
727
+ if (askQuestion) {
728
+ const answer = await askQuestion(question, { iteration: i, stage: "tdd" });
729
+ if (answer) {
730
+ session.last_reviewer_feedback += `\nUser guidance: ${answer}`;
731
+ session.repeated_issue_count = 0;
732
+ await saveSession(session);
733
+ continue;
734
+ }
735
+ }
736
+ await pauseSession(session, {
737
+ question,
738
+ context: {
739
+ iteration: i,
740
+ stage: "tdd",
741
+ lastFeedback: tddEval.message,
742
+ repeatedCount: session.repeated_issue_count
743
+ }
744
+ });
745
+ emitProgress(
746
+ emitter,
747
+ makeEvent("question", { ...eventBase, stage: "tdd" }, {
748
+ status: "paused",
749
+ message: question,
750
+ detail: { question, sessionId: session.id }
751
+ })
752
+ );
753
+ return { paused: true, sessionId: session.id, question, context: "tdd_fail_fast" };
754
+ }
755
+ continue;
756
+ }
757
+
758
+ // --- SonarQube (via SonarRole) ---
759
+ if (config.sonarqube.enabled) {
760
+ logger.setContext({ iteration: i, stage: "sonar" });
761
+ emitProgress(
762
+ emitter,
763
+ makeEvent("sonar:start", { ...eventBase, stage: "sonar" }, {
764
+ message: "SonarQube scanning"
765
+ })
766
+ );
767
+
768
+ const sonarRole = new SonarRole({ config, logger, emitter });
769
+ await sonarRole.init({ iteration: i });
770
+ const sonarStart = Date.now();
771
+ const sonarOutput = await sonarRole.run();
772
+ trackBudget({ role: "sonar", provider: "sonar", result: sonarOutput, duration_ms: Date.now() - sonarStart });
773
+ const sonarResult = sonarOutput.result;
774
+
775
+ if (!sonarResult.gateStatus && sonarResult.error) {
776
+ await markSessionStatus(session, "failed");
777
+ emitProgress(
778
+ emitter,
779
+ makeEvent("sonar:end", { ...eventBase, stage: "sonar" }, {
780
+ status: "fail",
781
+ message: `Sonar scan failed: ${sonarResult.error}`
782
+ })
783
+ );
784
+ throw new Error(`Sonar scan failed: ${sonarResult.error}`);
785
+ }
786
+
787
+ session.last_sonar_summary = sonarOutput.summary;
788
+ if (typeof sonarResult.openIssuesTotal === "number") {
789
+ if (sonarIssuesInitial === null) {
790
+ sonarIssuesInitial = sonarResult.openIssuesTotal;
791
+ }
792
+ sonarIssuesFinal = sonarResult.openIssuesTotal;
793
+ }
794
+ await addCheckpoint(session, {
795
+ stage: "sonar",
796
+ iteration: i,
797
+ project_key: sonarResult.projectKey,
798
+ quality_gate: sonarResult.gateStatus,
799
+ open_issues: sonarResult.openIssuesTotal
800
+ });
801
+
802
+ emitProgress(
803
+ emitter,
804
+ makeEvent("sonar:end", { ...eventBase, stage: "sonar" }, {
805
+ status: sonarResult.blocking ? "fail" : "ok",
806
+ message: `Quality gate: ${sonarResult.gateStatus}`,
807
+ detail: { projectKey: sonarResult.projectKey, gateStatus: sonarResult.gateStatus, openIssues: sonarResult.openIssuesTotal }
808
+ })
809
+ );
810
+
811
+ if (sonarResult.blocking) {
812
+ repeatDetector.addIteration(sonarResult.issues, []);
813
+ const repeatState = repeatDetector.isStalled();
814
+ if (repeatState.stalled) {
815
+ const repeatCounts = repeatDetector.getRepeatCounts();
816
+ const message = `No progress: SonarQube issues repeated ${repeatCounts.sonar} times.`;
817
+ logger.warn(message);
818
+ await markSessionStatus(session, "stalled");
819
+ emitProgress(
820
+ emitter,
821
+ makeEvent("session:end", { ...eventBase, stage: "sonar" }, {
822
+ status: "stalled",
823
+ message,
824
+ detail: { reason: repeatState.reason, repeats: repeatCounts.sonar, budget: budgetSummary() }
825
+ })
826
+ );
827
+ return { approved: false, sessionId: session.id, reason: "stalled" };
828
+ }
829
+
830
+ session.last_reviewer_feedback = `Sonar gate blocking (${sonarResult.gateStatus}). Resolve critical findings first.`;
831
+ session.sonar_retry_count = (session.sonar_retry_count || 0) + 1;
832
+ await saveSession(session);
833
+ const maxSonarRetries = config.session.max_sonar_retries ?? config.session.fail_fast_repeats;
834
+ if (session.sonar_retry_count >= maxSonarRetries) {
835
+ emitProgress(
836
+ emitter,
837
+ makeEvent("solomon:escalate", { ...eventBase, stage: "sonar" }, {
838
+ message: `Sonar sub-loop limit reached (${session.sonar_retry_count}/${maxSonarRetries})`,
839
+ detail: { subloop: "sonar", retryCount: session.sonar_retry_count, limit: maxSonarRetries, gateStatus: sonarResult.gateStatus }
840
+ })
841
+ );
842
+
843
+ const solomonResult = await invokeSolomon({
844
+ config, logger, emitter, eventBase, stage: "sonar", askQuestion, session, iteration: i,
845
+ conflict: {
846
+ stage: "sonar",
847
+ task,
848
+ iterationCount: session.sonar_retry_count,
849
+ maxIterations: maxSonarRetries,
850
+ history: [{ agent: "sonar", feedback: session.last_sonar_summary }]
851
+ }
852
+ });
853
+
854
+ if (solomonResult.action === "pause") {
855
+ return { paused: true, sessionId: session.id, question: solomonResult.question, context: "sonar_fail_fast" };
856
+ }
857
+ if (solomonResult.action === "continue") {
858
+ if (solomonResult.humanGuidance) {
859
+ session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
860
+ }
861
+ session.sonar_retry_count = 0;
862
+ await saveSession(session);
863
+ continue;
864
+ }
865
+ if (solomonResult.action === "subtask") {
866
+ return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "sonar_subtask" };
867
+ }
868
+ }
869
+ continue;
870
+ }
871
+
872
+ // Sonar passed — reset retry counter
873
+ session.sonar_retry_count = 0;
874
+ const issuesInitial = sonarIssuesInitial ?? sonarResult.openIssuesTotal ?? 0;
875
+ const issuesFinal = sonarIssuesFinal ?? sonarResult.openIssuesTotal ?? 0;
876
+ stageResults.sonar = {
877
+ gateStatus: sonarResult.gateStatus,
878
+ openIssues: sonarResult.openIssuesTotal,
879
+ issuesInitial,
880
+ issuesFinal,
881
+ issuesResolved: Math.max(issuesInitial - issuesFinal, 0)
882
+ };
883
+ }
884
+
885
+ let review = {
886
+ approved: true,
887
+ blocking_issues: [],
888
+ non_blocking_suggestions: [],
889
+ summary: "Reviewer disabled by pipeline",
890
+ confidence: 1
891
+ };
892
+ if (reviewerEnabled) {
893
+ logger.setContext({ iteration: i, stage: "reviewer" });
894
+ emitProgress(
895
+ emitter,
896
+ makeEvent("reviewer:start", { ...eventBase, stage: "reviewer" }, {
897
+ message: `Reviewer (${reviewerRole.provider}) running`,
898
+ detail: { reviewer: reviewerRole.provider }
899
+ })
900
+ );
901
+
902
+ const diff = await generateDiff({ baseRef: session.session_start_sha });
903
+ const reviewerPrompt = buildReviewerPrompt({
904
+ task,
905
+ diff,
906
+ reviewRules,
907
+ mode: config.review_mode,
908
+ serenaEnabled: Boolean(config.serena?.enabled)
909
+ });
910
+ const reviewerOnOutput = ({ stream, line }) => {
911
+ emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "reviewer" }, {
912
+ message: line,
913
+ detail: { stream, agent: reviewerRole.provider }
914
+ }));
915
+ };
916
+ const reviewerStart = Date.now();
917
+ const reviewerExec = await runReviewerWithFallback({
918
+ reviewerName: reviewerRole.provider,
919
+ config,
920
+ logger,
921
+ prompt: reviewerPrompt,
922
+ session,
923
+ iteration: i,
924
+ onOutput: reviewerOnOutput,
925
+ onAttemptResult: ({ reviewer, result }) => {
926
+ trackBudget({ role: "reviewer", provider: reviewer, model: reviewerRole.model, result, duration_ms: Date.now() - reviewerStart });
927
+ }
928
+ });
929
+
930
+ if (!reviewerExec.result || !reviewerExec.result.ok) {
931
+ await markSessionStatus(session, "failed");
932
+ const lastAttempt = reviewerExec.attempts.at(-1);
933
+ const details =
934
+ lastAttempt?.result?.error ||
935
+ lastAttempt?.result?.output ||
936
+ `reviewer=${lastAttempt?.reviewer || "unknown"} exitCode=${lastAttempt?.result?.exitCode ?? "unknown"}`;
937
+ emitProgress(
938
+ emitter,
939
+ makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
940
+ status: "fail",
941
+ message: `Reviewer failed: ${details}`
942
+ })
943
+ );
944
+ throw new Error(`Reviewer failed: ${details}`);
945
+ }
946
+
947
+ try {
948
+ const parsed = parseJsonOutput(reviewerExec.result.output);
949
+ if (!parsed) {
950
+ throw new Error("Reviewer output is not valid JSON");
951
+ }
952
+ review = validateReviewResult(parsed);
953
+ } catch (parseErr) {
954
+ logger.warn(`Reviewer output parse/validation failed: ${parseErr.message}`);
955
+ review = {
956
+ approved: false,
957
+ blocking_issues: [{
958
+ id: "PARSE_ERROR",
959
+ severity: "high",
960
+ description: `Reviewer output could not be parsed: ${parseErr.message}`
961
+ }],
962
+ non_blocking_suggestions: [],
963
+ summary: `Parse error: ${parseErr.message}`,
964
+ confidence: 0
965
+ };
966
+ }
967
+ await addCheckpoint(session, {
968
+ stage: "reviewer",
969
+ iteration: i,
970
+ approved: review.approved,
971
+ blocking_issues: review.blocking_issues.length
972
+ });
973
+
974
+ emitProgress(
975
+ emitter,
976
+ makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
977
+ status: review.approved ? "ok" : "fail",
978
+ message: review.approved ? "Review approved" : `Review rejected (${review.blocking_issues.length} blocking)`,
979
+ detail: {
980
+ approved: review.approved,
981
+ blockingCount: review.blocking_issues.length,
982
+ issues: review.blocking_issues.map(
983
+ (x) => `${x.id || "ISSUE"}: ${x.description || "Missing description"}`
984
+ )
985
+ }
986
+ })
987
+ );
988
+
989
+ if (!review.approved) {
990
+ repeatDetector.addIteration([], review.blocking_issues);
991
+ const repeatState = repeatDetector.isStalled();
992
+ if (repeatState.stalled) {
993
+ const repeatCounts = repeatDetector.getRepeatCounts();
994
+ const message = `Manual intervention required: reviewer issues repeated ${repeatCounts.reviewer} times.`;
995
+ logger.warn(message);
996
+ await markSessionStatus(session, "stalled");
997
+ emitProgress(
998
+ emitter,
999
+ makeEvent("session:end", { ...eventBase, stage: "reviewer" }, {
1000
+ status: "stalled",
1001
+ message,
1002
+ detail: { reason: repeatState.reason, repeats: repeatCounts.reviewer, budget: budgetSummary() }
1003
+ })
1004
+ );
1005
+ return { approved: false, sessionId: session.id, reason: "stalled" };
1006
+ }
1007
+ }
1008
+ }
1009
+
1010
+ // --- Iteration end ---
1011
+ const iterDuration = Date.now() - iterStart;
1012
+ emitProgress(
1013
+ emitter,
1014
+ makeEvent("iteration:end", { ...eventBase, stage: "iteration" }, {
1015
+ message: `Iteration ${i} completed`,
1016
+ detail: { duration: iterDuration }
1017
+ })
1018
+ );
1019
+
1020
+ if (review.approved) {
1021
+ session.reviewer_retry_count = 0;
1022
+
1023
+ // --- Post-loop stages: Tester → Security ---
1024
+ const postLoopDiff = await generateDiff({ baseRef: session.session_start_sha });
1025
+
1026
+ // --- Tester ---
1027
+ if (testerEnabled) {
1028
+ logger.setContext({ iteration: i, stage: "tester" });
1029
+ emitProgress(
1030
+ emitter,
1031
+ makeEvent("tester:start", { ...eventBase, stage: "tester" }, {
1032
+ message: "Tester evaluating test quality"
1033
+ })
1034
+ );
1035
+
1036
+ const tester = new TesterRole({ config, logger, emitter });
1037
+ await tester.init({ task, iteration: i });
1038
+ const testerStart = Date.now();
1039
+ const testerOutput = await tester.run({ task, diff: postLoopDiff });
1040
+ trackBudget({
1041
+ role: "tester",
1042
+ provider: config?.roles?.tester?.provider || coderRole.provider,
1043
+ model: config?.roles?.tester?.model || coderRole.model,
1044
+ result: testerOutput,
1045
+ duration_ms: Date.now() - testerStart
1046
+ });
1047
+
1048
+ await addCheckpoint(session, { stage: "tester", iteration: i, ok: testerOutput.ok });
1049
+
1050
+ emitProgress(
1051
+ emitter,
1052
+ makeEvent("tester:end", { ...eventBase, stage: "tester" }, {
1053
+ status: testerOutput.ok ? "ok" : "fail",
1054
+ message: testerOutput.ok ? "Tester passed" : `Tester: ${testerOutput.summary}`
1055
+ })
1056
+ );
1057
+
1058
+ if (!testerOutput.ok) {
1059
+ const maxTesterRetries = config.session?.max_tester_retries ?? 1;
1060
+ session.tester_retry_count = (session.tester_retry_count || 0) + 1;
1061
+ await saveSession(session);
1062
+
1063
+ if (session.tester_retry_count >= maxTesterRetries) {
1064
+ const solomonResult = await invokeSolomon({
1065
+ config, logger, emitter, eventBase, stage: "tester", askQuestion, session, iteration: i,
1066
+ conflict: {
1067
+ stage: "tester",
1068
+ task,
1069
+ diff: postLoopDiff,
1070
+ iterationCount: session.tester_retry_count,
1071
+ maxIterations: maxTesterRetries,
1072
+ history: [{ agent: "tester", feedback: testerOutput.summary }]
1073
+ }
1074
+ });
1075
+
1076
+ if (solomonResult.action === "pause") {
1077
+ return { paused: true, sessionId: session.id, question: solomonResult.question, context: "tester_fail_fast" };
1078
+ }
1079
+ if (solomonResult.action === "subtask") {
1080
+ return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "tester_subtask" };
1081
+ }
1082
+ // continue = Solomon approved, proceed to next stage
1083
+ } else {
1084
+ session.last_reviewer_feedback = `Tester feedback: ${testerOutput.summary}`;
1085
+ await saveSession(session);
1086
+ continue;
1087
+ }
1088
+ } else {
1089
+ session.tester_retry_count = 0;
1090
+ stageResults.tester = { ok: true, summary: testerOutput.summary || "All tests passed" };
1091
+ }
1092
+ }
1093
+
1094
+ // --- Security ---
1095
+ if (securityEnabled) {
1096
+ logger.setContext({ iteration: i, stage: "security" });
1097
+ emitProgress(
1098
+ emitter,
1099
+ makeEvent("security:start", { ...eventBase, stage: "security" }, {
1100
+ message: "Security auditing code"
1101
+ })
1102
+ );
1103
+
1104
+ const security = new SecurityRole({ config, logger, emitter });
1105
+ await security.init({ task, iteration: i });
1106
+ const securityStart = Date.now();
1107
+ const securityOutput = await security.run({ task, diff: postLoopDiff });
1108
+ trackBudget({
1109
+ role: "security",
1110
+ provider: config?.roles?.security?.provider || coderRole.provider,
1111
+ model: config?.roles?.security?.model || coderRole.model,
1112
+ result: securityOutput,
1113
+ duration_ms: Date.now() - securityStart
1114
+ });
1115
+
1116
+ await addCheckpoint(session, { stage: "security", iteration: i, ok: securityOutput.ok });
1117
+
1118
+ emitProgress(
1119
+ emitter,
1120
+ makeEvent("security:end", { ...eventBase, stage: "security" }, {
1121
+ status: securityOutput.ok ? "ok" : "fail",
1122
+ message: securityOutput.ok ? "Security audit passed" : `Security: ${securityOutput.summary}`
1123
+ })
1124
+ );
1125
+
1126
+ if (!securityOutput.ok) {
1127
+ const maxSecurityRetries = config.session?.max_security_retries ?? 1;
1128
+ session.security_retry_count = (session.security_retry_count || 0) + 1;
1129
+ await saveSession(session);
1130
+
1131
+ if (session.security_retry_count >= maxSecurityRetries) {
1132
+ const solomonResult = await invokeSolomon({
1133
+ config, logger, emitter, eventBase, stage: "security", askQuestion, session, iteration: i,
1134
+ conflict: {
1135
+ stage: "security",
1136
+ task,
1137
+ diff: postLoopDiff,
1138
+ iterationCount: session.security_retry_count,
1139
+ maxIterations: maxSecurityRetries,
1140
+ history: [{ agent: "security", feedback: securityOutput.summary }]
1141
+ }
1142
+ });
1143
+
1144
+ if (solomonResult.action === "pause") {
1145
+ return { paused: true, sessionId: session.id, question: solomonResult.question, context: "security_fail_fast" };
1146
+ }
1147
+ if (solomonResult.action === "subtask") {
1148
+ return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "security_subtask" };
1149
+ }
1150
+ // continue = Solomon approved, proceed
1151
+ } else {
1152
+ session.last_reviewer_feedback = `Security feedback: ${securityOutput.summary}`;
1153
+ await saveSession(session);
1154
+ continue;
1155
+ }
1156
+ } else {
1157
+ session.security_retry_count = 0;
1158
+ stageResults.security = { ok: true, summary: securityOutput.summary || "No vulnerabilities found" };
1159
+ }
1160
+ }
1161
+
1162
+ // --- All post-loop checks passed → finalize ---
1163
+ const gitResult = await finalizeGitAutomation({ config, gitCtx, task, logger, session });
1164
+ if (stageResults.planner?.ok) {
1165
+ stageResults.planner.completedSteps = [...(stageResults.planner.steps || [])];
1166
+ }
1167
+ session.budget = budgetSummary();
1168
+ await markSessionStatus(session, "approved");
1169
+ emitProgress(
1170
+ emitter,
1171
+ makeEvent("session:end", { ...eventBase, stage: "done" }, {
1172
+ message: "Session approved",
1173
+ detail: { approved: true, iterations: i, stages: stageResults, git: gitResult, budget: budgetSummary() }
1174
+ })
1175
+ );
1176
+ return { approved: true, sessionId: session.id, review, git: gitResult };
1177
+ }
1178
+
1179
+ session.last_reviewer_feedback = review.blocking_issues
1180
+ .map((x) => `${x.id || "ISSUE"}: ${x.description || "Missing description"}`)
1181
+ .join("\n");
1182
+ session.reviewer_retry_count = (session.reviewer_retry_count || 0) + 1;
1183
+ await saveSession(session);
1184
+
1185
+ const maxReviewerRetries = config.session.max_reviewer_retries ?? config.session.fail_fast_repeats;
1186
+ if (session.reviewer_retry_count >= maxReviewerRetries) {
1187
+ emitProgress(
1188
+ emitter,
1189
+ makeEvent("solomon:escalate", { ...eventBase, stage: "reviewer" }, {
1190
+ message: `Reviewer sub-loop limit reached (${session.reviewer_retry_count}/${maxReviewerRetries})`,
1191
+ detail: { subloop: "reviewer", retryCount: session.reviewer_retry_count, limit: maxReviewerRetries }
1192
+ })
1193
+ );
1194
+
1195
+ const solomonResult = await invokeSolomon({
1196
+ config, logger, emitter, eventBase, stage: "reviewer", askQuestion, session, iteration: i,
1197
+ conflict: {
1198
+ stage: "reviewer",
1199
+ task,
1200
+ iterationCount: session.reviewer_retry_count,
1201
+ maxIterations: maxReviewerRetries,
1202
+ history: [{ agent: "reviewer", feedback: session.last_reviewer_feedback }]
1203
+ }
1204
+ });
1205
+
1206
+ if (solomonResult.action === "pause") {
1207
+ return { paused: true, sessionId: session.id, question: solomonResult.question, context: "reviewer_fail_fast" };
1208
+ }
1209
+ if (solomonResult.action === "continue") {
1210
+ if (solomonResult.humanGuidance) {
1211
+ session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
1212
+ }
1213
+ session.reviewer_retry_count = 0;
1214
+ await saveSession(session);
1215
+ continue;
1216
+ }
1217
+ if (solomonResult.action === "subtask") {
1218
+ return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "reviewer_subtask" };
1219
+ }
1220
+ }
1221
+ }
1222
+
1223
+ session.budget = budgetSummary();
1224
+ await markSessionStatus(session, "failed");
1225
+ emitProgress(
1226
+ emitter,
1227
+ makeEvent("session:end", { ...eventBase, stage: "done" }, {
1228
+ status: "fail",
1229
+ message: "Max iterations reached",
1230
+ detail: { approved: false, reason: "max_iterations", iterations: config.max_iterations, stages: stageResults, budget: budgetSummary() }
1231
+ })
1232
+ );
1233
+ return { approved: false, sessionId: session.id, reason: "max_iterations" };
1234
+ }
1235
+
1236
+ export async function resumeFlow({ sessionId, answer, config, logger, flags = {}, emitter = null, askQuestion = null }) {
1237
+ const session = answer
1238
+ ? await resumeSessionWithAnswer(sessionId, answer)
1239
+ : await loadSession(sessionId);
1240
+
1241
+ if (session.status === "paused" && !answer) {
1242
+ logger.info(`Session ${sessionId} is paused. Provide --answer to resume.`);
1243
+ return session;
1244
+ }
1245
+
1246
+ if (session.status !== "running") {
1247
+ logger.info(`Session ${sessionId} has status ${session.status}`);
1248
+ return session;
1249
+ }
1250
+
1251
+ // Session was paused and now resumed with answer - re-run the flow
1252
+ const task = session.task;
1253
+ const sessionConfig = config || session.config_snapshot;
1254
+ if (!sessionConfig) {
1255
+ throw new Error("No config available to resume session");
1256
+ }
1257
+
1258
+ logger.info(`Resuming session ${sessionId} with answer: ${answer}`);
1259
+
1260
+ // Inject the answer as additional feedback for the coder
1261
+ if (session.paused_state?.context?.lastFeedback) {
1262
+ session.last_reviewer_feedback = `Previous feedback: ${session.paused_state.context.lastFeedback}\nUser guidance: ${answer}`;
1263
+ }
1264
+ session.repeated_issue_count = 0;
1265
+ session.sonar_retry_count = 0;
1266
+ session.reviewer_retry_count = 0;
1267
+ session.tester_retry_count = 0;
1268
+ session.security_retry_count = 0;
1269
+ session.last_sonar_issue_signature = null;
1270
+ session.sonar_repeat_count = 0;
1271
+ session.last_reviewer_issue_signature = null;
1272
+ session.reviewer_repeat_count = 0;
1273
+ await saveSession(session);
1274
+
1275
+ // Re-run the flow with the existing session context
1276
+ return runFlow({ task, config: sessionConfig, logger, flags, emitter, askQuestion });
1277
+ }