karajan-code 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,244 +1,31 @@
1
1
  import { createAgent } from "./agents/index.js";
2
2
  import {
3
- addCheckpoint,
4
3
  createSession,
5
4
  loadSession,
6
5
  markSessionStatus,
7
- pauseSession,
8
6
  resumeSessionWithAnswer,
9
7
  saveSession
10
8
  } from "./session-store.js";
11
9
  import { computeBaseRef, generateDiff } from "./review/diff-generator.js";
12
- import { parseJsonOutput } from "./review/parser.js";
13
- import { validateReviewResult } from "./review/schema.js";
14
- import { evaluateTddPolicy } from "./review/tdd-policy.js";
15
10
  import { buildCoderPrompt } from "./prompts/coder.js";
16
11
  import { buildReviewerPrompt } from "./prompts/reviewer.js";
17
12
  import { resolveRole } from "./config.js";
18
- import { SonarRole } from "./roles/sonar-role.js";
19
- import { RepeatDetector } from "./repeat-detector.js";
13
+ import { RepeatDetector, getRepeatThreshold } from "./repeat-detector.js";
20
14
  import { emitProgress, makeEvent } from "./utils/events.js";
21
- import { BudgetTracker } from "./utils/budget.js";
15
+ import { BudgetTracker, extractUsageMetrics } from "./utils/budget.js";
22
16
  import {
23
- commitMessageFromTask,
24
17
  prepareGitAutomation,
25
18
  finalizeGitAutomation
26
19
  } from "./git/automation.js";
27
20
  import { resolveRoleMdPath, loadFirstExisting } from "./roles/base-role.js";
28
21
  import { resolveReviewProfile } from "./review/profiles.js";
29
- import { ResearcherRole } from "./roles/researcher-role.js";
30
- import { TriageRole } from "./roles/triage-role.js";
31
- import { TesterRole } from "./roles/tester-role.js";
32
- import { SecurityRole } from "./roles/security-role.js";
33
- import { SolomonRole } from "./roles/solomon-role.js";
22
+ import { CoderRole } from "./roles/coder-role.js";
23
+ import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
24
+ import { runTriageStage, runResearcherStage, runPlannerStage } from "./orchestrator/pre-loop-stages.js";
25
+ import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, runReviewerStage } from "./orchestrator/iteration-stages.js";
26
+ import { runTesterStage, runSecurityStage } from "./orchestrator/post-loop-stages.js";
34
27
 
35
- function parsePlannerOutput(output) {
36
- const text = String(output || "").trim();
37
- if (!text) return null;
38
28
 
39
- const lines = text
40
- .split(/\r?\n/)
41
- .map((line) => line.trim())
42
- .filter(Boolean);
43
-
44
- let title = null;
45
- let approach = null;
46
- const steps = [];
47
-
48
- for (const line of lines) {
49
- if (!title) {
50
- const titleMatch = line.match(/^title\s*:\s*(.+)$/i);
51
- if (titleMatch) {
52
- title = titleMatch[1].trim();
53
- continue;
54
- }
55
- }
56
-
57
- if (!approach) {
58
- const approachMatch = line.match(/^(approach|strategy)\s*:\s*(.+)$/i);
59
- if (approachMatch) {
60
- approach = approachMatch[2].trim();
61
- continue;
62
- }
63
- }
64
-
65
- const numberedStep = line.match(/^\d+[\).:-]\s*(.+)$/);
66
- if (numberedStep) {
67
- steps.push(numberedStep[1].trim());
68
- continue;
69
- }
70
-
71
- const bulletStep = line.match(/^[-*]\s+(.+)$/);
72
- if (bulletStep) {
73
- steps.push(bulletStep[1].trim());
74
- continue;
75
- }
76
- }
77
-
78
- if (!title) {
79
- const firstFreeLine = lines.find((line) => !/^(approach|strategy)\s*:/i.test(line) && !/^\d+[\).:-]\s*/.test(line));
80
- title = firstFreeLine || null;
81
- }
82
-
83
- return { title, approach, steps };
84
- }
85
-
86
- function getRepeatThreshold(config) {
87
- const raw =
88
- config?.failFast?.repeatThreshold ??
89
- config?.session?.repeat_detection_threshold ??
90
- config?.session?.fail_fast_repeats ??
91
- 2;
92
- const value = Number(raw);
93
- if (Number.isFinite(value) && value > 0) return value;
94
- return 2;
95
- }
96
-
97
- function extractUsageMetrics(result, defaultModel = null) {
98
- const usage = result?.usage || result?.metrics || {};
99
- const tokens_in =
100
- result?.tokens_in ??
101
- usage?.tokens_in ??
102
- usage?.input_tokens ??
103
- usage?.prompt_tokens ??
104
- 0;
105
- const tokens_out =
106
- result?.tokens_out ??
107
- usage?.tokens_out ??
108
- usage?.output_tokens ??
109
- usage?.completion_tokens ??
110
- 0;
111
- const cost_usd =
112
- result?.cost_usd ??
113
- usage?.cost_usd ??
114
- usage?.usd_cost ??
115
- usage?.cost;
116
- const model =
117
- result?.model ??
118
- usage?.model ??
119
- usage?.model_name ??
120
- usage?.model_id ??
121
- defaultModel ??
122
- null;
123
-
124
- return { tokens_in, tokens_out, cost_usd, model };
125
- }
126
-
127
- async function runReviewerWithFallback({ reviewerName, config, logger, prompt, session, iteration, onOutput, onAttemptResult }) {
128
- const fallbackReviewer = config.reviewer_options?.fallback_reviewer;
129
- const retries = Math.max(0, Number(config.reviewer_options?.retries ?? 1));
130
- const candidates = [reviewerName];
131
- if (fallbackReviewer && fallbackReviewer !== reviewerName) {
132
- candidates.push(fallbackReviewer);
133
- }
134
-
135
- const attempts = [];
136
- for (const name of candidates) {
137
- const reviewer = createAgent(name, config, logger);
138
- for (let attempt = 1; attempt <= retries + 1; attempt += 1) {
139
- const result = await reviewer.reviewTask({ prompt, onOutput, role: "reviewer" });
140
- if (onAttemptResult) {
141
- await onAttemptResult({ reviewer: name, result });
142
- }
143
- attempts.push({ reviewer: name, attempt, ok: result.ok, result });
144
- await addCheckpoint(session, {
145
- stage: "reviewer-attempt",
146
- iteration,
147
- reviewer: name,
148
- attempt,
149
- ok: result.ok
150
- });
151
-
152
- if (result.ok) {
153
- return { result, attempts };
154
- }
155
- }
156
- }
157
-
158
- return { result: null, attempts };
159
- }
160
-
161
- async function invokeSolomon({ config, logger, emitter, eventBase, stage, conflict, askQuestion, session, iteration }) {
162
- const solomonEnabled = Boolean(config.pipeline?.solomon?.enabled);
163
-
164
- if (!solomonEnabled) {
165
- return escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration });
166
- }
167
-
168
- emitProgress(
169
- emitter,
170
- makeEvent("solomon:start", { ...eventBase, stage: "solomon" }, {
171
- message: `Solomon arbitrating ${stage} conflict`,
172
- detail: { conflictStage: stage }
173
- })
174
- );
175
-
176
- const solomon = new SolomonRole({ config, logger, emitter });
177
- await solomon.init({ task: conflict.task || session.task, iteration });
178
- const ruling = await solomon.run({ conflict });
179
-
180
- emitProgress(
181
- emitter,
182
- makeEvent("solomon:end", { ...eventBase, stage: "solomon" }, {
183
- message: `Solomon ruling: ${ruling.result?.ruling || "unknown"}`,
184
- detail: ruling.result
185
- })
186
- );
187
-
188
- await addCheckpoint(session, {
189
- stage: "solomon",
190
- iteration,
191
- ruling: ruling.result?.ruling,
192
- escalate: ruling.result?.escalate,
193
- subtask: ruling.result?.subtask?.title || null
194
- });
195
-
196
- if (!ruling.ok) {
197
- // escalate_human
198
- return escalateToHuman({
199
- askQuestion, session, emitter, eventBase, stage, iteration,
200
- conflict: { ...conflict, solomonReason: ruling.result?.escalate_reason }
201
- });
202
- }
203
-
204
- const r = ruling.result?.ruling;
205
- if (r === "approve" || r === "approve_with_conditions") {
206
- return { action: "continue", conditions: ruling.result?.conditions || [], ruling };
207
- }
208
-
209
- if (r === "create_subtask") {
210
- return { action: "subtask", subtask: ruling.result?.subtask, ruling };
211
- }
212
-
213
- return { action: "continue", conditions: [], ruling };
214
- }
215
-
216
- async function escalateToHuman({ askQuestion, session, emitter, eventBase, stage, conflict, iteration }) {
217
- const reason = conflict?.solomonReason || `${stage} conflict unresolved`;
218
- const question = `${stage} conflict requires human intervention: ${reason}\nDetails: ${JSON.stringify(conflict?.history?.slice(-2) || [], null, 2)}\n\nHow should we proceed?`;
219
-
220
- if (askQuestion) {
221
- const answer = await askQuestion(question, { iteration, stage });
222
- if (answer) {
223
- return { action: "continue", humanGuidance: answer };
224
- }
225
- }
226
-
227
- await pauseSession(session, {
228
- question,
229
- context: { iteration, stage, conflict }
230
- });
231
- emitProgress(
232
- emitter,
233
- makeEvent("question", { ...eventBase, stage }, {
234
- status: "paused",
235
- message: question,
236
- detail: { question, sessionId: session.id }
237
- })
238
- );
239
-
240
- return { action: "pause", question };
241
- }
242
29
 
243
30
  export async function runFlow({ task, config, logger, flags = {}, emitter = null, askQuestion = null }) {
244
31
  const plannerRole = resolveRole(config, "planner");
@@ -309,7 +96,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
309
96
  }
310
97
 
311
98
  const repeatDetector = new RepeatDetector({ threshold: getRepeatThreshold(config) });
312
- const coder = createAgent(coderRole.provider, config, logger);
99
+ const coderRoleInstance = new CoderRole({ config, logger, emitter, createAgentFn: createAgent });
313
100
  const startedAt = Date.now();
314
101
  const eventBase = { sessionId: null, iteration: 0, stage: null, startedAt };
315
102
  const budgetTracker = new BudgetTracker({ pricing: config?.budget?.pricing });
@@ -381,172 +168,47 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
381
168
 
382
169
  // Accumulate stage results for final summary
383
170
  const stageResults = {};
384
- let sonarIssuesInitial = null;
385
- let sonarIssuesFinal = null;
171
+ const sonarState = { issuesInitial: null, issuesFinal: null };
386
172
 
387
173
  if (triageEnabled) {
388
- logger.setContext({ iteration: 0, stage: "triage" });
389
- emitProgress(
390
- emitter,
391
- makeEvent("triage:start", { ...eventBase, stage: "triage" }, {
392
- message: "Triage classifying task complexity"
393
- })
394
- );
395
-
396
- const triage = new TriageRole({ config, logger, emitter });
397
- await triage.init({ task, sessionId: session.id, iteration: 0 });
398
- const triageStart = Date.now();
399
- const triageOutput = await triage.run({ task });
400
- trackBudget({
401
- role: "triage",
402
- provider: config?.roles?.triage?.provider || coderRole.provider,
403
- model: config?.roles?.triage?.model || coderRole.model,
404
- result: triageOutput,
405
- duration_ms: Date.now() - triageStart
406
- });
407
-
408
- await addCheckpoint(session, { stage: "triage", iteration: 0, ok: triageOutput.ok });
409
-
410
- const recommendedRoles = new Set(triageOutput.result?.roles || []);
411
- if (triageOutput.ok) {
412
- plannerEnabled = recommendedRoles.has("planner");
413
- researcherEnabled = recommendedRoles.has("researcher");
414
- refactorerEnabled = recommendedRoles.has("refactorer");
415
- reviewerEnabled = recommendedRoles.has("reviewer");
416
- testerEnabled = recommendedRoles.has("tester");
417
- securityEnabled = recommendedRoles.has("security");
418
- }
419
-
420
- if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
421
- if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
422
- if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
423
- if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
424
- if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
425
- if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
426
-
427
- stageResults.triage = {
428
- ok: triageOutput.ok,
429
- level: triageOutput.result?.level || null,
430
- roles: Array.from(recommendedRoles),
431
- reasoning: triageOutput.result?.reasoning || null
432
- };
433
-
434
- emitProgress(
435
- emitter,
436
- makeEvent("triage:end", { ...eventBase, stage: "triage" }, {
437
- status: triageOutput.ok ? "ok" : "fail",
438
- message: triageOutput.ok ? "Triage completed" : `Triage failed: ${triageOutput.summary}`,
439
- detail: stageResults.triage
440
- })
441
- );
442
- } else {
443
- if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
444
- if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
445
- if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
446
- if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
447
- if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
448
- if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
174
+ const triageResult = await runTriageStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
175
+ if (triageResult.roleOverrides.plannerEnabled !== undefined) plannerEnabled = triageResult.roleOverrides.plannerEnabled;
176
+ if (triageResult.roleOverrides.researcherEnabled !== undefined) researcherEnabled = triageResult.roleOverrides.researcherEnabled;
177
+ if (triageResult.roleOverrides.refactorerEnabled !== undefined) refactorerEnabled = triageResult.roleOverrides.refactorerEnabled;
178
+ if (triageResult.roleOverrides.reviewerEnabled !== undefined) reviewerEnabled = triageResult.roleOverrides.reviewerEnabled;
179
+ if (triageResult.roleOverrides.testerEnabled !== undefined) testerEnabled = triageResult.roleOverrides.testerEnabled;
180
+ if (triageResult.roleOverrides.securityEnabled !== undefined) securityEnabled = triageResult.roleOverrides.securityEnabled;
181
+ stageResults.triage = triageResult.stageResult;
449
182
  }
450
183
 
184
+ if (flags.enablePlanner !== undefined) plannerEnabled = Boolean(flags.enablePlanner);
185
+ if (flags.enableResearcher !== undefined) researcherEnabled = Boolean(flags.enableResearcher);
186
+ if (flags.enableRefactorer !== undefined) refactorerEnabled = Boolean(flags.enableRefactorer);
187
+ if (flags.enableReviewer !== undefined) reviewerEnabled = Boolean(flags.enableReviewer);
188
+ if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
189
+ if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
190
+
451
191
  // --- Researcher (pre-planning) ---
452
192
  let researchContext = null;
453
193
  if (researcherEnabled) {
454
- logger.setContext({ iteration: 0, stage: "researcher" });
455
- emitProgress(
456
- emitter,
457
- makeEvent("researcher:start", { ...eventBase, stage: "researcher" }, {
458
- message: "Researcher investigating codebase"
459
- })
460
- );
461
-
462
- const researcher = new ResearcherRole({ config, logger, emitter });
463
- await researcher.init({ task });
464
- const researchStart = Date.now();
465
- const researchOutput = await researcher.run({ task });
466
- trackBudget({
467
- role: "researcher",
468
- provider: config?.roles?.researcher?.provider || coderRole.provider,
469
- model: config?.roles?.researcher?.model || coderRole.model,
470
- result: researchOutput,
471
- duration_ms: Date.now() - researchStart
472
- });
473
-
474
- await addCheckpoint(session, { stage: "researcher", iteration: 0, ok: researchOutput.ok });
475
-
476
- emitProgress(
477
- emitter,
478
- makeEvent("researcher:end", { ...eventBase, stage: "researcher" }, {
479
- status: researchOutput.ok ? "ok" : "fail",
480
- message: researchOutput.ok ? "Research completed" : `Research failed: ${researchOutput.summary}`
481
- })
482
- );
483
-
484
- stageResults.researcher = { ok: researchOutput.ok, summary: researchOutput.summary || null };
485
- if (researchOutput.ok) {
486
- researchContext = researchOutput.result;
487
- }
194
+ const researcherResult = await runResearcherStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
195
+ researchContext = researcherResult.researchContext;
196
+ stageResults.researcher = researcherResult.stageResult;
488
197
  }
489
198
 
490
199
  // --- Planner ---
491
200
  let plannedTask = task;
492
201
  if (plannerEnabled) {
493
- logger.setContext({ iteration: 0, stage: "planner" });
494
- emitProgress(
495
- emitter,
496
- makeEvent("planner:start", { ...eventBase, stage: "planner" }, {
497
- message: `Planner (${plannerRole.provider}) running`,
498
- detail: { planner: plannerRole.provider }
499
- })
500
- );
501
- const planner = createAgent(plannerRole.provider, config, logger);
502
- const plannerStart = Date.now();
503
- const plannerPromptParts = [
504
- "Create an implementation plan for this task.",
505
- "Return concise numbered steps focused on execution order and risk.",
506
- "",
507
- task
508
- ];
509
- if (researchContext) {
510
- plannerPromptParts.push("", "## Research findings", JSON.stringify(researchContext, null, 2));
511
- }
512
- const plannerResult = await planner.runTask({ prompt: plannerPromptParts.join("\n"), role: "planner" });
513
- trackBudget({ role: "planner", provider: plannerRole.provider, model: plannerRole.model, result: plannerResult, duration_ms: Date.now() - plannerStart });
514
- if (!plannerResult.ok) {
515
- await markSessionStatus(session, "failed");
516
- const details = plannerResult.error || plannerResult.output || `exitCode=${plannerResult.exitCode ?? "unknown"}`;
517
- emitProgress(
518
- emitter,
519
- makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
520
- status: "fail",
521
- message: `Planner failed: ${details}`
522
- })
523
- );
524
- throw new Error(`Planner failed: ${details}`);
525
- }
526
- if (plannerResult.output?.trim()) {
527
- plannedTask = `${task}\n\nExecution plan:\n${plannerResult.output.trim()}`;
528
- }
529
- const parsedPlan = parsePlannerOutput(plannerResult.output);
530
- stageResults.planner = {
531
- ok: true,
532
- title: parsedPlan?.title || null,
533
- approach: parsedPlan?.approach || null,
534
- steps: parsedPlan?.steps || [],
535
- completedSteps: []
536
- };
537
- emitProgress(
538
- emitter,
539
- makeEvent("planner:end", { ...eventBase, stage: "planner" }, {
540
- message: "Planner completed"
541
- })
542
- );
202
+ const plannerResult = await runPlannerStage({ config, logger, emitter, eventBase, session, plannerRole, researchContext, trackBudget });
203
+ plannedTask = plannerResult.plannedTask;
204
+ stageResults.planner = plannerResult.stageResult;
543
205
  }
544
206
 
545
207
  const gitCtx = await prepareGitAutomation({ config, task, logger, session });
546
208
 
547
209
  const projectDir = config.projectDir || process.cwd();
548
210
  const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
549
- const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
211
+ await coderRoleInstance.init();
550
212
 
551
213
  for (let i = 1; i <= config.max_iterations; i += 1) {
552
214
  const elapsedMinutes = (Date.now() - startedAt) / 60000;
@@ -593,295 +255,41 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
593
255
  logger.info(`Iteration ${i}/${config.max_iterations}`);
594
256
 
595
257
  // --- Coder ---
596
- logger.setContext({ iteration: i, stage: "coder" });
597
- emitProgress(
598
- emitter,
599
- makeEvent("coder:start", { ...eventBase, stage: "coder" }, {
600
- message: `Coder (${coderRole.provider}) running`,
601
- detail: { coder: coderRole.provider }
602
- })
603
- );
604
-
605
- const coderPrompt = buildCoderPrompt({
606
- task: plannedTask,
607
- reviewerFeedback: session.last_reviewer_feedback,
608
- sonarSummary: session.last_sonar_summary,
609
- coderRules,
610
- methodology: config.development?.methodology || "tdd",
611
- serenaEnabled: Boolean(config.serena?.enabled)
612
- });
613
- const coderOnOutput = ({ stream, line }) => {
614
- emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "coder" }, {
615
- message: line,
616
- detail: { stream, agent: coderRole.provider }
617
- }));
618
- };
619
- const coderStart = Date.now();
620
- const coderResult = await coder.runTask({ prompt: coderPrompt, onOutput: coderOnOutput, role: "coder" });
621
- trackBudget({ role: "coder", provider: coderRole.provider, model: coderRole.model, result: coderResult, duration_ms: Date.now() - coderStart });
622
-
623
- if (!coderResult.ok) {
624
- await markSessionStatus(session, "failed");
625
- const details = coderResult.error || coderResult.output || `exitCode=${coderResult.exitCode ?? "unknown"}`;
626
- emitProgress(
627
- emitter,
628
- makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
629
- status: "fail",
630
- message: `Coder failed: ${details}`
631
- })
632
- );
633
- throw new Error(`Coder failed: ${details}`);
634
- }
635
-
636
- await addCheckpoint(session, { stage: "coder", iteration: i, note: "Coder applied changes" });
637
- emitProgress(
638
- emitter,
639
- makeEvent("coder:end", { ...eventBase, stage: "coder" }, {
640
- message: "Coder completed"
641
- })
642
- );
258
+ await runCoderStage({ coderRoleInstance, coderRole, config, logger, emitter, eventBase, session, plannedTask, trackBudget, iteration: i });
643
259
 
260
+ // --- Refactorer ---
644
261
  if (refactorerEnabled) {
645
- logger.setContext({ iteration: i, stage: "refactorer" });
646
- emitProgress(
647
- emitter,
648
- makeEvent("refactorer:start", { ...eventBase, stage: "refactorer" }, {
649
- message: `Refactorer (${refactorerRole.provider}) running`,
650
- detail: { refactorer: refactorerRole.provider }
651
- })
652
- );
653
- const refactorer = createAgent(refactorerRole.provider, config, logger);
654
- const refactorPrompt = [
655
- `Task context:\n${plannedTask}`,
656
- "",
657
- "Refactor the current changes for clarity and maintainability without changing behavior.",
658
- "Do not expand scope and keep tests green."
659
- ].join("\n");
660
- const refactorerOnOutput = ({ stream, line }) => {
661
- emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "refactorer" }, {
662
- message: line,
663
- detail: { stream, agent: refactorerRole.provider }
664
- }));
665
- };
666
- const refactorerStart = Date.now();
667
- const refactorResult = await refactorer.runTask({
668
- prompt: refactorPrompt,
669
- onOutput: refactorerOnOutput,
670
- role: "refactorer"
671
- });
672
- trackBudget({ role: "refactorer", provider: refactorerRole.provider, model: refactorerRole.model, result: refactorResult, duration_ms: Date.now() - refactorerStart });
673
- if (!refactorResult.ok) {
674
- await markSessionStatus(session, "failed");
675
- const details = refactorResult.error || refactorResult.output || `exitCode=${refactorResult.exitCode ?? "unknown"}`;
676
- emitProgress(
677
- emitter,
678
- makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
679
- status: "fail",
680
- message: `Refactorer failed: ${details}`
681
- })
682
- );
683
- throw new Error(`Refactorer failed: ${details}`);
684
- }
685
- await addCheckpoint(session, { stage: "refactorer", iteration: i, note: "Refactorer applied cleanups" });
686
- emitProgress(
687
- emitter,
688
- makeEvent("refactorer:end", { ...eventBase, stage: "refactorer" }, {
689
- message: "Refactorer completed"
690
- })
691
- );
262
+ await runRefactorerStage({ refactorerRole, config, logger, emitter, eventBase, session, plannedTask, trackBudget, iteration: i });
692
263
  }
693
264
 
694
265
  // --- TDD Policy ---
695
- logger.setContext({ iteration: i, stage: "tdd" });
696
- const tddDiff = await generateDiff({ baseRef: session.session_start_sha });
697
- const tddEval = evaluateTddPolicy(tddDiff, config.development);
698
- await addCheckpoint(session, {
699
- stage: "tdd-policy",
700
- iteration: i,
701
- ok: tddEval.ok,
702
- reason: tddEval.reason,
703
- source_files: tddEval.sourceFiles?.length || 0,
704
- test_files: tddEval.testFiles?.length || 0
705
- });
706
-
707
- emitProgress(
708
- emitter,
709
- makeEvent("tdd:result", { ...eventBase, stage: "tdd" }, {
710
- status: tddEval.ok ? "ok" : "fail",
711
- message: tddEval.ok ? "TDD policy passed" : `TDD policy failed: ${tddEval.reason}`,
712
- detail: {
713
- ok: tddEval.ok,
714
- reason: tddEval.reason,
715
- sourceFiles: tddEval.sourceFiles?.length || 0,
716
- testFiles: tddEval.testFiles?.length || 0
717
- }
718
- })
719
- );
720
-
721
- if (!tddEval.ok) {
722
- session.last_reviewer_feedback = tddEval.message;
723
- session.repeated_issue_count += 1;
724
- await saveSession(session);
725
- if (session.repeated_issue_count >= config.session.fail_fast_repeats) {
726
- const question = `TDD policy has failed ${session.repeated_issue_count} times. The coder is not creating tests. How should we proceed? Issue: ${tddEval.reason}`;
727
- if (askQuestion) {
728
- const answer = await askQuestion(question, { iteration: i, stage: "tdd" });
729
- if (answer) {
730
- session.last_reviewer_feedback += `\nUser guidance: ${answer}`;
731
- session.repeated_issue_count = 0;
732
- await saveSession(session);
733
- continue;
734
- }
735
- }
736
- await pauseSession(session, {
737
- question,
738
- context: {
739
- iteration: i,
740
- stage: "tdd",
741
- lastFeedback: tddEval.message,
742
- repeatedCount: session.repeated_issue_count
743
- }
744
- });
745
- emitProgress(
746
- emitter,
747
- makeEvent("question", { ...eventBase, stage: "tdd" }, {
748
- status: "paused",
749
- message: question,
750
- detail: { question, sessionId: session.id }
751
- })
752
- );
753
- return { paused: true, sessionId: session.id, question, context: "tdd_fail_fast" };
754
- }
266
+ const tddResult = await runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration: i, askQuestion });
267
+ if (tddResult.action === "pause") {
268
+ return tddResult.result;
269
+ }
270
+ if (tddResult.action === "continue") {
755
271
  continue;
756
272
  }
757
273
 
758
- // --- SonarQube (via SonarRole) ---
274
+ // --- SonarQube ---
759
275
  if (config.sonarqube.enabled) {
760
- logger.setContext({ iteration: i, stage: "sonar" });
761
- emitProgress(
762
- emitter,
763
- makeEvent("sonar:start", { ...eventBase, stage: "sonar" }, {
764
- message: "SonarQube scanning"
765
- })
766
- );
767
-
768
- const sonarRole = new SonarRole({ config, logger, emitter });
769
- await sonarRole.init({ iteration: i });
770
- const sonarStart = Date.now();
771
- const sonarOutput = await sonarRole.run();
772
- trackBudget({ role: "sonar", provider: "sonar", result: sonarOutput, duration_ms: Date.now() - sonarStart });
773
- const sonarResult = sonarOutput.result;
774
-
775
- if (!sonarResult.gateStatus && sonarResult.error) {
776
- await markSessionStatus(session, "failed");
777
- emitProgress(
778
- emitter,
779
- makeEvent("sonar:end", { ...eventBase, stage: "sonar" }, {
780
- status: "fail",
781
- message: `Sonar scan failed: ${sonarResult.error}`
782
- })
783
- );
784
- throw new Error(`Sonar scan failed: ${sonarResult.error}`);
785
- }
786
-
787
- session.last_sonar_summary = sonarOutput.summary;
788
- if (typeof sonarResult.openIssuesTotal === "number") {
789
- if (sonarIssuesInitial === null) {
790
- sonarIssuesInitial = sonarResult.openIssuesTotal;
791
- }
792
- sonarIssuesFinal = sonarResult.openIssuesTotal;
793
- }
794
- await addCheckpoint(session, {
795
- stage: "sonar",
796
- iteration: i,
797
- project_key: sonarResult.projectKey,
798
- quality_gate: sonarResult.gateStatus,
799
- open_issues: sonarResult.openIssuesTotal
276
+ const sonarResult = await runSonarStage({
277
+ config, logger, emitter, eventBase, session, trackBudget, iteration: i,
278
+ repeatDetector, budgetSummary, sonarState,
279
+ askQuestion, task
800
280
  });
801
-
802
- emitProgress(
803
- emitter,
804
- makeEvent("sonar:end", { ...eventBase, stage: "sonar" }, {
805
- status: sonarResult.blocking ? "fail" : "ok",
806
- message: `Quality gate: ${sonarResult.gateStatus}`,
807
- detail: { projectKey: sonarResult.projectKey, gateStatus: sonarResult.gateStatus, openIssues: sonarResult.openIssuesTotal }
808
- })
809
- );
810
-
811
- if (sonarResult.blocking) {
812
- repeatDetector.addIteration(sonarResult.issues, []);
813
- const repeatState = repeatDetector.isStalled();
814
- if (repeatState.stalled) {
815
- const repeatCounts = repeatDetector.getRepeatCounts();
816
- const message = `No progress: SonarQube issues repeated ${repeatCounts.sonar} times.`;
817
- logger.warn(message);
818
- await markSessionStatus(session, "stalled");
819
- emitProgress(
820
- emitter,
821
- makeEvent("session:end", { ...eventBase, stage: "sonar" }, {
822
- status: "stalled",
823
- message,
824
- detail: { reason: repeatState.reason, repeats: repeatCounts.sonar, budget: budgetSummary() }
825
- })
826
- );
827
- return { approved: false, sessionId: session.id, reason: "stalled" };
828
- }
829
-
830
- session.last_reviewer_feedback = `Sonar gate blocking (${sonarResult.gateStatus}). Resolve critical findings first.`;
831
- session.sonar_retry_count = (session.sonar_retry_count || 0) + 1;
832
- await saveSession(session);
833
- const maxSonarRetries = config.session.max_sonar_retries ?? config.session.fail_fast_repeats;
834
- if (session.sonar_retry_count >= maxSonarRetries) {
835
- emitProgress(
836
- emitter,
837
- makeEvent("solomon:escalate", { ...eventBase, stage: "sonar" }, {
838
- message: `Sonar sub-loop limit reached (${session.sonar_retry_count}/${maxSonarRetries})`,
839
- detail: { subloop: "sonar", retryCount: session.sonar_retry_count, limit: maxSonarRetries, gateStatus: sonarResult.gateStatus }
840
- })
841
- );
842
-
843
- const solomonResult = await invokeSolomon({
844
- config, logger, emitter, eventBase, stage: "sonar", askQuestion, session, iteration: i,
845
- conflict: {
846
- stage: "sonar",
847
- task,
848
- iterationCount: session.sonar_retry_count,
849
- maxIterations: maxSonarRetries,
850
- history: [{ agent: "sonar", feedback: session.last_sonar_summary }]
851
- }
852
- });
853
-
854
- if (solomonResult.action === "pause") {
855
- return { paused: true, sessionId: session.id, question: solomonResult.question, context: "sonar_fail_fast" };
856
- }
857
- if (solomonResult.action === "continue") {
858
- if (solomonResult.humanGuidance) {
859
- session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
860
- }
861
- session.sonar_retry_count = 0;
862
- await saveSession(session);
863
- continue;
864
- }
865
- if (solomonResult.action === "subtask") {
866
- return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "sonar_subtask" };
867
- }
868
- }
281
+ if (sonarResult.action === "stalled" || sonarResult.action === "pause") {
282
+ return sonarResult.result;
283
+ }
284
+ if (sonarResult.action === "continue") {
869
285
  continue;
870
286
  }
871
-
872
- // Sonar passed — reset retry counter
873
- session.sonar_retry_count = 0;
874
- const issuesInitial = sonarIssuesInitial ?? sonarResult.openIssuesTotal ?? 0;
875
- const issuesFinal = sonarIssuesFinal ?? sonarResult.openIssuesTotal ?? 0;
876
- stageResults.sonar = {
877
- gateStatus: sonarResult.gateStatus,
878
- openIssues: sonarResult.openIssuesTotal,
879
- issuesInitial,
880
- issuesFinal,
881
- issuesResolved: Math.max(issuesInitial - issuesFinal, 0)
882
- };
287
+ if (sonarResult.stageResult) {
288
+ stageResults.sonar = sonarResult.stageResult;
289
+ }
883
290
  }
884
291
 
292
+ // --- Reviewer ---
885
293
  let review = {
886
294
  approved: true,
887
295
  blocking_issues: [],
@@ -890,120 +298,13 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
890
298
  confidence: 1
891
299
  };
892
300
  if (reviewerEnabled) {
893
- logger.setContext({ iteration: i, stage: "reviewer" });
894
- emitProgress(
895
- emitter,
896
- makeEvent("reviewer:start", { ...eventBase, stage: "reviewer" }, {
897
- message: `Reviewer (${reviewerRole.provider}) running`,
898
- detail: { reviewer: reviewerRole.provider }
899
- })
900
- );
901
-
902
- const diff = await generateDiff({ baseRef: session.session_start_sha });
903
- const reviewerPrompt = buildReviewerPrompt({
904
- task,
905
- diff,
906
- reviewRules,
907
- mode: config.review_mode,
908
- serenaEnabled: Boolean(config.serena?.enabled)
301
+ const reviewerResult = await runReviewerStage({
302
+ reviewerRole, config, logger, emitter, eventBase, session, trackBudget,
303
+ iteration: i, reviewRules, task, repeatDetector, budgetSummary
909
304
  });
910
- const reviewerOnOutput = ({ stream, line }) => {
911
- emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "reviewer" }, {
912
- message: line,
913
- detail: { stream, agent: reviewerRole.provider }
914
- }));
915
- };
916
- const reviewerStart = Date.now();
917
- const reviewerExec = await runReviewerWithFallback({
918
- reviewerName: reviewerRole.provider,
919
- config,
920
- logger,
921
- prompt: reviewerPrompt,
922
- session,
923
- iteration: i,
924
- onOutput: reviewerOnOutput,
925
- onAttemptResult: ({ reviewer, result }) => {
926
- trackBudget({ role: "reviewer", provider: reviewer, model: reviewerRole.model, result, duration_ms: Date.now() - reviewerStart });
927
- }
928
- });
929
-
930
- if (!reviewerExec.result || !reviewerExec.result.ok) {
931
- await markSessionStatus(session, "failed");
932
- const lastAttempt = reviewerExec.attempts.at(-1);
933
- const details =
934
- lastAttempt?.result?.error ||
935
- lastAttempt?.result?.output ||
936
- `reviewer=${lastAttempt?.reviewer || "unknown"} exitCode=${lastAttempt?.result?.exitCode ?? "unknown"}`;
937
- emitProgress(
938
- emitter,
939
- makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
940
- status: "fail",
941
- message: `Reviewer failed: ${details}`
942
- })
943
- );
944
- throw new Error(`Reviewer failed: ${details}`);
945
- }
946
-
947
- try {
948
- const parsed = parseJsonOutput(reviewerExec.result.output);
949
- if (!parsed) {
950
- throw new Error("Reviewer output is not valid JSON");
951
- }
952
- review = validateReviewResult(parsed);
953
- } catch (parseErr) {
954
- logger.warn(`Reviewer output parse/validation failed: ${parseErr.message}`);
955
- review = {
956
- approved: false,
957
- blocking_issues: [{
958
- id: "PARSE_ERROR",
959
- severity: "high",
960
- description: `Reviewer output could not be parsed: ${parseErr.message}`
961
- }],
962
- non_blocking_suggestions: [],
963
- summary: `Parse error: ${parseErr.message}`,
964
- confidence: 0
965
- };
966
- }
967
- await addCheckpoint(session, {
968
- stage: "reviewer",
969
- iteration: i,
970
- approved: review.approved,
971
- blocking_issues: review.blocking_issues.length
972
- });
973
-
974
- emitProgress(
975
- emitter,
976
- makeEvent("reviewer:end", { ...eventBase, stage: "reviewer" }, {
977
- status: review.approved ? "ok" : "fail",
978
- message: review.approved ? "Review approved" : `Review rejected (${review.blocking_issues.length} blocking)`,
979
- detail: {
980
- approved: review.approved,
981
- blockingCount: review.blocking_issues.length,
982
- issues: review.blocking_issues.map(
983
- (x) => `${x.id || "ISSUE"}: ${x.description || "Missing description"}`
984
- )
985
- }
986
- })
987
- );
988
-
989
- if (!review.approved) {
990
- repeatDetector.addIteration([], review.blocking_issues);
991
- const repeatState = repeatDetector.isStalled();
992
- if (repeatState.stalled) {
993
- const repeatCounts = repeatDetector.getRepeatCounts();
994
- const message = `Manual intervention required: reviewer issues repeated ${repeatCounts.reviewer} times.`;
995
- logger.warn(message);
996
- await markSessionStatus(session, "stalled");
997
- emitProgress(
998
- emitter,
999
- makeEvent("session:end", { ...eventBase, stage: "reviewer" }, {
1000
- status: "stalled",
1001
- message,
1002
- detail: { reason: repeatState.reason, repeats: repeatCounts.reviewer, budget: budgetSummary() }
1003
- })
1004
- );
1005
- return { approved: false, sessionId: session.id, reason: "stalled" };
1006
- }
305
+ review = reviewerResult.review;
306
+ if (reviewerResult.stalled) {
307
+ return reviewerResult.stalledResult;
1007
308
  }
1008
309
  }
1009
310
 
@@ -1023,139 +324,35 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
1023
324
  // --- Post-loop stages: Tester → Security ---
1024
325
  const postLoopDiff = await generateDiff({ baseRef: session.session_start_sha });
1025
326
 
1026
- // --- Tester ---
1027
327
  if (testerEnabled) {
1028
- logger.setContext({ iteration: i, stage: "tester" });
1029
- emitProgress(
1030
- emitter,
1031
- makeEvent("tester:start", { ...eventBase, stage: "tester" }, {
1032
- message: "Tester evaluating test quality"
1033
- })
1034
- );
1035
-
1036
- const tester = new TesterRole({ config, logger, emitter });
1037
- await tester.init({ task, iteration: i });
1038
- const testerStart = Date.now();
1039
- const testerOutput = await tester.run({ task, diff: postLoopDiff });
1040
- trackBudget({
1041
- role: "tester",
1042
- provider: config?.roles?.tester?.provider || coderRole.provider,
1043
- model: config?.roles?.tester?.model || coderRole.model,
1044
- result: testerOutput,
1045
- duration_ms: Date.now() - testerStart
328
+ const testerResult = await runTesterStage({
329
+ config, logger, emitter, eventBase, session, coderRole, trackBudget,
330
+ iteration: i, task, diff: postLoopDiff, askQuestion
1046
331
  });
1047
-
1048
- await addCheckpoint(session, { stage: "tester", iteration: i, ok: testerOutput.ok });
1049
-
1050
- emitProgress(
1051
- emitter,
1052
- makeEvent("tester:end", { ...eventBase, stage: "tester" }, {
1053
- status: testerOutput.ok ? "ok" : "fail",
1054
- message: testerOutput.ok ? "Tester passed" : `Tester: ${testerOutput.summary}`
1055
- })
1056
- );
1057
-
1058
- if (!testerOutput.ok) {
1059
- const maxTesterRetries = config.session?.max_tester_retries ?? 1;
1060
- session.tester_retry_count = (session.tester_retry_count || 0) + 1;
1061
- await saveSession(session);
1062
-
1063
- if (session.tester_retry_count >= maxTesterRetries) {
1064
- const solomonResult = await invokeSolomon({
1065
- config, logger, emitter, eventBase, stage: "tester", askQuestion, session, iteration: i,
1066
- conflict: {
1067
- stage: "tester",
1068
- task,
1069
- diff: postLoopDiff,
1070
- iterationCount: session.tester_retry_count,
1071
- maxIterations: maxTesterRetries,
1072
- history: [{ agent: "tester", feedback: testerOutput.summary }]
1073
- }
1074
- });
1075
-
1076
- if (solomonResult.action === "pause") {
1077
- return { paused: true, sessionId: session.id, question: solomonResult.question, context: "tester_fail_fast" };
1078
- }
1079
- if (solomonResult.action === "subtask") {
1080
- return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "tester_subtask" };
1081
- }
1082
- // continue = Solomon approved, proceed to next stage
1083
- } else {
1084
- session.last_reviewer_feedback = `Tester feedback: ${testerOutput.summary}`;
1085
- await saveSession(session);
1086
- continue;
1087
- }
1088
- } else {
1089
- session.tester_retry_count = 0;
1090
- stageResults.tester = { ok: true, summary: testerOutput.summary || "All tests passed" };
332
+ if (testerResult.action === "pause") {
333
+ return testerResult.result;
334
+ }
335
+ if (testerResult.action === "continue") {
336
+ continue;
337
+ }
338
+ if (testerResult.stageResult) {
339
+ stageResults.tester = testerResult.stageResult;
1091
340
  }
1092
341
  }
1093
342
 
1094
- // --- Security ---
1095
343
  if (securityEnabled) {
1096
- logger.setContext({ iteration: i, stage: "security" });
1097
- emitProgress(
1098
- emitter,
1099
- makeEvent("security:start", { ...eventBase, stage: "security" }, {
1100
- message: "Security auditing code"
1101
- })
1102
- );
1103
-
1104
- const security = new SecurityRole({ config, logger, emitter });
1105
- await security.init({ task, iteration: i });
1106
- const securityStart = Date.now();
1107
- const securityOutput = await security.run({ task, diff: postLoopDiff });
1108
- trackBudget({
1109
- role: "security",
1110
- provider: config?.roles?.security?.provider || coderRole.provider,
1111
- model: config?.roles?.security?.model || coderRole.model,
1112
- result: securityOutput,
1113
- duration_ms: Date.now() - securityStart
344
+ const securityResult = await runSecurityStage({
345
+ config, logger, emitter, eventBase, session, coderRole, trackBudget,
346
+ iteration: i, task, diff: postLoopDiff, askQuestion
1114
347
  });
1115
-
1116
- await addCheckpoint(session, { stage: "security", iteration: i, ok: securityOutput.ok });
1117
-
1118
- emitProgress(
1119
- emitter,
1120
- makeEvent("security:end", { ...eventBase, stage: "security" }, {
1121
- status: securityOutput.ok ? "ok" : "fail",
1122
- message: securityOutput.ok ? "Security audit passed" : `Security: ${securityOutput.summary}`
1123
- })
1124
- );
1125
-
1126
- if (!securityOutput.ok) {
1127
- const maxSecurityRetries = config.session?.max_security_retries ?? 1;
1128
- session.security_retry_count = (session.security_retry_count || 0) + 1;
1129
- await saveSession(session);
1130
-
1131
- if (session.security_retry_count >= maxSecurityRetries) {
1132
- const solomonResult = await invokeSolomon({
1133
- config, logger, emitter, eventBase, stage: "security", askQuestion, session, iteration: i,
1134
- conflict: {
1135
- stage: "security",
1136
- task,
1137
- diff: postLoopDiff,
1138
- iterationCount: session.security_retry_count,
1139
- maxIterations: maxSecurityRetries,
1140
- history: [{ agent: "security", feedback: securityOutput.summary }]
1141
- }
1142
- });
1143
-
1144
- if (solomonResult.action === "pause") {
1145
- return { paused: true, sessionId: session.id, question: solomonResult.question, context: "security_fail_fast" };
1146
- }
1147
- if (solomonResult.action === "subtask") {
1148
- return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "security_subtask" };
1149
- }
1150
- // continue = Solomon approved, proceed
1151
- } else {
1152
- session.last_reviewer_feedback = `Security feedback: ${securityOutput.summary}`;
1153
- await saveSession(session);
1154
- continue;
1155
- }
1156
- } else {
1157
- session.security_retry_count = 0;
1158
- stageResults.security = { ok: true, summary: securityOutput.summary || "No vulnerabilities found" };
348
+ if (securityResult.action === "pause") {
349
+ return securityResult.result;
350
+ }
351
+ if (securityResult.action === "continue") {
352
+ continue;
353
+ }
354
+ if (securityResult.stageResult) {
355
+ stageResults.security = securityResult.stageResult;
1159
356
  }
1160
357
  }
1161
358