popeye-cli 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/.env.example +24 -1
  2. package/CONTRIBUTING.md +275 -0
  3. package/OPEN_SOURCE_MANIFESTO.md +172 -0
  4. package/README.md +832 -123
  5. package/dist/adapters/claude.d.ts +19 -4
  6. package/dist/adapters/claude.d.ts.map +1 -1
  7. package/dist/adapters/claude.js +908 -42
  8. package/dist/adapters/claude.js.map +1 -1
  9. package/dist/adapters/gemini.d.ts +55 -0
  10. package/dist/adapters/gemini.d.ts.map +1 -0
  11. package/dist/adapters/gemini.js +318 -0
  12. package/dist/adapters/gemini.js.map +1 -0
  13. package/dist/adapters/grok.d.ts +73 -0
  14. package/dist/adapters/grok.d.ts.map +1 -0
  15. package/dist/adapters/grok.js +430 -0
  16. package/dist/adapters/grok.js.map +1 -0
  17. package/dist/adapters/openai.d.ts +1 -1
  18. package/dist/adapters/openai.d.ts.map +1 -1
  19. package/dist/adapters/openai.js +47 -8
  20. package/dist/adapters/openai.js.map +1 -1
  21. package/dist/auth/claude.d.ts +11 -9
  22. package/dist/auth/claude.d.ts.map +1 -1
  23. package/dist/auth/claude.js +107 -71
  24. package/dist/auth/claude.js.map +1 -1
  25. package/dist/auth/gemini.d.ts +58 -0
  26. package/dist/auth/gemini.d.ts.map +1 -0
  27. package/dist/auth/gemini.js +172 -0
  28. package/dist/auth/gemini.js.map +1 -0
  29. package/dist/auth/grok.d.ts +73 -0
  30. package/dist/auth/grok.d.ts.map +1 -0
  31. package/dist/auth/grok.js +211 -0
  32. package/dist/auth/grok.js.map +1 -0
  33. package/dist/auth/index.d.ts +14 -7
  34. package/dist/auth/index.d.ts.map +1 -1
  35. package/dist/auth/index.js +41 -6
  36. package/dist/auth/index.js.map +1 -1
  37. package/dist/auth/keychain.d.ts +20 -7
  38. package/dist/auth/keychain.d.ts.map +1 -1
  39. package/dist/auth/keychain.js +85 -29
  40. package/dist/auth/keychain.js.map +1 -1
  41. package/dist/auth/openai.d.ts +2 -2
  42. package/dist/auth/openai.d.ts.map +1 -1
  43. package/dist/auth/openai.js +30 -32
  44. package/dist/auth/openai.js.map +1 -1
  45. package/dist/cli/commands/auth.d.ts +1 -1
  46. package/dist/cli/commands/auth.d.ts.map +1 -1
  47. package/dist/cli/commands/auth.js +79 -8
  48. package/dist/cli/commands/auth.js.map +1 -1
  49. package/dist/cli/commands/create.d.ts.map +1 -1
  50. package/dist/cli/commands/create.js +15 -4
  51. package/dist/cli/commands/create.js.map +1 -1
  52. package/dist/cli/interactive.d.ts.map +1 -1
  53. package/dist/cli/interactive.js +1494 -114
  54. package/dist/cli/interactive.js.map +1 -1
  55. package/dist/config/defaults.d.ts +9 -1
  56. package/dist/config/defaults.d.ts.map +1 -1
  57. package/dist/config/defaults.js +19 -2
  58. package/dist/config/defaults.js.map +1 -1
  59. package/dist/config/index.d.ts +19 -0
  60. package/dist/config/index.d.ts.map +1 -1
  61. package/dist/config/index.js +33 -1
  62. package/dist/config/index.js.map +1 -1
  63. package/dist/config/schema.d.ts +47 -0
  64. package/dist/config/schema.d.ts.map +1 -1
  65. package/dist/config/schema.js +29 -1
  66. package/dist/config/schema.js.map +1 -1
  67. package/dist/generators/fullstack.d.ts +32 -0
  68. package/dist/generators/fullstack.d.ts.map +1 -0
  69. package/dist/generators/fullstack.js +497 -0
  70. package/dist/generators/fullstack.js.map +1 -0
  71. package/dist/generators/index.d.ts +4 -3
  72. package/dist/generators/index.d.ts.map +1 -1
  73. package/dist/generators/index.js +15 -1
  74. package/dist/generators/index.js.map +1 -1
  75. package/dist/generators/python.d.ts +17 -1
  76. package/dist/generators/python.d.ts.map +1 -1
  77. package/dist/generators/python.js +34 -20
  78. package/dist/generators/python.js.map +1 -1
  79. package/dist/generators/templates/fullstack.d.ts +113 -0
  80. package/dist/generators/templates/fullstack.d.ts.map +1 -0
  81. package/dist/generators/templates/fullstack.js +1004 -0
  82. package/dist/generators/templates/fullstack.js.map +1 -0
  83. package/dist/generators/typescript.d.ts +19 -1
  84. package/dist/generators/typescript.d.ts.map +1 -1
  85. package/dist/generators/typescript.js +37 -20
  86. package/dist/generators/typescript.js.map +1 -1
  87. package/dist/state/index.d.ts +108 -0
  88. package/dist/state/index.d.ts.map +1 -1
  89. package/dist/state/index.js +551 -4
  90. package/dist/state/index.js.map +1 -1
  91. package/dist/state/registry.d.ts +52 -0
  92. package/dist/state/registry.d.ts.map +1 -0
  93. package/dist/state/registry.js +215 -0
  94. package/dist/state/registry.js.map +1 -0
  95. package/dist/types/cli.d.ts +8 -0
  96. package/dist/types/cli.d.ts.map +1 -1
  97. package/dist/types/cli.js.map +1 -1
  98. package/dist/types/consensus.d.ts +186 -4
  99. package/dist/types/consensus.d.ts.map +1 -1
  100. package/dist/types/consensus.js +35 -3
  101. package/dist/types/consensus.js.map +1 -1
  102. package/dist/types/project.d.ts +76 -0
  103. package/dist/types/project.d.ts.map +1 -1
  104. package/dist/types/project.js +1 -1
  105. package/dist/types/project.js.map +1 -1
  106. package/dist/types/workflow.d.ts +217 -16
  107. package/dist/types/workflow.d.ts.map +1 -1
  108. package/dist/types/workflow.js +40 -1
  109. package/dist/types/workflow.js.map +1 -1
  110. package/dist/workflow/auto-fix.d.ts +45 -0
  111. package/dist/workflow/auto-fix.d.ts.map +1 -0
  112. package/dist/workflow/auto-fix.js +274 -0
  113. package/dist/workflow/auto-fix.js.map +1 -0
  114. package/dist/workflow/consensus.d.ts +70 -2
  115. package/dist/workflow/consensus.d.ts.map +1 -1
  116. package/dist/workflow/consensus.js +872 -17
  117. package/dist/workflow/consensus.js.map +1 -1
  118. package/dist/workflow/execution-mode.d.ts +10 -4
  119. package/dist/workflow/execution-mode.d.ts.map +1 -1
  120. package/dist/workflow/execution-mode.js +547 -58
  121. package/dist/workflow/execution-mode.js.map +1 -1
  122. package/dist/workflow/index.d.ts +14 -2
  123. package/dist/workflow/index.d.ts.map +1 -1
  124. package/dist/workflow/index.js +69 -6
  125. package/dist/workflow/index.js.map +1 -1
  126. package/dist/workflow/milestone-workflow.d.ts +34 -0
  127. package/dist/workflow/milestone-workflow.d.ts.map +1 -0
  128. package/dist/workflow/milestone-workflow.js +414 -0
  129. package/dist/workflow/milestone-workflow.js.map +1 -0
  130. package/dist/workflow/plan-mode.d.ts +80 -3
  131. package/dist/workflow/plan-mode.d.ts.map +1 -1
  132. package/dist/workflow/plan-mode.js +767 -49
  133. package/dist/workflow/plan-mode.js.map +1 -1
  134. package/dist/workflow/plan-storage.d.ts +386 -0
  135. package/dist/workflow/plan-storage.d.ts.map +1 -0
  136. package/dist/workflow/plan-storage.js +878 -0
  137. package/dist/workflow/plan-storage.js.map +1 -0
  138. package/dist/workflow/project-verification.d.ts +37 -0
  139. package/dist/workflow/project-verification.d.ts.map +1 -0
  140. package/dist/workflow/project-verification.js +381 -0
  141. package/dist/workflow/project-verification.js.map +1 -0
  142. package/dist/workflow/task-workflow.d.ts +37 -0
  143. package/dist/workflow/task-workflow.d.ts.map +1 -0
  144. package/dist/workflow/task-workflow.js +386 -0
  145. package/dist/workflow/task-workflow.js.map +1 -0
  146. package/dist/workflow/test-runner.d.ts +9 -0
  147. package/dist/workflow/test-runner.d.ts.map +1 -1
  148. package/dist/workflow/test-runner.js +101 -5
  149. package/dist/workflow/test-runner.js.map +1 -1
  150. package/dist/workflow/ui-designer.d.ts +82 -0
  151. package/dist/workflow/ui-designer.d.ts.map +1 -0
  152. package/dist/workflow/ui-designer.js +234 -0
  153. package/dist/workflow/ui-designer.js.map +1 -0
  154. package/dist/workflow/ui-setup.d.ts +58 -0
  155. package/dist/workflow/ui-setup.d.ts.map +1 -0
  156. package/dist/workflow/ui-setup.js +685 -0
  157. package/dist/workflow/ui-setup.js.map +1 -0
  158. package/dist/workflow/ui-verification.d.ts +114 -0
  159. package/dist/workflow/ui-verification.d.ts.map +1 -0
  160. package/dist/workflow/ui-verification.js +258 -0
  161. package/dist/workflow/ui-verification.js.map +1 -0
  162. package/dist/workflow/workflow-logger.d.ts +110 -0
  163. package/dist/workflow/workflow-logger.d.ts.map +1 -0
  164. package/dist/workflow/workflow-logger.js +267 -0
  165. package/dist/workflow/workflow-logger.js.map +1 -0
  166. package/dist/workflow/workspace-manager.d.ts +342 -0
  167. package/dist/workflow/workspace-manager.d.ts.map +1 -0
  168. package/dist/workflow/workspace-manager.js +733 -0
  169. package/dist/workflow/workspace-manager.js.map +1 -0
  170. package/package.json +2 -2
  171. package/src/adapters/claude.ts +1067 -47
  172. package/src/adapters/gemini.ts +373 -0
  173. package/src/adapters/grok.ts +492 -0
  174. package/src/adapters/openai.ts +48 -9
  175. package/src/auth/claude.ts +120 -78
  176. package/src/auth/gemini.ts +207 -0
  177. package/src/auth/grok.ts +255 -0
  178. package/src/auth/index.ts +47 -9
  179. package/src/auth/keychain.ts +95 -28
  180. package/src/auth/openai.ts +29 -36
  181. package/src/cli/commands/auth.ts +89 -10
  182. package/src/cli/commands/create.ts +13 -4
  183. package/src/cli/interactive.ts +1774 -142
  184. package/src/config/defaults.ts +19 -2
  185. package/src/config/index.ts +36 -1
  186. package/src/config/schema.ts +30 -1
  187. package/src/generators/fullstack.ts +551 -0
  188. package/src/generators/index.ts +25 -1
  189. package/src/generators/python.ts +65 -20
  190. package/src/generators/templates/fullstack.ts +1047 -0
  191. package/src/generators/typescript.ts +69 -20
  192. package/src/state/index.ts +713 -4
  193. package/src/state/registry.ts +278 -0
  194. package/src/types/cli.ts +8 -0
  195. package/src/types/consensus.ts +197 -6
  196. package/src/types/project.ts +82 -1
  197. package/src/types/workflow.ts +90 -1
  198. package/src/workflow/auto-fix.ts +340 -0
  199. package/src/workflow/consensus.ts +1180 -16
  200. package/src/workflow/execution-mode.ts +673 -74
  201. package/src/workflow/index.ts +95 -6
  202. package/src/workflow/milestone-workflow.ts +576 -0
  203. package/src/workflow/plan-mode.ts +924 -50
  204. package/src/workflow/plan-storage.ts +1282 -0
  205. package/src/workflow/project-verification.ts +471 -0
  206. package/src/workflow/task-workflow.ts +528 -0
  207. package/src/workflow/test-runner.ts +120 -5
  208. package/src/workflow/ui-designer.ts +337 -0
  209. package/src/workflow/ui-setup.ts +797 -0
  210. package/src/workflow/ui-verification.ts +357 -0
  211. package/src/workflow/workflow-logger.ts +353 -0
  212. package/src/workflow/workspace-manager.ts +912 -0
  213. package/tests/config/config.test.ts +1 -1
  214. package/tests/types/consensus.test.ts +3 -3
  215. package/tests/workflow/plan-mode.test.ts +213 -0
  216. package/tests/workflow/test-runner.test.ts +5 -3
@@ -1,13 +1,31 @@
1
1
  /**
2
2
  * Consensus workflow module
3
- * Handles the iterative consensus-building process between Claude and OpenAI
3
+ * Handles the iterative consensus-building process between Claude and OpenAI/Gemini
4
+ * with arbitration support when consensus cannot be reached
4
5
  */
5
6
 
6
- import type { ConsensusResult, ConsensusIteration, ConsensusConfig } from '../types/consensus.js';
7
+ import type {
8
+ ConsensusResult,
9
+ ConsensusIteration,
10
+ ConsensusConfig,
11
+ ArbitrationResult,
12
+ AIProvider,
13
+ TaggedItem,
14
+ AppConsensusScores,
15
+ CorrectionRecord,
16
+ } from '../types/consensus.js';
7
17
  import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
8
- import { requestConsensus } from '../adapters/openai.js';
18
+ import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
19
+ import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
20
+ import { requestConsensus as requestGrokConsensus, requestArbitration as requestGrokArbitration } from '../adapters/grok.js';
9
21
  import { revisePlan } from '../adapters/claude.js';
10
22
  import { recordConsensusIteration } from '../state/index.js';
23
+ import {
24
+ createPlanStorage,
25
+ type ReviewerFeedback,
26
+ type FullstackReviewerFeedback,
27
+ type FeedbackAppTarget,
28
+ } from './plan-storage.js';
11
29
 
12
30
  /**
13
31
  * Options for consensus iteration
@@ -15,8 +33,15 @@ import { recordConsensusIteration } from '../state/index.js';
15
33
  export interface ConsensusOptions {
16
34
  projectDir: string;
17
35
  config?: Partial<ConsensusConfig>;
36
+ /** Whether this is a fullstack project (enables per-app tracking) */
37
+ isFullstack?: boolean;
38
+ /** Project language for revision prompts */
39
+ language?: 'python' | 'typescript' | 'fullstack';
18
40
  onIteration?: (iteration: number, result: ConsensusResult) => void;
19
41
  onRevision?: (iteration: number, revisedPlan: string) => void;
42
+ onConcerns?: (concerns: string[], recommendations: string[]) => void;
43
+ onArbitration?: (result: ArbitrationResult) => void;
44
+ onProgress?: (phase: string, message: string) => void;
20
45
  }
21
46
 
22
47
  /**
@@ -26,10 +51,109 @@ export interface ConsensusProcessResult {
26
51
  approved: boolean;
27
52
  finalPlan: string;
28
53
  finalScore: number;
54
+ bestPlan: string;
55
+ bestScore: number;
56
+ bestIteration: number;
29
57
  iterations: ConsensusIteration[];
30
58
  totalIterations: number;
59
+ finalConcerns: string[];
60
+ finalRecommendations: string[];
61
+ arbitrated: boolean;
62
+ arbitrationResult?: ArbitrationResult;
63
+ /** True if consensus timed out and we accepted the best available plan */
64
+ timedOut?: boolean;
31
65
  }
32
66
 
67
+ /**
68
+ * Request consensus from the configured reviewer (OpenAI, Gemini, or Grok)
69
+ */
70
+ async function requestReviewerConsensus(
71
+ plan: string,
72
+ context: string,
73
+ reviewer: AIProvider,
74
+ config: Partial<ConsensusConfig>
75
+ ): Promise<ConsensusResult> {
76
+ if (reviewer === 'gemini') {
77
+ return requestGeminiConsensus(plan, context, {
78
+ model: config.geminiModel,
79
+ temperature: config.temperature,
80
+ maxTokens: config.maxTokens,
81
+ });
82
+ }
83
+ if (reviewer === 'grok') {
84
+ return requestGrokConsensus(plan, context, {
85
+ model: config.grokModel,
86
+ temperature: config.temperature,
87
+ maxTokens: config.maxTokens,
88
+ });
89
+ }
90
+ return requestOpenAIConsensus(plan, context, config);
91
+ }
92
+
93
+ /**
94
+ * Request arbitration from the configured arbitrator (OpenAI, Gemini, or Grok)
95
+ */
96
+ async function requestArbitratorDecision(
97
+ plan: string,
98
+ reviewerFeedback: string,
99
+ claudeFeedback: string,
100
+ iterations: number,
101
+ scores: number[],
102
+ arbitrator: AIProvider
103
+ ): Promise<ArbitrationResult> {
104
+ if (arbitrator === 'grok') {
105
+ return requestGrokArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
106
+ }
107
+ // Default to Gemini for arbitration (most capable at reasoning)
108
+ return requestGeminiArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
109
+ }
110
+
111
+ /**
112
+ * Check if the consensus process is "stuck" (not improving)
113
+ * Detects both:
114
+ * 1. Stagnation: scores within 5% of each other
115
+ * 2. Oscillation: scores going up and down without progress
116
+ */
117
+ function isStuck(scores: number[], stuckIterations: number): boolean {
118
+ if (scores.length < stuckIterations) return false;
119
+
120
+ const recentScores = scores.slice(-stuckIterations);
121
+ const maxRecent = Math.max(...recentScores);
122
+ const minRecent = Math.min(...recentScores);
123
+
124
+ // Check 1: Stagnation - all recent scores are within 5% of each other
125
+ if ((maxRecent - minRecent) <= 5) {
126
+ return true;
127
+ }
128
+
129
+ // Check 2: Oscillation - detect if we're going up and down without making progress
130
+ // e.g., 70 -> 85 -> 75 -> 80 (oscillating around ~77.5)
131
+ if (recentScores.length >= 3) {
132
+ const avg = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
133
+ const deviations = recentScores.map(s => Math.abs(s - avg));
134
+ const avgDeviation = deviations.reduce((a, b) => a + b, 0) / deviations.length;
135
+
136
+ // If scores are oscillating around an average (avg deviation > 3% but range < 20%)
137
+ // and we're not trending upward, consider it stuck
138
+ if (avgDeviation > 3 && (maxRecent - minRecent) < 20) {
139
+ // Check if we're trending upward (last score should be close to max)
140
+ const lastScore = recentScores[recentScores.length - 1];
141
+ const firstScore = recentScores[0];
142
+ // Not improving if last score is not better than first
143
+ if (lastScore <= firstScore + 2) {
144
+ return true;
145
+ }
146
+ }
147
+ }
148
+
149
+ return false;
150
+ }
151
+
152
+ /**
153
+ * Default consensus timeout (15 minutes total)
154
+ */
155
+ const DEFAULT_CONSENSUS_TIMEOUT_MS = 15 * 60 * 1000;
156
+
33
157
  /**
34
158
  * Format a plan for consensus review
35
159
  * Structures the plan in a way that's optimal for review
@@ -88,6 +212,7 @@ export function meetsThreshold(
88
212
 
89
213
  /**
90
214
  * Iterate until consensus is reached
215
+ * Supports configurable reviewer and arbitration when stuck
91
216
  *
92
217
  * @param initialPlan - The initial plan to review
93
218
  * @param context - Project context
@@ -102,24 +227,130 @@ export async function iterateUntilConsensus(
102
227
  const {
103
228
  projectDir,
104
229
  config = {},
230
+ isFullstack = false,
231
+ language: providedLanguage,
105
232
  onIteration,
106
233
  onRevision,
234
+ onConcerns,
235
+ onArbitration,
236
+ onProgress,
107
237
  } = options;
108
238
 
239
+ // Derive language from isFullstack if not explicitly provided
240
+ const language = providedLanguage || (isFullstack ? 'fullstack' : 'python');
241
+
109
242
  const {
110
243
  threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
111
244
  maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
245
+ reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
246
+ arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
247
+ enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
248
+ arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
249
+ stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
112
250
  } = config;
113
251
 
114
252
  const iterations: ConsensusIteration[] = [];
253
+ const scores: number[] = [];
115
254
  let currentPlan = initialPlan;
116
255
  let iteration = 0;
117
256
 
257
+ // Track the best plan throughout the process
258
+ let bestPlan = initialPlan;
259
+ let bestScore = 0;
260
+ let bestIteration = 0;
261
+ let lastConcerns: string[] = [];
262
+ let lastRecommendations: string[] = [];
263
+ let lastAnalysis = '';
264
+
265
+ // Track arbitration attempts to prevent infinite loops
266
+ let arbitrationAttempts = 0;
267
+
268
+ // Track elapsed time to detect stuck processes
269
+ const startTime = Date.now();
270
+ const maxArbitrationAttempts = 2;
271
+
272
+ onProgress?.('consensus', `Using ${reviewer} as reviewer${enableArbitration ? `, ${arbitrator} as arbitrator` : ''}`);
273
+
118
274
  while (iteration < maxIterations) {
119
275
  iteration++;
120
276
 
121
- // Request consensus review from OpenAI
122
- const consensusResult = await requestConsensus(currentPlan, context, config);
277
+ // Check total elapsed time - if timing out, try arbitration before giving up
278
+ const totalElapsed = Date.now() - startTime;
279
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS && enableArbitration && arbitrationAttempts < maxArbitrationAttempts) {
280
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes - invoking arbitrator before accepting`);
281
+
282
+ try {
283
+ arbitrationAttempts++;
284
+ const arbitrationResult = await requestArbitratorDecision(
285
+ bestPlan,
286
+ lastAnalysis,
287
+ `Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
288
+ iteration,
289
+ scores,
290
+ arbitrator
291
+ );
292
+
293
+ if (onArbitration) {
294
+ onArbitration(arbitrationResult);
295
+ }
296
+
297
+ // Accept arbitration result (we're out of time)
298
+ onProgress?.('arbitration', `Arbitrator decision: ${arbitrationResult.approved ? 'APPROVED' : 'REVISE'} with ${arbitrationResult.score}%`);
299
+
300
+ return {
301
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
302
+ finalPlan: bestPlan,
303
+ finalScore: arbitrationResult.score,
304
+ bestPlan,
305
+ bestScore: arbitrationResult.score,
306
+ bestIteration,
307
+ iterations,
308
+ totalIterations: iteration - 1,
309
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
310
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
311
+ arbitrated: true,
312
+ arbitrationResult,
313
+ timedOut: true,
314
+ };
315
+ } catch (arbError) {
316
+ onProgress?.('arbitration', `Arbitration failed on timeout: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
317
+ // Fall through to accept best plan
318
+ }
319
+ }
320
+
321
+ // Hard timeout - no more arbitration attempts left
322
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
323
+ onProgress?.('consensus', `Consensus timeout - accepting best plan with ${bestScore}%`);
324
+ return {
325
+ approved: bestScore >= arbitrationThreshold,
326
+ finalPlan: bestPlan,
327
+ finalScore: bestScore,
328
+ bestPlan,
329
+ bestScore,
330
+ bestIteration,
331
+ iterations,
332
+ totalIterations: iteration - 1,
333
+ finalConcerns: lastConcerns,
334
+ finalRecommendations: lastRecommendations,
335
+ arbitrated: false,
336
+ timedOut: true,
337
+ };
338
+ }
339
+
340
+ // Log iteration timing
341
+ const iterationStart = Date.now();
342
+ const elapsedMinutes = Math.round((iterationStart - startTime) / 60000);
343
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
344
+
345
+ // Request consensus review from configured reviewer
346
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
347
+ const consensusResult = await requestReviewerConsensus(currentPlan, context, reviewer, config);
348
+
349
+ // Log iteration duration
350
+ const iterationDuration = Math.round((Date.now() - iterationStart) / 1000);
351
+ onProgress?.('consensus', `Review completed in ${iterationDuration}s - score: ${consensusResult.score}%`);
352
+
353
+ scores.push(consensusResult.score);
123
354
 
124
355
  // Record the iteration
125
356
  const iterationRecord: ConsensusIteration = {
@@ -134,54 +365,186 @@ export async function iterateUntilConsensus(
134
365
  // Save to project state
135
366
  await recordConsensusIteration(projectDir, iterationRecord);
136
367
 
137
- // Notify callback
368
+ // Track best plan - only update if this score is better
369
+ if (consensusResult.score > bestScore) {
370
+ bestPlan = currentPlan;
371
+ bestScore = consensusResult.score;
372
+ bestIteration = iteration;
373
+ }
374
+
375
+ // Track concerns for output
376
+ lastConcerns = consensusResult.concerns || [];
377
+ lastRecommendations = consensusResult.recommendations || [];
378
+ lastAnalysis = consensusResult.analysis || '';
379
+
380
+ // Notify callbacks
138
381
  if (onIteration) {
139
382
  onIteration(iteration, consensusResult);
140
383
  }
141
384
 
385
+ if (onConcerns && (lastConcerns.length > 0 || lastRecommendations.length > 0)) {
386
+ onConcerns(lastConcerns, lastRecommendations);
387
+ }
388
+
142
389
  // Check if we've reached consensus
143
390
  if (meetsThreshold(consensusResult.score, threshold)) {
144
391
  return {
145
392
  approved: true,
146
393
  finalPlan: currentPlan,
147
394
  finalScore: consensusResult.score,
395
+ bestPlan: currentPlan,
396
+ bestScore: consensusResult.score,
397
+ bestIteration: iteration,
148
398
  iterations,
149
399
  totalIterations: iteration,
400
+ finalConcerns: [],
401
+ finalRecommendations: [],
402
+ arbitrated: false,
150
403
  };
151
404
  }
152
405
 
406
+ // Check if we're stuck and should trigger arbitration
407
+ if (enableArbitration &&
408
+ bestScore >= arbitrationThreshold &&
409
+ isStuck(scores, stuckIterations) &&
410
+ arbitrationAttempts < maxArbitrationAttempts) {
411
+
412
+ arbitrationAttempts++;
413
+ onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
414
+
415
+ try {
416
+ const arbitrationResult = await requestArbitratorDecision(
417
+ bestPlan,
418
+ lastAnalysis,
419
+ `The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`,
420
+ iteration,
421
+ scores,
422
+ arbitrator
423
+ );
424
+
425
+ if (onArbitration) {
426
+ onArbitration(arbitrationResult);
427
+ }
428
+
429
+ // Accept if arbitrator approves OR if arbitrator gives a high score (>= 88%)
430
+ // This prevents infinite REVISE loops when the arbitrator is happy enough
431
+ const acceptArbitration = arbitrationResult.approved ||
432
+ arbitrationResult.score >= 88 ||
433
+ (arbitrationAttempts >= maxArbitrationAttempts && arbitrationResult.score >= 80);
434
+
435
+ if (acceptArbitration) {
436
+ const reason = arbitrationResult.approved
437
+ ? `Arbitrator approved plan with ${arbitrationResult.score}% confidence`
438
+ : `Arbitrator score ${arbitrationResult.score}% is acceptable - proceeding with best plan`;
439
+ onProgress?.('arbitration', reason);
440
+
441
+ return {
442
+ approved: true,
443
+ finalPlan: bestPlan,
444
+ finalScore: arbitrationResult.score,
445
+ bestPlan,
446
+ bestScore: arbitrationResult.score,
447
+ bestIteration,
448
+ iterations,
449
+ totalIterations: iteration,
450
+ finalConcerns: arbitrationResult.minorConcerns || [],
451
+ finalRecommendations: arbitrationResult.suggestedChanges || [],
452
+ arbitrated: true,
453
+ arbitrationResult,
454
+ };
455
+ } else {
456
+ onProgress?.('arbitration', `Arbitrator requests changes: ${arbitrationResult.suggestedChanges.slice(0, 2).join('; ')}`);
457
+ // Apply arbitrator's suggested changes
458
+ if (arbitrationResult.suggestedChanges.length > 0) {
459
+ onProgress?.('consensus', 'Applying arbitrator suggestions...');
460
+ const revisionResult = await revisePlan(
461
+ bestPlan,
462
+ arbitrationResult.reasoning,
463
+ arbitrationResult.suggestedChanges,
464
+ language
465
+ );
466
+ if (revisionResult.success && revisionResult.response) {
467
+ currentPlan = revisionResult.response;
468
+ // Reset stuck detection after arbitration revision
469
+ scores.length = 0;
470
+ scores.push(arbitrationResult.score);
471
+ onProgress?.('consensus', 'Plan revised based on arbitrator feedback');
472
+ } else {
473
+ onProgress?.('consensus', 'Revision failed, continuing with current plan');
474
+ }
475
+ }
476
+ }
477
+ } catch (error) {
478
+ onProgress?.('arbitration', `Arbitration failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
479
+ // If we've tried arbitration and it failed, accept the best plan we have
480
+ if (arbitrationAttempts >= maxArbitrationAttempts && bestScore >= arbitrationThreshold) {
481
+ onProgress?.('arbitration', `Max arbitration attempts reached, accepting best plan with ${bestScore}%`);
482
+ return {
483
+ approved: true,
484
+ finalPlan: bestPlan,
485
+ finalScore: bestScore,
486
+ bestPlan,
487
+ bestScore,
488
+ bestIteration,
489
+ iterations,
490
+ totalIterations: iteration,
491
+ finalConcerns: lastConcerns,
492
+ finalRecommendations: lastRecommendations,
493
+ arbitrated: true,
494
+ };
495
+ }
496
+ }
497
+ }
498
+
153
499
  // If not at max iterations, revise the plan
154
500
  if (iteration < maxIterations) {
155
501
  const concerns = extractConcerns(consensusResult);
502
+ onProgress?.('consensus', 'Revising plan based on feedback...');
503
+
504
+ // Create a progress handler for revision
505
+ const revisionProgress = onProgress
506
+ ? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
507
+ : undefined;
156
508
 
157
509
  // Use Claude to revise the plan
158
510
  const revisionResult = await revisePlan(
159
511
  currentPlan,
160
512
  consensusResult.analysis,
161
- concerns
513
+ concerns,
514
+ language,
515
+ revisionProgress
162
516
  );
163
517
 
164
518
  if (revisionResult.success && revisionResult.response) {
519
+ // Only use the revised plan for the next iteration
520
+ // The best plan tracking above will decide if it's actually better
165
521
  currentPlan = revisionResult.response;
166
522
 
167
523
  if (onRevision) {
168
524
  onRevision(iteration, currentPlan);
169
525
  }
170
526
  } else {
171
- // If revision fails, try to continue with current plan
527
+ // If revision fails, try to continue with best plan
172
528
  console.warn(`Plan revision failed at iteration ${iteration}:`, revisionResult.error);
529
+ currentPlan = bestPlan;
173
530
  }
174
531
  }
175
532
  }
176
533
 
177
534
  // Max iterations reached without consensus
178
- const lastIteration = iterations[iterations.length - 1];
535
+ // Return the BEST plan we found, not the last one
179
536
  return {
180
537
  approved: false,
181
- finalPlan: currentPlan,
182
- finalScore: lastIteration?.result.score || 0,
538
+ finalPlan: bestPlan,
539
+ finalScore: bestScore,
540
+ bestPlan,
541
+ bestScore,
542
+ bestIteration,
183
543
  iterations,
184
544
  totalIterations: iteration,
545
+ finalConcerns: lastConcerns,
546
+ finalRecommendations: lastRecommendations,
547
+ arbitrated: false,
185
548
  };
186
549
  }
187
550
 
@@ -196,16 +559,31 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
196
559
 
197
560
  lines.push(`## Consensus Summary`);
198
561
  lines.push('');
199
- lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}`);
562
+ lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}${result.arbitrated ? ' (via arbitration)' : ''}`);
200
563
  lines.push(`**Final Score:** ${result.finalScore}%`);
564
+ lines.push(`**Best Score:** ${result.bestScore}% (iteration ${result.bestIteration})`);
201
565
  lines.push(`**Total Iterations:** ${result.totalIterations}`);
566
+
567
+ if (result.arbitrated && result.arbitrationResult) {
568
+ lines.push('');
569
+ lines.push(`### Arbitration Decision`);
570
+ lines.push(`- Decision: ${result.arbitrationResult.approved ? 'APPROVED' : 'REVISE'}`);
571
+ lines.push(`- Confidence: ${result.arbitrationResult.score}%`);
572
+ if (result.arbitrationResult.criticalConcerns.length > 0) {
573
+ lines.push(`- Critical Concerns: ${result.arbitrationResult.criticalConcerns.length}`);
574
+ }
575
+ if (result.arbitrationResult.minorConcerns.length > 0) {
576
+ lines.push(`- Minor Concerns: ${result.arbitrationResult.minorConcerns.length}`);
577
+ }
578
+ }
202
579
  lines.push('');
203
580
 
204
581
  lines.push(`### Iteration History`);
205
582
  lines.push('');
206
583
 
207
584
  for (const iteration of result.iterations) {
208
- lines.push(`#### Iteration ${iteration.iteration}`);
585
+ const isBest = iteration.iteration === result.bestIteration;
586
+ lines.push(`#### Iteration ${iteration.iteration}${isBest ? ' (BEST)' : ''}`);
209
587
  lines.push(`- Score: ${iteration.result.score}%`);
210
588
  lines.push(`- Strengths: ${iteration.result.strengths?.length || 0}`);
211
589
  lines.push(`- Concerns: ${iteration.result.concerns?.length || 0}`);
@@ -213,13 +591,21 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
213
591
  }
214
592
 
215
593
  if (!result.approved) {
216
- const lastResult = result.iterations[result.iterations.length - 1]?.result;
217
- if (lastResult?.concerns && lastResult.concerns.length > 0) {
594
+ if (result.finalConcerns && result.finalConcerns.length > 0) {
218
595
  lines.push(`### Remaining Concerns`);
219
596
  lines.push('');
220
- for (const concern of lastResult.concerns) {
597
+ for (const concern of result.finalConcerns) {
221
598
  lines.push(`- ${concern}`);
222
599
  }
600
+ lines.push('');
601
+ }
602
+
603
+ if (result.finalRecommendations && result.finalRecommendations.length > 0) {
604
+ lines.push(`### Recommendations`);
605
+ lines.push('');
606
+ for (const rec of result.finalRecommendations) {
607
+ lines.push(`- ${rec}`);
608
+ }
223
609
  }
224
610
  }
225
611
 
@@ -297,3 +683,781 @@ export function getScoreTrend(
297
683
  if (diff < -5) return 'declining';
298
684
  return 'stable';
299
685
  }
686
+
687
+ /**
688
+ * Options for optimized consensus
689
+ */
690
+ export interface OptimizedConsensusOptions extends ConsensusOptions {
691
+ milestoneId: string;
692
+ milestoneName?: string;
693
+ taskId?: string;
694
+ taskName?: string;
695
+ /** Use parallel reviews from multiple providers */
696
+ parallelReviews?: boolean;
697
+ /** Additional reviewers beyond primary */
698
+ additionalReviewers?: AIProvider[];
699
+ /** Whether this is a fullstack project (enables per-app tracking) */
700
+ isFullstack?: boolean;
701
+ }
702
+
703
+ /**
704
+ * Result for fullstack consensus with per-app tracking
705
+ */
706
+ export interface FullstackConsensusProcessResult extends ConsensusProcessResult {
707
+ /** Per-app scores */
708
+ appScores: AppConsensusScores;
709
+ /** Per-app approval status */
710
+ appApproved: {
711
+ frontend?: boolean;
712
+ backend?: boolean;
713
+ unified: boolean;
714
+ };
715
+ /** Tagged concerns by app */
716
+ taggedConcerns: TaggedItem[];
717
+ /** Tagged recommendations by app */
718
+ taggedRecommendations: TaggedItem[];
719
+ /** Corrections made during consensus */
720
+ corrections: CorrectionRecord[];
721
+ }
722
+
723
+ /**
724
+ * Collect feedback from a single reviewer
725
+ */
726
+ async function collectReviewerFeedback(
727
+ plan: string,
728
+ context: string,
729
+ reviewer: AIProvider,
730
+ config: Partial<ConsensusConfig>,
731
+ onProgress?: (phase: string, message: string) => void
732
+ ): Promise<ReviewerFeedback> {
733
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
734
+ const startTime = Date.now();
735
+
736
+ const result = await requestReviewerConsensus(plan, context, reviewer, config);
737
+
738
+ const duration = Math.round((Date.now() - startTime) / 1000);
739
+ onProgress?.('consensus', `${reviewer} review completed in ${duration}s - score: ${result.score}%`);
740
+
741
+ return {
742
+ reviewer,
743
+ score: result.score,
744
+ timestamp: new Date().toISOString(),
745
+ concerns: result.concerns || [],
746
+ recommendations: result.recommendations || [],
747
+ analysis: result.analysis || '',
748
+ };
749
+ }
750
+
751
+ /**
752
+ * Collect feedback from multiple reviewers in parallel
753
+ */
754
+ async function collectAllFeedback(
755
+ plan: string,
756
+ context: string,
757
+ reviewers: AIProvider[],
758
+ config: Partial<ConsensusConfig>,
759
+ onProgress?: (phase: string, message: string) => void
760
+ ): Promise<ReviewerFeedback[]> {
761
+ onProgress?.('consensus', `Collecting feedback from ${reviewers.length} reviewer(s) in parallel...`);
762
+
763
+ const feedbackPromises = reviewers.map(reviewer =>
764
+ collectReviewerFeedback(plan, context, reviewer, config, onProgress)
765
+ .catch(error => {
766
+ onProgress?.('consensus', `${reviewer} review failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
767
+ return null;
768
+ })
769
+ );
770
+
771
+ const results = await Promise.all(feedbackPromises);
772
+ return results.filter((f): f is ReviewerFeedback => f !== null);
773
+ }
774
+
775
+ /**
776
+ * Categorize a concern or recommendation by app target
777
+ * Analyzes text content to determine if it relates to frontend, backend, or unified
778
+ */
779
+ function categorizeByContent(content: string): FeedbackAppTarget {
780
+ const lowerContent = content.toLowerCase();
781
+
782
+ // Frontend indicators
783
+ const frontendKeywords = [
784
+ 'react', 'component', 'jsx', 'tsx', 'css', 'tailwind', 'ui', 'user interface',
785
+ 'button', 'form', 'input', 'modal', 'page', 'router', 'navigation', 'state management',
786
+ 'redux', 'zustand', 'vite', 'frontend', 'front-end', 'client', 'browser', 'dom',
787
+ 'styling', 'layout', 'responsive', 'animation', 'hook', 'usestate', 'useeffect',
788
+ 'shadcn', 'radix', 'tailwindcss', 'vitest', 'jest', 'testing-library', 'playwright',
789
+ ];
790
+
791
+ // Backend indicators
792
+ const backendKeywords = [
793
+ 'fastapi', 'api', 'endpoint', 'route', 'database', 'sql', 'postgresql', 'neon',
794
+ 'model', 'schema', 'migration', 'orm', 'sqlalchemy', 'pydantic', 'validation',
795
+ 'authentication', 'authorization', 'jwt', 'token', 'middleware', 'backend', 'back-end',
796
+ 'server', 'python', 'pytest', 'alembic', 'celery', 'redis', 'cache', 'queue',
797
+ 'repository', 'service', 'crud', 'rest', 'graphql', 'websocket',
798
+ ];
799
+
800
+ // Count matches
801
+ let frontendMatches = 0;
802
+ let backendMatches = 0;
803
+
804
+ for (const keyword of frontendKeywords) {
805
+ if (lowerContent.includes(keyword)) {
806
+ frontendMatches++;
807
+ }
808
+ }
809
+
810
+ for (const keyword of backendKeywords) {
811
+ if (lowerContent.includes(keyword)) {
812
+ backendMatches++;
813
+ }
814
+ }
815
+
816
+ // Determine category
817
+ if (frontendMatches > backendMatches && frontendMatches >= 2) {
818
+ return 'frontend';
819
+ } else if (backendMatches > frontendMatches && backendMatches >= 2) {
820
+ return 'backend';
821
+ } else {
822
+ return 'unified';
823
+ }
824
+ }
825
+
826
+ /**
827
+ * Categorize all concerns and recommendations by app target
828
+ */
829
+ function categorizeFeedbackItems(
830
+ concerns: string[],
831
+ recommendations: string[]
832
+ ): {
833
+ taggedConcerns: TaggedItem[];
834
+ taggedRecommendations: TaggedItem[];
835
+ appScores: { frontend: number; backend: number; unified: number };
836
+ } {
837
+ const taggedConcerns: TaggedItem[] = concerns.map(concern => ({
838
+ app: categorizeByContent(concern),
839
+ content: concern,
840
+ }));
841
+
842
+ const taggedRecommendations: TaggedItem[] = recommendations.map(rec => ({
843
+ app: categorizeByContent(rec),
844
+ content: rec,
845
+ }));
846
+
847
+ // Count items per app for score calculation
848
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
849
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
850
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
851
+
852
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
853
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
854
+ const unifiedRecs = taggedRecommendations.filter(r => r.app === 'unified').length;
855
+
856
+ // Calculate relative scores (more concerns = lower score)
857
+ const totalItems = taggedConcerns.length + taggedRecommendations.length;
858
+ const baseScore = totalItems > 0 ? 100 : 0;
859
+
860
+ return {
861
+ taggedConcerns,
862
+ taggedRecommendations,
863
+ appScores: {
864
+ frontend: Math.max(0, baseScore - (frontendConcerns + frontendRecs) * 5),
865
+ backend: Math.max(0, baseScore - (backendConcerns + backendRecs) * 5),
866
+ unified: Math.max(0, baseScore - (unifiedConcerns + unifiedRecs) * 5),
867
+ },
868
+ };
869
+ }
870
+
871
+ /**
872
+ * Calculate per-app scores from feedback
873
+ */
874
+ function calculateAppScores(
875
+ allFeedback: ReviewerFeedback[],
876
+ taggedConcerns: TaggedItem[],
877
+ taggedRecommendations: TaggedItem[]
878
+ ): AppConsensusScores {
879
+ // Base score from average feedback score
880
+ const baseScore = allFeedback.length > 0
881
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
882
+ : 0;
883
+
884
+ // Count concerns per app
885
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
886
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
887
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
888
+
889
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
890
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
891
+
892
+ // Calculate app-specific scores
893
+ // More concerns = lower score (each concern/rec reduces score by 2 points)
894
+ const frontendScore = frontendConcerns > 0 || frontendRecs > 0
895
+ ? Math.max(0, baseScore - (frontendConcerns * 2 + frontendRecs))
896
+ : baseScore;
897
+
898
+ const backendScore = backendConcerns > 0 || backendRecs > 0
899
+ ? Math.max(0, baseScore - (backendConcerns * 2 + backendRecs))
900
+ : baseScore;
901
+
902
+ // Unified score is the base combined score
903
+ const unifiedScore = Math.max(0, baseScore - (unifiedConcerns * 2));
904
+
905
+ return {
906
+ frontend: frontendScore,
907
+ backend: backendScore,
908
+ unified: unifiedScore,
909
+ };
910
+ }
911
+
912
+ /**
913
+ * Optimized consensus process that batches feedback and reduces API calls
914
+ *
915
+ * Key optimizations:
916
+ * 1. Plans stored in files, not regenerated from scratch
917
+ * 2. Collects ALL reviewer feedback before revision
918
+ * 3. Claude revises ONCE per round with combined feedback
919
+ * 4. Parallel reviews when multiple reviewers configured
920
+ * 5. Per-app tracking for fullstack projects (frontend/backend/unified)
921
+ *
922
+ * @param initialPlan - The initial plan to seek consensus on
923
+ * @param context - Project context for review
924
+ * @param options - Consensus options including tracking info
925
+ * @returns Consensus process result (FullstackConsensusProcessResult for fullstack projects)
926
+ */
927
+ export async function runOptimizedConsensusProcess(
928
+ initialPlan: string,
929
+ context: string,
930
+ options: OptimizedConsensusOptions
931
+ ): Promise<ConsensusProcessResult | FullstackConsensusProcessResult> {
932
+ const {
933
+ projectDir,
934
+ config = {},
935
+ onIteration,
936
+ onRevision,
937
+ onConcerns,
938
+ onArbitration,
939
+ onProgress,
940
+ milestoneId,
941
+ milestoneName,
942
+ taskId,
943
+ taskName,
944
+ parallelReviews = true,
945
+ additionalReviewers = [],
946
+ isFullstack = false,
947
+ } = options;
948
+
949
+ // Derive language from isFullstack for revision prompts
950
+ const language: 'python' | 'typescript' | 'fullstack' = isFullstack ? 'fullstack' : 'python';
951
+
952
+ const {
953
+ threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
954
+ maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
955
+ reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
956
+ arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
957
+ enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
958
+ arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
959
+ stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
960
+ } = config;
961
+
962
+ // Initialize plan storage with fullstack support
963
+ const planStorage = createPlanStorage(projectDir, isFullstack);
964
+ await planStorage.initialize();
965
+
966
+ // Track per-app consensus for fullstack projects
967
+ const appScoresHistory: { frontend: number[]; backend: number[]; unified: number[] } = {
968
+ frontend: [],
969
+ backend: [],
970
+ unified: [],
971
+ };
972
+ const allTaggedConcerns: TaggedItem[] = [];
973
+ const allTaggedRecommendations: TaggedItem[] = [];
974
+ const corrections: CorrectionRecord[] = [];
975
+
976
+ // Determine all reviewers
977
+ const allReviewers: AIProvider[] = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
978
+
979
+ const iterations: ConsensusIteration[] = [];
980
+ const scores: number[] = [];
981
+ let currentPlan = initialPlan;
982
+ let iteration = 0;
983
+
984
+ // Track the best plan
985
+ let bestPlan = initialPlan;
986
+ let bestScore = 0;
987
+ let bestIteration = 0;
988
+ let lastConcerns: string[] = [];
989
+ let lastRecommendations: string[] = [];
990
+ let lastAnalysis = '';
991
+
992
+ const startTime = Date.now();
993
+
994
+ onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
995
+ onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
996
+ if (isFullstack) {
997
+ onProgress?.('consensus', `Fullstack mode enabled - tracking per-app consensus (frontend/backend/unified)`);
998
+ }
999
+
1000
+ // Save initial plan to storage
1001
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
1002
+ milestoneId,
1003
+ milestoneName,
1004
+ taskId,
1005
+ taskName,
1006
+ });
1007
+
1008
+ while (iteration < maxIterations) {
1009
+ iteration++;
1010
+
1011
+ // Check timeout
1012
+ const totalElapsed = Date.now() - startTime;
1013
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
1014
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes`);
1015
+
1016
+ if (enableArbitration) {
1017
+ try {
1018
+ const arbitrationResult = await requestArbitratorDecision(
1019
+ bestPlan,
1020
+ lastAnalysis,
1021
+ `Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
1022
+ iteration,
1023
+ scores,
1024
+ arbitrator
1025
+ );
1026
+
1027
+ if (onArbitration) onArbitration(arbitrationResult);
1028
+
1029
+ return {
1030
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
1031
+ finalPlan: bestPlan,
1032
+ finalScore: arbitrationResult.score,
1033
+ bestPlan,
1034
+ bestScore: arbitrationResult.score,
1035
+ bestIteration,
1036
+ iterations,
1037
+ totalIterations: iteration - 1,
1038
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
1039
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
1040
+ arbitrated: true,
1041
+ arbitrationResult,
1042
+ timedOut: true,
1043
+ };
1044
+ } catch {
1045
+ // Fall through to accept best plan
1046
+ }
1047
+ }
1048
+
1049
+ return {
1050
+ approved: bestScore >= arbitrationThreshold,
1051
+ finalPlan: bestPlan,
1052
+ finalScore: bestScore,
1053
+ bestPlan,
1054
+ bestScore,
1055
+ bestIteration,
1056
+ iterations,
1057
+ totalIterations: iteration - 1,
1058
+ finalConcerns: lastConcerns,
1059
+ finalRecommendations: lastRecommendations,
1060
+ arbitrated: false,
1061
+ timedOut: true,
1062
+ };
1063
+ }
1064
+
1065
+ const elapsedMinutes = Math.round((Date.now() - startTime) / 60000);
1066
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
1067
+
1068
+ // Clear previous feedback for this round
1069
+ await planStorage.clearFeedback(milestoneId, taskId);
1070
+
1071
+ // ============================================
1072
+ // OPTIMIZATION: Collect ALL feedback in parallel
1073
+ // ============================================
1074
+ let allFeedback: ReviewerFeedback[];
1075
+
1076
+ if (parallelReviews && allReviewers.length > 1) {
1077
+ allFeedback = await collectAllFeedback(currentPlan, context, allReviewers, config, onProgress);
1078
+ } else {
1079
+ // Sequential fallback
1080
+ allFeedback = [];
1081
+ for (const rev of allReviewers) {
1082
+ const feedback = await collectReviewerFeedback(currentPlan, context, rev, config, onProgress);
1083
+ allFeedback.push(feedback);
1084
+ }
1085
+ }
1086
+
1087
+ // Combine all concerns and recommendations
1088
+ const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
1089
+ const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
1090
+ const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
1091
+
1092
+ lastConcerns = allConcerns;
1093
+ lastRecommendations = allRecommendations;
1094
+
1095
+ // ============================================
1096
+ // FULLSTACK: Categorize feedback by app target
1097
+ // ============================================
1098
+ let currentAppScores: AppConsensusScores = { unified: 0 };
1099
+ let iterationTaggedConcerns: TaggedItem[] = [];
1100
+ let iterationTaggedRecs: TaggedItem[] = [];
1101
+
1102
+ if (isFullstack) {
1103
+ onProgress?.('consensus', 'Categorizing feedback by app (frontend/backend/unified)...');
1104
+
1105
+ // Categorize concerns and recommendations
1106
+ const categorized = categorizeFeedbackItems(allConcerns, allRecommendations);
1107
+ iterationTaggedConcerns = categorized.taggedConcerns;
1108
+ iterationTaggedRecs = categorized.taggedRecommendations;
1109
+
1110
+ // Calculate per-app scores
1111
+ currentAppScores = calculateAppScores(allFeedback, iterationTaggedConcerns, iterationTaggedRecs);
1112
+
1113
+ // Track scores history
1114
+ appScoresHistory.frontend.push(currentAppScores.frontend || 0);
1115
+ appScoresHistory.backend.push(currentAppScores.backend || 0);
1116
+ appScoresHistory.unified.push(currentAppScores.unified);
1117
+
1118
+ // Accumulate tagged items for final result
1119
+ allTaggedConcerns.push(...iterationTaggedConcerns);
1120
+ allTaggedRecommendations.push(...iterationTaggedRecs);
1121
+
1122
+ // Log per-app breakdown
1123
+ const frontendConcerns = iterationTaggedConcerns.filter(c => c.app === 'frontend').length;
1124
+ const backendConcerns = iterationTaggedConcerns.filter(c => c.app === 'backend').length;
1125
+ const unifiedConcerns = iterationTaggedConcerns.filter(c => c.app === 'unified').length;
1126
+
1127
+ onProgress?.('consensus', `Per-app concerns: FE=${frontendConcerns}, BE=${backendConcerns}, Unified=${unifiedConcerns}`);
1128
+ onProgress?.('consensus', `Per-app scores: FE=${currentAppScores.frontend}%, BE=${currentAppScores.backend}%, Unified=${currentAppScores.unified}%`);
1129
+
1130
+ // Save feedback to per-app directories
1131
+ for (const feedback of allFeedback) {
1132
+ // Create fullstack feedback with tagged items
1133
+ const fullstackFeedback: FullstackReviewerFeedback = {
1134
+ ...feedback,
1135
+ appScores: currentAppScores,
1136
+ taggedConcerns: iterationTaggedConcerns.filter(c =>
1137
+ feedback.concerns.some(fc => fc === c.content)
1138
+ ),
1139
+ taggedRecommendations: iterationTaggedRecs.filter(r =>
1140
+ feedback.recommendations.some(fr => fr === r.content)
1141
+ ),
1142
+ isFullstack: true,
1143
+ };
1144
+
1145
+ // Save to all app directories
1146
+ await planStorage.saveFullstackFeedback(
1147
+ fullstackFeedback,
1148
+ taskId ? 'task' : 'milestone',
1149
+ milestoneId,
1150
+ taskId
1151
+ );
1152
+ }
1153
+ } else {
1154
+ // Non-fullstack: save feedback without app categorization
1155
+ for (const feedback of allFeedback) {
1156
+ await planStorage.saveFeedback(feedback, milestoneId, taskId);
1157
+ }
1158
+ currentAppScores = { unified: allFeedback.length > 0
1159
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
1160
+ : 0 };
1161
+ }
1162
+
1163
+ // Calculate combined score (average of all reviewers)
1164
+ const combinedScore = allFeedback.length > 0
1165
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
1166
+ : 0;
1167
+
1168
+ scores.push(combinedScore);
1169
+ lastAnalysis = combinedAnalysis;
1170
+
1171
+ // Create consensus result for tracking
1172
+ const consensusResult: ConsensusResult = {
1173
+ score: combinedScore,
1174
+ analysis: combinedAnalysis,
1175
+ concerns: allConcerns,
1176
+ recommendations: allRecommendations,
1177
+ approved: combinedScore >= threshold,
1178
+ strengths: [],
1179
+ rawResponse: combinedAnalysis,
1180
+ };
1181
+
1182
+ // Record iteration
1183
+ const iterationRecord: ConsensusIteration = {
1184
+ iteration,
1185
+ plan: currentPlan,
1186
+ timestamp: new Date().toISOString(),
1187
+ result: consensusResult,
1188
+ };
1189
+ iterations.push(iterationRecord);
1190
+
1191
+ if (onIteration) onIteration(iteration, consensusResult);
1192
+ if (onConcerns) onConcerns(allConcerns, allRecommendations);
1193
+
1194
+ // Update best plan tracking
1195
+ if (combinedScore > bestScore) {
1196
+ bestScore = combinedScore;
1197
+ bestPlan = currentPlan;
1198
+ bestIteration = iteration;
1199
+ }
1200
+
1201
+ // Save plan with updated score (including per-app scores for fullstack)
1202
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
1203
+ milestoneId,
1204
+ milestoneName,
1205
+ taskId,
1206
+ taskName,
1207
+ score: combinedScore,
1208
+ frontendScore: isFullstack ? currentAppScores.frontend : undefined,
1209
+ backendScore: isFullstack ? currentAppScores.backend : undefined,
1210
+ unifiedScore: isFullstack ? currentAppScores.unified : undefined,
1211
+ });
1212
+
1213
+ // Record correction for fullstack tracking
1214
+ if (isFullstack && iteration > 1) {
1215
+ const previousScore = scores.length >= 2 ? scores[scores.length - 2] : 0;
1216
+ const correction: CorrectionRecord = {
1217
+ id: `correction-${iteration}`,
1218
+ timestamp: new Date().toISOString(),
1219
+ app: 'unified', // Top-level correction
1220
+ previousScore,
1221
+ newScore: combinedScore,
1222
+ concerns: lastConcerns.slice(0, 5),
1223
+ changes: lastRecommendations.slice(0, 3),
1224
+ reviewer,
1225
+ };
1226
+ corrections.push(correction);
1227
+
1228
+ await planStorage.recordCorrection(
1229
+ taskId ? 'task' : 'milestone',
1230
+ correction,
1231
+ milestoneId,
1232
+ taskId
1233
+ );
1234
+ }
1235
+
1236
+ // Record in project state
1237
+ await recordConsensusIteration(projectDir, iterationRecord);
1238
+
1239
+ onProgress?.('consensus', `Combined score: ${combinedScore}% (from ${allFeedback.length} reviewer(s))`);
1240
+
1241
+ // Check if consensus reached
1242
+ if (combinedScore >= threshold) {
1243
+ onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
1244
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
1245
+
1246
+ // Update per-app approval status for fullstack
1247
+ if (isFullstack) {
1248
+ const feApproved = (currentAppScores.frontend || 0) >= threshold;
1249
+ const beApproved = (currentAppScores.backend || 0) >= threshold;
1250
+
1251
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'frontend', feApproved, currentAppScores.frontend || 0, milestoneId, taskId);
1252
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'backend', beApproved, currentAppScores.backend || 0, milestoneId, taskId);
1253
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'unified', true, currentAppScores.unified, milestoneId, taskId);
1254
+
1255
+ onProgress?.('consensus', `Per-app approval: FE=${feApproved}, BE=${beApproved}, Unified=true`);
1256
+
1257
+ return {
1258
+ approved: true,
1259
+ finalPlan: currentPlan,
1260
+ finalScore: combinedScore,
1261
+ bestPlan: currentPlan,
1262
+ bestScore: combinedScore,
1263
+ bestIteration: iteration,
1264
+ iterations,
1265
+ totalIterations: iteration,
1266
+ finalConcerns: allConcerns,
1267
+ finalRecommendations: allRecommendations,
1268
+ arbitrated: false,
1269
+ // Fullstack-specific fields
1270
+ appScores: currentAppScores,
1271
+ appApproved: {
1272
+ frontend: feApproved,
1273
+ backend: beApproved,
1274
+ unified: true,
1275
+ },
1276
+ taggedConcerns: allTaggedConcerns,
1277
+ taggedRecommendations: allTaggedRecommendations,
1278
+ corrections,
1279
+ } as FullstackConsensusProcessResult;
1280
+ }
1281
+
1282
+ return {
1283
+ approved: true,
1284
+ finalPlan: currentPlan,
1285
+ finalScore: combinedScore,
1286
+ bestPlan: currentPlan,
1287
+ bestScore: combinedScore,
1288
+ bestIteration: iteration,
1289
+ iterations,
1290
+ totalIterations: iteration,
1291
+ finalConcerns: allConcerns,
1292
+ finalRecommendations: allRecommendations,
1293
+ arbitrated: false,
1294
+ };
1295
+ }
1296
+
1297
+ // Check if stuck
1298
+ if (isStuck(scores, stuckIterations) && enableArbitration) {
1299
+ onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
1300
+
1301
+ try {
1302
+ const arbitrationResult = await requestArbitratorDecision(
1303
+ bestPlan,
1304
+ combinedAnalysis,
1305
+ `Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`,
1306
+ iteration,
1307
+ scores,
1308
+ arbitrator
1309
+ );
1310
+
1311
+ if (onArbitration) onArbitration(arbitrationResult);
1312
+
1313
+ if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
1314
+ onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
1315
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
1316
+
1317
+ if (isFullstack) {
1318
+ const feApproved = (currentAppScores.frontend || 0) >= arbitrationThreshold;
1319
+ const beApproved = (currentAppScores.backend || 0) >= arbitrationThreshold;
1320
+
1321
+ return {
1322
+ approved: true,
1323
+ finalPlan: bestPlan,
1324
+ finalScore: arbitrationResult.score,
1325
+ bestPlan,
1326
+ bestScore: arbitrationResult.score,
1327
+ bestIteration,
1328
+ iterations,
1329
+ totalIterations: iteration,
1330
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
1331
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
1332
+ arbitrated: true,
1333
+ arbitrationResult,
1334
+ // Fullstack-specific fields
1335
+ appScores: currentAppScores,
1336
+ appApproved: {
1337
+ frontend: feApproved,
1338
+ backend: beApproved,
1339
+ unified: true,
1340
+ },
1341
+ taggedConcerns: allTaggedConcerns,
1342
+ taggedRecommendations: allTaggedRecommendations,
1343
+ corrections,
1344
+ } as FullstackConsensusProcessResult;
1345
+ }
1346
+
1347
+ return {
1348
+ approved: true,
1349
+ finalPlan: bestPlan,
1350
+ finalScore: arbitrationResult.score,
1351
+ bestPlan,
1352
+ bestScore: arbitrationResult.score,
1353
+ bestIteration,
1354
+ iterations,
1355
+ totalIterations: iteration,
1356
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
1357
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
1358
+ arbitrated: true,
1359
+ arbitrationResult,
1360
+ };
1361
+ }
1362
+ } catch (arbError) {
1363
+ onProgress?.('arbitration', `Arbitration failed: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
1364
+ }
1365
+ }
1366
+
1367
+ // ============================================
1368
+ // OPTIMIZATION: Single revision with ALL feedback
1369
+ // ============================================
1370
+ if (iteration < maxIterations) {
1371
+ onProgress?.('consensus', `Revising plan with combined feedback from ${allFeedback.length} reviewer(s)...`);
1372
+
1373
+ const revisionProgress = onProgress
1374
+ ? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
1375
+ : undefined;
1376
+
1377
+ // Use Claude to revise with ALL combined feedback (single API call)
1378
+ const revisionResult = await revisePlan(
1379
+ currentPlan,
1380
+ combinedAnalysis,
1381
+ allConcerns,
1382
+ language,
1383
+ revisionProgress
1384
+ );
1385
+
1386
+ if (revisionResult.success && revisionResult.response) {
1387
+ currentPlan = revisionResult.response;
1388
+
1389
+ // Save revised plan
1390
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
1391
+ milestoneId,
1392
+ milestoneName,
1393
+ taskId,
1394
+ taskName,
1395
+ });
1396
+
1397
+ if (onRevision) onRevision(iteration, currentPlan);
1398
+ } else {
1399
+ onProgress?.('consensus', `Revision failed, continuing with best plan`);
1400
+ currentPlan = bestPlan;
1401
+ }
1402
+ }
1403
+ }
1404
+
1405
+ // Max iterations reached
1406
+ await planStorage.updateStatus('reviewing', taskId ? 'task' : 'milestone', milestoneId, taskId);
1407
+
1408
+ // Final per-app scores from history
1409
+ const finalAppScores: AppConsensusScores = isFullstack ? {
1410
+ frontend: appScoresHistory.frontend.length > 0
1411
+ ? appScoresHistory.frontend[appScoresHistory.frontend.length - 1]
1412
+ : undefined,
1413
+ backend: appScoresHistory.backend.length > 0
1414
+ ? appScoresHistory.backend[appScoresHistory.backend.length - 1]
1415
+ : undefined,
1416
+ unified: appScoresHistory.unified.length > 0
1417
+ ? appScoresHistory.unified[appScoresHistory.unified.length - 1]
1418
+ : bestScore,
1419
+ } : { unified: bestScore };
1420
+
1421
+ if (isFullstack) {
1422
+ const feApproved = (finalAppScores.frontend || 0) >= threshold;
1423
+ const beApproved = (finalAppScores.backend || 0) >= threshold;
1424
+
1425
+ return {
1426
+ approved: false,
1427
+ finalPlan: bestPlan,
1428
+ finalScore: bestScore,
1429
+ bestPlan,
1430
+ bestScore,
1431
+ bestIteration,
1432
+ iterations,
1433
+ totalIterations: iteration,
1434
+ finalConcerns: lastConcerns,
1435
+ finalRecommendations: lastRecommendations,
1436
+ arbitrated: false,
1437
+ // Fullstack-specific fields
1438
+ appScores: finalAppScores,
1439
+ appApproved: {
1440
+ frontend: feApproved,
1441
+ backend: beApproved,
1442
+ unified: bestScore >= threshold,
1443
+ },
1444
+ taggedConcerns: allTaggedConcerns,
1445
+ taggedRecommendations: allTaggedRecommendations,
1446
+ corrections,
1447
+ } as FullstackConsensusProcessResult;
1448
+ }
1449
+
1450
+ return {
1451
+ approved: false,
1452
+ finalPlan: bestPlan,
1453
+ finalScore: bestScore,
1454
+ bestPlan,
1455
+ bestScore,
1456
+ bestIteration,
1457
+ iterations,
1458
+ totalIterations: iteration,
1459
+ finalConcerns: lastConcerns,
1460
+ finalRecommendations: lastRecommendations,
1461
+ arbitrated: false,
1462
+ };
1463
+ }