popeye-cli 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/.env.example +24 -1
  2. package/CONTRIBUTING.md +275 -0
  3. package/OPEN_SOURCE_MANIFESTO.md +172 -0
  4. package/README.md +832 -123
  5. package/dist/adapters/claude.d.ts +19 -4
  6. package/dist/adapters/claude.d.ts.map +1 -1
  7. package/dist/adapters/claude.js +908 -42
  8. package/dist/adapters/claude.js.map +1 -1
  9. package/dist/adapters/gemini.d.ts +55 -0
  10. package/dist/adapters/gemini.d.ts.map +1 -0
  11. package/dist/adapters/gemini.js +318 -0
  12. package/dist/adapters/gemini.js.map +1 -0
  13. package/dist/adapters/grok.d.ts +73 -0
  14. package/dist/adapters/grok.d.ts.map +1 -0
  15. package/dist/adapters/grok.js +430 -0
  16. package/dist/adapters/grok.js.map +1 -0
  17. package/dist/adapters/openai.d.ts +1 -1
  18. package/dist/adapters/openai.d.ts.map +1 -1
  19. package/dist/adapters/openai.js +47 -8
  20. package/dist/adapters/openai.js.map +1 -1
  21. package/dist/auth/claude.d.ts +11 -9
  22. package/dist/auth/claude.d.ts.map +1 -1
  23. package/dist/auth/claude.js +107 -71
  24. package/dist/auth/claude.js.map +1 -1
  25. package/dist/auth/gemini.d.ts +58 -0
  26. package/dist/auth/gemini.d.ts.map +1 -0
  27. package/dist/auth/gemini.js +172 -0
  28. package/dist/auth/gemini.js.map +1 -0
  29. package/dist/auth/grok.d.ts +73 -0
  30. package/dist/auth/grok.d.ts.map +1 -0
  31. package/dist/auth/grok.js +211 -0
  32. package/dist/auth/grok.js.map +1 -0
  33. package/dist/auth/index.d.ts +14 -7
  34. package/dist/auth/index.d.ts.map +1 -1
  35. package/dist/auth/index.js +41 -6
  36. package/dist/auth/index.js.map +1 -1
  37. package/dist/auth/keychain.d.ts +20 -7
  38. package/dist/auth/keychain.d.ts.map +1 -1
  39. package/dist/auth/keychain.js +85 -29
  40. package/dist/auth/keychain.js.map +1 -1
  41. package/dist/auth/openai.d.ts +2 -2
  42. package/dist/auth/openai.d.ts.map +1 -1
  43. package/dist/auth/openai.js +30 -32
  44. package/dist/auth/openai.js.map +1 -1
  45. package/dist/cli/commands/auth.d.ts +1 -1
  46. package/dist/cli/commands/auth.d.ts.map +1 -1
  47. package/dist/cli/commands/auth.js +79 -8
  48. package/dist/cli/commands/auth.js.map +1 -1
  49. package/dist/cli/commands/create.d.ts.map +1 -1
  50. package/dist/cli/commands/create.js +15 -4
  51. package/dist/cli/commands/create.js.map +1 -1
  52. package/dist/cli/interactive.d.ts.map +1 -1
  53. package/dist/cli/interactive.js +1494 -114
  54. package/dist/cli/interactive.js.map +1 -1
  55. package/dist/config/defaults.d.ts +9 -1
  56. package/dist/config/defaults.d.ts.map +1 -1
  57. package/dist/config/defaults.js +19 -2
  58. package/dist/config/defaults.js.map +1 -1
  59. package/dist/config/index.d.ts +19 -0
  60. package/dist/config/index.d.ts.map +1 -1
  61. package/dist/config/index.js +33 -1
  62. package/dist/config/index.js.map +1 -1
  63. package/dist/config/schema.d.ts +47 -0
  64. package/dist/config/schema.d.ts.map +1 -1
  65. package/dist/config/schema.js +29 -1
  66. package/dist/config/schema.js.map +1 -1
  67. package/dist/generators/fullstack.d.ts +32 -0
  68. package/dist/generators/fullstack.d.ts.map +1 -0
  69. package/dist/generators/fullstack.js +497 -0
  70. package/dist/generators/fullstack.js.map +1 -0
  71. package/dist/generators/index.d.ts +4 -3
  72. package/dist/generators/index.d.ts.map +1 -1
  73. package/dist/generators/index.js +15 -1
  74. package/dist/generators/index.js.map +1 -1
  75. package/dist/generators/python.d.ts +17 -1
  76. package/dist/generators/python.d.ts.map +1 -1
  77. package/dist/generators/python.js +34 -20
  78. package/dist/generators/python.js.map +1 -1
  79. package/dist/generators/templates/fullstack.d.ts +113 -0
  80. package/dist/generators/templates/fullstack.d.ts.map +1 -0
  81. package/dist/generators/templates/fullstack.js +1004 -0
  82. package/dist/generators/templates/fullstack.js.map +1 -0
  83. package/dist/generators/typescript.d.ts +19 -1
  84. package/dist/generators/typescript.d.ts.map +1 -1
  85. package/dist/generators/typescript.js +37 -20
  86. package/dist/generators/typescript.js.map +1 -1
  87. package/dist/state/index.d.ts +108 -0
  88. package/dist/state/index.d.ts.map +1 -1
  89. package/dist/state/index.js +551 -4
  90. package/dist/state/index.js.map +1 -1
  91. package/dist/state/registry.d.ts +52 -0
  92. package/dist/state/registry.d.ts.map +1 -0
  93. package/dist/state/registry.js +215 -0
  94. package/dist/state/registry.js.map +1 -0
  95. package/dist/types/cli.d.ts +8 -0
  96. package/dist/types/cli.d.ts.map +1 -1
  97. package/dist/types/cli.js.map +1 -1
  98. package/dist/types/consensus.d.ts +186 -4
  99. package/dist/types/consensus.d.ts.map +1 -1
  100. package/dist/types/consensus.js +35 -3
  101. package/dist/types/consensus.js.map +1 -1
  102. package/dist/types/project.d.ts +76 -0
  103. package/dist/types/project.d.ts.map +1 -1
  104. package/dist/types/project.js +1 -1
  105. package/dist/types/project.js.map +1 -1
  106. package/dist/types/workflow.d.ts +217 -16
  107. package/dist/types/workflow.d.ts.map +1 -1
  108. package/dist/types/workflow.js +40 -1
  109. package/dist/types/workflow.js.map +1 -1
  110. package/dist/workflow/auto-fix.d.ts +45 -0
  111. package/dist/workflow/auto-fix.d.ts.map +1 -0
  112. package/dist/workflow/auto-fix.js +274 -0
  113. package/dist/workflow/auto-fix.js.map +1 -0
  114. package/dist/workflow/consensus.d.ts +70 -2
  115. package/dist/workflow/consensus.d.ts.map +1 -1
  116. package/dist/workflow/consensus.js +872 -17
  117. package/dist/workflow/consensus.js.map +1 -1
  118. package/dist/workflow/execution-mode.d.ts +10 -4
  119. package/dist/workflow/execution-mode.d.ts.map +1 -1
  120. package/dist/workflow/execution-mode.js +547 -58
  121. package/dist/workflow/execution-mode.js.map +1 -1
  122. package/dist/workflow/index.d.ts +14 -2
  123. package/dist/workflow/index.d.ts.map +1 -1
  124. package/dist/workflow/index.js +69 -6
  125. package/dist/workflow/index.js.map +1 -1
  126. package/dist/workflow/milestone-workflow.d.ts +34 -0
  127. package/dist/workflow/milestone-workflow.d.ts.map +1 -0
  128. package/dist/workflow/milestone-workflow.js +414 -0
  129. package/dist/workflow/milestone-workflow.js.map +1 -0
  130. package/dist/workflow/plan-mode.d.ts +80 -3
  131. package/dist/workflow/plan-mode.d.ts.map +1 -1
  132. package/dist/workflow/plan-mode.js +767 -49
  133. package/dist/workflow/plan-mode.js.map +1 -1
  134. package/dist/workflow/plan-storage.d.ts +386 -0
  135. package/dist/workflow/plan-storage.d.ts.map +1 -0
  136. package/dist/workflow/plan-storage.js +878 -0
  137. package/dist/workflow/plan-storage.js.map +1 -0
  138. package/dist/workflow/project-verification.d.ts +37 -0
  139. package/dist/workflow/project-verification.d.ts.map +1 -0
  140. package/dist/workflow/project-verification.js +381 -0
  141. package/dist/workflow/project-verification.js.map +1 -0
  142. package/dist/workflow/task-workflow.d.ts +37 -0
  143. package/dist/workflow/task-workflow.d.ts.map +1 -0
  144. package/dist/workflow/task-workflow.js +386 -0
  145. package/dist/workflow/task-workflow.js.map +1 -0
  146. package/dist/workflow/test-runner.d.ts +9 -0
  147. package/dist/workflow/test-runner.d.ts.map +1 -1
  148. package/dist/workflow/test-runner.js +101 -5
  149. package/dist/workflow/test-runner.js.map +1 -1
  150. package/dist/workflow/ui-designer.d.ts +82 -0
  151. package/dist/workflow/ui-designer.d.ts.map +1 -0
  152. package/dist/workflow/ui-designer.js +234 -0
  153. package/dist/workflow/ui-designer.js.map +1 -0
  154. package/dist/workflow/ui-setup.d.ts +58 -0
  155. package/dist/workflow/ui-setup.d.ts.map +1 -0
  156. package/dist/workflow/ui-setup.js +685 -0
  157. package/dist/workflow/ui-setup.js.map +1 -0
  158. package/dist/workflow/ui-verification.d.ts +114 -0
  159. package/dist/workflow/ui-verification.d.ts.map +1 -0
  160. package/dist/workflow/ui-verification.js +258 -0
  161. package/dist/workflow/ui-verification.js.map +1 -0
  162. package/dist/workflow/workflow-logger.d.ts +110 -0
  163. package/dist/workflow/workflow-logger.d.ts.map +1 -0
  164. package/dist/workflow/workflow-logger.js +267 -0
  165. package/dist/workflow/workflow-logger.js.map +1 -0
  166. package/dist/workflow/workspace-manager.d.ts +342 -0
  167. package/dist/workflow/workspace-manager.d.ts.map +1 -0
  168. package/dist/workflow/workspace-manager.js +733 -0
  169. package/dist/workflow/workspace-manager.js.map +1 -0
  170. package/package.json +2 -2
  171. package/src/adapters/claude.ts +1067 -47
  172. package/src/adapters/gemini.ts +373 -0
  173. package/src/adapters/grok.ts +492 -0
  174. package/src/adapters/openai.ts +48 -9
  175. package/src/auth/claude.ts +120 -78
  176. package/src/auth/gemini.ts +207 -0
  177. package/src/auth/grok.ts +255 -0
  178. package/src/auth/index.ts +47 -9
  179. package/src/auth/keychain.ts +95 -28
  180. package/src/auth/openai.ts +29 -36
  181. package/src/cli/commands/auth.ts +89 -10
  182. package/src/cli/commands/create.ts +13 -4
  183. package/src/cli/interactive.ts +1774 -142
  184. package/src/config/defaults.ts +19 -2
  185. package/src/config/index.ts +36 -1
  186. package/src/config/schema.ts +30 -1
  187. package/src/generators/fullstack.ts +551 -0
  188. package/src/generators/index.ts +25 -1
  189. package/src/generators/python.ts +65 -20
  190. package/src/generators/templates/fullstack.ts +1047 -0
  191. package/src/generators/typescript.ts +69 -20
  192. package/src/state/index.ts +713 -4
  193. package/src/state/registry.ts +278 -0
  194. package/src/types/cli.ts +8 -0
  195. package/src/types/consensus.ts +197 -6
  196. package/src/types/project.ts +82 -1
  197. package/src/types/workflow.ts +90 -1
  198. package/src/workflow/auto-fix.ts +340 -0
  199. package/src/workflow/consensus.ts +1180 -16
  200. package/src/workflow/execution-mode.ts +673 -74
  201. package/src/workflow/index.ts +95 -6
  202. package/src/workflow/milestone-workflow.ts +576 -0
  203. package/src/workflow/plan-mode.ts +924 -50
  204. package/src/workflow/plan-storage.ts +1282 -0
  205. package/src/workflow/project-verification.ts +471 -0
  206. package/src/workflow/task-workflow.ts +528 -0
  207. package/src/workflow/test-runner.ts +120 -5
  208. package/src/workflow/ui-designer.ts +337 -0
  209. package/src/workflow/ui-setup.ts +797 -0
  210. package/src/workflow/ui-verification.ts +357 -0
  211. package/src/workflow/workflow-logger.ts +353 -0
  212. package/src/workflow/workspace-manager.ts +912 -0
  213. package/tests/config/config.test.ts +1 -1
  214. package/tests/types/consensus.test.ts +3 -3
  215. package/tests/workflow/plan-mode.test.ts +213 -0
  216. package/tests/workflow/test-runner.test.ts +5 -3
@@ -1,11 +1,85 @@
1
1
  /**
2
2
  * Consensus workflow module
3
- * Handles the iterative consensus-building process between Claude and OpenAI
3
+ * Handles the iterative consensus-building process between Claude and OpenAI/Gemini
4
+ * with arbitration support when consensus cannot be reached
4
5
  */
5
6
  import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
6
- import { requestConsensus } from '../adapters/openai.js';
7
+ import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
8
+ import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
9
+ import { requestConsensus as requestGrokConsensus, requestArbitration as requestGrokArbitration } from '../adapters/grok.js';
7
10
  import { revisePlan } from '../adapters/claude.js';
8
11
  import { recordConsensusIteration } from '../state/index.js';
12
+ import { createPlanStorage, } from './plan-storage.js';
13
+ /**
14
+ * Request consensus from the configured reviewer (OpenAI, Gemini, or Grok)
15
+ */
16
+ async function requestReviewerConsensus(plan, context, reviewer, config) {
17
+ if (reviewer === 'gemini') {
18
+ return requestGeminiConsensus(plan, context, {
19
+ model: config.geminiModel,
20
+ temperature: config.temperature,
21
+ maxTokens: config.maxTokens,
22
+ });
23
+ }
24
+ if (reviewer === 'grok') {
25
+ return requestGrokConsensus(plan, context, {
26
+ model: config.grokModel,
27
+ temperature: config.temperature,
28
+ maxTokens: config.maxTokens,
29
+ });
30
+ }
31
+ return requestOpenAIConsensus(plan, context, config);
32
+ }
33
+ /**
34
+ * Request arbitration from the configured arbitrator (OpenAI, Gemini, or Grok)
35
+ */
36
+ async function requestArbitratorDecision(plan, reviewerFeedback, claudeFeedback, iterations, scores, arbitrator) {
37
+ if (arbitrator === 'grok') {
38
+ return requestGrokArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
39
+ }
40
+ // Default to Gemini for arbitration (most capable at reasoning)
41
+ return requestGeminiArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
42
+ }
43
+ /**
44
+ * Check if the consensus process is "stuck" (not improving)
45
+ * Detects both:
46
+ * 1. Stagnation: scores within 5% of each other
47
+ * 2. Oscillation: scores going up and down without progress
48
+ */
49
+ function isStuck(scores, stuckIterations) {
50
+ if (scores.length < stuckIterations)
51
+ return false;
52
+ const recentScores = scores.slice(-stuckIterations);
53
+ const maxRecent = Math.max(...recentScores);
54
+ const minRecent = Math.min(...recentScores);
55
+ // Check 1: Stagnation - all recent scores are within 5% of each other
56
+ if ((maxRecent - minRecent) <= 5) {
57
+ return true;
58
+ }
59
+ // Check 2: Oscillation - detect if we're going up and down without making progress
60
+ // e.g., 70 -> 85 -> 75 -> 80 (oscillating around ~77.5)
61
+ if (recentScores.length >= 3) {
62
+ const avg = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
63
+ const deviations = recentScores.map(s => Math.abs(s - avg));
64
+ const avgDeviation = deviations.reduce((a, b) => a + b, 0) / deviations.length;
65
+ // If scores are oscillating around an average (avg deviation > 3% but range < 20%)
66
+ // and we're not trending upward, consider it stuck
67
+ if (avgDeviation > 3 && (maxRecent - minRecent) < 20) {
68
+ // Check if we're trending upward (last score should be close to max)
69
+ const lastScore = recentScores[recentScores.length - 1];
70
+ const firstScore = recentScores[0];
71
+ // Not improving if last score is not better than first
72
+ if (lastScore <= firstScore + 2) {
73
+ return true;
74
+ }
75
+ }
76
+ }
77
+ return false;
78
+ }
79
+ /**
80
+ * Default consensus timeout (15 minutes total)
81
+ */
82
+ const DEFAULT_CONSENSUS_TIMEOUT_MS = 15 * 60 * 1000;
9
83
  /**
10
84
  * Format a plan for consensus review
11
85
  * Structures the plan in a way that's optimal for review
@@ -55,6 +129,7 @@ export function meetsThreshold(score, threshold = DEFAULT_CONSENSUS_CONFIG.thres
55
129
  }
56
130
  /**
57
131
  * Iterate until consensus is reached
132
+ * Supports configurable reviewer and arbitration when stuck
58
133
  *
59
134
  * @param initialPlan - The initial plan to review
60
135
  * @param context - Project context
@@ -62,15 +137,91 @@ export function meetsThreshold(score, threshold = DEFAULT_CONSENSUS_CONFIG.thres
62
137
  * @returns The consensus process result
63
138
  */
64
139
  export async function iterateUntilConsensus(initialPlan, context, options) {
65
- const { projectDir, config = {}, onIteration, onRevision, } = options;
66
- const { threshold = DEFAULT_CONSENSUS_CONFIG.threshold, maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations, } = config;
140
+ const { projectDir, config = {}, isFullstack = false, language: providedLanguage, onIteration, onRevision, onConcerns, onArbitration, onProgress, } = options;
141
+ // Derive language from isFullstack if not explicitly provided
142
+ const language = providedLanguage || (isFullstack ? 'fullstack' : 'python');
143
+ const { threshold = DEFAULT_CONSENSUS_CONFIG.threshold, maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations, reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer, arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator, enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration, arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold, stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations, } = config;
67
144
  const iterations = [];
145
+ const scores = [];
68
146
  let currentPlan = initialPlan;
69
147
  let iteration = 0;
148
+ // Track the best plan throughout the process
149
+ let bestPlan = initialPlan;
150
+ let bestScore = 0;
151
+ let bestIteration = 0;
152
+ let lastConcerns = [];
153
+ let lastRecommendations = [];
154
+ let lastAnalysis = '';
155
+ // Track arbitration attempts to prevent infinite loops
156
+ let arbitrationAttempts = 0;
157
+ // Track elapsed time to detect stuck processes
158
+ const startTime = Date.now();
159
+ const maxArbitrationAttempts = 2;
160
+ onProgress?.('consensus', `Using ${reviewer} as reviewer${enableArbitration ? `, ${arbitrator} as arbitrator` : ''}`);
70
161
  while (iteration < maxIterations) {
71
162
  iteration++;
72
- // Request consensus review from OpenAI
73
- const consensusResult = await requestConsensus(currentPlan, context, config);
163
+ // Check total elapsed time - if timing out, try arbitration before giving up
164
+ const totalElapsed = Date.now() - startTime;
165
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS && enableArbitration && arbitrationAttempts < maxArbitrationAttempts) {
166
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes - invoking arbitrator before accepting`);
167
+ try {
168
+ arbitrationAttempts++;
169
+ const arbitrationResult = await requestArbitratorDecision(bestPlan, lastAnalysis, `Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`, iteration, scores, arbitrator);
170
+ if (onArbitration) {
171
+ onArbitration(arbitrationResult);
172
+ }
173
+ // Accept arbitration result (we're out of time)
174
+ onProgress?.('arbitration', `Arbitrator decision: ${arbitrationResult.approved ? 'APPROVED' : 'REVISE'} with ${arbitrationResult.score}%`);
175
+ return {
176
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
177
+ finalPlan: bestPlan,
178
+ finalScore: arbitrationResult.score,
179
+ bestPlan,
180
+ bestScore: arbitrationResult.score,
181
+ bestIteration,
182
+ iterations,
183
+ totalIterations: iteration - 1,
184
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
185
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
186
+ arbitrated: true,
187
+ arbitrationResult,
188
+ timedOut: true,
189
+ };
190
+ }
191
+ catch (arbError) {
192
+ onProgress?.('arbitration', `Arbitration failed on timeout: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
193
+ // Fall through to accept best plan
194
+ }
195
+ }
196
+ // Hard timeout - no more arbitration attempts left
197
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
198
+ onProgress?.('consensus', `Consensus timeout - accepting best plan with ${bestScore}%`);
199
+ return {
200
+ approved: bestScore >= arbitrationThreshold,
201
+ finalPlan: bestPlan,
202
+ finalScore: bestScore,
203
+ bestPlan,
204
+ bestScore,
205
+ bestIteration,
206
+ iterations,
207
+ totalIterations: iteration - 1,
208
+ finalConcerns: lastConcerns,
209
+ finalRecommendations: lastRecommendations,
210
+ arbitrated: false,
211
+ timedOut: true,
212
+ };
213
+ }
214
+ // Log iteration timing
215
+ const iterationStart = Date.now();
216
+ const elapsedMinutes = Math.round((iterationStart - startTime) / 60000);
217
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
218
+ // Request consensus review from configured reviewer
219
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
220
+ const consensusResult = await requestReviewerConsensus(currentPlan, context, reviewer, config);
221
+ // Log iteration duration
222
+ const iterationDuration = Math.round((Date.now() - iterationStart) / 1000);
223
+ onProgress?.('consensus', `Review completed in ${iterationDuration}s - score: ${consensusResult.score}%`);
224
+ scores.push(consensusResult.score);
74
225
  // Record the iteration
75
226
  const iterationRecord = {
76
227
  iteration,
@@ -81,45 +232,155 @@ export async function iterateUntilConsensus(initialPlan, context, options) {
81
232
  iterations.push(iterationRecord);
82
233
  // Save to project state
83
234
  await recordConsensusIteration(projectDir, iterationRecord);
84
- // Notify callback
235
+ // Track best plan - only update if this score is better
236
+ if (consensusResult.score > bestScore) {
237
+ bestPlan = currentPlan;
238
+ bestScore = consensusResult.score;
239
+ bestIteration = iteration;
240
+ }
241
+ // Track concerns for output
242
+ lastConcerns = consensusResult.concerns || [];
243
+ lastRecommendations = consensusResult.recommendations || [];
244
+ lastAnalysis = consensusResult.analysis || '';
245
+ // Notify callbacks
85
246
  if (onIteration) {
86
247
  onIteration(iteration, consensusResult);
87
248
  }
249
+ if (onConcerns && (lastConcerns.length > 0 || lastRecommendations.length > 0)) {
250
+ onConcerns(lastConcerns, lastRecommendations);
251
+ }
88
252
  // Check if we've reached consensus
89
253
  if (meetsThreshold(consensusResult.score, threshold)) {
90
254
  return {
91
255
  approved: true,
92
256
  finalPlan: currentPlan,
93
257
  finalScore: consensusResult.score,
258
+ bestPlan: currentPlan,
259
+ bestScore: consensusResult.score,
260
+ bestIteration: iteration,
94
261
  iterations,
95
262
  totalIterations: iteration,
263
+ finalConcerns: [],
264
+ finalRecommendations: [],
265
+ arbitrated: false,
96
266
  };
97
267
  }
268
+ // Check if we're stuck and should trigger arbitration
269
+ if (enableArbitration &&
270
+ bestScore >= arbitrationThreshold &&
271
+ isStuck(scores, stuckIterations) &&
272
+ arbitrationAttempts < maxArbitrationAttempts) {
273
+ arbitrationAttempts++;
274
+ onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
275
+ try {
276
+ const arbitrationResult = await requestArbitratorDecision(bestPlan, lastAnalysis, `The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`, iteration, scores, arbitrator);
277
+ if (onArbitration) {
278
+ onArbitration(arbitrationResult);
279
+ }
280
+ // Accept if arbitrator approves OR if arbitrator gives a high score (>= 88%)
281
+ // This prevents infinite REVISE loops when the arbitrator is happy enough
282
+ const acceptArbitration = arbitrationResult.approved ||
283
+ arbitrationResult.score >= 88 ||
284
+ (arbitrationAttempts >= maxArbitrationAttempts && arbitrationResult.score >= 80);
285
+ if (acceptArbitration) {
286
+ const reason = arbitrationResult.approved
287
+ ? `Arbitrator approved plan with ${arbitrationResult.score}% confidence`
288
+ : `Arbitrator score ${arbitrationResult.score}% is acceptable - proceeding with best plan`;
289
+ onProgress?.('arbitration', reason);
290
+ return {
291
+ approved: true,
292
+ finalPlan: bestPlan,
293
+ finalScore: arbitrationResult.score,
294
+ bestPlan,
295
+ bestScore: arbitrationResult.score,
296
+ bestIteration,
297
+ iterations,
298
+ totalIterations: iteration,
299
+ finalConcerns: arbitrationResult.minorConcerns || [],
300
+ finalRecommendations: arbitrationResult.suggestedChanges || [],
301
+ arbitrated: true,
302
+ arbitrationResult,
303
+ };
304
+ }
305
+ else {
306
+ onProgress?.('arbitration', `Arbitrator requests changes: ${arbitrationResult.suggestedChanges.slice(0, 2).join('; ')}`);
307
+ // Apply arbitrator's suggested changes
308
+ if (arbitrationResult.suggestedChanges.length > 0) {
309
+ onProgress?.('consensus', 'Applying arbitrator suggestions...');
310
+ const revisionResult = await revisePlan(bestPlan, arbitrationResult.reasoning, arbitrationResult.suggestedChanges, language);
311
+ if (revisionResult.success && revisionResult.response) {
312
+ currentPlan = revisionResult.response;
313
+ // Reset stuck detection after arbitration revision
314
+ scores.length = 0;
315
+ scores.push(arbitrationResult.score);
316
+ onProgress?.('consensus', 'Plan revised based on arbitrator feedback');
317
+ }
318
+ else {
319
+ onProgress?.('consensus', 'Revision failed, continuing with current plan');
320
+ }
321
+ }
322
+ }
323
+ }
324
+ catch (error) {
325
+ onProgress?.('arbitration', `Arbitration failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
326
+ // If we've tried arbitration and it failed, accept the best plan we have
327
+ if (arbitrationAttempts >= maxArbitrationAttempts && bestScore >= arbitrationThreshold) {
328
+ onProgress?.('arbitration', `Max arbitration attempts reached, accepting best plan with ${bestScore}%`);
329
+ return {
330
+ approved: true,
331
+ finalPlan: bestPlan,
332
+ finalScore: bestScore,
333
+ bestPlan,
334
+ bestScore,
335
+ bestIteration,
336
+ iterations,
337
+ totalIterations: iteration,
338
+ finalConcerns: lastConcerns,
339
+ finalRecommendations: lastRecommendations,
340
+ arbitrated: true,
341
+ };
342
+ }
343
+ }
344
+ }
98
345
  // If not at max iterations, revise the plan
99
346
  if (iteration < maxIterations) {
100
347
  const concerns = extractConcerns(consensusResult);
348
+ onProgress?.('consensus', 'Revising plan based on feedback...');
349
+ // Create a progress handler for revision
350
+ const revisionProgress = onProgress
351
+ ? (msg) => onProgress('consensus', `[revision] ${msg}`)
352
+ : undefined;
101
353
  // Use Claude to revise the plan
102
- const revisionResult = await revisePlan(currentPlan, consensusResult.analysis, concerns);
354
+ const revisionResult = await revisePlan(currentPlan, consensusResult.analysis, concerns, language, revisionProgress);
103
355
  if (revisionResult.success && revisionResult.response) {
356
+ // Only use the revised plan for the next iteration
357
+ // The best plan tracking above will decide if it's actually better
104
358
  currentPlan = revisionResult.response;
105
359
  if (onRevision) {
106
360
  onRevision(iteration, currentPlan);
107
361
  }
108
362
  }
109
363
  else {
110
- // If revision fails, try to continue with current plan
364
+ // If revision fails, try to continue with best plan
111
365
  console.warn(`Plan revision failed at iteration ${iteration}:`, revisionResult.error);
366
+ currentPlan = bestPlan;
112
367
  }
113
368
  }
114
369
  }
115
370
  // Max iterations reached without consensus
116
- const lastIteration = iterations[iterations.length - 1];
371
+ // Return the BEST plan we found, not the last one
117
372
  return {
118
373
  approved: false,
119
- finalPlan: currentPlan,
120
- finalScore: lastIteration?.result.score || 0,
374
+ finalPlan: bestPlan,
375
+ finalScore: bestScore,
376
+ bestPlan,
377
+ bestScore,
378
+ bestIteration,
121
379
  iterations,
122
380
  totalIterations: iteration,
381
+ finalConcerns: lastConcerns,
382
+ finalRecommendations: lastRecommendations,
383
+ arbitrated: false,
123
384
  };
124
385
  }
125
386
  /**
@@ -132,27 +393,48 @@ export function summarizeConsensusProcess(result) {
132
393
  const lines = [];
133
394
  lines.push(`## Consensus Summary`);
134
395
  lines.push('');
135
- lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}`);
396
+ lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}${result.arbitrated ? ' (via arbitration)' : ''}`);
136
397
  lines.push(`**Final Score:** ${result.finalScore}%`);
398
+ lines.push(`**Best Score:** ${result.bestScore}% (iteration ${result.bestIteration})`);
137
399
  lines.push(`**Total Iterations:** ${result.totalIterations}`);
400
+ if (result.arbitrated && result.arbitrationResult) {
401
+ lines.push('');
402
+ lines.push(`### Arbitration Decision`);
403
+ lines.push(`- Decision: ${result.arbitrationResult.approved ? 'APPROVED' : 'REVISE'}`);
404
+ lines.push(`- Confidence: ${result.arbitrationResult.score}%`);
405
+ if (result.arbitrationResult.criticalConcerns.length > 0) {
406
+ lines.push(`- Critical Concerns: ${result.arbitrationResult.criticalConcerns.length}`);
407
+ }
408
+ if (result.arbitrationResult.minorConcerns.length > 0) {
409
+ lines.push(`- Minor Concerns: ${result.arbitrationResult.minorConcerns.length}`);
410
+ }
411
+ }
138
412
  lines.push('');
139
413
  lines.push(`### Iteration History`);
140
414
  lines.push('');
141
415
  for (const iteration of result.iterations) {
142
- lines.push(`#### Iteration ${iteration.iteration}`);
416
+ const isBest = iteration.iteration === result.bestIteration;
417
+ lines.push(`#### Iteration ${iteration.iteration}${isBest ? ' (BEST)' : ''}`);
143
418
  lines.push(`- Score: ${iteration.result.score}%`);
144
419
  lines.push(`- Strengths: ${iteration.result.strengths?.length || 0}`);
145
420
  lines.push(`- Concerns: ${iteration.result.concerns?.length || 0}`);
146
421
  lines.push('');
147
422
  }
148
423
  if (!result.approved) {
149
- const lastResult = result.iterations[result.iterations.length - 1]?.result;
150
- if (lastResult?.concerns && lastResult.concerns.length > 0) {
424
+ if (result.finalConcerns && result.finalConcerns.length > 0) {
151
425
  lines.push(`### Remaining Concerns`);
152
426
  lines.push('');
153
- for (const concern of lastResult.concerns) {
427
+ for (const concern of result.finalConcerns) {
154
428
  lines.push(`- ${concern}`);
155
429
  }
430
+ lines.push('');
431
+ }
432
+ if (result.finalRecommendations && result.finalRecommendations.length > 0) {
433
+ lines.push(`### Recommendations`);
434
+ lines.push('');
435
+ for (const rec of result.finalRecommendations) {
436
+ lines.push(`- ${rec}`);
437
+ }
156
438
  }
157
439
  }
158
440
  return lines.join('\n');
@@ -217,4 +499,577 @@ export function getScoreTrend(iterations) {
217
499
  return 'declining';
218
500
  return 'stable';
219
501
  }
502
+ /**
503
+ * Collect feedback from a single reviewer
504
+ */
505
+ async function collectReviewerFeedback(plan, context, reviewer, config, onProgress) {
506
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
507
+ const startTime = Date.now();
508
+ const result = await requestReviewerConsensus(plan, context, reviewer, config);
509
+ const duration = Math.round((Date.now() - startTime) / 1000);
510
+ onProgress?.('consensus', `${reviewer} review completed in ${duration}s - score: ${result.score}%`);
511
+ return {
512
+ reviewer,
513
+ score: result.score,
514
+ timestamp: new Date().toISOString(),
515
+ concerns: result.concerns || [],
516
+ recommendations: result.recommendations || [],
517
+ analysis: result.analysis || '',
518
+ };
519
+ }
520
+ /**
521
+ * Collect feedback from multiple reviewers in parallel
522
+ */
523
+ async function collectAllFeedback(plan, context, reviewers, config, onProgress) {
524
+ onProgress?.('consensus', `Collecting feedback from ${reviewers.length} reviewer(s) in parallel...`);
525
+ const feedbackPromises = reviewers.map(reviewer => collectReviewerFeedback(plan, context, reviewer, config, onProgress)
526
+ .catch(error => {
527
+ onProgress?.('consensus', `${reviewer} review failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
528
+ return null;
529
+ }));
530
+ const results = await Promise.all(feedbackPromises);
531
+ return results.filter((f) => f !== null);
532
+ }
533
+ /**
534
+ * Categorize a concern or recommendation by app target
535
+ * Analyzes text content to determine if it relates to frontend, backend, or unified
536
+ */
537
+ function categorizeByContent(content) {
538
+ const lowerContent = content.toLowerCase();
539
+ // Frontend indicators
540
+ const frontendKeywords = [
541
+ 'react', 'component', 'jsx', 'tsx', 'css', 'tailwind', 'ui', 'user interface',
542
+ 'button', 'form', 'input', 'modal', 'page', 'router', 'navigation', 'state management',
543
+ 'redux', 'zustand', 'vite', 'frontend', 'front-end', 'client', 'browser', 'dom',
544
+ 'styling', 'layout', 'responsive', 'animation', 'hook', 'usestate', 'useeffect',
545
+ 'shadcn', 'radix', 'tailwindcss', 'vitest', 'jest', 'testing-library', 'playwright',
546
+ ];
547
+ // Backend indicators
548
+ const backendKeywords = [
549
+ 'fastapi', 'api', 'endpoint', 'route', 'database', 'sql', 'postgresql', 'neon',
550
+ 'model', 'schema', 'migration', 'orm', 'sqlalchemy', 'pydantic', 'validation',
551
+ 'authentication', 'authorization', 'jwt', 'token', 'middleware', 'backend', 'back-end',
552
+ 'server', 'python', 'pytest', 'alembic', 'celery', 'redis', 'cache', 'queue',
553
+ 'repository', 'service', 'crud', 'rest', 'graphql', 'websocket',
554
+ ];
555
+ // Count matches
556
+ let frontendMatches = 0;
557
+ let backendMatches = 0;
558
+ for (const keyword of frontendKeywords) {
559
+ if (lowerContent.includes(keyword)) {
560
+ frontendMatches++;
561
+ }
562
+ }
563
+ for (const keyword of backendKeywords) {
564
+ if (lowerContent.includes(keyword)) {
565
+ backendMatches++;
566
+ }
567
+ }
568
+ // Determine category
569
+ if (frontendMatches > backendMatches && frontendMatches >= 2) {
570
+ return 'frontend';
571
+ }
572
+ else if (backendMatches > frontendMatches && backendMatches >= 2) {
573
+ return 'backend';
574
+ }
575
+ else {
576
+ return 'unified';
577
+ }
578
+ }
579
+ /**
580
+ * Categorize all concerns and recommendations by app target
581
+ */
582
+ function categorizeFeedbackItems(concerns, recommendations) {
583
+ const taggedConcerns = concerns.map(concern => ({
584
+ app: categorizeByContent(concern),
585
+ content: concern,
586
+ }));
587
+ const taggedRecommendations = recommendations.map(rec => ({
588
+ app: categorizeByContent(rec),
589
+ content: rec,
590
+ }));
591
+ // Count items per app for score calculation
592
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
593
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
594
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
595
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
596
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
597
+ const unifiedRecs = taggedRecommendations.filter(r => r.app === 'unified').length;
598
+ // Calculate relative scores (more concerns = lower score)
599
+ const totalItems = taggedConcerns.length + taggedRecommendations.length;
600
+ const baseScore = totalItems > 0 ? 100 : 0;
601
+ return {
602
+ taggedConcerns,
603
+ taggedRecommendations,
604
+ appScores: {
605
+ frontend: Math.max(0, baseScore - (frontendConcerns + frontendRecs) * 5),
606
+ backend: Math.max(0, baseScore - (backendConcerns + backendRecs) * 5),
607
+ unified: Math.max(0, baseScore - (unifiedConcerns + unifiedRecs) * 5),
608
+ },
609
+ };
610
+ }
611
+ /**
612
+ * Calculate per-app scores from feedback
613
+ */
614
+ function calculateAppScores(allFeedback, taggedConcerns, taggedRecommendations) {
615
+ // Base score from average feedback score
616
+ const baseScore = allFeedback.length > 0
617
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
618
+ : 0;
619
+ // Count concerns per app
620
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
621
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
622
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
623
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
624
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
625
+ // Calculate app-specific scores
626
+ // More concerns = lower score (each concern/rec reduces score by 2 points)
627
+ const frontendScore = frontendConcerns > 0 || frontendRecs > 0
628
+ ? Math.max(0, baseScore - (frontendConcerns * 2 + frontendRecs))
629
+ : baseScore;
630
+ const backendScore = backendConcerns > 0 || backendRecs > 0
631
+ ? Math.max(0, baseScore - (backendConcerns * 2 + backendRecs))
632
+ : baseScore;
633
+ // Unified score is the base combined score
634
+ const unifiedScore = Math.max(0, baseScore - (unifiedConcerns * 2));
635
+ return {
636
+ frontend: frontendScore,
637
+ backend: backendScore,
638
+ unified: unifiedScore,
639
+ };
640
+ }
641
+ /**
642
+ * Optimized consensus process that batches feedback and reduces API calls
643
+ *
644
+ * Key optimizations:
645
+ * 1. Plans stored in files, not regenerated from scratch
646
+ * 2. Collects ALL reviewer feedback before revision
647
+ * 3. Claude revises ONCE per round with combined feedback
648
+ * 4. Parallel reviews when multiple reviewers configured
649
+ * 5. Per-app tracking for fullstack projects (frontend/backend/unified)
650
+ *
651
+ * @param initialPlan - The initial plan to seek consensus on
652
+ * @param context - Project context for review
653
+ * @param options - Consensus options including tracking info
654
+ * @returns Consensus process result (FullstackConsensusProcessResult for fullstack projects)
655
+ */
656
+ export async function runOptimizedConsensusProcess(initialPlan, context, options) {
657
+ const { projectDir, config = {}, onIteration, onRevision, onConcerns, onArbitration, onProgress, milestoneId, milestoneName, taskId, taskName, parallelReviews = true, additionalReviewers = [], isFullstack = false, } = options;
658
+ // Derive language from isFullstack for revision prompts
659
+ const language = isFullstack ? 'fullstack' : 'python';
660
+ const { threshold = DEFAULT_CONSENSUS_CONFIG.threshold, maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations, reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer, arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator, enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration, arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold, stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations, } = config;
661
+ // Initialize plan storage with fullstack support
662
+ const planStorage = createPlanStorage(projectDir, isFullstack);
663
+ await planStorage.initialize();
664
+ // Track per-app consensus for fullstack projects
665
+ const appScoresHistory = {
666
+ frontend: [],
667
+ backend: [],
668
+ unified: [],
669
+ };
670
+ const allTaggedConcerns = [];
671
+ const allTaggedRecommendations = [];
672
+ const corrections = [];
673
+ // Determine all reviewers
674
+ const allReviewers = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
675
+ const iterations = [];
676
+ const scores = [];
677
+ let currentPlan = initialPlan;
678
+ let iteration = 0;
679
+ // Track the best plan
680
+ let bestPlan = initialPlan;
681
+ let bestScore = 0;
682
+ let bestIteration = 0;
683
+ let lastConcerns = [];
684
+ let lastRecommendations = [];
685
+ let lastAnalysis = '';
686
+ const startTime = Date.now();
687
+ onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
688
+ onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
689
+ if (isFullstack) {
690
+ onProgress?.('consensus', `Fullstack mode enabled - tracking per-app consensus (frontend/backend/unified)`);
691
+ }
692
+ // Save initial plan to storage
693
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
694
+ milestoneId,
695
+ milestoneName,
696
+ taskId,
697
+ taskName,
698
+ });
699
+ while (iteration < maxIterations) {
700
+ iteration++;
701
+ // Check timeout
702
+ const totalElapsed = Date.now() - startTime;
703
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
704
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes`);
705
+ if (enableArbitration) {
706
+ try {
707
+ const arbitrationResult = await requestArbitratorDecision(bestPlan, lastAnalysis, `Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`, iteration, scores, arbitrator);
708
+ if (onArbitration)
709
+ onArbitration(arbitrationResult);
710
+ return {
711
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
712
+ finalPlan: bestPlan,
713
+ finalScore: arbitrationResult.score,
714
+ bestPlan,
715
+ bestScore: arbitrationResult.score,
716
+ bestIteration,
717
+ iterations,
718
+ totalIterations: iteration - 1,
719
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
720
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
721
+ arbitrated: true,
722
+ arbitrationResult,
723
+ timedOut: true,
724
+ };
725
+ }
726
+ catch {
727
+ // Fall through to accept best plan
728
+ }
729
+ }
730
+ return {
731
+ approved: bestScore >= arbitrationThreshold,
732
+ finalPlan: bestPlan,
733
+ finalScore: bestScore,
734
+ bestPlan,
735
+ bestScore,
736
+ bestIteration,
737
+ iterations,
738
+ totalIterations: iteration - 1,
739
+ finalConcerns: lastConcerns,
740
+ finalRecommendations: lastRecommendations,
741
+ arbitrated: false,
742
+ timedOut: true,
743
+ };
744
+ }
745
+ const elapsedMinutes = Math.round((Date.now() - startTime) / 60000);
746
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
747
+ // Clear previous feedback for this round
748
+ await planStorage.clearFeedback(milestoneId, taskId);
749
+ // ============================================
750
+ // OPTIMIZATION: Collect ALL feedback in parallel
751
+ // ============================================
752
+ let allFeedback;
753
+ if (parallelReviews && allReviewers.length > 1) {
754
+ allFeedback = await collectAllFeedback(currentPlan, context, allReviewers, config, onProgress);
755
+ }
756
+ else {
757
+ // Sequential fallback
758
+ allFeedback = [];
759
+ for (const rev of allReviewers) {
760
+ const feedback = await collectReviewerFeedback(currentPlan, context, rev, config, onProgress);
761
+ allFeedback.push(feedback);
762
+ }
763
+ }
764
+ // Combine all concerns and recommendations
765
+ const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
766
+ const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
767
+ const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
768
+ lastConcerns = allConcerns;
769
+ lastRecommendations = allRecommendations;
770
+ // ============================================
771
+ // FULLSTACK: Categorize feedback by app target
772
+ // ============================================
773
+ let currentAppScores = { unified: 0 };
774
+ let iterationTaggedConcerns = [];
775
+ let iterationTaggedRecs = [];
776
+ if (isFullstack) {
777
+ onProgress?.('consensus', 'Categorizing feedback by app (frontend/backend/unified)...');
778
+ // Categorize concerns and recommendations
779
+ const categorized = categorizeFeedbackItems(allConcerns, allRecommendations);
780
+ iterationTaggedConcerns = categorized.taggedConcerns;
781
+ iterationTaggedRecs = categorized.taggedRecommendations;
782
+ // Calculate per-app scores
783
+ currentAppScores = calculateAppScores(allFeedback, iterationTaggedConcerns, iterationTaggedRecs);
784
+ // Track scores history
785
+ appScoresHistory.frontend.push(currentAppScores.frontend || 0);
786
+ appScoresHistory.backend.push(currentAppScores.backend || 0);
787
+ appScoresHistory.unified.push(currentAppScores.unified);
788
+ // Accumulate tagged items for final result
789
+ allTaggedConcerns.push(...iterationTaggedConcerns);
790
+ allTaggedRecommendations.push(...iterationTaggedRecs);
791
+ // Log per-app breakdown
792
+ const frontendConcerns = iterationTaggedConcerns.filter(c => c.app === 'frontend').length;
793
+ const backendConcerns = iterationTaggedConcerns.filter(c => c.app === 'backend').length;
794
+ const unifiedConcerns = iterationTaggedConcerns.filter(c => c.app === 'unified').length;
795
+ onProgress?.('consensus', `Per-app concerns: FE=${frontendConcerns}, BE=${backendConcerns}, Unified=${unifiedConcerns}`);
796
+ onProgress?.('consensus', `Per-app scores: FE=${currentAppScores.frontend}%, BE=${currentAppScores.backend}%, Unified=${currentAppScores.unified}%`);
797
+ // Save feedback to per-app directories
798
+ for (const feedback of allFeedback) {
799
+ // Create fullstack feedback with tagged items
800
+ const fullstackFeedback = {
801
+ ...feedback,
802
+ appScores: currentAppScores,
803
+ taggedConcerns: iterationTaggedConcerns.filter(c => feedback.concerns.some(fc => fc === c.content)),
804
+ taggedRecommendations: iterationTaggedRecs.filter(r => feedback.recommendations.some(fr => fr === r.content)),
805
+ isFullstack: true,
806
+ };
807
+ // Save to all app directories
808
+ await planStorage.saveFullstackFeedback(fullstackFeedback, taskId ? 'task' : 'milestone', milestoneId, taskId);
809
+ }
810
+ }
811
+ else {
812
+ // Non-fullstack: save feedback without app categorization
813
+ for (const feedback of allFeedback) {
814
+ await planStorage.saveFeedback(feedback, milestoneId, taskId);
815
+ }
816
+ currentAppScores = { unified: allFeedback.length > 0
817
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
818
+ : 0 };
819
+ }
820
+ // Calculate combined score (average of all reviewers)
821
+ const combinedScore = allFeedback.length > 0
822
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
823
+ : 0;
824
+ scores.push(combinedScore);
825
+ lastAnalysis = combinedAnalysis;
826
+ // Create consensus result for tracking
827
+ const consensusResult = {
828
+ score: combinedScore,
829
+ analysis: combinedAnalysis,
830
+ concerns: allConcerns,
831
+ recommendations: allRecommendations,
832
+ approved: combinedScore >= threshold,
833
+ strengths: [],
834
+ rawResponse: combinedAnalysis,
835
+ };
836
+ // Record iteration
837
+ const iterationRecord = {
838
+ iteration,
839
+ plan: currentPlan,
840
+ timestamp: new Date().toISOString(),
841
+ result: consensusResult,
842
+ };
843
+ iterations.push(iterationRecord);
844
+ if (onIteration)
845
+ onIteration(iteration, consensusResult);
846
+ if (onConcerns)
847
+ onConcerns(allConcerns, allRecommendations);
848
+ // Update best plan tracking
849
+ if (combinedScore > bestScore) {
850
+ bestScore = combinedScore;
851
+ bestPlan = currentPlan;
852
+ bestIteration = iteration;
853
+ }
854
+ // Save plan with updated score (including per-app scores for fullstack)
855
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
856
+ milestoneId,
857
+ milestoneName,
858
+ taskId,
859
+ taskName,
860
+ score: combinedScore,
861
+ frontendScore: isFullstack ? currentAppScores.frontend : undefined,
862
+ backendScore: isFullstack ? currentAppScores.backend : undefined,
863
+ unifiedScore: isFullstack ? currentAppScores.unified : undefined,
864
+ });
865
+ // Record correction for fullstack tracking
866
+ if (isFullstack && iteration > 1) {
867
+ const previousScore = scores.length >= 2 ? scores[scores.length - 2] : 0;
868
+ const correction = {
869
+ id: `correction-${iteration}`,
870
+ timestamp: new Date().toISOString(),
871
+ app: 'unified', // Top-level correction
872
+ previousScore,
873
+ newScore: combinedScore,
874
+ concerns: lastConcerns.slice(0, 5),
875
+ changes: lastRecommendations.slice(0, 3),
876
+ reviewer,
877
+ };
878
+ corrections.push(correction);
879
+ await planStorage.recordCorrection(taskId ? 'task' : 'milestone', correction, milestoneId, taskId);
880
+ }
881
+ // Record in project state
882
+ await recordConsensusIteration(projectDir, iterationRecord);
883
+ onProgress?.('consensus', `Combined score: ${combinedScore}% (from ${allFeedback.length} reviewer(s))`);
884
+ // Check if consensus reached
885
+ if (combinedScore >= threshold) {
886
+ onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
887
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
888
+ // Update per-app approval status for fullstack
889
+ if (isFullstack) {
890
+ const feApproved = (currentAppScores.frontend || 0) >= threshold;
891
+ const beApproved = (currentAppScores.backend || 0) >= threshold;
892
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'frontend', feApproved, currentAppScores.frontend || 0, milestoneId, taskId);
893
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'backend', beApproved, currentAppScores.backend || 0, milestoneId, taskId);
894
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'unified', true, currentAppScores.unified, milestoneId, taskId);
895
+ onProgress?.('consensus', `Per-app approval: FE=${feApproved}, BE=${beApproved}, Unified=true`);
896
+ return {
897
+ approved: true,
898
+ finalPlan: currentPlan,
899
+ finalScore: combinedScore,
900
+ bestPlan: currentPlan,
901
+ bestScore: combinedScore,
902
+ bestIteration: iteration,
903
+ iterations,
904
+ totalIterations: iteration,
905
+ finalConcerns: allConcerns,
906
+ finalRecommendations: allRecommendations,
907
+ arbitrated: false,
908
+ // Fullstack-specific fields
909
+ appScores: currentAppScores,
910
+ appApproved: {
911
+ frontend: feApproved,
912
+ backend: beApproved,
913
+ unified: true,
914
+ },
915
+ taggedConcerns: allTaggedConcerns,
916
+ taggedRecommendations: allTaggedRecommendations,
917
+ corrections,
918
+ };
919
+ }
920
+ return {
921
+ approved: true,
922
+ finalPlan: currentPlan,
923
+ finalScore: combinedScore,
924
+ bestPlan: currentPlan,
925
+ bestScore: combinedScore,
926
+ bestIteration: iteration,
927
+ iterations,
928
+ totalIterations: iteration,
929
+ finalConcerns: allConcerns,
930
+ finalRecommendations: allRecommendations,
931
+ arbitrated: false,
932
+ };
933
+ }
934
+ // Check if stuck
935
+ if (isStuck(scores, stuckIterations) && enableArbitration) {
936
+ onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
937
+ try {
938
+ const arbitrationResult = await requestArbitratorDecision(bestPlan, combinedAnalysis, `Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`, iteration, scores, arbitrator);
939
+ if (onArbitration)
940
+ onArbitration(arbitrationResult);
941
+ if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
942
+ onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
943
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
944
+ if (isFullstack) {
945
+ const feApproved = (currentAppScores.frontend || 0) >= arbitrationThreshold;
946
+ const beApproved = (currentAppScores.backend || 0) >= arbitrationThreshold;
947
+ return {
948
+ approved: true,
949
+ finalPlan: bestPlan,
950
+ finalScore: arbitrationResult.score,
951
+ bestPlan,
952
+ bestScore: arbitrationResult.score,
953
+ bestIteration,
954
+ iterations,
955
+ totalIterations: iteration,
956
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
957
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
958
+ arbitrated: true,
959
+ arbitrationResult,
960
+ // Fullstack-specific fields
961
+ appScores: currentAppScores,
962
+ appApproved: {
963
+ frontend: feApproved,
964
+ backend: beApproved,
965
+ unified: true,
966
+ },
967
+ taggedConcerns: allTaggedConcerns,
968
+ taggedRecommendations: allTaggedRecommendations,
969
+ corrections,
970
+ };
971
+ }
972
+ return {
973
+ approved: true,
974
+ finalPlan: bestPlan,
975
+ finalScore: arbitrationResult.score,
976
+ bestPlan,
977
+ bestScore: arbitrationResult.score,
978
+ bestIteration,
979
+ iterations,
980
+ totalIterations: iteration,
981
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
982
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
983
+ arbitrated: true,
984
+ arbitrationResult,
985
+ };
986
+ }
987
+ }
988
+ catch (arbError) {
989
+ onProgress?.('arbitration', `Arbitration failed: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
990
+ }
991
+ }
992
+ // ============================================
993
+ // OPTIMIZATION: Single revision with ALL feedback
994
+ // ============================================
995
+ if (iteration < maxIterations) {
996
+ onProgress?.('consensus', `Revising plan with combined feedback from ${allFeedback.length} reviewer(s)...`);
997
+ const revisionProgress = onProgress
998
+ ? (msg) => onProgress('consensus', `[revision] ${msg}`)
999
+ : undefined;
1000
+ // Use Claude to revise with ALL combined feedback (single API call)
1001
+ const revisionResult = await revisePlan(currentPlan, combinedAnalysis, allConcerns, language, revisionProgress);
1002
+ if (revisionResult.success && revisionResult.response) {
1003
+ currentPlan = revisionResult.response;
1004
+ // Save revised plan
1005
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
1006
+ milestoneId,
1007
+ milestoneName,
1008
+ taskId,
1009
+ taskName,
1010
+ });
1011
+ if (onRevision)
1012
+ onRevision(iteration, currentPlan);
1013
+ }
1014
+ else {
1015
+ onProgress?.('consensus', `Revision failed, continuing with best plan`);
1016
+ currentPlan = bestPlan;
1017
+ }
1018
+ }
1019
+ }
1020
+ // Max iterations reached
1021
+ await planStorage.updateStatus('reviewing', taskId ? 'task' : 'milestone', milestoneId, taskId);
1022
+ // Final per-app scores from history
1023
+ const finalAppScores = isFullstack ? {
1024
+ frontend: appScoresHistory.frontend.length > 0
1025
+ ? appScoresHistory.frontend[appScoresHistory.frontend.length - 1]
1026
+ : undefined,
1027
+ backend: appScoresHistory.backend.length > 0
1028
+ ? appScoresHistory.backend[appScoresHistory.backend.length - 1]
1029
+ : undefined,
1030
+ unified: appScoresHistory.unified.length > 0
1031
+ ? appScoresHistory.unified[appScoresHistory.unified.length - 1]
1032
+ : bestScore,
1033
+ } : { unified: bestScore };
1034
+ if (isFullstack) {
1035
+ const feApproved = (finalAppScores.frontend || 0) >= threshold;
1036
+ const beApproved = (finalAppScores.backend || 0) >= threshold;
1037
+ return {
1038
+ approved: false,
1039
+ finalPlan: bestPlan,
1040
+ finalScore: bestScore,
1041
+ bestPlan,
1042
+ bestScore,
1043
+ bestIteration,
1044
+ iterations,
1045
+ totalIterations: iteration,
1046
+ finalConcerns: lastConcerns,
1047
+ finalRecommendations: lastRecommendations,
1048
+ arbitrated: false,
1049
+ // Fullstack-specific fields
1050
+ appScores: finalAppScores,
1051
+ appApproved: {
1052
+ frontend: feApproved,
1053
+ backend: beApproved,
1054
+ unified: bestScore >= threshold,
1055
+ },
1056
+ taggedConcerns: allTaggedConcerns,
1057
+ taggedRecommendations: allTaggedRecommendations,
1058
+ corrections,
1059
+ };
1060
+ }
1061
+ return {
1062
+ approved: false,
1063
+ finalPlan: bestPlan,
1064
+ finalScore: bestScore,
1065
+ bestPlan,
1066
+ bestScore,
1067
+ bestIteration,
1068
+ iterations,
1069
+ totalIterations: iteration,
1070
+ finalConcerns: lastConcerns,
1071
+ finalRecommendations: lastRecommendations,
1072
+ arbitrated: false,
1073
+ };
1074
+ }
220
1075
  //# sourceMappingURL=consensus.js.map