popeye-cli 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/README.md +521 -125
  2. package/dist/adapters/claude.d.ts +16 -4
  3. package/dist/adapters/claude.d.ts.map +1 -1
  4. package/dist/adapters/claude.js +679 -33
  5. package/dist/adapters/claude.js.map +1 -1
  6. package/dist/adapters/gemini.d.ts +55 -0
  7. package/dist/adapters/gemini.d.ts.map +1 -0
  8. package/dist/adapters/gemini.js +318 -0
  9. package/dist/adapters/gemini.js.map +1 -0
  10. package/dist/adapters/openai.d.ts.map +1 -1
  11. package/dist/adapters/openai.js +41 -7
  12. package/dist/adapters/openai.js.map +1 -1
  13. package/dist/auth/claude.d.ts +11 -9
  14. package/dist/auth/claude.d.ts.map +1 -1
  15. package/dist/auth/claude.js +107 -71
  16. package/dist/auth/claude.js.map +1 -1
  17. package/dist/auth/gemini.d.ts +58 -0
  18. package/dist/auth/gemini.d.ts.map +1 -0
  19. package/dist/auth/gemini.js +172 -0
  20. package/dist/auth/gemini.js.map +1 -0
  21. package/dist/auth/index.d.ts +11 -7
  22. package/dist/auth/index.d.ts.map +1 -1
  23. package/dist/auth/index.js +23 -5
  24. package/dist/auth/index.js.map +1 -1
  25. package/dist/auth/keychain.d.ts +20 -7
  26. package/dist/auth/keychain.d.ts.map +1 -1
  27. package/dist/auth/keychain.js +85 -29
  28. package/dist/auth/keychain.js.map +1 -1
  29. package/dist/auth/openai.d.ts +2 -2
  30. package/dist/auth/openai.d.ts.map +1 -1
  31. package/dist/auth/openai.js +30 -32
  32. package/dist/auth/openai.js.map +1 -1
  33. package/dist/cli/index.d.ts.map +1 -1
  34. package/dist/cli/index.js +4 -7
  35. package/dist/cli/index.js.map +1 -1
  36. package/dist/cli/interactive.d.ts +2 -2
  37. package/dist/cli/interactive.d.ts.map +1 -1
  38. package/dist/cli/interactive.js +1380 -183
  39. package/dist/cli/interactive.js.map +1 -1
  40. package/dist/config/defaults.d.ts +6 -1
  41. package/dist/config/defaults.d.ts.map +1 -1
  42. package/dist/config/defaults.js +10 -2
  43. package/dist/config/defaults.js.map +1 -1
  44. package/dist/config/index.d.ts +10 -0
  45. package/dist/config/index.d.ts.map +1 -1
  46. package/dist/config/index.js +19 -0
  47. package/dist/config/index.js.map +1 -1
  48. package/dist/config/schema.d.ts +20 -0
  49. package/dist/config/schema.d.ts.map +1 -1
  50. package/dist/config/schema.js +7 -0
  51. package/dist/config/schema.js.map +1 -1
  52. package/dist/generators/python.d.ts.map +1 -1
  53. package/dist/generators/python.js +1 -0
  54. package/dist/generators/python.js.map +1 -1
  55. package/dist/generators/typescript.d.ts.map +1 -1
  56. package/dist/generators/typescript.js +1 -0
  57. package/dist/generators/typescript.js.map +1 -1
  58. package/dist/state/index.d.ts +108 -0
  59. package/dist/state/index.d.ts.map +1 -1
  60. package/dist/state/index.js +551 -4
  61. package/dist/state/index.js.map +1 -1
  62. package/dist/state/registry.d.ts +52 -0
  63. package/dist/state/registry.d.ts.map +1 -0
  64. package/dist/state/registry.js +215 -0
  65. package/dist/state/registry.js.map +1 -0
  66. package/dist/types/cli.d.ts +4 -0
  67. package/dist/types/cli.d.ts.map +1 -1
  68. package/dist/types/cli.js.map +1 -1
  69. package/dist/types/consensus.d.ts +69 -4
  70. package/dist/types/consensus.d.ts.map +1 -1
  71. package/dist/types/consensus.js +24 -3
  72. package/dist/types/consensus.js.map +1 -1
  73. package/dist/types/workflow.d.ts +55 -0
  74. package/dist/types/workflow.d.ts.map +1 -1
  75. package/dist/types/workflow.js +16 -0
  76. package/dist/types/workflow.js.map +1 -1
  77. package/dist/workflow/auto-fix.d.ts +45 -0
  78. package/dist/workflow/auto-fix.d.ts.map +1 -0
  79. package/dist/workflow/auto-fix.js +274 -0
  80. package/dist/workflow/auto-fix.js.map +1 -0
  81. package/dist/workflow/consensus.d.ts +44 -2
  82. package/dist/workflow/consensus.d.ts.map +1 -1
  83. package/dist/workflow/consensus.js +565 -17
  84. package/dist/workflow/consensus.js.map +1 -1
  85. package/dist/workflow/execution-mode.d.ts +10 -4
  86. package/dist/workflow/execution-mode.d.ts.map +1 -1
  87. package/dist/workflow/execution-mode.js +547 -58
  88. package/dist/workflow/execution-mode.js.map +1 -1
  89. package/dist/workflow/index.d.ts +14 -2
  90. package/dist/workflow/index.d.ts.map +1 -1
  91. package/dist/workflow/index.js +69 -6
  92. package/dist/workflow/index.js.map +1 -1
  93. package/dist/workflow/milestone-workflow.d.ts +34 -0
  94. package/dist/workflow/milestone-workflow.d.ts.map +1 -0
  95. package/dist/workflow/milestone-workflow.js +414 -0
  96. package/dist/workflow/milestone-workflow.js.map +1 -0
  97. package/dist/workflow/plan-mode.d.ts +14 -1
  98. package/dist/workflow/plan-mode.d.ts.map +1 -1
  99. package/dist/workflow/plan-mode.js +589 -47
  100. package/dist/workflow/plan-mode.js.map +1 -1
  101. package/dist/workflow/plan-storage.d.ts +142 -0
  102. package/dist/workflow/plan-storage.d.ts.map +1 -0
  103. package/dist/workflow/plan-storage.js +331 -0
  104. package/dist/workflow/plan-storage.js.map +1 -0
  105. package/dist/workflow/project-verification.d.ts +37 -0
  106. package/dist/workflow/project-verification.d.ts.map +1 -0
  107. package/dist/workflow/project-verification.js +381 -0
  108. package/dist/workflow/project-verification.js.map +1 -0
  109. package/dist/workflow/task-workflow.d.ts +37 -0
  110. package/dist/workflow/task-workflow.d.ts.map +1 -0
  111. package/dist/workflow/task-workflow.js +383 -0
  112. package/dist/workflow/task-workflow.js.map +1 -0
  113. package/dist/workflow/test-runner.d.ts +1 -0
  114. package/dist/workflow/test-runner.d.ts.map +1 -1
  115. package/dist/workflow/test-runner.js +9 -5
  116. package/dist/workflow/test-runner.js.map +1 -1
  117. package/dist/workflow/ui-designer.d.ts +82 -0
  118. package/dist/workflow/ui-designer.d.ts.map +1 -0
  119. package/dist/workflow/ui-designer.js +234 -0
  120. package/dist/workflow/ui-designer.js.map +1 -0
  121. package/dist/workflow/ui-setup.d.ts +58 -0
  122. package/dist/workflow/ui-setup.d.ts.map +1 -0
  123. package/dist/workflow/ui-setup.js +685 -0
  124. package/dist/workflow/ui-setup.js.map +1 -0
  125. package/dist/workflow/ui-verification.d.ts +114 -0
  126. package/dist/workflow/ui-verification.d.ts.map +1 -0
  127. package/dist/workflow/ui-verification.js +258 -0
  128. package/dist/workflow/ui-verification.js.map +1 -0
  129. package/dist/workflow/workflow-logger.d.ts +110 -0
  130. package/dist/workflow/workflow-logger.d.ts.map +1 -0
  131. package/dist/workflow/workflow-logger.js +267 -0
  132. package/dist/workflow/workflow-logger.js.map +1 -0
  133. package/package.json +2 -2
  134. package/src/adapters/claude.ts +815 -34
  135. package/src/adapters/gemini.ts +373 -0
  136. package/src/adapters/openai.ts +40 -7
  137. package/src/auth/claude.ts +120 -78
  138. package/src/auth/gemini.ts +207 -0
  139. package/src/auth/index.ts +28 -8
  140. package/src/auth/keychain.ts +95 -28
  141. package/src/auth/openai.ts +29 -36
  142. package/src/cli/index.ts +4 -7
  143. package/src/cli/interactive.ts +1641 -216
  144. package/src/config/defaults.ts +10 -2
  145. package/src/config/index.ts +21 -0
  146. package/src/config/schema.ts +7 -0
  147. package/src/generators/python.ts +1 -0
  148. package/src/generators/typescript.ts +1 -0
  149. package/src/state/index.ts +713 -4
  150. package/src/state/registry.ts +278 -0
  151. package/src/types/cli.ts +4 -0
  152. package/src/types/consensus.ts +65 -6
  153. package/src/types/workflow.ts +35 -0
  154. package/src/workflow/auto-fix.ts +340 -0
  155. package/src/workflow/consensus.ts +750 -16
  156. package/src/workflow/execution-mode.ts +673 -74
  157. package/src/workflow/index.ts +95 -6
  158. package/src/workflow/milestone-workflow.ts +576 -0
  159. package/src/workflow/plan-mode.ts +696 -50
  160. package/src/workflow/plan-storage.ts +482 -0
  161. package/src/workflow/project-verification.ts +471 -0
  162. package/src/workflow/task-workflow.ts +525 -0
  163. package/src/workflow/test-runner.ts +10 -5
  164. package/src/workflow/ui-designer.ts +337 -0
  165. package/src/workflow/ui-setup.ts +797 -0
  166. package/src/workflow/ui-verification.ts +357 -0
  167. package/src/workflow/workflow-logger.ts +353 -0
  168. package/tests/config/config.test.ts +1 -1
  169. package/tests/types/consensus.test.ts +3 -3
  170. package/tests/workflow/plan-mode.test.ts +213 -0
  171. package/tests/workflow/test-runner.test.ts +5 -3
@@ -1,13 +1,16 @@
1
1
  /**
2
2
  * Consensus workflow module
3
- * Handles the iterative consensus-building process between Claude and OpenAI
3
+ * Handles the iterative consensus-building process between Claude and OpenAI/Gemini
4
+ * with arbitration support when consensus cannot be reached
4
5
  */
5
6
 
6
- import type { ConsensusResult, ConsensusIteration, ConsensusConfig } from '../types/consensus.js';
7
+ import type { ConsensusResult, ConsensusIteration, ConsensusConfig, ArbitrationResult, AIProvider } from '../types/consensus.js';
7
8
  import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
8
- import { requestConsensus } from '../adapters/openai.js';
9
+ import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
10
+ import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
9
11
  import { revisePlan } from '../adapters/claude.js';
10
12
  import { recordConsensusIteration } from '../state/index.js';
13
+ import { createPlanStorage, type ReviewerFeedback } from './plan-storage.js';
11
14
 
12
15
  /**
13
16
  * Options for consensus iteration
@@ -17,6 +20,9 @@ export interface ConsensusOptions {
17
20
  config?: Partial<ConsensusConfig>;
18
21
  onIteration?: (iteration: number, result: ConsensusResult) => void;
19
22
  onRevision?: (iteration: number, revisedPlan: string) => void;
23
+ onConcerns?: (concerns: string[], recommendations: string[]) => void;
24
+ onArbitration?: (result: ArbitrationResult) => void;
25
+ onProgress?: (phase: string, message: string) => void;
20
26
  }
21
27
 
22
28
  /**
@@ -26,10 +32,84 @@ export interface ConsensusProcessResult {
26
32
  approved: boolean;
27
33
  finalPlan: string;
28
34
  finalScore: number;
35
+ bestPlan: string;
36
+ bestScore: number;
37
+ bestIteration: number;
29
38
  iterations: ConsensusIteration[];
30
39
  totalIterations: number;
40
+ finalConcerns: string[];
41
+ finalRecommendations: string[];
42
+ arbitrated: boolean;
43
+ arbitrationResult?: ArbitrationResult;
44
+ /** True if consensus timed out and we accepted the best available plan */
45
+ timedOut?: boolean;
31
46
  }
32
47
 
48
+ /**
49
+ * Request consensus from the configured reviewer (OpenAI or Gemini)
50
+ */
51
+ async function requestReviewerConsensus(
52
+ plan: string,
53
+ context: string,
54
+ reviewer: AIProvider,
55
+ config: Partial<ConsensusConfig>
56
+ ): Promise<ConsensusResult> {
57
+ if (reviewer === 'gemini') {
58
+ return requestGeminiConsensus(plan, context, {
59
+ model: config.geminiModel,
60
+ temperature: config.temperature,
61
+ maxTokens: config.maxTokens,
62
+ });
63
+ }
64
+ return requestOpenAIConsensus(plan, context, config);
65
+ }
66
+
67
+ /**
68
+ * Check if the consensus process is "stuck" (not improving)
69
+ * Detects both:
70
+ * 1. Stagnation: scores within 5% of each other
71
+ * 2. Oscillation: scores going up and down without progress
72
+ */
73
+ function isStuck(scores: number[], stuckIterations: number): boolean {
74
+ if (scores.length < stuckIterations) return false;
75
+
76
+ const recentScores = scores.slice(-stuckIterations);
77
+ const maxRecent = Math.max(...recentScores);
78
+ const minRecent = Math.min(...recentScores);
79
+
80
+ // Check 1: Stagnation - all recent scores are within 5% of each other
81
+ if ((maxRecent - minRecent) <= 5) {
82
+ return true;
83
+ }
84
+
85
+ // Check 2: Oscillation - detect if we're going up and down without making progress
86
+ // e.g., 70 -> 85 -> 75 -> 80 (oscillating around ~77.5)
87
+ if (recentScores.length >= 3) {
88
+ const avg = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
89
+ const deviations = recentScores.map(s => Math.abs(s - avg));
90
+ const avgDeviation = deviations.reduce((a, b) => a + b, 0) / deviations.length;
91
+
92
+ // If scores are oscillating around an average (avg deviation > 3% but range < 20%)
93
+ // and we're not trending upward, consider it stuck
94
+ if (avgDeviation > 3 && (maxRecent - minRecent) < 20) {
95
+ // Check if we're trending upward (last score should be close to max)
96
+ const lastScore = recentScores[recentScores.length - 1];
97
+ const firstScore = recentScores[0];
98
+ // Not improving if last score is not better than first
99
+ if (lastScore <= firstScore + 2) {
100
+ return true;
101
+ }
102
+ }
103
+ }
104
+
105
+ return false;
106
+ }
107
+
108
+ /**
109
+ * Default consensus timeout (15 minutes total)
110
+ */
111
+ const DEFAULT_CONSENSUS_TIMEOUT_MS = 15 * 60 * 1000;
112
+
33
113
  /**
34
114
  * Format a plan for consensus review
35
115
  * Structures the plan in a way that's optimal for review
@@ -88,6 +168,7 @@ export function meetsThreshold(
88
168
 
89
169
  /**
90
170
  * Iterate until consensus is reached
171
+ * Supports configurable reviewer and arbitration when stuck
91
172
  *
92
173
  * @param initialPlan - The initial plan to review
93
174
  * @param context - Project context
@@ -104,22 +185,122 @@ export async function iterateUntilConsensus(
104
185
  config = {},
105
186
  onIteration,
106
187
  onRevision,
188
+ onConcerns,
189
+ onArbitration,
190
+ onProgress,
107
191
  } = options;
108
192
 
109
193
  const {
110
194
  threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
111
195
  maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
196
+ reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
197
+ arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
198
+ enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
199
+ arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
200
+ stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
112
201
  } = config;
113
202
 
114
203
  const iterations: ConsensusIteration[] = [];
204
+ const scores: number[] = [];
115
205
  let currentPlan = initialPlan;
116
206
  let iteration = 0;
117
207
 
208
+ // Track the best plan throughout the process
209
+ let bestPlan = initialPlan;
210
+ let bestScore = 0;
211
+ let bestIteration = 0;
212
+ let lastConcerns: string[] = [];
213
+ let lastRecommendations: string[] = [];
214
+ let lastAnalysis = '';
215
+
216
+ // Track arbitration attempts to prevent infinite loops
217
+ let arbitrationAttempts = 0;
218
+
219
+ // Track elapsed time to detect stuck processes
220
+ const startTime = Date.now();
221
+ const maxArbitrationAttempts = 2;
222
+
223
+ onProgress?.('consensus', `Using ${reviewer} as reviewer${enableArbitration ? `, ${arbitrator} as arbitrator` : ''}`);
224
+
118
225
  while (iteration < maxIterations) {
119
226
  iteration++;
120
227
 
121
- // Request consensus review from OpenAI
122
- const consensusResult = await requestConsensus(currentPlan, context, config);
228
+ // Check total elapsed time - if timing out, try arbitration before giving up
229
+ const totalElapsed = Date.now() - startTime;
230
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS && enableArbitration && arbitrationAttempts < maxArbitrationAttempts) {
231
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes - invoking arbitrator before accepting`);
232
+
233
+ try {
234
+ arbitrationAttempts++;
235
+ const arbitrationResult = await requestGeminiArbitration(
236
+ bestPlan,
237
+ lastAnalysis,
238
+ `Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
239
+ iteration,
240
+ scores
241
+ );
242
+
243
+ if (onArbitration) {
244
+ onArbitration(arbitrationResult);
245
+ }
246
+
247
+ // Accept arbitration result (we're out of time)
248
+ onProgress?.('arbitration', `Arbitrator decision: ${arbitrationResult.approved ? 'APPROVED' : 'REVISE'} with ${arbitrationResult.score}%`);
249
+
250
+ return {
251
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
252
+ finalPlan: bestPlan,
253
+ finalScore: arbitrationResult.score,
254
+ bestPlan,
255
+ bestScore: arbitrationResult.score,
256
+ bestIteration,
257
+ iterations,
258
+ totalIterations: iteration - 1,
259
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
260
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
261
+ arbitrated: true,
262
+ arbitrationResult,
263
+ timedOut: true,
264
+ };
265
+ } catch (arbError) {
266
+ onProgress?.('arbitration', `Arbitration failed on timeout: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
267
+ // Fall through to accept best plan
268
+ }
269
+ }
270
+
271
+ // Hard timeout - no more arbitration attempts left
272
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
273
+ onProgress?.('consensus', `Consensus timeout - accepting best plan with ${bestScore}%`);
274
+ return {
275
+ approved: bestScore >= arbitrationThreshold,
276
+ finalPlan: bestPlan,
277
+ finalScore: bestScore,
278
+ bestPlan,
279
+ bestScore,
280
+ bestIteration,
281
+ iterations,
282
+ totalIterations: iteration - 1,
283
+ finalConcerns: lastConcerns,
284
+ finalRecommendations: lastRecommendations,
285
+ arbitrated: false,
286
+ timedOut: true,
287
+ };
288
+ }
289
+
290
+ // Log iteration timing
291
+ const iterationStart = Date.now();
292
+ const elapsedMinutes = Math.round((iterationStart - startTime) / 60000);
293
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
294
+
295
+ // Request consensus review from configured reviewer
296
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
297
+ const consensusResult = await requestReviewerConsensus(currentPlan, context, reviewer, config);
298
+
299
+ // Log iteration duration
300
+ const iterationDuration = Math.round((Date.now() - iterationStart) / 1000);
301
+ onProgress?.('consensus', `Review completed in ${iterationDuration}s - score: ${consensusResult.score}%`);
302
+
303
+ scores.push(consensusResult.score);
123
304
 
124
305
  // Record the iteration
125
306
  const iterationRecord: ConsensusIteration = {
@@ -134,54 +315,183 @@ export async function iterateUntilConsensus(
134
315
  // Save to project state
135
316
  await recordConsensusIteration(projectDir, iterationRecord);
136
317
 
137
- // Notify callback
318
+ // Track best plan - only update if this score is better
319
+ if (consensusResult.score > bestScore) {
320
+ bestPlan = currentPlan;
321
+ bestScore = consensusResult.score;
322
+ bestIteration = iteration;
323
+ }
324
+
325
+ // Track concerns for output
326
+ lastConcerns = consensusResult.concerns || [];
327
+ lastRecommendations = consensusResult.recommendations || [];
328
+ lastAnalysis = consensusResult.analysis || '';
329
+
330
+ // Notify callbacks
138
331
  if (onIteration) {
139
332
  onIteration(iteration, consensusResult);
140
333
  }
141
334
 
335
+ if (onConcerns && (lastConcerns.length > 0 || lastRecommendations.length > 0)) {
336
+ onConcerns(lastConcerns, lastRecommendations);
337
+ }
338
+
142
339
  // Check if we've reached consensus
143
340
  if (meetsThreshold(consensusResult.score, threshold)) {
144
341
  return {
145
342
  approved: true,
146
343
  finalPlan: currentPlan,
147
344
  finalScore: consensusResult.score,
345
+ bestPlan: currentPlan,
346
+ bestScore: consensusResult.score,
347
+ bestIteration: iteration,
148
348
  iterations,
149
349
  totalIterations: iteration,
350
+ finalConcerns: [],
351
+ finalRecommendations: [],
352
+ arbitrated: false,
150
353
  };
151
354
  }
152
355
 
356
+ // Check if we're stuck and should trigger arbitration
357
+ if (enableArbitration &&
358
+ bestScore >= arbitrationThreshold &&
359
+ isStuck(scores, stuckIterations) &&
360
+ arbitrationAttempts < maxArbitrationAttempts) {
361
+
362
+ arbitrationAttempts++;
363
+ onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
364
+
365
+ try {
366
+ const arbitrationResult = await requestGeminiArbitration(
367
+ bestPlan,
368
+ lastAnalysis,
369
+ `The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`,
370
+ iteration,
371
+ scores
372
+ );
373
+
374
+ if (onArbitration) {
375
+ onArbitration(arbitrationResult);
376
+ }
377
+
378
+ // Accept if arbitrator approves OR if arbitrator gives a high score (>= 88%)
379
+ // This prevents infinite REVISE loops when the arbitrator is happy enough
380
+ const acceptArbitration = arbitrationResult.approved ||
381
+ arbitrationResult.score >= 88 ||
382
+ (arbitrationAttempts >= maxArbitrationAttempts && arbitrationResult.score >= 80);
383
+
384
+ if (acceptArbitration) {
385
+ const reason = arbitrationResult.approved
386
+ ? `Arbitrator approved plan with ${arbitrationResult.score}% confidence`
387
+ : `Arbitrator score ${arbitrationResult.score}% is acceptable - proceeding with best plan`;
388
+ onProgress?.('arbitration', reason);
389
+
390
+ return {
391
+ approved: true,
392
+ finalPlan: bestPlan,
393
+ finalScore: arbitrationResult.score,
394
+ bestPlan,
395
+ bestScore: arbitrationResult.score,
396
+ bestIteration,
397
+ iterations,
398
+ totalIterations: iteration,
399
+ finalConcerns: arbitrationResult.minorConcerns || [],
400
+ finalRecommendations: arbitrationResult.suggestedChanges || [],
401
+ arbitrated: true,
402
+ arbitrationResult,
403
+ };
404
+ } else {
405
+ onProgress?.('arbitration', `Arbitrator requests changes: ${arbitrationResult.suggestedChanges.slice(0, 2).join('; ')}`);
406
+ // Apply arbitrator's suggested changes
407
+ if (arbitrationResult.suggestedChanges.length > 0) {
408
+ onProgress?.('consensus', 'Applying arbitrator suggestions...');
409
+ const revisionResult = await revisePlan(
410
+ bestPlan,
411
+ arbitrationResult.reasoning,
412
+ arbitrationResult.suggestedChanges
413
+ );
414
+ if (revisionResult.success && revisionResult.response) {
415
+ currentPlan = revisionResult.response;
416
+ // Reset stuck detection after arbitration revision
417
+ scores.length = 0;
418
+ scores.push(arbitrationResult.score);
419
+ onProgress?.('consensus', 'Plan revised based on arbitrator feedback');
420
+ } else {
421
+ onProgress?.('consensus', 'Revision failed, continuing with current plan');
422
+ }
423
+ }
424
+ }
425
+ } catch (error) {
426
+ onProgress?.('arbitration', `Arbitration failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
427
+ // If we've tried arbitration and it failed, accept the best plan we have
428
+ if (arbitrationAttempts >= maxArbitrationAttempts && bestScore >= arbitrationThreshold) {
429
+ onProgress?.('arbitration', `Max arbitration attempts reached, accepting best plan with ${bestScore}%`);
430
+ return {
431
+ approved: true,
432
+ finalPlan: bestPlan,
433
+ finalScore: bestScore,
434
+ bestPlan,
435
+ bestScore,
436
+ bestIteration,
437
+ iterations,
438
+ totalIterations: iteration,
439
+ finalConcerns: lastConcerns,
440
+ finalRecommendations: lastRecommendations,
441
+ arbitrated: true,
442
+ };
443
+ }
444
+ }
445
+ }
446
+
153
447
  // If not at max iterations, revise the plan
154
448
  if (iteration < maxIterations) {
155
449
  const concerns = extractConcerns(consensusResult);
450
+ onProgress?.('consensus', 'Revising plan based on feedback...');
451
+
452
+ // Create a progress handler for revision
453
+ const revisionProgress = onProgress
454
+ ? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
455
+ : undefined;
156
456
 
157
457
  // Use Claude to revise the plan
158
458
  const revisionResult = await revisePlan(
159
459
  currentPlan,
160
460
  consensusResult.analysis,
161
- concerns
461
+ concerns,
462
+ revisionProgress
162
463
  );
163
464
 
164
465
  if (revisionResult.success && revisionResult.response) {
466
+ // Only use the revised plan for the next iteration
467
+ // The best plan tracking above will decide if it's actually better
165
468
  currentPlan = revisionResult.response;
166
469
 
167
470
  if (onRevision) {
168
471
  onRevision(iteration, currentPlan);
169
472
  }
170
473
  } else {
171
- // If revision fails, try to continue with current plan
474
+ // If revision fails, try to continue with best plan
172
475
  console.warn(`Plan revision failed at iteration ${iteration}:`, revisionResult.error);
476
+ currentPlan = bestPlan;
173
477
  }
174
478
  }
175
479
  }
176
480
 
177
481
  // Max iterations reached without consensus
178
- const lastIteration = iterations[iterations.length - 1];
482
+ // Return the BEST plan we found, not the last one
179
483
  return {
180
484
  approved: false,
181
- finalPlan: currentPlan,
182
- finalScore: lastIteration?.result.score || 0,
485
+ finalPlan: bestPlan,
486
+ finalScore: bestScore,
487
+ bestPlan,
488
+ bestScore,
489
+ bestIteration,
183
490
  iterations,
184
491
  totalIterations: iteration,
492
+ finalConcerns: lastConcerns,
493
+ finalRecommendations: lastRecommendations,
494
+ arbitrated: false,
185
495
  };
186
496
  }
187
497
 
@@ -196,16 +506,31 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
196
506
 
197
507
  lines.push(`## Consensus Summary`);
198
508
  lines.push('');
199
- lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}`);
509
+ lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}${result.arbitrated ? ' (via arbitration)' : ''}`);
200
510
  lines.push(`**Final Score:** ${result.finalScore}%`);
511
+ lines.push(`**Best Score:** ${result.bestScore}% (iteration ${result.bestIteration})`);
201
512
  lines.push(`**Total Iterations:** ${result.totalIterations}`);
513
+
514
+ if (result.arbitrated && result.arbitrationResult) {
515
+ lines.push('');
516
+ lines.push(`### Arbitration Decision`);
517
+ lines.push(`- Decision: ${result.arbitrationResult.approved ? 'APPROVED' : 'REVISE'}`);
518
+ lines.push(`- Confidence: ${result.arbitrationResult.score}%`);
519
+ if (result.arbitrationResult.criticalConcerns.length > 0) {
520
+ lines.push(`- Critical Concerns: ${result.arbitrationResult.criticalConcerns.length}`);
521
+ }
522
+ if (result.arbitrationResult.minorConcerns.length > 0) {
523
+ lines.push(`- Minor Concerns: ${result.arbitrationResult.minorConcerns.length}`);
524
+ }
525
+ }
202
526
  lines.push('');
203
527
 
204
528
  lines.push(`### Iteration History`);
205
529
  lines.push('');
206
530
 
207
531
  for (const iteration of result.iterations) {
208
- lines.push(`#### Iteration ${iteration.iteration}`);
532
+ const isBest = iteration.iteration === result.bestIteration;
533
+ lines.push(`#### Iteration ${iteration.iteration}${isBest ? ' (BEST)' : ''}`);
209
534
  lines.push(`- Score: ${iteration.result.score}%`);
210
535
  lines.push(`- Strengths: ${iteration.result.strengths?.length || 0}`);
211
536
  lines.push(`- Concerns: ${iteration.result.concerns?.length || 0}`);
@@ -213,13 +538,21 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
213
538
  }
214
539
 
215
540
  if (!result.approved) {
216
- const lastResult = result.iterations[result.iterations.length - 1]?.result;
217
- if (lastResult?.concerns && lastResult.concerns.length > 0) {
541
+ if (result.finalConcerns && result.finalConcerns.length > 0) {
218
542
  lines.push(`### Remaining Concerns`);
219
543
  lines.push('');
220
- for (const concern of lastResult.concerns) {
544
+ for (const concern of result.finalConcerns) {
221
545
  lines.push(`- ${concern}`);
222
546
  }
547
+ lines.push('');
548
+ }
549
+
550
+ if (result.finalRecommendations && result.finalRecommendations.length > 0) {
551
+ lines.push(`### Recommendations`);
552
+ lines.push('');
553
+ for (const rec of result.finalRecommendations) {
554
+ lines.push(`- ${rec}`);
555
+ }
223
556
  }
224
557
  }
225
558
 
@@ -297,3 +630,404 @@ export function getScoreTrend(
297
630
  if (diff < -5) return 'declining';
298
631
  return 'stable';
299
632
  }
633
+
634
+ /**
635
+ * Options for optimized consensus
636
+ */
637
+ export interface OptimizedConsensusOptions extends ConsensusOptions {
638
+ milestoneId: string;
639
+ milestoneName?: string;
640
+ taskId?: string;
641
+ taskName?: string;
642
+ /** Use parallel reviews from multiple providers */
643
+ parallelReviews?: boolean;
644
+ /** Additional reviewers beyond primary */
645
+ additionalReviewers?: AIProvider[];
646
+ }
647
+
648
+ /**
649
+ * Collect feedback from a single reviewer
650
+ */
651
+ async function collectReviewerFeedback(
652
+ plan: string,
653
+ context: string,
654
+ reviewer: AIProvider,
655
+ config: Partial<ConsensusConfig>,
656
+ onProgress?: (phase: string, message: string) => void
657
+ ): Promise<ReviewerFeedback> {
658
+ onProgress?.('consensus', `Requesting review from ${reviewer}...`);
659
+ const startTime = Date.now();
660
+
661
+ const result = await requestReviewerConsensus(plan, context, reviewer, config);
662
+
663
+ const duration = Math.round((Date.now() - startTime) / 1000);
664
+ onProgress?.('consensus', `${reviewer} review completed in ${duration}s - score: ${result.score}%`);
665
+
666
+ return {
667
+ reviewer,
668
+ score: result.score,
669
+ timestamp: new Date().toISOString(),
670
+ concerns: result.concerns || [],
671
+ recommendations: result.recommendations || [],
672
+ analysis: result.analysis || '',
673
+ };
674
+ }
675
+
676
+ /**
677
+ * Collect feedback from multiple reviewers in parallel
678
+ */
679
+ async function collectAllFeedback(
680
+ plan: string,
681
+ context: string,
682
+ reviewers: AIProvider[],
683
+ config: Partial<ConsensusConfig>,
684
+ onProgress?: (phase: string, message: string) => void
685
+ ): Promise<ReviewerFeedback[]> {
686
+ onProgress?.('consensus', `Collecting feedback from ${reviewers.length} reviewer(s) in parallel...`);
687
+
688
+ const feedbackPromises = reviewers.map(reviewer =>
689
+ collectReviewerFeedback(plan, context, reviewer, config, onProgress)
690
+ .catch(error => {
691
+ onProgress?.('consensus', `${reviewer} review failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
692
+ return null;
693
+ })
694
+ );
695
+
696
+ const results = await Promise.all(feedbackPromises);
697
+ return results.filter((f): f is ReviewerFeedback => f !== null);
698
+ }
699
+
700
+ /**
701
+ * Optimized consensus process that batches feedback and reduces API calls
702
+ *
703
+ * Key optimizations:
704
+ * 1. Plans stored in files, not regenerated from scratch
705
+ * 2. Collects ALL reviewer feedback before revision
706
+ * 3. Claude revises ONCE per round with combined feedback
707
+ * 4. Parallel reviews when multiple reviewers configured
708
+ *
709
+ * @param initialPlan - The initial plan to seek consensus on
710
+ * @param context - Project context for review
711
+ * @param options - Consensus options including tracking info
712
+ * @returns Consensus process result
713
+ */
714
+ export async function runOptimizedConsensusProcess(
715
+ initialPlan: string,
716
+ context: string,
717
+ options: OptimizedConsensusOptions
718
+ ): Promise<ConsensusProcessResult> {
719
+ const {
720
+ projectDir,
721
+ config = {},
722
+ onIteration,
723
+ onRevision,
724
+ onConcerns,
725
+ onArbitration,
726
+ onProgress,
727
+ milestoneId,
728
+ milestoneName,
729
+ taskId,
730
+ taskName,
731
+ parallelReviews = true,
732
+ additionalReviewers = [],
733
+ } = options;
734
+
735
+ const {
736
+ threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
737
+ maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
738
+ reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
739
+ arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
740
+ enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
741
+ arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
742
+ stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
743
+ } = config;
744
+
745
+ // Initialize plan storage
746
+ const planStorage = createPlanStorage(projectDir);
747
+ await planStorage.initialize();
748
+
749
+ // Determine all reviewers
750
+ const allReviewers: AIProvider[] = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
751
+
752
+ const iterations: ConsensusIteration[] = [];
753
+ const scores: number[] = [];
754
+ let currentPlan = initialPlan;
755
+ let iteration = 0;
756
+
757
+ // Track the best plan
758
+ let bestPlan = initialPlan;
759
+ let bestScore = 0;
760
+ let bestIteration = 0;
761
+ let lastConcerns: string[] = [];
762
+ let lastRecommendations: string[] = [];
763
+ let lastAnalysis = '';
764
+
765
+ const startTime = Date.now();
766
+
767
+ onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
768
+ onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
769
+
770
+ // Save initial plan to storage
771
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
772
+ milestoneId,
773
+ milestoneName,
774
+ taskId,
775
+ taskName,
776
+ });
777
+
778
+ while (iteration < maxIterations) {
779
+ iteration++;
780
+
781
+ // Check timeout
782
+ const totalElapsed = Date.now() - startTime;
783
+ if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
784
+ onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes`);
785
+
786
+ if (enableArbitration) {
787
+ try {
788
+ const arbitrationResult = await requestGeminiArbitration(
789
+ bestPlan,
790
+ lastAnalysis,
791
+ `Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
792
+ iteration,
793
+ scores
794
+ );
795
+
796
+ if (onArbitration) onArbitration(arbitrationResult);
797
+
798
+ return {
799
+ approved: arbitrationResult.approved || arbitrationResult.score >= 80,
800
+ finalPlan: bestPlan,
801
+ finalScore: arbitrationResult.score,
802
+ bestPlan,
803
+ bestScore: arbitrationResult.score,
804
+ bestIteration,
805
+ iterations,
806
+ totalIterations: iteration - 1,
807
+ finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
808
+ finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
809
+ arbitrated: true,
810
+ arbitrationResult,
811
+ timedOut: true,
812
+ };
813
+ } catch {
814
+ // Fall through to accept best plan
815
+ }
816
+ }
817
+
818
+ return {
819
+ approved: bestScore >= arbitrationThreshold,
820
+ finalPlan: bestPlan,
821
+ finalScore: bestScore,
822
+ bestPlan,
823
+ bestScore,
824
+ bestIteration,
825
+ iterations,
826
+ totalIterations: iteration - 1,
827
+ finalConcerns: lastConcerns,
828
+ finalRecommendations: lastRecommendations,
829
+ arbitrated: false,
830
+ timedOut: true,
831
+ };
832
+ }
833
+
834
+ const elapsedMinutes = Math.round((Date.now() - startTime) / 60000);
835
+ onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
836
+
837
+ // Clear previous feedback for this round
838
+ await planStorage.clearFeedback(milestoneId, taskId);
839
+
840
+ // ============================================
841
+ // OPTIMIZATION: Collect ALL feedback in parallel
842
+ // ============================================
843
+ let allFeedback: ReviewerFeedback[];
844
+
845
+ if (parallelReviews && allReviewers.length > 1) {
846
+ allFeedback = await collectAllFeedback(currentPlan, context, allReviewers, config, onProgress);
847
+ } else {
848
+ // Sequential fallback
849
+ allFeedback = [];
850
+ for (const rev of allReviewers) {
851
+ const feedback = await collectReviewerFeedback(currentPlan, context, rev, config, onProgress);
852
+ allFeedback.push(feedback);
853
+ }
854
+ }
855
+
856
+ // Save all feedback
857
+ for (const feedback of allFeedback) {
858
+ await planStorage.saveFeedback(feedback, milestoneId, taskId);
859
+ }
860
+
861
+ // Calculate combined score (average of all reviewers)
862
+ const combinedScore = allFeedback.length > 0
863
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
864
+ : 0;
865
+
866
+ scores.push(combinedScore);
867
+
868
+ // Combine all concerns and recommendations
869
+ const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
870
+ const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
871
+ const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
872
+
873
+ lastConcerns = allConcerns;
874
+ lastRecommendations = allRecommendations;
875
+ lastAnalysis = combinedAnalysis;
876
+
877
+ // Create consensus result for tracking
878
+ const consensusResult: ConsensusResult = {
879
+ score: combinedScore,
880
+ analysis: combinedAnalysis,
881
+ concerns: allConcerns,
882
+ recommendations: allRecommendations,
883
+ approved: combinedScore >= threshold,
884
+ strengths: [],
885
+ rawResponse: combinedAnalysis,
886
+ };
887
+
888
+ // Record iteration
889
+ const iterationRecord: ConsensusIteration = {
890
+ iteration,
891
+ plan: currentPlan,
892
+ timestamp: new Date().toISOString(),
893
+ result: consensusResult,
894
+ };
895
+ iterations.push(iterationRecord);
896
+
897
+ if (onIteration) onIteration(iteration, consensusResult);
898
+ if (onConcerns) onConcerns(allConcerns, allRecommendations);
899
+
900
+ // Update best plan tracking
901
+ if (combinedScore > bestScore) {
902
+ bestScore = combinedScore;
903
+ bestPlan = currentPlan;
904
+ bestIteration = iteration;
905
+ }
906
+
907
+ // Save plan with updated score
908
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
909
+ milestoneId,
910
+ milestoneName,
911
+ taskId,
912
+ taskName,
913
+ score: combinedScore,
914
+ });
915
+
916
+ // Record in project state
917
+ await recordConsensusIteration(projectDir, iterationRecord);
918
+
919
+ onProgress?.('consensus', `Combined score: ${combinedScore}% (from ${allFeedback.length} reviewer(s))`);
920
+
921
+ // Check if consensus reached
922
+ if (combinedScore >= threshold) {
923
+ onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
924
+ await planStorage.updateStatus('approved', milestoneId, taskId);
925
+
926
+ return {
927
+ approved: true,
928
+ finalPlan: currentPlan,
929
+ finalScore: combinedScore,
930
+ bestPlan: currentPlan,
931
+ bestScore: combinedScore,
932
+ bestIteration: iteration,
933
+ iterations,
934
+ totalIterations: iteration,
935
+ finalConcerns: allConcerns,
936
+ finalRecommendations: allRecommendations,
937
+ arbitrated: false,
938
+ };
939
+ }
940
+
941
+ // Check if stuck
942
+ if (isStuck(scores, stuckIterations) && enableArbitration) {
943
+ onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
944
+
945
+ try {
946
+ const arbitrationResult = await requestGeminiArbitration(
947
+ bestPlan,
948
+ combinedAnalysis,
949
+ `Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`,
950
+ iteration,
951
+ scores
952
+ );
953
+
954
+ if (onArbitration) onArbitration(arbitrationResult);
955
+
956
+ if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
957
+ onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
958
+ await planStorage.updateStatus('approved', milestoneId, taskId);
959
+
960
+ return {
961
+ approved: true,
962
+ finalPlan: bestPlan,
963
+ finalScore: arbitrationResult.score,
964
+ bestPlan,
965
+ bestScore: arbitrationResult.score,
966
+ bestIteration,
967
+ iterations,
968
+ totalIterations: iteration,
969
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
970
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
971
+ arbitrated: true,
972
+ arbitrationResult,
973
+ };
974
+ }
975
+ } catch (arbError) {
976
+ onProgress?.('arbitration', `Arbitration failed: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
977
+ }
978
+ }
979
+
980
+ // ============================================
981
+ // OPTIMIZATION: Single revision with ALL feedback
982
+ // ============================================
983
+ if (iteration < maxIterations) {
984
+ onProgress?.('consensus', `Revising plan with combined feedback from ${allFeedback.length} reviewer(s)...`);
985
+
986
+ const revisionProgress = onProgress
987
+ ? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
988
+ : undefined;
989
+
990
+ // Use Claude to revise with ALL combined feedback (single API call)
991
+ const revisionResult = await revisePlan(
992
+ currentPlan,
993
+ combinedAnalysis,
994
+ allConcerns,
995
+ revisionProgress
996
+ );
997
+
998
+ if (revisionResult.success && revisionResult.response) {
999
+ currentPlan = revisionResult.response;
1000
+
1001
+ // Save revised plan
1002
+ await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
1003
+ milestoneId,
1004
+ milestoneName,
1005
+ taskId,
1006
+ taskName,
1007
+ });
1008
+
1009
+ if (onRevision) onRevision(iteration, currentPlan);
1010
+ } else {
1011
+ onProgress?.('consensus', `Revision failed, continuing with best plan`);
1012
+ currentPlan = bestPlan;
1013
+ }
1014
+ }
1015
+ }
1016
+
1017
+ // Max iterations reached
1018
+ await planStorage.updateStatus('reviewing', milestoneId, taskId);
1019
+
1020
+ return {
1021
+ approved: false,
1022
+ finalPlan: bestPlan,
1023
+ finalScore: bestScore,
1024
+ bestPlan,
1025
+ bestScore,
1026
+ bestIteration,
1027
+ iterations,
1028
+ totalIterations: iteration,
1029
+ finalConcerns: lastConcerns,
1030
+ finalRecommendations: lastRecommendations,
1031
+ arbitrated: false,
1032
+ };
1033
+ }