popeye-cli 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +521 -125
- package/dist/adapters/claude.d.ts +16 -4
- package/dist/adapters/claude.d.ts.map +1 -1
- package/dist/adapters/claude.js +679 -33
- package/dist/adapters/claude.js.map +1 -1
- package/dist/adapters/gemini.d.ts +55 -0
- package/dist/adapters/gemini.d.ts.map +1 -0
- package/dist/adapters/gemini.js +318 -0
- package/dist/adapters/gemini.js.map +1 -0
- package/dist/adapters/openai.d.ts.map +1 -1
- package/dist/adapters/openai.js +41 -7
- package/dist/adapters/openai.js.map +1 -1
- package/dist/auth/claude.d.ts +11 -9
- package/dist/auth/claude.d.ts.map +1 -1
- package/dist/auth/claude.js +107 -71
- package/dist/auth/claude.js.map +1 -1
- package/dist/auth/gemini.d.ts +58 -0
- package/dist/auth/gemini.d.ts.map +1 -0
- package/dist/auth/gemini.js +172 -0
- package/dist/auth/gemini.js.map +1 -0
- package/dist/auth/index.d.ts +11 -7
- package/dist/auth/index.d.ts.map +1 -1
- package/dist/auth/index.js +23 -5
- package/dist/auth/index.js.map +1 -1
- package/dist/auth/keychain.d.ts +20 -7
- package/dist/auth/keychain.d.ts.map +1 -1
- package/dist/auth/keychain.js +85 -29
- package/dist/auth/keychain.js.map +1 -1
- package/dist/auth/openai.d.ts +2 -2
- package/dist/auth/openai.d.ts.map +1 -1
- package/dist/auth/openai.js +30 -32
- package/dist/auth/openai.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +4 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/interactive.d.ts +2 -2
- package/dist/cli/interactive.d.ts.map +1 -1
- package/dist/cli/interactive.js +1380 -183
- package/dist/cli/interactive.js.map +1 -1
- package/dist/config/defaults.d.ts +6 -1
- package/dist/config/defaults.d.ts.map +1 -1
- package/dist/config/defaults.js +10 -2
- package/dist/config/defaults.js.map +1 -1
- package/dist/config/index.d.ts +10 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +19 -0
- package/dist/config/index.js.map +1 -1
- package/dist/config/schema.d.ts +20 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +7 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/generators/python.d.ts.map +1 -1
- package/dist/generators/python.js +1 -0
- package/dist/generators/python.js.map +1 -1
- package/dist/generators/typescript.d.ts.map +1 -1
- package/dist/generators/typescript.js +1 -0
- package/dist/generators/typescript.js.map +1 -1
- package/dist/state/index.d.ts +108 -0
- package/dist/state/index.d.ts.map +1 -1
- package/dist/state/index.js +551 -4
- package/dist/state/index.js.map +1 -1
- package/dist/state/registry.d.ts +52 -0
- package/dist/state/registry.d.ts.map +1 -0
- package/dist/state/registry.js +215 -0
- package/dist/state/registry.js.map +1 -0
- package/dist/types/cli.d.ts +4 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/cli.js.map +1 -1
- package/dist/types/consensus.d.ts +69 -4
- package/dist/types/consensus.d.ts.map +1 -1
- package/dist/types/consensus.js +24 -3
- package/dist/types/consensus.js.map +1 -1
- package/dist/types/workflow.d.ts +55 -0
- package/dist/types/workflow.d.ts.map +1 -1
- package/dist/types/workflow.js +16 -0
- package/dist/types/workflow.js.map +1 -1
- package/dist/workflow/auto-fix.d.ts +45 -0
- package/dist/workflow/auto-fix.d.ts.map +1 -0
- package/dist/workflow/auto-fix.js +274 -0
- package/dist/workflow/auto-fix.js.map +1 -0
- package/dist/workflow/consensus.d.ts +44 -2
- package/dist/workflow/consensus.d.ts.map +1 -1
- package/dist/workflow/consensus.js +565 -17
- package/dist/workflow/consensus.js.map +1 -1
- package/dist/workflow/execution-mode.d.ts +10 -4
- package/dist/workflow/execution-mode.d.ts.map +1 -1
- package/dist/workflow/execution-mode.js +547 -58
- package/dist/workflow/execution-mode.js.map +1 -1
- package/dist/workflow/index.d.ts +14 -2
- package/dist/workflow/index.d.ts.map +1 -1
- package/dist/workflow/index.js +69 -6
- package/dist/workflow/index.js.map +1 -1
- package/dist/workflow/milestone-workflow.d.ts +34 -0
- package/dist/workflow/milestone-workflow.d.ts.map +1 -0
- package/dist/workflow/milestone-workflow.js +414 -0
- package/dist/workflow/milestone-workflow.js.map +1 -0
- package/dist/workflow/plan-mode.d.ts +14 -1
- package/dist/workflow/plan-mode.d.ts.map +1 -1
- package/dist/workflow/plan-mode.js +589 -47
- package/dist/workflow/plan-mode.js.map +1 -1
- package/dist/workflow/plan-storage.d.ts +142 -0
- package/dist/workflow/plan-storage.d.ts.map +1 -0
- package/dist/workflow/plan-storage.js +331 -0
- package/dist/workflow/plan-storage.js.map +1 -0
- package/dist/workflow/project-verification.d.ts +37 -0
- package/dist/workflow/project-verification.d.ts.map +1 -0
- package/dist/workflow/project-verification.js +381 -0
- package/dist/workflow/project-verification.js.map +1 -0
- package/dist/workflow/task-workflow.d.ts +37 -0
- package/dist/workflow/task-workflow.d.ts.map +1 -0
- package/dist/workflow/task-workflow.js +383 -0
- package/dist/workflow/task-workflow.js.map +1 -0
- package/dist/workflow/test-runner.d.ts +1 -0
- package/dist/workflow/test-runner.d.ts.map +1 -1
- package/dist/workflow/test-runner.js +9 -5
- package/dist/workflow/test-runner.js.map +1 -1
- package/dist/workflow/ui-designer.d.ts +82 -0
- package/dist/workflow/ui-designer.d.ts.map +1 -0
- package/dist/workflow/ui-designer.js +234 -0
- package/dist/workflow/ui-designer.js.map +1 -0
- package/dist/workflow/ui-setup.d.ts +58 -0
- package/dist/workflow/ui-setup.d.ts.map +1 -0
- package/dist/workflow/ui-setup.js +685 -0
- package/dist/workflow/ui-setup.js.map +1 -0
- package/dist/workflow/ui-verification.d.ts +114 -0
- package/dist/workflow/ui-verification.d.ts.map +1 -0
- package/dist/workflow/ui-verification.js +258 -0
- package/dist/workflow/ui-verification.js.map +1 -0
- package/dist/workflow/workflow-logger.d.ts +110 -0
- package/dist/workflow/workflow-logger.d.ts.map +1 -0
- package/dist/workflow/workflow-logger.js +267 -0
- package/dist/workflow/workflow-logger.js.map +1 -0
- package/package.json +2 -2
- package/src/adapters/claude.ts +815 -34
- package/src/adapters/gemini.ts +373 -0
- package/src/adapters/openai.ts +40 -7
- package/src/auth/claude.ts +120 -78
- package/src/auth/gemini.ts +207 -0
- package/src/auth/index.ts +28 -8
- package/src/auth/keychain.ts +95 -28
- package/src/auth/openai.ts +29 -36
- package/src/cli/index.ts +4 -7
- package/src/cli/interactive.ts +1641 -216
- package/src/config/defaults.ts +10 -2
- package/src/config/index.ts +21 -0
- package/src/config/schema.ts +7 -0
- package/src/generators/python.ts +1 -0
- package/src/generators/typescript.ts +1 -0
- package/src/state/index.ts +713 -4
- package/src/state/registry.ts +278 -0
- package/src/types/cli.ts +4 -0
- package/src/types/consensus.ts +65 -6
- package/src/types/workflow.ts +35 -0
- package/src/workflow/auto-fix.ts +340 -0
- package/src/workflow/consensus.ts +750 -16
- package/src/workflow/execution-mode.ts +673 -74
- package/src/workflow/index.ts +95 -6
- package/src/workflow/milestone-workflow.ts +576 -0
- package/src/workflow/plan-mode.ts +696 -50
- package/src/workflow/plan-storage.ts +482 -0
- package/src/workflow/project-verification.ts +471 -0
- package/src/workflow/task-workflow.ts +525 -0
- package/src/workflow/test-runner.ts +10 -5
- package/src/workflow/ui-designer.ts +337 -0
- package/src/workflow/ui-setup.ts +797 -0
- package/src/workflow/ui-verification.ts +357 -0
- package/src/workflow/workflow-logger.ts +353 -0
- package/tests/config/config.test.ts +1 -1
- package/tests/types/consensus.test.ts +3 -3
- package/tests/workflow/plan-mode.test.ts +213 -0
- package/tests/workflow/test-runner.test.ts +5 -3
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Consensus workflow module
|
|
3
|
-
* Handles the iterative consensus-building process between Claude and OpenAI
|
|
3
|
+
* Handles the iterative consensus-building process between Claude and OpenAI/Gemini
|
|
4
|
+
* with arbitration support when consensus cannot be reached
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
|
-
import type { ConsensusResult, ConsensusIteration, ConsensusConfig } from '../types/consensus.js';
|
|
7
|
+
import type { ConsensusResult, ConsensusIteration, ConsensusConfig, ArbitrationResult, AIProvider } from '../types/consensus.js';
|
|
7
8
|
import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
|
|
8
|
-
import { requestConsensus } from '../adapters/openai.js';
|
|
9
|
+
import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
|
|
10
|
+
import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
|
|
9
11
|
import { revisePlan } from '../adapters/claude.js';
|
|
10
12
|
import { recordConsensusIteration } from '../state/index.js';
|
|
13
|
+
import { createPlanStorage, type ReviewerFeedback } from './plan-storage.js';
|
|
11
14
|
|
|
12
15
|
/**
|
|
13
16
|
* Options for consensus iteration
|
|
@@ -17,6 +20,9 @@ export interface ConsensusOptions {
|
|
|
17
20
|
config?: Partial<ConsensusConfig>;
|
|
18
21
|
onIteration?: (iteration: number, result: ConsensusResult) => void;
|
|
19
22
|
onRevision?: (iteration: number, revisedPlan: string) => void;
|
|
23
|
+
onConcerns?: (concerns: string[], recommendations: string[]) => void;
|
|
24
|
+
onArbitration?: (result: ArbitrationResult) => void;
|
|
25
|
+
onProgress?: (phase: string, message: string) => void;
|
|
20
26
|
}
|
|
21
27
|
|
|
22
28
|
/**
|
|
@@ -26,10 +32,84 @@ export interface ConsensusProcessResult {
|
|
|
26
32
|
approved: boolean;
|
|
27
33
|
finalPlan: string;
|
|
28
34
|
finalScore: number;
|
|
35
|
+
bestPlan: string;
|
|
36
|
+
bestScore: number;
|
|
37
|
+
bestIteration: number;
|
|
29
38
|
iterations: ConsensusIteration[];
|
|
30
39
|
totalIterations: number;
|
|
40
|
+
finalConcerns: string[];
|
|
41
|
+
finalRecommendations: string[];
|
|
42
|
+
arbitrated: boolean;
|
|
43
|
+
arbitrationResult?: ArbitrationResult;
|
|
44
|
+
/** True if consensus timed out and we accepted the best available plan */
|
|
45
|
+
timedOut?: boolean;
|
|
31
46
|
}
|
|
32
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Request consensus from the configured reviewer (OpenAI or Gemini)
|
|
50
|
+
*/
|
|
51
|
+
async function requestReviewerConsensus(
|
|
52
|
+
plan: string,
|
|
53
|
+
context: string,
|
|
54
|
+
reviewer: AIProvider,
|
|
55
|
+
config: Partial<ConsensusConfig>
|
|
56
|
+
): Promise<ConsensusResult> {
|
|
57
|
+
if (reviewer === 'gemini') {
|
|
58
|
+
return requestGeminiConsensus(plan, context, {
|
|
59
|
+
model: config.geminiModel,
|
|
60
|
+
temperature: config.temperature,
|
|
61
|
+
maxTokens: config.maxTokens,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
return requestOpenAIConsensus(plan, context, config);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Check if the consensus process is "stuck" (not improving)
|
|
69
|
+
* Detects both:
|
|
70
|
+
* 1. Stagnation: scores within 5% of each other
|
|
71
|
+
* 2. Oscillation: scores going up and down without progress
|
|
72
|
+
*/
|
|
73
|
+
function isStuck(scores: number[], stuckIterations: number): boolean {
|
|
74
|
+
if (scores.length < stuckIterations) return false;
|
|
75
|
+
|
|
76
|
+
const recentScores = scores.slice(-stuckIterations);
|
|
77
|
+
const maxRecent = Math.max(...recentScores);
|
|
78
|
+
const minRecent = Math.min(...recentScores);
|
|
79
|
+
|
|
80
|
+
// Check 1: Stagnation - all recent scores are within 5% of each other
|
|
81
|
+
if ((maxRecent - minRecent) <= 5) {
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Check 2: Oscillation - detect if we're going up and down without making progress
|
|
86
|
+
// e.g., 70 -> 85 -> 75 -> 80 (oscillating around ~77.5)
|
|
87
|
+
if (recentScores.length >= 3) {
|
|
88
|
+
const avg = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
|
|
89
|
+
const deviations = recentScores.map(s => Math.abs(s - avg));
|
|
90
|
+
const avgDeviation = deviations.reduce((a, b) => a + b, 0) / deviations.length;
|
|
91
|
+
|
|
92
|
+
// If scores are oscillating around an average (avg deviation > 3% but range < 20%)
|
|
93
|
+
// and we're not trending upward, consider it stuck
|
|
94
|
+
if (avgDeviation > 3 && (maxRecent - minRecent) < 20) {
|
|
95
|
+
// Check if we're trending upward (last score should be close to max)
|
|
96
|
+
const lastScore = recentScores[recentScores.length - 1];
|
|
97
|
+
const firstScore = recentScores[0];
|
|
98
|
+
// Not improving if last score is not better than first
|
|
99
|
+
if (lastScore <= firstScore + 2) {
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Default consensus timeout (15 minutes total)
|
|
110
|
+
*/
|
|
111
|
+
const DEFAULT_CONSENSUS_TIMEOUT_MS = 15 * 60 * 1000;
|
|
112
|
+
|
|
33
113
|
/**
|
|
34
114
|
* Format a plan for consensus review
|
|
35
115
|
* Structures the plan in a way that's optimal for review
|
|
@@ -88,6 +168,7 @@ export function meetsThreshold(
|
|
|
88
168
|
|
|
89
169
|
/**
|
|
90
170
|
* Iterate until consensus is reached
|
|
171
|
+
* Supports configurable reviewer and arbitration when stuck
|
|
91
172
|
*
|
|
92
173
|
* @param initialPlan - The initial plan to review
|
|
93
174
|
* @param context - Project context
|
|
@@ -104,22 +185,122 @@ export async function iterateUntilConsensus(
|
|
|
104
185
|
config = {},
|
|
105
186
|
onIteration,
|
|
106
187
|
onRevision,
|
|
188
|
+
onConcerns,
|
|
189
|
+
onArbitration,
|
|
190
|
+
onProgress,
|
|
107
191
|
} = options;
|
|
108
192
|
|
|
109
193
|
const {
|
|
110
194
|
threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
|
|
111
195
|
maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
|
|
196
|
+
reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
|
|
197
|
+
arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
|
|
198
|
+
enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
|
|
199
|
+
arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
|
|
200
|
+
stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
|
|
112
201
|
} = config;
|
|
113
202
|
|
|
114
203
|
const iterations: ConsensusIteration[] = [];
|
|
204
|
+
const scores: number[] = [];
|
|
115
205
|
let currentPlan = initialPlan;
|
|
116
206
|
let iteration = 0;
|
|
117
207
|
|
|
208
|
+
// Track the best plan throughout the process
|
|
209
|
+
let bestPlan = initialPlan;
|
|
210
|
+
let bestScore = 0;
|
|
211
|
+
let bestIteration = 0;
|
|
212
|
+
let lastConcerns: string[] = [];
|
|
213
|
+
let lastRecommendations: string[] = [];
|
|
214
|
+
let lastAnalysis = '';
|
|
215
|
+
|
|
216
|
+
// Track arbitration attempts to prevent infinite loops
|
|
217
|
+
let arbitrationAttempts = 0;
|
|
218
|
+
|
|
219
|
+
// Track elapsed time to detect stuck processes
|
|
220
|
+
const startTime = Date.now();
|
|
221
|
+
const maxArbitrationAttempts = 2;
|
|
222
|
+
|
|
223
|
+
onProgress?.('consensus', `Using ${reviewer} as reviewer${enableArbitration ? `, ${arbitrator} as arbitrator` : ''}`);
|
|
224
|
+
|
|
118
225
|
while (iteration < maxIterations) {
|
|
119
226
|
iteration++;
|
|
120
227
|
|
|
121
|
-
//
|
|
122
|
-
const
|
|
228
|
+
// Check total elapsed time - if timing out, try arbitration before giving up
|
|
229
|
+
const totalElapsed = Date.now() - startTime;
|
|
230
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS && enableArbitration && arbitrationAttempts < maxArbitrationAttempts) {
|
|
231
|
+
onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes - invoking arbitrator before accepting`);
|
|
232
|
+
|
|
233
|
+
try {
|
|
234
|
+
arbitrationAttempts++;
|
|
235
|
+
const arbitrationResult = await requestGeminiArbitration(
|
|
236
|
+
bestPlan,
|
|
237
|
+
lastAnalysis,
|
|
238
|
+
`Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
239
|
+
iteration,
|
|
240
|
+
scores
|
|
241
|
+
);
|
|
242
|
+
|
|
243
|
+
if (onArbitration) {
|
|
244
|
+
onArbitration(arbitrationResult);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Accept arbitration result (we're out of time)
|
|
248
|
+
onProgress?.('arbitration', `Arbitrator decision: ${arbitrationResult.approved ? 'APPROVED' : 'REVISE'} with ${arbitrationResult.score}%`);
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
approved: arbitrationResult.approved || arbitrationResult.score >= 80,
|
|
252
|
+
finalPlan: bestPlan,
|
|
253
|
+
finalScore: arbitrationResult.score,
|
|
254
|
+
bestPlan,
|
|
255
|
+
bestScore: arbitrationResult.score,
|
|
256
|
+
bestIteration,
|
|
257
|
+
iterations,
|
|
258
|
+
totalIterations: iteration - 1,
|
|
259
|
+
finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
|
|
260
|
+
finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
|
|
261
|
+
arbitrated: true,
|
|
262
|
+
arbitrationResult,
|
|
263
|
+
timedOut: true,
|
|
264
|
+
};
|
|
265
|
+
} catch (arbError) {
|
|
266
|
+
onProgress?.('arbitration', `Arbitration failed on timeout: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
|
|
267
|
+
// Fall through to accept best plan
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Hard timeout - no more arbitration attempts left
|
|
272
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
|
|
273
|
+
onProgress?.('consensus', `Consensus timeout - accepting best plan with ${bestScore}%`);
|
|
274
|
+
return {
|
|
275
|
+
approved: bestScore >= arbitrationThreshold,
|
|
276
|
+
finalPlan: bestPlan,
|
|
277
|
+
finalScore: bestScore,
|
|
278
|
+
bestPlan,
|
|
279
|
+
bestScore,
|
|
280
|
+
bestIteration,
|
|
281
|
+
iterations,
|
|
282
|
+
totalIterations: iteration - 1,
|
|
283
|
+
finalConcerns: lastConcerns,
|
|
284
|
+
finalRecommendations: lastRecommendations,
|
|
285
|
+
arbitrated: false,
|
|
286
|
+
timedOut: true,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Log iteration timing
|
|
291
|
+
const iterationStart = Date.now();
|
|
292
|
+
const elapsedMinutes = Math.round((iterationStart - startTime) / 60000);
|
|
293
|
+
onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
|
|
294
|
+
|
|
295
|
+
// Request consensus review from configured reviewer
|
|
296
|
+
onProgress?.('consensus', `Requesting review from ${reviewer}...`);
|
|
297
|
+
const consensusResult = await requestReviewerConsensus(currentPlan, context, reviewer, config);
|
|
298
|
+
|
|
299
|
+
// Log iteration duration
|
|
300
|
+
const iterationDuration = Math.round((Date.now() - iterationStart) / 1000);
|
|
301
|
+
onProgress?.('consensus', `Review completed in ${iterationDuration}s - score: ${consensusResult.score}%`);
|
|
302
|
+
|
|
303
|
+
scores.push(consensusResult.score);
|
|
123
304
|
|
|
124
305
|
// Record the iteration
|
|
125
306
|
const iterationRecord: ConsensusIteration = {
|
|
@@ -134,54 +315,183 @@ export async function iterateUntilConsensus(
|
|
|
134
315
|
// Save to project state
|
|
135
316
|
await recordConsensusIteration(projectDir, iterationRecord);
|
|
136
317
|
|
|
137
|
-
//
|
|
318
|
+
// Track best plan - only update if this score is better
|
|
319
|
+
if (consensusResult.score > bestScore) {
|
|
320
|
+
bestPlan = currentPlan;
|
|
321
|
+
bestScore = consensusResult.score;
|
|
322
|
+
bestIteration = iteration;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Track concerns for output
|
|
326
|
+
lastConcerns = consensusResult.concerns || [];
|
|
327
|
+
lastRecommendations = consensusResult.recommendations || [];
|
|
328
|
+
lastAnalysis = consensusResult.analysis || '';
|
|
329
|
+
|
|
330
|
+
// Notify callbacks
|
|
138
331
|
if (onIteration) {
|
|
139
332
|
onIteration(iteration, consensusResult);
|
|
140
333
|
}
|
|
141
334
|
|
|
335
|
+
if (onConcerns && (lastConcerns.length > 0 || lastRecommendations.length > 0)) {
|
|
336
|
+
onConcerns(lastConcerns, lastRecommendations);
|
|
337
|
+
}
|
|
338
|
+
|
|
142
339
|
// Check if we've reached consensus
|
|
143
340
|
if (meetsThreshold(consensusResult.score, threshold)) {
|
|
144
341
|
return {
|
|
145
342
|
approved: true,
|
|
146
343
|
finalPlan: currentPlan,
|
|
147
344
|
finalScore: consensusResult.score,
|
|
345
|
+
bestPlan: currentPlan,
|
|
346
|
+
bestScore: consensusResult.score,
|
|
347
|
+
bestIteration: iteration,
|
|
148
348
|
iterations,
|
|
149
349
|
totalIterations: iteration,
|
|
350
|
+
finalConcerns: [],
|
|
351
|
+
finalRecommendations: [],
|
|
352
|
+
arbitrated: false,
|
|
150
353
|
};
|
|
151
354
|
}
|
|
152
355
|
|
|
356
|
+
// Check if we're stuck and should trigger arbitration
|
|
357
|
+
if (enableArbitration &&
|
|
358
|
+
bestScore >= arbitrationThreshold &&
|
|
359
|
+
isStuck(scores, stuckIterations) &&
|
|
360
|
+
arbitrationAttempts < maxArbitrationAttempts) {
|
|
361
|
+
|
|
362
|
+
arbitrationAttempts++;
|
|
363
|
+
onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
|
|
364
|
+
|
|
365
|
+
try {
|
|
366
|
+
const arbitrationResult = await requestGeminiArbitration(
|
|
367
|
+
bestPlan,
|
|
368
|
+
lastAnalysis,
|
|
369
|
+
`The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
370
|
+
iteration,
|
|
371
|
+
scores
|
|
372
|
+
);
|
|
373
|
+
|
|
374
|
+
if (onArbitration) {
|
|
375
|
+
onArbitration(arbitrationResult);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Accept if arbitrator approves OR if arbitrator gives a high score (>= 88%)
|
|
379
|
+
// This prevents infinite REVISE loops when the arbitrator is happy enough
|
|
380
|
+
const acceptArbitration = arbitrationResult.approved ||
|
|
381
|
+
arbitrationResult.score >= 88 ||
|
|
382
|
+
(arbitrationAttempts >= maxArbitrationAttempts && arbitrationResult.score >= 80);
|
|
383
|
+
|
|
384
|
+
if (acceptArbitration) {
|
|
385
|
+
const reason = arbitrationResult.approved
|
|
386
|
+
? `Arbitrator approved plan with ${arbitrationResult.score}% confidence`
|
|
387
|
+
: `Arbitrator score ${arbitrationResult.score}% is acceptable - proceeding with best plan`;
|
|
388
|
+
onProgress?.('arbitration', reason);
|
|
389
|
+
|
|
390
|
+
return {
|
|
391
|
+
approved: true,
|
|
392
|
+
finalPlan: bestPlan,
|
|
393
|
+
finalScore: arbitrationResult.score,
|
|
394
|
+
bestPlan,
|
|
395
|
+
bestScore: arbitrationResult.score,
|
|
396
|
+
bestIteration,
|
|
397
|
+
iterations,
|
|
398
|
+
totalIterations: iteration,
|
|
399
|
+
finalConcerns: arbitrationResult.minorConcerns || [],
|
|
400
|
+
finalRecommendations: arbitrationResult.suggestedChanges || [],
|
|
401
|
+
arbitrated: true,
|
|
402
|
+
arbitrationResult,
|
|
403
|
+
};
|
|
404
|
+
} else {
|
|
405
|
+
onProgress?.('arbitration', `Arbitrator requests changes: ${arbitrationResult.suggestedChanges.slice(0, 2).join('; ')}`);
|
|
406
|
+
// Apply arbitrator's suggested changes
|
|
407
|
+
if (arbitrationResult.suggestedChanges.length > 0) {
|
|
408
|
+
onProgress?.('consensus', 'Applying arbitrator suggestions...');
|
|
409
|
+
const revisionResult = await revisePlan(
|
|
410
|
+
bestPlan,
|
|
411
|
+
arbitrationResult.reasoning,
|
|
412
|
+
arbitrationResult.suggestedChanges
|
|
413
|
+
);
|
|
414
|
+
if (revisionResult.success && revisionResult.response) {
|
|
415
|
+
currentPlan = revisionResult.response;
|
|
416
|
+
// Reset stuck detection after arbitration revision
|
|
417
|
+
scores.length = 0;
|
|
418
|
+
scores.push(arbitrationResult.score);
|
|
419
|
+
onProgress?.('consensus', 'Plan revised based on arbitrator feedback');
|
|
420
|
+
} else {
|
|
421
|
+
onProgress?.('consensus', 'Revision failed, continuing with current plan');
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
} catch (error) {
|
|
426
|
+
onProgress?.('arbitration', `Arbitration failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
427
|
+
// If we've tried arbitration and it failed, accept the best plan we have
|
|
428
|
+
if (arbitrationAttempts >= maxArbitrationAttempts && bestScore >= arbitrationThreshold) {
|
|
429
|
+
onProgress?.('arbitration', `Max arbitration attempts reached, accepting best plan with ${bestScore}%`);
|
|
430
|
+
return {
|
|
431
|
+
approved: true,
|
|
432
|
+
finalPlan: bestPlan,
|
|
433
|
+
finalScore: bestScore,
|
|
434
|
+
bestPlan,
|
|
435
|
+
bestScore,
|
|
436
|
+
bestIteration,
|
|
437
|
+
iterations,
|
|
438
|
+
totalIterations: iteration,
|
|
439
|
+
finalConcerns: lastConcerns,
|
|
440
|
+
finalRecommendations: lastRecommendations,
|
|
441
|
+
arbitrated: true,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
153
447
|
// If not at max iterations, revise the plan
|
|
154
448
|
if (iteration < maxIterations) {
|
|
155
449
|
const concerns = extractConcerns(consensusResult);
|
|
450
|
+
onProgress?.('consensus', 'Revising plan based on feedback...');
|
|
451
|
+
|
|
452
|
+
// Create a progress handler for revision
|
|
453
|
+
const revisionProgress = onProgress
|
|
454
|
+
? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
|
|
455
|
+
: undefined;
|
|
156
456
|
|
|
157
457
|
// Use Claude to revise the plan
|
|
158
458
|
const revisionResult = await revisePlan(
|
|
159
459
|
currentPlan,
|
|
160
460
|
consensusResult.analysis,
|
|
161
|
-
concerns
|
|
461
|
+
concerns,
|
|
462
|
+
revisionProgress
|
|
162
463
|
);
|
|
163
464
|
|
|
164
465
|
if (revisionResult.success && revisionResult.response) {
|
|
466
|
+
// Only use the revised plan for the next iteration
|
|
467
|
+
// The best plan tracking above will decide if it's actually better
|
|
165
468
|
currentPlan = revisionResult.response;
|
|
166
469
|
|
|
167
470
|
if (onRevision) {
|
|
168
471
|
onRevision(iteration, currentPlan);
|
|
169
472
|
}
|
|
170
473
|
} else {
|
|
171
|
-
// If revision fails, try to continue with
|
|
474
|
+
// If revision fails, try to continue with best plan
|
|
172
475
|
console.warn(`Plan revision failed at iteration ${iteration}:`, revisionResult.error);
|
|
476
|
+
currentPlan = bestPlan;
|
|
173
477
|
}
|
|
174
478
|
}
|
|
175
479
|
}
|
|
176
480
|
|
|
177
481
|
// Max iterations reached without consensus
|
|
178
|
-
|
|
482
|
+
// Return the BEST plan we found, not the last one
|
|
179
483
|
return {
|
|
180
484
|
approved: false,
|
|
181
|
-
finalPlan:
|
|
182
|
-
finalScore:
|
|
485
|
+
finalPlan: bestPlan,
|
|
486
|
+
finalScore: bestScore,
|
|
487
|
+
bestPlan,
|
|
488
|
+
bestScore,
|
|
489
|
+
bestIteration,
|
|
183
490
|
iterations,
|
|
184
491
|
totalIterations: iteration,
|
|
492
|
+
finalConcerns: lastConcerns,
|
|
493
|
+
finalRecommendations: lastRecommendations,
|
|
494
|
+
arbitrated: false,
|
|
185
495
|
};
|
|
186
496
|
}
|
|
187
497
|
|
|
@@ -196,16 +506,31 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
|
|
|
196
506
|
|
|
197
507
|
lines.push(`## Consensus Summary`);
|
|
198
508
|
lines.push('');
|
|
199
|
-
lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}`);
|
|
509
|
+
lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}${result.arbitrated ? ' (via arbitration)' : ''}`);
|
|
200
510
|
lines.push(`**Final Score:** ${result.finalScore}%`);
|
|
511
|
+
lines.push(`**Best Score:** ${result.bestScore}% (iteration ${result.bestIteration})`);
|
|
201
512
|
lines.push(`**Total Iterations:** ${result.totalIterations}`);
|
|
513
|
+
|
|
514
|
+
if (result.arbitrated && result.arbitrationResult) {
|
|
515
|
+
lines.push('');
|
|
516
|
+
lines.push(`### Arbitration Decision`);
|
|
517
|
+
lines.push(`- Decision: ${result.arbitrationResult.approved ? 'APPROVED' : 'REVISE'}`);
|
|
518
|
+
lines.push(`- Confidence: ${result.arbitrationResult.score}%`);
|
|
519
|
+
if (result.arbitrationResult.criticalConcerns.length > 0) {
|
|
520
|
+
lines.push(`- Critical Concerns: ${result.arbitrationResult.criticalConcerns.length}`);
|
|
521
|
+
}
|
|
522
|
+
if (result.arbitrationResult.minorConcerns.length > 0) {
|
|
523
|
+
lines.push(`- Minor Concerns: ${result.arbitrationResult.minorConcerns.length}`);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
202
526
|
lines.push('');
|
|
203
527
|
|
|
204
528
|
lines.push(`### Iteration History`);
|
|
205
529
|
lines.push('');
|
|
206
530
|
|
|
207
531
|
for (const iteration of result.iterations) {
|
|
208
|
-
|
|
532
|
+
const isBest = iteration.iteration === result.bestIteration;
|
|
533
|
+
lines.push(`#### Iteration ${iteration.iteration}${isBest ? ' (BEST)' : ''}`);
|
|
209
534
|
lines.push(`- Score: ${iteration.result.score}%`);
|
|
210
535
|
lines.push(`- Strengths: ${iteration.result.strengths?.length || 0}`);
|
|
211
536
|
lines.push(`- Concerns: ${iteration.result.concerns?.length || 0}`);
|
|
@@ -213,13 +538,21 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
|
|
|
213
538
|
}
|
|
214
539
|
|
|
215
540
|
if (!result.approved) {
|
|
216
|
-
|
|
217
|
-
if (lastResult?.concerns && lastResult.concerns.length > 0) {
|
|
541
|
+
if (result.finalConcerns && result.finalConcerns.length > 0) {
|
|
218
542
|
lines.push(`### Remaining Concerns`);
|
|
219
543
|
lines.push('');
|
|
220
|
-
for (const concern of
|
|
544
|
+
for (const concern of result.finalConcerns) {
|
|
221
545
|
lines.push(`- ${concern}`);
|
|
222
546
|
}
|
|
547
|
+
lines.push('');
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
if (result.finalRecommendations && result.finalRecommendations.length > 0) {
|
|
551
|
+
lines.push(`### Recommendations`);
|
|
552
|
+
lines.push('');
|
|
553
|
+
for (const rec of result.finalRecommendations) {
|
|
554
|
+
lines.push(`- ${rec}`);
|
|
555
|
+
}
|
|
223
556
|
}
|
|
224
557
|
}
|
|
225
558
|
|
|
@@ -297,3 +630,404 @@ export function getScoreTrend(
|
|
|
297
630
|
if (diff < -5) return 'declining';
|
|
298
631
|
return 'stable';
|
|
299
632
|
}
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Options for optimized consensus
|
|
636
|
+
*/
|
|
637
|
+
export interface OptimizedConsensusOptions extends ConsensusOptions {
|
|
638
|
+
milestoneId: string;
|
|
639
|
+
milestoneName?: string;
|
|
640
|
+
taskId?: string;
|
|
641
|
+
taskName?: string;
|
|
642
|
+
/** Use parallel reviews from multiple providers */
|
|
643
|
+
parallelReviews?: boolean;
|
|
644
|
+
/** Additional reviewers beyond primary */
|
|
645
|
+
additionalReviewers?: AIProvider[];
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Collect feedback from a single reviewer
|
|
650
|
+
*/
|
|
651
|
+
async function collectReviewerFeedback(
|
|
652
|
+
plan: string,
|
|
653
|
+
context: string,
|
|
654
|
+
reviewer: AIProvider,
|
|
655
|
+
config: Partial<ConsensusConfig>,
|
|
656
|
+
onProgress?: (phase: string, message: string) => void
|
|
657
|
+
): Promise<ReviewerFeedback> {
|
|
658
|
+
onProgress?.('consensus', `Requesting review from ${reviewer}...`);
|
|
659
|
+
const startTime = Date.now();
|
|
660
|
+
|
|
661
|
+
const result = await requestReviewerConsensus(plan, context, reviewer, config);
|
|
662
|
+
|
|
663
|
+
const duration = Math.round((Date.now() - startTime) / 1000);
|
|
664
|
+
onProgress?.('consensus', `${reviewer} review completed in ${duration}s - score: ${result.score}%`);
|
|
665
|
+
|
|
666
|
+
return {
|
|
667
|
+
reviewer,
|
|
668
|
+
score: result.score,
|
|
669
|
+
timestamp: new Date().toISOString(),
|
|
670
|
+
concerns: result.concerns || [],
|
|
671
|
+
recommendations: result.recommendations || [],
|
|
672
|
+
analysis: result.analysis || '',
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
/**
|
|
677
|
+
* Collect feedback from multiple reviewers in parallel
|
|
678
|
+
*/
|
|
679
|
+
async function collectAllFeedback(
|
|
680
|
+
plan: string,
|
|
681
|
+
context: string,
|
|
682
|
+
reviewers: AIProvider[],
|
|
683
|
+
config: Partial<ConsensusConfig>,
|
|
684
|
+
onProgress?: (phase: string, message: string) => void
|
|
685
|
+
): Promise<ReviewerFeedback[]> {
|
|
686
|
+
onProgress?.('consensus', `Collecting feedback from ${reviewers.length} reviewer(s) in parallel...`);
|
|
687
|
+
|
|
688
|
+
const feedbackPromises = reviewers.map(reviewer =>
|
|
689
|
+
collectReviewerFeedback(plan, context, reviewer, config, onProgress)
|
|
690
|
+
.catch(error => {
|
|
691
|
+
onProgress?.('consensus', `${reviewer} review failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
692
|
+
return null;
|
|
693
|
+
})
|
|
694
|
+
);
|
|
695
|
+
|
|
696
|
+
const results = await Promise.all(feedbackPromises);
|
|
697
|
+
return results.filter((f): f is ReviewerFeedback => f !== null);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* Optimized consensus process that batches feedback and reduces API calls
|
|
702
|
+
*
|
|
703
|
+
* Key optimizations:
|
|
704
|
+
* 1. Plans stored in files, not regenerated from scratch
|
|
705
|
+
* 2. Collects ALL reviewer feedback before revision
|
|
706
|
+
* 3. Claude revises ONCE per round with combined feedback
|
|
707
|
+
* 4. Parallel reviews when multiple reviewers configured
|
|
708
|
+
*
|
|
709
|
+
* @param initialPlan - The initial plan to seek consensus on
|
|
710
|
+
* @param context - Project context for review
|
|
711
|
+
* @param options - Consensus options including tracking info
|
|
712
|
+
* @returns Consensus process result
|
|
713
|
+
*/
|
|
714
|
+
export async function runOptimizedConsensusProcess(
|
|
715
|
+
initialPlan: string,
|
|
716
|
+
context: string,
|
|
717
|
+
options: OptimizedConsensusOptions
|
|
718
|
+
): Promise<ConsensusProcessResult> {
|
|
719
|
+
const {
|
|
720
|
+
projectDir,
|
|
721
|
+
config = {},
|
|
722
|
+
onIteration,
|
|
723
|
+
onRevision,
|
|
724
|
+
onConcerns,
|
|
725
|
+
onArbitration,
|
|
726
|
+
onProgress,
|
|
727
|
+
milestoneId,
|
|
728
|
+
milestoneName,
|
|
729
|
+
taskId,
|
|
730
|
+
taskName,
|
|
731
|
+
parallelReviews = true,
|
|
732
|
+
additionalReviewers = [],
|
|
733
|
+
} = options;
|
|
734
|
+
|
|
735
|
+
const {
|
|
736
|
+
threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
|
|
737
|
+
maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
|
|
738
|
+
reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
|
|
739
|
+
arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
|
|
740
|
+
enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
|
|
741
|
+
arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
|
|
742
|
+
stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
|
|
743
|
+
} = config;
|
|
744
|
+
|
|
745
|
+
// Initialize plan storage
|
|
746
|
+
const planStorage = createPlanStorage(projectDir);
|
|
747
|
+
await planStorage.initialize();
|
|
748
|
+
|
|
749
|
+
// Determine all reviewers
|
|
750
|
+
const allReviewers: AIProvider[] = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
|
|
751
|
+
|
|
752
|
+
const iterations: ConsensusIteration[] = [];
|
|
753
|
+
const scores: number[] = [];
|
|
754
|
+
let currentPlan = initialPlan;
|
|
755
|
+
let iteration = 0;
|
|
756
|
+
|
|
757
|
+
// Track the best plan
|
|
758
|
+
let bestPlan = initialPlan;
|
|
759
|
+
let bestScore = 0;
|
|
760
|
+
let bestIteration = 0;
|
|
761
|
+
let lastConcerns: string[] = [];
|
|
762
|
+
let lastRecommendations: string[] = [];
|
|
763
|
+
let lastAnalysis = '';
|
|
764
|
+
|
|
765
|
+
const startTime = Date.now();
|
|
766
|
+
|
|
767
|
+
onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
|
|
768
|
+
onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
|
|
769
|
+
|
|
770
|
+
// Save initial plan to storage
|
|
771
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
772
|
+
milestoneId,
|
|
773
|
+
milestoneName,
|
|
774
|
+
taskId,
|
|
775
|
+
taskName,
|
|
776
|
+
});
|
|
777
|
+
|
|
778
|
+
while (iteration < maxIterations) {
|
|
779
|
+
iteration++;
|
|
780
|
+
|
|
781
|
+
// Check timeout
|
|
782
|
+
const totalElapsed = Date.now() - startTime;
|
|
783
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
|
|
784
|
+
onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes`);
|
|
785
|
+
|
|
786
|
+
if (enableArbitration) {
|
|
787
|
+
try {
|
|
788
|
+
const arbitrationResult = await requestGeminiArbitration(
|
|
789
|
+
bestPlan,
|
|
790
|
+
lastAnalysis,
|
|
791
|
+
`Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
792
|
+
iteration,
|
|
793
|
+
scores
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
if (onArbitration) onArbitration(arbitrationResult);
|
|
797
|
+
|
|
798
|
+
return {
|
|
799
|
+
approved: arbitrationResult.approved || arbitrationResult.score >= 80,
|
|
800
|
+
finalPlan: bestPlan,
|
|
801
|
+
finalScore: arbitrationResult.score,
|
|
802
|
+
bestPlan,
|
|
803
|
+
bestScore: arbitrationResult.score,
|
|
804
|
+
bestIteration,
|
|
805
|
+
iterations,
|
|
806
|
+
totalIterations: iteration - 1,
|
|
807
|
+
finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
|
|
808
|
+
finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
|
|
809
|
+
arbitrated: true,
|
|
810
|
+
arbitrationResult,
|
|
811
|
+
timedOut: true,
|
|
812
|
+
};
|
|
813
|
+
} catch {
|
|
814
|
+
// Fall through to accept best plan
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
return {
|
|
819
|
+
approved: bestScore >= arbitrationThreshold,
|
|
820
|
+
finalPlan: bestPlan,
|
|
821
|
+
finalScore: bestScore,
|
|
822
|
+
bestPlan,
|
|
823
|
+
bestScore,
|
|
824
|
+
bestIteration,
|
|
825
|
+
iterations,
|
|
826
|
+
totalIterations: iteration - 1,
|
|
827
|
+
finalConcerns: lastConcerns,
|
|
828
|
+
finalRecommendations: lastRecommendations,
|
|
829
|
+
arbitrated: false,
|
|
830
|
+
timedOut: true,
|
|
831
|
+
};
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
const elapsedMinutes = Math.round((Date.now() - startTime) / 60000);
|
|
835
|
+
onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
|
|
836
|
+
|
|
837
|
+
// Clear previous feedback for this round
|
|
838
|
+
await planStorage.clearFeedback(milestoneId, taskId);
|
|
839
|
+
|
|
840
|
+
// ============================================
|
|
841
|
+
// OPTIMIZATION: Collect ALL feedback in parallel
|
|
842
|
+
// ============================================
|
|
843
|
+
let allFeedback: ReviewerFeedback[];
|
|
844
|
+
|
|
845
|
+
if (parallelReviews && allReviewers.length > 1) {
|
|
846
|
+
allFeedback = await collectAllFeedback(currentPlan, context, allReviewers, config, onProgress);
|
|
847
|
+
} else {
|
|
848
|
+
// Sequential fallback
|
|
849
|
+
allFeedback = [];
|
|
850
|
+
for (const rev of allReviewers) {
|
|
851
|
+
const feedback = await collectReviewerFeedback(currentPlan, context, rev, config, onProgress);
|
|
852
|
+
allFeedback.push(feedback);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Save all feedback
|
|
857
|
+
for (const feedback of allFeedback) {
|
|
858
|
+
await planStorage.saveFeedback(feedback, milestoneId, taskId);
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// Calculate combined score (average of all reviewers)
|
|
862
|
+
const combinedScore = allFeedback.length > 0
|
|
863
|
+
? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
|
|
864
|
+
: 0;
|
|
865
|
+
|
|
866
|
+
scores.push(combinedScore);
|
|
867
|
+
|
|
868
|
+
// Combine all concerns and recommendations
|
|
869
|
+
const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
|
|
870
|
+
const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
|
|
871
|
+
const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
|
|
872
|
+
|
|
873
|
+
lastConcerns = allConcerns;
|
|
874
|
+
lastRecommendations = allRecommendations;
|
|
875
|
+
lastAnalysis = combinedAnalysis;
|
|
876
|
+
|
|
877
|
+
// Create consensus result for tracking
|
|
878
|
+
const consensusResult: ConsensusResult = {
|
|
879
|
+
score: combinedScore,
|
|
880
|
+
analysis: combinedAnalysis,
|
|
881
|
+
concerns: allConcerns,
|
|
882
|
+
recommendations: allRecommendations,
|
|
883
|
+
approved: combinedScore >= threshold,
|
|
884
|
+
strengths: [],
|
|
885
|
+
rawResponse: combinedAnalysis,
|
|
886
|
+
};
|
|
887
|
+
|
|
888
|
+
// Record iteration
|
|
889
|
+
const iterationRecord: ConsensusIteration = {
|
|
890
|
+
iteration,
|
|
891
|
+
plan: currentPlan,
|
|
892
|
+
timestamp: new Date().toISOString(),
|
|
893
|
+
result: consensusResult,
|
|
894
|
+
};
|
|
895
|
+
iterations.push(iterationRecord);
|
|
896
|
+
|
|
897
|
+
if (onIteration) onIteration(iteration, consensusResult);
|
|
898
|
+
if (onConcerns) onConcerns(allConcerns, allRecommendations);
|
|
899
|
+
|
|
900
|
+
// Update best plan tracking
|
|
901
|
+
if (combinedScore > bestScore) {
|
|
902
|
+
bestScore = combinedScore;
|
|
903
|
+
bestPlan = currentPlan;
|
|
904
|
+
bestIteration = iteration;
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// Save plan with updated score
|
|
908
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
909
|
+
milestoneId,
|
|
910
|
+
milestoneName,
|
|
911
|
+
taskId,
|
|
912
|
+
taskName,
|
|
913
|
+
score: combinedScore,
|
|
914
|
+
});
|
|
915
|
+
|
|
916
|
+
// Record in project state
|
|
917
|
+
await recordConsensusIteration(projectDir, iterationRecord);
|
|
918
|
+
|
|
919
|
+
onProgress?.('consensus', `Combined score: ${combinedScore}% (from ${allFeedback.length} reviewer(s))`);
|
|
920
|
+
|
|
921
|
+
// Check if consensus reached
|
|
922
|
+
if (combinedScore >= threshold) {
|
|
923
|
+
onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
|
|
924
|
+
await planStorage.updateStatus('approved', milestoneId, taskId);
|
|
925
|
+
|
|
926
|
+
return {
|
|
927
|
+
approved: true,
|
|
928
|
+
finalPlan: currentPlan,
|
|
929
|
+
finalScore: combinedScore,
|
|
930
|
+
bestPlan: currentPlan,
|
|
931
|
+
bestScore: combinedScore,
|
|
932
|
+
bestIteration: iteration,
|
|
933
|
+
iterations,
|
|
934
|
+
totalIterations: iteration,
|
|
935
|
+
finalConcerns: allConcerns,
|
|
936
|
+
finalRecommendations: allRecommendations,
|
|
937
|
+
arbitrated: false,
|
|
938
|
+
};
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
// Check if stuck
|
|
942
|
+
if (isStuck(scores, stuckIterations) && enableArbitration) {
|
|
943
|
+
onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
|
|
944
|
+
|
|
945
|
+
try {
|
|
946
|
+
const arbitrationResult = await requestGeminiArbitration(
|
|
947
|
+
bestPlan,
|
|
948
|
+
combinedAnalysis,
|
|
949
|
+
`Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`,
|
|
950
|
+
iteration,
|
|
951
|
+
scores
|
|
952
|
+
);
|
|
953
|
+
|
|
954
|
+
if (onArbitration) onArbitration(arbitrationResult);
|
|
955
|
+
|
|
956
|
+
if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
|
|
957
|
+
onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
|
|
958
|
+
await planStorage.updateStatus('approved', milestoneId, taskId);
|
|
959
|
+
|
|
960
|
+
return {
|
|
961
|
+
approved: true,
|
|
962
|
+
finalPlan: bestPlan,
|
|
963
|
+
finalScore: arbitrationResult.score,
|
|
964
|
+
bestPlan,
|
|
965
|
+
bestScore: arbitrationResult.score,
|
|
966
|
+
bestIteration,
|
|
967
|
+
iterations,
|
|
968
|
+
totalIterations: iteration,
|
|
969
|
+
finalConcerns: arbitrationResult.minorConcerns || allConcerns,
|
|
970
|
+
finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
|
|
971
|
+
arbitrated: true,
|
|
972
|
+
arbitrationResult,
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
} catch (arbError) {
|
|
976
|
+
onProgress?.('arbitration', `Arbitration failed: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
// ============================================
|
|
981
|
+
// OPTIMIZATION: Single revision with ALL feedback
|
|
982
|
+
// ============================================
|
|
983
|
+
if (iteration < maxIterations) {
|
|
984
|
+
onProgress?.('consensus', `Revising plan with combined feedback from ${allFeedback.length} reviewer(s)...`);
|
|
985
|
+
|
|
986
|
+
const revisionProgress = onProgress
|
|
987
|
+
? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
|
|
988
|
+
: undefined;
|
|
989
|
+
|
|
990
|
+
// Use Claude to revise with ALL combined feedback (single API call)
|
|
991
|
+
const revisionResult = await revisePlan(
|
|
992
|
+
currentPlan,
|
|
993
|
+
combinedAnalysis,
|
|
994
|
+
allConcerns,
|
|
995
|
+
revisionProgress
|
|
996
|
+
);
|
|
997
|
+
|
|
998
|
+
if (revisionResult.success && revisionResult.response) {
|
|
999
|
+
currentPlan = revisionResult.response;
|
|
1000
|
+
|
|
1001
|
+
// Save revised plan
|
|
1002
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
1003
|
+
milestoneId,
|
|
1004
|
+
milestoneName,
|
|
1005
|
+
taskId,
|
|
1006
|
+
taskName,
|
|
1007
|
+
});
|
|
1008
|
+
|
|
1009
|
+
if (onRevision) onRevision(iteration, currentPlan);
|
|
1010
|
+
} else {
|
|
1011
|
+
onProgress?.('consensus', `Revision failed, continuing with best plan`);
|
|
1012
|
+
currentPlan = bestPlan;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
// Max iterations reached
|
|
1018
|
+
await planStorage.updateStatus('reviewing', milestoneId, taskId);
|
|
1019
|
+
|
|
1020
|
+
return {
|
|
1021
|
+
approved: false,
|
|
1022
|
+
finalPlan: bestPlan,
|
|
1023
|
+
finalScore: bestScore,
|
|
1024
|
+
bestPlan,
|
|
1025
|
+
bestScore,
|
|
1026
|
+
bestIteration,
|
|
1027
|
+
iterations,
|
|
1028
|
+
totalIterations: iteration,
|
|
1029
|
+
finalConcerns: lastConcerns,
|
|
1030
|
+
finalRecommendations: lastRecommendations,
|
|
1031
|
+
arbitrated: false,
|
|
1032
|
+
};
|
|
1033
|
+
}
|