popeye-cli 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +24 -1
- package/CONTRIBUTING.md +275 -0
- package/OPEN_SOURCE_MANIFESTO.md +172 -0
- package/README.md +832 -123
- package/dist/adapters/claude.d.ts +19 -4
- package/dist/adapters/claude.d.ts.map +1 -1
- package/dist/adapters/claude.js +908 -42
- package/dist/adapters/claude.js.map +1 -1
- package/dist/adapters/gemini.d.ts +55 -0
- package/dist/adapters/gemini.d.ts.map +1 -0
- package/dist/adapters/gemini.js +318 -0
- package/dist/adapters/gemini.js.map +1 -0
- package/dist/adapters/grok.d.ts +73 -0
- package/dist/adapters/grok.d.ts.map +1 -0
- package/dist/adapters/grok.js +430 -0
- package/dist/adapters/grok.js.map +1 -0
- package/dist/adapters/openai.d.ts +1 -1
- package/dist/adapters/openai.d.ts.map +1 -1
- package/dist/adapters/openai.js +47 -8
- package/dist/adapters/openai.js.map +1 -1
- package/dist/auth/claude.d.ts +11 -9
- package/dist/auth/claude.d.ts.map +1 -1
- package/dist/auth/claude.js +107 -71
- package/dist/auth/claude.js.map +1 -1
- package/dist/auth/gemini.d.ts +58 -0
- package/dist/auth/gemini.d.ts.map +1 -0
- package/dist/auth/gemini.js +172 -0
- package/dist/auth/gemini.js.map +1 -0
- package/dist/auth/grok.d.ts +73 -0
- package/dist/auth/grok.d.ts.map +1 -0
- package/dist/auth/grok.js +211 -0
- package/dist/auth/grok.js.map +1 -0
- package/dist/auth/index.d.ts +14 -7
- package/dist/auth/index.d.ts.map +1 -1
- package/dist/auth/index.js +41 -6
- package/dist/auth/index.js.map +1 -1
- package/dist/auth/keychain.d.ts +20 -7
- package/dist/auth/keychain.d.ts.map +1 -1
- package/dist/auth/keychain.js +85 -29
- package/dist/auth/keychain.js.map +1 -1
- package/dist/auth/openai.d.ts +2 -2
- package/dist/auth/openai.d.ts.map +1 -1
- package/dist/auth/openai.js +30 -32
- package/dist/auth/openai.js.map +1 -1
- package/dist/cli/commands/auth.d.ts +1 -1
- package/dist/cli/commands/auth.d.ts.map +1 -1
- package/dist/cli/commands/auth.js +79 -8
- package/dist/cli/commands/auth.js.map +1 -1
- package/dist/cli/commands/create.d.ts.map +1 -1
- package/dist/cli/commands/create.js +15 -4
- package/dist/cli/commands/create.js.map +1 -1
- package/dist/cli/interactive.d.ts.map +1 -1
- package/dist/cli/interactive.js +1494 -114
- package/dist/cli/interactive.js.map +1 -1
- package/dist/config/defaults.d.ts +9 -1
- package/dist/config/defaults.d.ts.map +1 -1
- package/dist/config/defaults.js +19 -2
- package/dist/config/defaults.js.map +1 -1
- package/dist/config/index.d.ts +19 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +33 -1
- package/dist/config/index.js.map +1 -1
- package/dist/config/schema.d.ts +47 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +29 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/generators/fullstack.d.ts +32 -0
- package/dist/generators/fullstack.d.ts.map +1 -0
- package/dist/generators/fullstack.js +497 -0
- package/dist/generators/fullstack.js.map +1 -0
- package/dist/generators/index.d.ts +4 -3
- package/dist/generators/index.d.ts.map +1 -1
- package/dist/generators/index.js +15 -1
- package/dist/generators/index.js.map +1 -1
- package/dist/generators/python.d.ts +17 -1
- package/dist/generators/python.d.ts.map +1 -1
- package/dist/generators/python.js +34 -20
- package/dist/generators/python.js.map +1 -1
- package/dist/generators/templates/fullstack.d.ts +113 -0
- package/dist/generators/templates/fullstack.d.ts.map +1 -0
- package/dist/generators/templates/fullstack.js +1004 -0
- package/dist/generators/templates/fullstack.js.map +1 -0
- package/dist/generators/typescript.d.ts +19 -1
- package/dist/generators/typescript.d.ts.map +1 -1
- package/dist/generators/typescript.js +37 -20
- package/dist/generators/typescript.js.map +1 -1
- package/dist/state/index.d.ts +108 -0
- package/dist/state/index.d.ts.map +1 -1
- package/dist/state/index.js +551 -4
- package/dist/state/index.js.map +1 -1
- package/dist/state/registry.d.ts +52 -0
- package/dist/state/registry.d.ts.map +1 -0
- package/dist/state/registry.js +215 -0
- package/dist/state/registry.js.map +1 -0
- package/dist/types/cli.d.ts +8 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/cli.js.map +1 -1
- package/dist/types/consensus.d.ts +186 -4
- package/dist/types/consensus.d.ts.map +1 -1
- package/dist/types/consensus.js +35 -3
- package/dist/types/consensus.js.map +1 -1
- package/dist/types/project.d.ts +76 -0
- package/dist/types/project.d.ts.map +1 -1
- package/dist/types/project.js +1 -1
- package/dist/types/project.js.map +1 -1
- package/dist/types/workflow.d.ts +217 -16
- package/dist/types/workflow.d.ts.map +1 -1
- package/dist/types/workflow.js +40 -1
- package/dist/types/workflow.js.map +1 -1
- package/dist/workflow/auto-fix.d.ts +45 -0
- package/dist/workflow/auto-fix.d.ts.map +1 -0
- package/dist/workflow/auto-fix.js +274 -0
- package/dist/workflow/auto-fix.js.map +1 -0
- package/dist/workflow/consensus.d.ts +70 -2
- package/dist/workflow/consensus.d.ts.map +1 -1
- package/dist/workflow/consensus.js +872 -17
- package/dist/workflow/consensus.js.map +1 -1
- package/dist/workflow/execution-mode.d.ts +10 -4
- package/dist/workflow/execution-mode.d.ts.map +1 -1
- package/dist/workflow/execution-mode.js +547 -58
- package/dist/workflow/execution-mode.js.map +1 -1
- package/dist/workflow/index.d.ts +14 -2
- package/dist/workflow/index.d.ts.map +1 -1
- package/dist/workflow/index.js +69 -6
- package/dist/workflow/index.js.map +1 -1
- package/dist/workflow/milestone-workflow.d.ts +34 -0
- package/dist/workflow/milestone-workflow.d.ts.map +1 -0
- package/dist/workflow/milestone-workflow.js +414 -0
- package/dist/workflow/milestone-workflow.js.map +1 -0
- package/dist/workflow/plan-mode.d.ts +80 -3
- package/dist/workflow/plan-mode.d.ts.map +1 -1
- package/dist/workflow/plan-mode.js +767 -49
- package/dist/workflow/plan-mode.js.map +1 -1
- package/dist/workflow/plan-storage.d.ts +386 -0
- package/dist/workflow/plan-storage.d.ts.map +1 -0
- package/dist/workflow/plan-storage.js +878 -0
- package/dist/workflow/plan-storage.js.map +1 -0
- package/dist/workflow/project-verification.d.ts +37 -0
- package/dist/workflow/project-verification.d.ts.map +1 -0
- package/dist/workflow/project-verification.js +381 -0
- package/dist/workflow/project-verification.js.map +1 -0
- package/dist/workflow/task-workflow.d.ts +37 -0
- package/dist/workflow/task-workflow.d.ts.map +1 -0
- package/dist/workflow/task-workflow.js +386 -0
- package/dist/workflow/task-workflow.js.map +1 -0
- package/dist/workflow/test-runner.d.ts +9 -0
- package/dist/workflow/test-runner.d.ts.map +1 -1
- package/dist/workflow/test-runner.js +101 -5
- package/dist/workflow/test-runner.js.map +1 -1
- package/dist/workflow/ui-designer.d.ts +82 -0
- package/dist/workflow/ui-designer.d.ts.map +1 -0
- package/dist/workflow/ui-designer.js +234 -0
- package/dist/workflow/ui-designer.js.map +1 -0
- package/dist/workflow/ui-setup.d.ts +58 -0
- package/dist/workflow/ui-setup.d.ts.map +1 -0
- package/dist/workflow/ui-setup.js +685 -0
- package/dist/workflow/ui-setup.js.map +1 -0
- package/dist/workflow/ui-verification.d.ts +114 -0
- package/dist/workflow/ui-verification.d.ts.map +1 -0
- package/dist/workflow/ui-verification.js +258 -0
- package/dist/workflow/ui-verification.js.map +1 -0
- package/dist/workflow/workflow-logger.d.ts +110 -0
- package/dist/workflow/workflow-logger.d.ts.map +1 -0
- package/dist/workflow/workflow-logger.js +267 -0
- package/dist/workflow/workflow-logger.js.map +1 -0
- package/dist/workflow/workspace-manager.d.ts +342 -0
- package/dist/workflow/workspace-manager.d.ts.map +1 -0
- package/dist/workflow/workspace-manager.js +733 -0
- package/dist/workflow/workspace-manager.js.map +1 -0
- package/package.json +2 -2
- package/src/adapters/claude.ts +1067 -47
- package/src/adapters/gemini.ts +373 -0
- package/src/adapters/grok.ts +492 -0
- package/src/adapters/openai.ts +48 -9
- package/src/auth/claude.ts +120 -78
- package/src/auth/gemini.ts +207 -0
- package/src/auth/grok.ts +255 -0
- package/src/auth/index.ts +47 -9
- package/src/auth/keychain.ts +95 -28
- package/src/auth/openai.ts +29 -36
- package/src/cli/commands/auth.ts +89 -10
- package/src/cli/commands/create.ts +13 -4
- package/src/cli/interactive.ts +1774 -142
- package/src/config/defaults.ts +19 -2
- package/src/config/index.ts +36 -1
- package/src/config/schema.ts +30 -1
- package/src/generators/fullstack.ts +551 -0
- package/src/generators/index.ts +25 -1
- package/src/generators/python.ts +65 -20
- package/src/generators/templates/fullstack.ts +1047 -0
- package/src/generators/typescript.ts +69 -20
- package/src/state/index.ts +713 -4
- package/src/state/registry.ts +278 -0
- package/src/types/cli.ts +8 -0
- package/src/types/consensus.ts +197 -6
- package/src/types/project.ts +82 -1
- package/src/types/workflow.ts +90 -1
- package/src/workflow/auto-fix.ts +340 -0
- package/src/workflow/consensus.ts +1180 -16
- package/src/workflow/execution-mode.ts +673 -74
- package/src/workflow/index.ts +95 -6
- package/src/workflow/milestone-workflow.ts +576 -0
- package/src/workflow/plan-mode.ts +924 -50
- package/src/workflow/plan-storage.ts +1282 -0
- package/src/workflow/project-verification.ts +471 -0
- package/src/workflow/task-workflow.ts +528 -0
- package/src/workflow/test-runner.ts +120 -5
- package/src/workflow/ui-designer.ts +337 -0
- package/src/workflow/ui-setup.ts +797 -0
- package/src/workflow/ui-verification.ts +357 -0
- package/src/workflow/workflow-logger.ts +353 -0
- package/src/workflow/workspace-manager.ts +912 -0
- package/tests/config/config.test.ts +1 -1
- package/tests/types/consensus.test.ts +3 -3
- package/tests/workflow/plan-mode.test.ts +213 -0
- package/tests/workflow/test-runner.test.ts +5 -3
|
@@ -1,13 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Consensus workflow module
|
|
3
|
-
* Handles the iterative consensus-building process between Claude and OpenAI
|
|
3
|
+
* Handles the iterative consensus-building process between Claude and OpenAI/Gemini
|
|
4
|
+
* with arbitration support when consensus cannot be reached
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
|
-
import type {
|
|
7
|
+
import type {
|
|
8
|
+
ConsensusResult,
|
|
9
|
+
ConsensusIteration,
|
|
10
|
+
ConsensusConfig,
|
|
11
|
+
ArbitrationResult,
|
|
12
|
+
AIProvider,
|
|
13
|
+
TaggedItem,
|
|
14
|
+
AppConsensusScores,
|
|
15
|
+
CorrectionRecord,
|
|
16
|
+
} from '../types/consensus.js';
|
|
7
17
|
import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
|
|
8
|
-
import { requestConsensus } from '../adapters/openai.js';
|
|
18
|
+
import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
|
|
19
|
+
import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
|
|
20
|
+
import { requestConsensus as requestGrokConsensus, requestArbitration as requestGrokArbitration } from '../adapters/grok.js';
|
|
9
21
|
import { revisePlan } from '../adapters/claude.js';
|
|
10
22
|
import { recordConsensusIteration } from '../state/index.js';
|
|
23
|
+
import {
|
|
24
|
+
createPlanStorage,
|
|
25
|
+
type ReviewerFeedback,
|
|
26
|
+
type FullstackReviewerFeedback,
|
|
27
|
+
type FeedbackAppTarget,
|
|
28
|
+
} from './plan-storage.js';
|
|
11
29
|
|
|
12
30
|
/**
|
|
13
31
|
* Options for consensus iteration
|
|
@@ -15,8 +33,15 @@ import { recordConsensusIteration } from '../state/index.js';
|
|
|
15
33
|
export interface ConsensusOptions {
|
|
16
34
|
projectDir: string;
|
|
17
35
|
config?: Partial<ConsensusConfig>;
|
|
36
|
+
/** Whether this is a fullstack project (enables per-app tracking) */
|
|
37
|
+
isFullstack?: boolean;
|
|
38
|
+
/** Project language for revision prompts */
|
|
39
|
+
language?: 'python' | 'typescript' | 'fullstack';
|
|
18
40
|
onIteration?: (iteration: number, result: ConsensusResult) => void;
|
|
19
41
|
onRevision?: (iteration: number, revisedPlan: string) => void;
|
|
42
|
+
onConcerns?: (concerns: string[], recommendations: string[]) => void;
|
|
43
|
+
onArbitration?: (result: ArbitrationResult) => void;
|
|
44
|
+
onProgress?: (phase: string, message: string) => void;
|
|
20
45
|
}
|
|
21
46
|
|
|
22
47
|
/**
|
|
@@ -26,10 +51,109 @@ export interface ConsensusProcessResult {
|
|
|
26
51
|
approved: boolean;
|
|
27
52
|
finalPlan: string;
|
|
28
53
|
finalScore: number;
|
|
54
|
+
bestPlan: string;
|
|
55
|
+
bestScore: number;
|
|
56
|
+
bestIteration: number;
|
|
29
57
|
iterations: ConsensusIteration[];
|
|
30
58
|
totalIterations: number;
|
|
59
|
+
finalConcerns: string[];
|
|
60
|
+
finalRecommendations: string[];
|
|
61
|
+
arbitrated: boolean;
|
|
62
|
+
arbitrationResult?: ArbitrationResult;
|
|
63
|
+
/** True if consensus timed out and we accepted the best available plan */
|
|
64
|
+
timedOut?: boolean;
|
|
31
65
|
}
|
|
32
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Request consensus from the configured reviewer (OpenAI, Gemini, or Grok)
|
|
69
|
+
*/
|
|
70
|
+
async function requestReviewerConsensus(
|
|
71
|
+
plan: string,
|
|
72
|
+
context: string,
|
|
73
|
+
reviewer: AIProvider,
|
|
74
|
+
config: Partial<ConsensusConfig>
|
|
75
|
+
): Promise<ConsensusResult> {
|
|
76
|
+
if (reviewer === 'gemini') {
|
|
77
|
+
return requestGeminiConsensus(plan, context, {
|
|
78
|
+
model: config.geminiModel,
|
|
79
|
+
temperature: config.temperature,
|
|
80
|
+
maxTokens: config.maxTokens,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
if (reviewer === 'grok') {
|
|
84
|
+
return requestGrokConsensus(plan, context, {
|
|
85
|
+
model: config.grokModel,
|
|
86
|
+
temperature: config.temperature,
|
|
87
|
+
maxTokens: config.maxTokens,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return requestOpenAIConsensus(plan, context, config);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Request arbitration from the configured arbitrator (OpenAI, Gemini, or Grok)
|
|
95
|
+
*/
|
|
96
|
+
async function requestArbitratorDecision(
|
|
97
|
+
plan: string,
|
|
98
|
+
reviewerFeedback: string,
|
|
99
|
+
claudeFeedback: string,
|
|
100
|
+
iterations: number,
|
|
101
|
+
scores: number[],
|
|
102
|
+
arbitrator: AIProvider
|
|
103
|
+
): Promise<ArbitrationResult> {
|
|
104
|
+
if (arbitrator === 'grok') {
|
|
105
|
+
return requestGrokArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
|
|
106
|
+
}
|
|
107
|
+
// Default to Gemini for arbitration (most capable at reasoning)
|
|
108
|
+
return requestGeminiArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Check if the consensus process is "stuck" (not improving)
|
|
113
|
+
* Detects both:
|
|
114
|
+
* 1. Stagnation: scores within 5% of each other
|
|
115
|
+
* 2. Oscillation: scores going up and down without progress
|
|
116
|
+
*/
|
|
117
|
+
function isStuck(scores: number[], stuckIterations: number): boolean {
|
|
118
|
+
if (scores.length < stuckIterations) return false;
|
|
119
|
+
|
|
120
|
+
const recentScores = scores.slice(-stuckIterations);
|
|
121
|
+
const maxRecent = Math.max(...recentScores);
|
|
122
|
+
const minRecent = Math.min(...recentScores);
|
|
123
|
+
|
|
124
|
+
// Check 1: Stagnation - all recent scores are within 5% of each other
|
|
125
|
+
if ((maxRecent - minRecent) <= 5) {
|
|
126
|
+
return true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Check 2: Oscillation - detect if we're going up and down without making progress
|
|
130
|
+
// e.g., 70 -> 85 -> 75 -> 80 (oscillating around ~77.5)
|
|
131
|
+
if (recentScores.length >= 3) {
|
|
132
|
+
const avg = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
|
|
133
|
+
const deviations = recentScores.map(s => Math.abs(s - avg));
|
|
134
|
+
const avgDeviation = deviations.reduce((a, b) => a + b, 0) / deviations.length;
|
|
135
|
+
|
|
136
|
+
// If scores are oscillating around an average (avg deviation > 3% but range < 20%)
|
|
137
|
+
// and we're not trending upward, consider it stuck
|
|
138
|
+
if (avgDeviation > 3 && (maxRecent - minRecent) < 20) {
|
|
139
|
+
// Check if we're trending upward (last score should be close to max)
|
|
140
|
+
const lastScore = recentScores[recentScores.length - 1];
|
|
141
|
+
const firstScore = recentScores[0];
|
|
142
|
+
// Not improving if last score is not better than first
|
|
143
|
+
if (lastScore <= firstScore + 2) {
|
|
144
|
+
return true;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Default consensus timeout (15 minutes total)
|
|
154
|
+
*/
|
|
155
|
+
const DEFAULT_CONSENSUS_TIMEOUT_MS = 15 * 60 * 1000;
|
|
156
|
+
|
|
33
157
|
/**
|
|
34
158
|
* Format a plan for consensus review
|
|
35
159
|
* Structures the plan in a way that's optimal for review
|
|
@@ -88,6 +212,7 @@ export function meetsThreshold(
|
|
|
88
212
|
|
|
89
213
|
/**
|
|
90
214
|
* Iterate until consensus is reached
|
|
215
|
+
* Supports configurable reviewer and arbitration when stuck
|
|
91
216
|
*
|
|
92
217
|
* @param initialPlan - The initial plan to review
|
|
93
218
|
* @param context - Project context
|
|
@@ -102,24 +227,130 @@ export async function iterateUntilConsensus(
|
|
|
102
227
|
const {
|
|
103
228
|
projectDir,
|
|
104
229
|
config = {},
|
|
230
|
+
isFullstack = false,
|
|
231
|
+
language: providedLanguage,
|
|
105
232
|
onIteration,
|
|
106
233
|
onRevision,
|
|
234
|
+
onConcerns,
|
|
235
|
+
onArbitration,
|
|
236
|
+
onProgress,
|
|
107
237
|
} = options;
|
|
108
238
|
|
|
239
|
+
// Derive language from isFullstack if not explicitly provided
|
|
240
|
+
const language = providedLanguage || (isFullstack ? 'fullstack' : 'python');
|
|
241
|
+
|
|
109
242
|
const {
|
|
110
243
|
threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
|
|
111
244
|
maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
|
|
245
|
+
reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
|
|
246
|
+
arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
|
|
247
|
+
enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
|
|
248
|
+
arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
|
|
249
|
+
stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
|
|
112
250
|
} = config;
|
|
113
251
|
|
|
114
252
|
const iterations: ConsensusIteration[] = [];
|
|
253
|
+
const scores: number[] = [];
|
|
115
254
|
let currentPlan = initialPlan;
|
|
116
255
|
let iteration = 0;
|
|
117
256
|
|
|
257
|
+
// Track the best plan throughout the process
|
|
258
|
+
let bestPlan = initialPlan;
|
|
259
|
+
let bestScore = 0;
|
|
260
|
+
let bestIteration = 0;
|
|
261
|
+
let lastConcerns: string[] = [];
|
|
262
|
+
let lastRecommendations: string[] = [];
|
|
263
|
+
let lastAnalysis = '';
|
|
264
|
+
|
|
265
|
+
// Track arbitration attempts to prevent infinite loops
|
|
266
|
+
let arbitrationAttempts = 0;
|
|
267
|
+
|
|
268
|
+
// Track elapsed time to detect stuck processes
|
|
269
|
+
const startTime = Date.now();
|
|
270
|
+
const maxArbitrationAttempts = 2;
|
|
271
|
+
|
|
272
|
+
onProgress?.('consensus', `Using ${reviewer} as reviewer${enableArbitration ? `, ${arbitrator} as arbitrator` : ''}`);
|
|
273
|
+
|
|
118
274
|
while (iteration < maxIterations) {
|
|
119
275
|
iteration++;
|
|
120
276
|
|
|
121
|
-
//
|
|
122
|
-
const
|
|
277
|
+
// Check total elapsed time - if timing out, try arbitration before giving up
|
|
278
|
+
const totalElapsed = Date.now() - startTime;
|
|
279
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS && enableArbitration && arbitrationAttempts < maxArbitrationAttempts) {
|
|
280
|
+
onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes - invoking arbitrator before accepting`);
|
|
281
|
+
|
|
282
|
+
try {
|
|
283
|
+
arbitrationAttempts++;
|
|
284
|
+
const arbitrationResult = await requestArbitratorDecision(
|
|
285
|
+
bestPlan,
|
|
286
|
+
lastAnalysis,
|
|
287
|
+
`Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
288
|
+
iteration,
|
|
289
|
+
scores,
|
|
290
|
+
arbitrator
|
|
291
|
+
);
|
|
292
|
+
|
|
293
|
+
if (onArbitration) {
|
|
294
|
+
onArbitration(arbitrationResult);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Accept arbitration result (we're out of time)
|
|
298
|
+
onProgress?.('arbitration', `Arbitrator decision: ${arbitrationResult.approved ? 'APPROVED' : 'REVISE'} with ${arbitrationResult.score}%`);
|
|
299
|
+
|
|
300
|
+
return {
|
|
301
|
+
approved: arbitrationResult.approved || arbitrationResult.score >= 80,
|
|
302
|
+
finalPlan: bestPlan,
|
|
303
|
+
finalScore: arbitrationResult.score,
|
|
304
|
+
bestPlan,
|
|
305
|
+
bestScore: arbitrationResult.score,
|
|
306
|
+
bestIteration,
|
|
307
|
+
iterations,
|
|
308
|
+
totalIterations: iteration - 1,
|
|
309
|
+
finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
|
|
310
|
+
finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
|
|
311
|
+
arbitrated: true,
|
|
312
|
+
arbitrationResult,
|
|
313
|
+
timedOut: true,
|
|
314
|
+
};
|
|
315
|
+
} catch (arbError) {
|
|
316
|
+
onProgress?.('arbitration', `Arbitration failed on timeout: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
|
|
317
|
+
// Fall through to accept best plan
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Hard timeout - no more arbitration attempts left
|
|
322
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
|
|
323
|
+
onProgress?.('consensus', `Consensus timeout - accepting best plan with ${bestScore}%`);
|
|
324
|
+
return {
|
|
325
|
+
approved: bestScore >= arbitrationThreshold,
|
|
326
|
+
finalPlan: bestPlan,
|
|
327
|
+
finalScore: bestScore,
|
|
328
|
+
bestPlan,
|
|
329
|
+
bestScore,
|
|
330
|
+
bestIteration,
|
|
331
|
+
iterations,
|
|
332
|
+
totalIterations: iteration - 1,
|
|
333
|
+
finalConcerns: lastConcerns,
|
|
334
|
+
finalRecommendations: lastRecommendations,
|
|
335
|
+
arbitrated: false,
|
|
336
|
+
timedOut: true,
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Log iteration timing
|
|
341
|
+
const iterationStart = Date.now();
|
|
342
|
+
const elapsedMinutes = Math.round((iterationStart - startTime) / 60000);
|
|
343
|
+
onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
|
|
344
|
+
|
|
345
|
+
// Request consensus review from configured reviewer
|
|
346
|
+
onProgress?.('consensus', `Requesting review from ${reviewer}...`);
|
|
347
|
+
const consensusResult = await requestReviewerConsensus(currentPlan, context, reviewer, config);
|
|
348
|
+
|
|
349
|
+
// Log iteration duration
|
|
350
|
+
const iterationDuration = Math.round((Date.now() - iterationStart) / 1000);
|
|
351
|
+
onProgress?.('consensus', `Review completed in ${iterationDuration}s - score: ${consensusResult.score}%`);
|
|
352
|
+
|
|
353
|
+
scores.push(consensusResult.score);
|
|
123
354
|
|
|
124
355
|
// Record the iteration
|
|
125
356
|
const iterationRecord: ConsensusIteration = {
|
|
@@ -134,54 +365,186 @@ export async function iterateUntilConsensus(
|
|
|
134
365
|
// Save to project state
|
|
135
366
|
await recordConsensusIteration(projectDir, iterationRecord);
|
|
136
367
|
|
|
137
|
-
//
|
|
368
|
+
// Track best plan - only update if this score is better
|
|
369
|
+
if (consensusResult.score > bestScore) {
|
|
370
|
+
bestPlan = currentPlan;
|
|
371
|
+
bestScore = consensusResult.score;
|
|
372
|
+
bestIteration = iteration;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Track concerns for output
|
|
376
|
+
lastConcerns = consensusResult.concerns || [];
|
|
377
|
+
lastRecommendations = consensusResult.recommendations || [];
|
|
378
|
+
lastAnalysis = consensusResult.analysis || '';
|
|
379
|
+
|
|
380
|
+
// Notify callbacks
|
|
138
381
|
if (onIteration) {
|
|
139
382
|
onIteration(iteration, consensusResult);
|
|
140
383
|
}
|
|
141
384
|
|
|
385
|
+
if (onConcerns && (lastConcerns.length > 0 || lastRecommendations.length > 0)) {
|
|
386
|
+
onConcerns(lastConcerns, lastRecommendations);
|
|
387
|
+
}
|
|
388
|
+
|
|
142
389
|
// Check if we've reached consensus
|
|
143
390
|
if (meetsThreshold(consensusResult.score, threshold)) {
|
|
144
391
|
return {
|
|
145
392
|
approved: true,
|
|
146
393
|
finalPlan: currentPlan,
|
|
147
394
|
finalScore: consensusResult.score,
|
|
395
|
+
bestPlan: currentPlan,
|
|
396
|
+
bestScore: consensusResult.score,
|
|
397
|
+
bestIteration: iteration,
|
|
148
398
|
iterations,
|
|
149
399
|
totalIterations: iteration,
|
|
400
|
+
finalConcerns: [],
|
|
401
|
+
finalRecommendations: [],
|
|
402
|
+
arbitrated: false,
|
|
150
403
|
};
|
|
151
404
|
}
|
|
152
405
|
|
|
406
|
+
// Check if we're stuck and should trigger arbitration
|
|
407
|
+
if (enableArbitration &&
|
|
408
|
+
bestScore >= arbitrationThreshold &&
|
|
409
|
+
isStuck(scores, stuckIterations) &&
|
|
410
|
+
arbitrationAttempts < maxArbitrationAttempts) {
|
|
411
|
+
|
|
412
|
+
arbitrationAttempts++;
|
|
413
|
+
onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
|
|
414
|
+
|
|
415
|
+
try {
|
|
416
|
+
const arbitrationResult = await requestArbitratorDecision(
|
|
417
|
+
bestPlan,
|
|
418
|
+
lastAnalysis,
|
|
419
|
+
`The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
420
|
+
iteration,
|
|
421
|
+
scores,
|
|
422
|
+
arbitrator
|
|
423
|
+
);
|
|
424
|
+
|
|
425
|
+
if (onArbitration) {
|
|
426
|
+
onArbitration(arbitrationResult);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Accept if arbitrator approves OR if arbitrator gives a high score (>= 88%)
|
|
430
|
+
// This prevents infinite REVISE loops when the arbitrator is happy enough
|
|
431
|
+
const acceptArbitration = arbitrationResult.approved ||
|
|
432
|
+
arbitrationResult.score >= 88 ||
|
|
433
|
+
(arbitrationAttempts >= maxArbitrationAttempts && arbitrationResult.score >= 80);
|
|
434
|
+
|
|
435
|
+
if (acceptArbitration) {
|
|
436
|
+
const reason = arbitrationResult.approved
|
|
437
|
+
? `Arbitrator approved plan with ${arbitrationResult.score}% confidence`
|
|
438
|
+
: `Arbitrator score ${arbitrationResult.score}% is acceptable - proceeding with best plan`;
|
|
439
|
+
onProgress?.('arbitration', reason);
|
|
440
|
+
|
|
441
|
+
return {
|
|
442
|
+
approved: true,
|
|
443
|
+
finalPlan: bestPlan,
|
|
444
|
+
finalScore: arbitrationResult.score,
|
|
445
|
+
bestPlan,
|
|
446
|
+
bestScore: arbitrationResult.score,
|
|
447
|
+
bestIteration,
|
|
448
|
+
iterations,
|
|
449
|
+
totalIterations: iteration,
|
|
450
|
+
finalConcerns: arbitrationResult.minorConcerns || [],
|
|
451
|
+
finalRecommendations: arbitrationResult.suggestedChanges || [],
|
|
452
|
+
arbitrated: true,
|
|
453
|
+
arbitrationResult,
|
|
454
|
+
};
|
|
455
|
+
} else {
|
|
456
|
+
onProgress?.('arbitration', `Arbitrator requests changes: ${arbitrationResult.suggestedChanges.slice(0, 2).join('; ')}`);
|
|
457
|
+
// Apply arbitrator's suggested changes
|
|
458
|
+
if (arbitrationResult.suggestedChanges.length > 0) {
|
|
459
|
+
onProgress?.('consensus', 'Applying arbitrator suggestions...');
|
|
460
|
+
const revisionResult = await revisePlan(
|
|
461
|
+
bestPlan,
|
|
462
|
+
arbitrationResult.reasoning,
|
|
463
|
+
arbitrationResult.suggestedChanges,
|
|
464
|
+
language
|
|
465
|
+
);
|
|
466
|
+
if (revisionResult.success && revisionResult.response) {
|
|
467
|
+
currentPlan = revisionResult.response;
|
|
468
|
+
// Reset stuck detection after arbitration revision
|
|
469
|
+
scores.length = 0;
|
|
470
|
+
scores.push(arbitrationResult.score);
|
|
471
|
+
onProgress?.('consensus', 'Plan revised based on arbitrator feedback');
|
|
472
|
+
} else {
|
|
473
|
+
onProgress?.('consensus', 'Revision failed, continuing with current plan');
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
} catch (error) {
|
|
478
|
+
onProgress?.('arbitration', `Arbitration failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
479
|
+
// If we've tried arbitration and it failed, accept the best plan we have
|
|
480
|
+
if (arbitrationAttempts >= maxArbitrationAttempts && bestScore >= arbitrationThreshold) {
|
|
481
|
+
onProgress?.('arbitration', `Max arbitration attempts reached, accepting best plan with ${bestScore}%`);
|
|
482
|
+
return {
|
|
483
|
+
approved: true,
|
|
484
|
+
finalPlan: bestPlan,
|
|
485
|
+
finalScore: bestScore,
|
|
486
|
+
bestPlan,
|
|
487
|
+
bestScore,
|
|
488
|
+
bestIteration,
|
|
489
|
+
iterations,
|
|
490
|
+
totalIterations: iteration,
|
|
491
|
+
finalConcerns: lastConcerns,
|
|
492
|
+
finalRecommendations: lastRecommendations,
|
|
493
|
+
arbitrated: true,
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
153
499
|
// If not at max iterations, revise the plan
|
|
154
500
|
if (iteration < maxIterations) {
|
|
155
501
|
const concerns = extractConcerns(consensusResult);
|
|
502
|
+
onProgress?.('consensus', 'Revising plan based on feedback...');
|
|
503
|
+
|
|
504
|
+
// Create a progress handler for revision
|
|
505
|
+
const revisionProgress = onProgress
|
|
506
|
+
? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
|
|
507
|
+
: undefined;
|
|
156
508
|
|
|
157
509
|
// Use Claude to revise the plan
|
|
158
510
|
const revisionResult = await revisePlan(
|
|
159
511
|
currentPlan,
|
|
160
512
|
consensusResult.analysis,
|
|
161
|
-
concerns
|
|
513
|
+
concerns,
|
|
514
|
+
language,
|
|
515
|
+
revisionProgress
|
|
162
516
|
);
|
|
163
517
|
|
|
164
518
|
if (revisionResult.success && revisionResult.response) {
|
|
519
|
+
// Only use the revised plan for the next iteration
|
|
520
|
+
// The best plan tracking above will decide if it's actually better
|
|
165
521
|
currentPlan = revisionResult.response;
|
|
166
522
|
|
|
167
523
|
if (onRevision) {
|
|
168
524
|
onRevision(iteration, currentPlan);
|
|
169
525
|
}
|
|
170
526
|
} else {
|
|
171
|
-
// If revision fails, try to continue with
|
|
527
|
+
// If revision fails, try to continue with best plan
|
|
172
528
|
console.warn(`Plan revision failed at iteration ${iteration}:`, revisionResult.error);
|
|
529
|
+
currentPlan = bestPlan;
|
|
173
530
|
}
|
|
174
531
|
}
|
|
175
532
|
}
|
|
176
533
|
|
|
177
534
|
// Max iterations reached without consensus
|
|
178
|
-
|
|
535
|
+
// Return the BEST plan we found, not the last one
|
|
179
536
|
return {
|
|
180
537
|
approved: false,
|
|
181
|
-
finalPlan:
|
|
182
|
-
finalScore:
|
|
538
|
+
finalPlan: bestPlan,
|
|
539
|
+
finalScore: bestScore,
|
|
540
|
+
bestPlan,
|
|
541
|
+
bestScore,
|
|
542
|
+
bestIteration,
|
|
183
543
|
iterations,
|
|
184
544
|
totalIterations: iteration,
|
|
545
|
+
finalConcerns: lastConcerns,
|
|
546
|
+
finalRecommendations: lastRecommendations,
|
|
547
|
+
arbitrated: false,
|
|
185
548
|
};
|
|
186
549
|
}
|
|
187
550
|
|
|
@@ -196,16 +559,31 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
|
|
|
196
559
|
|
|
197
560
|
lines.push(`## Consensus Summary`);
|
|
198
561
|
lines.push('');
|
|
199
|
-
lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}`);
|
|
562
|
+
lines.push(`**Status:** ${result.approved ? 'APPROVED' : 'NOT APPROVED'}${result.arbitrated ? ' (via arbitration)' : ''}`);
|
|
200
563
|
lines.push(`**Final Score:** ${result.finalScore}%`);
|
|
564
|
+
lines.push(`**Best Score:** ${result.bestScore}% (iteration ${result.bestIteration})`);
|
|
201
565
|
lines.push(`**Total Iterations:** ${result.totalIterations}`);
|
|
566
|
+
|
|
567
|
+
if (result.arbitrated && result.arbitrationResult) {
|
|
568
|
+
lines.push('');
|
|
569
|
+
lines.push(`### Arbitration Decision`);
|
|
570
|
+
lines.push(`- Decision: ${result.arbitrationResult.approved ? 'APPROVED' : 'REVISE'}`);
|
|
571
|
+
lines.push(`- Confidence: ${result.arbitrationResult.score}%`);
|
|
572
|
+
if (result.arbitrationResult.criticalConcerns.length > 0) {
|
|
573
|
+
lines.push(`- Critical Concerns: ${result.arbitrationResult.criticalConcerns.length}`);
|
|
574
|
+
}
|
|
575
|
+
if (result.arbitrationResult.minorConcerns.length > 0) {
|
|
576
|
+
lines.push(`- Minor Concerns: ${result.arbitrationResult.minorConcerns.length}`);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
202
579
|
lines.push('');
|
|
203
580
|
|
|
204
581
|
lines.push(`### Iteration History`);
|
|
205
582
|
lines.push('');
|
|
206
583
|
|
|
207
584
|
for (const iteration of result.iterations) {
|
|
208
|
-
|
|
585
|
+
const isBest = iteration.iteration === result.bestIteration;
|
|
586
|
+
lines.push(`#### Iteration ${iteration.iteration}${isBest ? ' (BEST)' : ''}`);
|
|
209
587
|
lines.push(`- Score: ${iteration.result.score}%`);
|
|
210
588
|
lines.push(`- Strengths: ${iteration.result.strengths?.length || 0}`);
|
|
211
589
|
lines.push(`- Concerns: ${iteration.result.concerns?.length || 0}`);
|
|
@@ -213,13 +591,21 @@ export function summarizeConsensusProcess(result: ConsensusProcessResult): strin
|
|
|
213
591
|
}
|
|
214
592
|
|
|
215
593
|
if (!result.approved) {
|
|
216
|
-
|
|
217
|
-
if (lastResult?.concerns && lastResult.concerns.length > 0) {
|
|
594
|
+
if (result.finalConcerns && result.finalConcerns.length > 0) {
|
|
218
595
|
lines.push(`### Remaining Concerns`);
|
|
219
596
|
lines.push('');
|
|
220
|
-
for (const concern of
|
|
597
|
+
for (const concern of result.finalConcerns) {
|
|
221
598
|
lines.push(`- ${concern}`);
|
|
222
599
|
}
|
|
600
|
+
lines.push('');
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if (result.finalRecommendations && result.finalRecommendations.length > 0) {
|
|
604
|
+
lines.push(`### Recommendations`);
|
|
605
|
+
lines.push('');
|
|
606
|
+
for (const rec of result.finalRecommendations) {
|
|
607
|
+
lines.push(`- ${rec}`);
|
|
608
|
+
}
|
|
223
609
|
}
|
|
224
610
|
}
|
|
225
611
|
|
|
@@ -297,3 +683,781 @@ export function getScoreTrend(
|
|
|
297
683
|
if (diff < -5) return 'declining';
|
|
298
684
|
return 'stable';
|
|
299
685
|
}
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Options for optimized consensus
|
|
689
|
+
*/
|
|
690
|
+
export interface OptimizedConsensusOptions extends ConsensusOptions {
|
|
691
|
+
milestoneId: string;
|
|
692
|
+
milestoneName?: string;
|
|
693
|
+
taskId?: string;
|
|
694
|
+
taskName?: string;
|
|
695
|
+
/** Use parallel reviews from multiple providers */
|
|
696
|
+
parallelReviews?: boolean;
|
|
697
|
+
/** Additional reviewers beyond primary */
|
|
698
|
+
additionalReviewers?: AIProvider[];
|
|
699
|
+
/** Whether this is a fullstack project (enables per-app tracking) */
|
|
700
|
+
isFullstack?: boolean;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
/**
|
|
704
|
+
* Result for fullstack consensus with per-app tracking
|
|
705
|
+
*/
|
|
706
|
+
export interface FullstackConsensusProcessResult extends ConsensusProcessResult {
|
|
707
|
+
/** Per-app scores */
|
|
708
|
+
appScores: AppConsensusScores;
|
|
709
|
+
/** Per-app approval status */
|
|
710
|
+
appApproved: {
|
|
711
|
+
frontend?: boolean;
|
|
712
|
+
backend?: boolean;
|
|
713
|
+
unified: boolean;
|
|
714
|
+
};
|
|
715
|
+
/** Tagged concerns by app */
|
|
716
|
+
taggedConcerns: TaggedItem[];
|
|
717
|
+
/** Tagged recommendations by app */
|
|
718
|
+
taggedRecommendations: TaggedItem[];
|
|
719
|
+
/** Corrections made during consensus */
|
|
720
|
+
corrections: CorrectionRecord[];
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Collect feedback from a single reviewer
|
|
725
|
+
*/
|
|
726
|
+
async function collectReviewerFeedback(
|
|
727
|
+
plan: string,
|
|
728
|
+
context: string,
|
|
729
|
+
reviewer: AIProvider,
|
|
730
|
+
config: Partial<ConsensusConfig>,
|
|
731
|
+
onProgress?: (phase: string, message: string) => void
|
|
732
|
+
): Promise<ReviewerFeedback> {
|
|
733
|
+
onProgress?.('consensus', `Requesting review from ${reviewer}...`);
|
|
734
|
+
const startTime = Date.now();
|
|
735
|
+
|
|
736
|
+
const result = await requestReviewerConsensus(plan, context, reviewer, config);
|
|
737
|
+
|
|
738
|
+
const duration = Math.round((Date.now() - startTime) / 1000);
|
|
739
|
+
onProgress?.('consensus', `${reviewer} review completed in ${duration}s - score: ${result.score}%`);
|
|
740
|
+
|
|
741
|
+
return {
|
|
742
|
+
reviewer,
|
|
743
|
+
score: result.score,
|
|
744
|
+
timestamp: new Date().toISOString(),
|
|
745
|
+
concerns: result.concerns || [],
|
|
746
|
+
recommendations: result.recommendations || [],
|
|
747
|
+
analysis: result.analysis || '',
|
|
748
|
+
};
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
/**
|
|
752
|
+
* Collect feedback from multiple reviewers in parallel
|
|
753
|
+
*/
|
|
754
|
+
async function collectAllFeedback(
|
|
755
|
+
plan: string,
|
|
756
|
+
context: string,
|
|
757
|
+
reviewers: AIProvider[],
|
|
758
|
+
config: Partial<ConsensusConfig>,
|
|
759
|
+
onProgress?: (phase: string, message: string) => void
|
|
760
|
+
): Promise<ReviewerFeedback[]> {
|
|
761
|
+
onProgress?.('consensus', `Collecting feedback from ${reviewers.length} reviewer(s) in parallel...`);
|
|
762
|
+
|
|
763
|
+
const feedbackPromises = reviewers.map(reviewer =>
|
|
764
|
+
collectReviewerFeedback(plan, context, reviewer, config, onProgress)
|
|
765
|
+
.catch(error => {
|
|
766
|
+
onProgress?.('consensus', `${reviewer} review failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
767
|
+
return null;
|
|
768
|
+
})
|
|
769
|
+
);
|
|
770
|
+
|
|
771
|
+
const results = await Promise.all(feedbackPromises);
|
|
772
|
+
return results.filter((f): f is ReviewerFeedback => f !== null);
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
/**
|
|
776
|
+
* Categorize a concern or recommendation by app target
|
|
777
|
+
* Analyzes text content to determine if it relates to frontend, backend, or unified
|
|
778
|
+
*/
|
|
779
|
+
function categorizeByContent(content: string): FeedbackAppTarget {
|
|
780
|
+
const lowerContent = content.toLowerCase();
|
|
781
|
+
|
|
782
|
+
// Frontend indicators
|
|
783
|
+
const frontendKeywords = [
|
|
784
|
+
'react', 'component', 'jsx', 'tsx', 'css', 'tailwind', 'ui', 'user interface',
|
|
785
|
+
'button', 'form', 'input', 'modal', 'page', 'router', 'navigation', 'state management',
|
|
786
|
+
'redux', 'zustand', 'vite', 'frontend', 'front-end', 'client', 'browser', 'dom',
|
|
787
|
+
'styling', 'layout', 'responsive', 'animation', 'hook', 'usestate', 'useeffect',
|
|
788
|
+
'shadcn', 'radix', 'tailwindcss', 'vitest', 'jest', 'testing-library', 'playwright',
|
|
789
|
+
];
|
|
790
|
+
|
|
791
|
+
// Backend indicators
|
|
792
|
+
const backendKeywords = [
|
|
793
|
+
'fastapi', 'api', 'endpoint', 'route', 'database', 'sql', 'postgresql', 'neon',
|
|
794
|
+
'model', 'schema', 'migration', 'orm', 'sqlalchemy', 'pydantic', 'validation',
|
|
795
|
+
'authentication', 'authorization', 'jwt', 'token', 'middleware', 'backend', 'back-end',
|
|
796
|
+
'server', 'python', 'pytest', 'alembic', 'celery', 'redis', 'cache', 'queue',
|
|
797
|
+
'repository', 'service', 'crud', 'rest', 'graphql', 'websocket',
|
|
798
|
+
];
|
|
799
|
+
|
|
800
|
+
// Count matches
|
|
801
|
+
let frontendMatches = 0;
|
|
802
|
+
let backendMatches = 0;
|
|
803
|
+
|
|
804
|
+
for (const keyword of frontendKeywords) {
|
|
805
|
+
if (lowerContent.includes(keyword)) {
|
|
806
|
+
frontendMatches++;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
for (const keyword of backendKeywords) {
|
|
811
|
+
if (lowerContent.includes(keyword)) {
|
|
812
|
+
backendMatches++;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
// Determine category
|
|
817
|
+
if (frontendMatches > backendMatches && frontendMatches >= 2) {
|
|
818
|
+
return 'frontend';
|
|
819
|
+
} else if (backendMatches > frontendMatches && backendMatches >= 2) {
|
|
820
|
+
return 'backend';
|
|
821
|
+
} else {
|
|
822
|
+
return 'unified';
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
/**
|
|
827
|
+
* Categorize all concerns and recommendations by app target
|
|
828
|
+
*/
|
|
829
|
+
function categorizeFeedbackItems(
|
|
830
|
+
concerns: string[],
|
|
831
|
+
recommendations: string[]
|
|
832
|
+
): {
|
|
833
|
+
taggedConcerns: TaggedItem[];
|
|
834
|
+
taggedRecommendations: TaggedItem[];
|
|
835
|
+
appScores: { frontend: number; backend: number; unified: number };
|
|
836
|
+
} {
|
|
837
|
+
const taggedConcerns: TaggedItem[] = concerns.map(concern => ({
|
|
838
|
+
app: categorizeByContent(concern),
|
|
839
|
+
content: concern,
|
|
840
|
+
}));
|
|
841
|
+
|
|
842
|
+
const taggedRecommendations: TaggedItem[] = recommendations.map(rec => ({
|
|
843
|
+
app: categorizeByContent(rec),
|
|
844
|
+
content: rec,
|
|
845
|
+
}));
|
|
846
|
+
|
|
847
|
+
// Count items per app for score calculation
|
|
848
|
+
const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
|
|
849
|
+
const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
|
|
850
|
+
const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
|
|
851
|
+
|
|
852
|
+
const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
|
|
853
|
+
const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
|
|
854
|
+
const unifiedRecs = taggedRecommendations.filter(r => r.app === 'unified').length;
|
|
855
|
+
|
|
856
|
+
// Calculate relative scores (more concerns = lower score)
|
|
857
|
+
const totalItems = taggedConcerns.length + taggedRecommendations.length;
|
|
858
|
+
const baseScore = totalItems > 0 ? 100 : 0;
|
|
859
|
+
|
|
860
|
+
return {
|
|
861
|
+
taggedConcerns,
|
|
862
|
+
taggedRecommendations,
|
|
863
|
+
appScores: {
|
|
864
|
+
frontend: Math.max(0, baseScore - (frontendConcerns + frontendRecs) * 5),
|
|
865
|
+
backend: Math.max(0, baseScore - (backendConcerns + backendRecs) * 5),
|
|
866
|
+
unified: Math.max(0, baseScore - (unifiedConcerns + unifiedRecs) * 5),
|
|
867
|
+
},
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
/**
|
|
872
|
+
* Calculate per-app scores from feedback
|
|
873
|
+
*/
|
|
874
|
+
function calculateAppScores(
|
|
875
|
+
allFeedback: ReviewerFeedback[],
|
|
876
|
+
taggedConcerns: TaggedItem[],
|
|
877
|
+
taggedRecommendations: TaggedItem[]
|
|
878
|
+
): AppConsensusScores {
|
|
879
|
+
// Base score from average feedback score
|
|
880
|
+
const baseScore = allFeedback.length > 0
|
|
881
|
+
? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
|
|
882
|
+
: 0;
|
|
883
|
+
|
|
884
|
+
// Count concerns per app
|
|
885
|
+
const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
|
|
886
|
+
const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
|
|
887
|
+
const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
|
|
888
|
+
|
|
889
|
+
const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
|
|
890
|
+
const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
|
|
891
|
+
|
|
892
|
+
// Calculate app-specific scores
|
|
893
|
+
// More concerns = lower score (each concern/rec reduces score by 2 points)
|
|
894
|
+
const frontendScore = frontendConcerns > 0 || frontendRecs > 0
|
|
895
|
+
? Math.max(0, baseScore - (frontendConcerns * 2 + frontendRecs))
|
|
896
|
+
: baseScore;
|
|
897
|
+
|
|
898
|
+
const backendScore = backendConcerns > 0 || backendRecs > 0
|
|
899
|
+
? Math.max(0, baseScore - (backendConcerns * 2 + backendRecs))
|
|
900
|
+
: baseScore;
|
|
901
|
+
|
|
902
|
+
// Unified score is the base combined score
|
|
903
|
+
const unifiedScore = Math.max(0, baseScore - (unifiedConcerns * 2));
|
|
904
|
+
|
|
905
|
+
return {
|
|
906
|
+
frontend: frontendScore,
|
|
907
|
+
backend: backendScore,
|
|
908
|
+
unified: unifiedScore,
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
/**
|
|
913
|
+
* Optimized consensus process that batches feedback and reduces API calls
|
|
914
|
+
*
|
|
915
|
+
* Key optimizations:
|
|
916
|
+
* 1. Plans stored in files, not regenerated from scratch
|
|
917
|
+
* 2. Collects ALL reviewer feedback before revision
|
|
918
|
+
* 3. Claude revises ONCE per round with combined feedback
|
|
919
|
+
* 4. Parallel reviews when multiple reviewers configured
|
|
920
|
+
* 5. Per-app tracking for fullstack projects (frontend/backend/unified)
|
|
921
|
+
*
|
|
922
|
+
* @param initialPlan - The initial plan to seek consensus on
|
|
923
|
+
* @param context - Project context for review
|
|
924
|
+
* @param options - Consensus options including tracking info
|
|
925
|
+
* @returns Consensus process result (FullstackConsensusProcessResult for fullstack projects)
|
|
926
|
+
*/
|
|
927
|
+
export async function runOptimizedConsensusProcess(
|
|
928
|
+
initialPlan: string,
|
|
929
|
+
context: string,
|
|
930
|
+
options: OptimizedConsensusOptions
|
|
931
|
+
): Promise<ConsensusProcessResult | FullstackConsensusProcessResult> {
|
|
932
|
+
const {
|
|
933
|
+
projectDir,
|
|
934
|
+
config = {},
|
|
935
|
+
onIteration,
|
|
936
|
+
onRevision,
|
|
937
|
+
onConcerns,
|
|
938
|
+
onArbitration,
|
|
939
|
+
onProgress,
|
|
940
|
+
milestoneId,
|
|
941
|
+
milestoneName,
|
|
942
|
+
taskId,
|
|
943
|
+
taskName,
|
|
944
|
+
parallelReviews = true,
|
|
945
|
+
additionalReviewers = [],
|
|
946
|
+
isFullstack = false,
|
|
947
|
+
} = options;
|
|
948
|
+
|
|
949
|
+
// Derive language from isFullstack for revision prompts
|
|
950
|
+
const language: 'python' | 'typescript' | 'fullstack' = isFullstack ? 'fullstack' : 'python';
|
|
951
|
+
|
|
952
|
+
const {
|
|
953
|
+
threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
|
|
954
|
+
maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
|
|
955
|
+
reviewer = DEFAULT_CONSENSUS_CONFIG.reviewer,
|
|
956
|
+
arbitrator = DEFAULT_CONSENSUS_CONFIG.arbitrator,
|
|
957
|
+
enableArbitration = DEFAULT_CONSENSUS_CONFIG.enableArbitration,
|
|
958
|
+
arbitrationThreshold = DEFAULT_CONSENSUS_CONFIG.arbitrationThreshold,
|
|
959
|
+
stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
|
|
960
|
+
} = config;
|
|
961
|
+
|
|
962
|
+
// Initialize plan storage with fullstack support
|
|
963
|
+
const planStorage = createPlanStorage(projectDir, isFullstack);
|
|
964
|
+
await planStorage.initialize();
|
|
965
|
+
|
|
966
|
+
// Track per-app consensus for fullstack projects
|
|
967
|
+
const appScoresHistory: { frontend: number[]; backend: number[]; unified: number[] } = {
|
|
968
|
+
frontend: [],
|
|
969
|
+
backend: [],
|
|
970
|
+
unified: [],
|
|
971
|
+
};
|
|
972
|
+
const allTaggedConcerns: TaggedItem[] = [];
|
|
973
|
+
const allTaggedRecommendations: TaggedItem[] = [];
|
|
974
|
+
const corrections: CorrectionRecord[] = [];
|
|
975
|
+
|
|
976
|
+
// Determine all reviewers
|
|
977
|
+
const allReviewers: AIProvider[] = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
|
|
978
|
+
|
|
979
|
+
const iterations: ConsensusIteration[] = [];
|
|
980
|
+
const scores: number[] = [];
|
|
981
|
+
let currentPlan = initialPlan;
|
|
982
|
+
let iteration = 0;
|
|
983
|
+
|
|
984
|
+
// Track the best plan
|
|
985
|
+
let bestPlan = initialPlan;
|
|
986
|
+
let bestScore = 0;
|
|
987
|
+
let bestIteration = 0;
|
|
988
|
+
let lastConcerns: string[] = [];
|
|
989
|
+
let lastRecommendations: string[] = [];
|
|
990
|
+
let lastAnalysis = '';
|
|
991
|
+
|
|
992
|
+
const startTime = Date.now();
|
|
993
|
+
|
|
994
|
+
onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
|
|
995
|
+
onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
|
|
996
|
+
if (isFullstack) {
|
|
997
|
+
onProgress?.('consensus', `Fullstack mode enabled - tracking per-app consensus (frontend/backend/unified)`);
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// Save initial plan to storage
|
|
1001
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
1002
|
+
milestoneId,
|
|
1003
|
+
milestoneName,
|
|
1004
|
+
taskId,
|
|
1005
|
+
taskName,
|
|
1006
|
+
});
|
|
1007
|
+
|
|
1008
|
+
while (iteration < maxIterations) {
|
|
1009
|
+
iteration++;
|
|
1010
|
+
|
|
1011
|
+
// Check timeout
|
|
1012
|
+
const totalElapsed = Date.now() - startTime;
|
|
1013
|
+
if (totalElapsed > DEFAULT_CONSENSUS_TIMEOUT_MS) {
|
|
1014
|
+
onProgress?.('consensus', `Consensus timeout after ${Math.round(totalElapsed / 60000)} minutes`);
|
|
1015
|
+
|
|
1016
|
+
if (enableArbitration) {
|
|
1017
|
+
try {
|
|
1018
|
+
const arbitrationResult = await requestArbitratorDecision(
|
|
1019
|
+
bestPlan,
|
|
1020
|
+
lastAnalysis,
|
|
1021
|
+
`Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
|
|
1022
|
+
iteration,
|
|
1023
|
+
scores,
|
|
1024
|
+
arbitrator
|
|
1025
|
+
);
|
|
1026
|
+
|
|
1027
|
+
if (onArbitration) onArbitration(arbitrationResult);
|
|
1028
|
+
|
|
1029
|
+
return {
|
|
1030
|
+
approved: arbitrationResult.approved || arbitrationResult.score >= 80,
|
|
1031
|
+
finalPlan: bestPlan,
|
|
1032
|
+
finalScore: arbitrationResult.score,
|
|
1033
|
+
bestPlan,
|
|
1034
|
+
bestScore: arbitrationResult.score,
|
|
1035
|
+
bestIteration,
|
|
1036
|
+
iterations,
|
|
1037
|
+
totalIterations: iteration - 1,
|
|
1038
|
+
finalConcerns: arbitrationResult.minorConcerns || lastConcerns,
|
|
1039
|
+
finalRecommendations: arbitrationResult.suggestedChanges || lastRecommendations,
|
|
1040
|
+
arbitrated: true,
|
|
1041
|
+
arbitrationResult,
|
|
1042
|
+
timedOut: true,
|
|
1043
|
+
};
|
|
1044
|
+
} catch {
|
|
1045
|
+
// Fall through to accept best plan
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
return {
|
|
1050
|
+
approved: bestScore >= arbitrationThreshold,
|
|
1051
|
+
finalPlan: bestPlan,
|
|
1052
|
+
finalScore: bestScore,
|
|
1053
|
+
bestPlan,
|
|
1054
|
+
bestScore,
|
|
1055
|
+
bestIteration,
|
|
1056
|
+
iterations,
|
|
1057
|
+
totalIterations: iteration - 1,
|
|
1058
|
+
finalConcerns: lastConcerns,
|
|
1059
|
+
finalRecommendations: lastRecommendations,
|
|
1060
|
+
arbitrated: false,
|
|
1061
|
+
timedOut: true,
|
|
1062
|
+
};
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
const elapsedMinutes = Math.round((Date.now() - startTime) / 60000);
|
|
1066
|
+
onProgress?.('consensus', `Iteration ${iteration} starting (${elapsedMinutes}min elapsed)`);
|
|
1067
|
+
|
|
1068
|
+
// Clear previous feedback for this round
|
|
1069
|
+
await planStorage.clearFeedback(milestoneId, taskId);
|
|
1070
|
+
|
|
1071
|
+
// ============================================
|
|
1072
|
+
// OPTIMIZATION: Collect ALL feedback in parallel
|
|
1073
|
+
// ============================================
|
|
1074
|
+
let allFeedback: ReviewerFeedback[];
|
|
1075
|
+
|
|
1076
|
+
if (parallelReviews && allReviewers.length > 1) {
|
|
1077
|
+
allFeedback = await collectAllFeedback(currentPlan, context, allReviewers, config, onProgress);
|
|
1078
|
+
} else {
|
|
1079
|
+
// Sequential fallback
|
|
1080
|
+
allFeedback = [];
|
|
1081
|
+
for (const rev of allReviewers) {
|
|
1082
|
+
const feedback = await collectReviewerFeedback(currentPlan, context, rev, config, onProgress);
|
|
1083
|
+
allFeedback.push(feedback);
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
// Combine all concerns and recommendations
|
|
1088
|
+
const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
|
|
1089
|
+
const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
|
|
1090
|
+
const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
|
|
1091
|
+
|
|
1092
|
+
lastConcerns = allConcerns;
|
|
1093
|
+
lastRecommendations = allRecommendations;
|
|
1094
|
+
|
|
1095
|
+
// ============================================
|
|
1096
|
+
// FULLSTACK: Categorize feedback by app target
|
|
1097
|
+
// ============================================
|
|
1098
|
+
let currentAppScores: AppConsensusScores = { unified: 0 };
|
|
1099
|
+
let iterationTaggedConcerns: TaggedItem[] = [];
|
|
1100
|
+
let iterationTaggedRecs: TaggedItem[] = [];
|
|
1101
|
+
|
|
1102
|
+
if (isFullstack) {
|
|
1103
|
+
onProgress?.('consensus', 'Categorizing feedback by app (frontend/backend/unified)...');
|
|
1104
|
+
|
|
1105
|
+
// Categorize concerns and recommendations
|
|
1106
|
+
const categorized = categorizeFeedbackItems(allConcerns, allRecommendations);
|
|
1107
|
+
iterationTaggedConcerns = categorized.taggedConcerns;
|
|
1108
|
+
iterationTaggedRecs = categorized.taggedRecommendations;
|
|
1109
|
+
|
|
1110
|
+
// Calculate per-app scores
|
|
1111
|
+
currentAppScores = calculateAppScores(allFeedback, iterationTaggedConcerns, iterationTaggedRecs);
|
|
1112
|
+
|
|
1113
|
+
// Track scores history
|
|
1114
|
+
appScoresHistory.frontend.push(currentAppScores.frontend || 0);
|
|
1115
|
+
appScoresHistory.backend.push(currentAppScores.backend || 0);
|
|
1116
|
+
appScoresHistory.unified.push(currentAppScores.unified);
|
|
1117
|
+
|
|
1118
|
+
// Accumulate tagged items for final result
|
|
1119
|
+
allTaggedConcerns.push(...iterationTaggedConcerns);
|
|
1120
|
+
allTaggedRecommendations.push(...iterationTaggedRecs);
|
|
1121
|
+
|
|
1122
|
+
// Log per-app breakdown
|
|
1123
|
+
const frontendConcerns = iterationTaggedConcerns.filter(c => c.app === 'frontend').length;
|
|
1124
|
+
const backendConcerns = iterationTaggedConcerns.filter(c => c.app === 'backend').length;
|
|
1125
|
+
const unifiedConcerns = iterationTaggedConcerns.filter(c => c.app === 'unified').length;
|
|
1126
|
+
|
|
1127
|
+
onProgress?.('consensus', `Per-app concerns: FE=${frontendConcerns}, BE=${backendConcerns}, Unified=${unifiedConcerns}`);
|
|
1128
|
+
onProgress?.('consensus', `Per-app scores: FE=${currentAppScores.frontend}%, BE=${currentAppScores.backend}%, Unified=${currentAppScores.unified}%`);
|
|
1129
|
+
|
|
1130
|
+
// Save feedback to per-app directories
|
|
1131
|
+
for (const feedback of allFeedback) {
|
|
1132
|
+
// Create fullstack feedback with tagged items
|
|
1133
|
+
const fullstackFeedback: FullstackReviewerFeedback = {
|
|
1134
|
+
...feedback,
|
|
1135
|
+
appScores: currentAppScores,
|
|
1136
|
+
taggedConcerns: iterationTaggedConcerns.filter(c =>
|
|
1137
|
+
feedback.concerns.some(fc => fc === c.content)
|
|
1138
|
+
),
|
|
1139
|
+
taggedRecommendations: iterationTaggedRecs.filter(r =>
|
|
1140
|
+
feedback.recommendations.some(fr => fr === r.content)
|
|
1141
|
+
),
|
|
1142
|
+
isFullstack: true,
|
|
1143
|
+
};
|
|
1144
|
+
|
|
1145
|
+
// Save to all app directories
|
|
1146
|
+
await planStorage.saveFullstackFeedback(
|
|
1147
|
+
fullstackFeedback,
|
|
1148
|
+
taskId ? 'task' : 'milestone',
|
|
1149
|
+
milestoneId,
|
|
1150
|
+
taskId
|
|
1151
|
+
);
|
|
1152
|
+
}
|
|
1153
|
+
} else {
|
|
1154
|
+
// Non-fullstack: save feedback without app categorization
|
|
1155
|
+
for (const feedback of allFeedback) {
|
|
1156
|
+
await planStorage.saveFeedback(feedback, milestoneId, taskId);
|
|
1157
|
+
}
|
|
1158
|
+
currentAppScores = { unified: allFeedback.length > 0
|
|
1159
|
+
? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
|
|
1160
|
+
: 0 };
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// Calculate combined score (average of all reviewers)
|
|
1164
|
+
const combinedScore = allFeedback.length > 0
|
|
1165
|
+
? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
|
|
1166
|
+
: 0;
|
|
1167
|
+
|
|
1168
|
+
scores.push(combinedScore);
|
|
1169
|
+
lastAnalysis = combinedAnalysis;
|
|
1170
|
+
|
|
1171
|
+
// Create consensus result for tracking
|
|
1172
|
+
const consensusResult: ConsensusResult = {
|
|
1173
|
+
score: combinedScore,
|
|
1174
|
+
analysis: combinedAnalysis,
|
|
1175
|
+
concerns: allConcerns,
|
|
1176
|
+
recommendations: allRecommendations,
|
|
1177
|
+
approved: combinedScore >= threshold,
|
|
1178
|
+
strengths: [],
|
|
1179
|
+
rawResponse: combinedAnalysis,
|
|
1180
|
+
};
|
|
1181
|
+
|
|
1182
|
+
// Record iteration
|
|
1183
|
+
const iterationRecord: ConsensusIteration = {
|
|
1184
|
+
iteration,
|
|
1185
|
+
plan: currentPlan,
|
|
1186
|
+
timestamp: new Date().toISOString(),
|
|
1187
|
+
result: consensusResult,
|
|
1188
|
+
};
|
|
1189
|
+
iterations.push(iterationRecord);
|
|
1190
|
+
|
|
1191
|
+
if (onIteration) onIteration(iteration, consensusResult);
|
|
1192
|
+
if (onConcerns) onConcerns(allConcerns, allRecommendations);
|
|
1193
|
+
|
|
1194
|
+
// Update best plan tracking
|
|
1195
|
+
if (combinedScore > bestScore) {
|
|
1196
|
+
bestScore = combinedScore;
|
|
1197
|
+
bestPlan = currentPlan;
|
|
1198
|
+
bestIteration = iteration;
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
// Save plan with updated score (including per-app scores for fullstack)
|
|
1202
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
1203
|
+
milestoneId,
|
|
1204
|
+
milestoneName,
|
|
1205
|
+
taskId,
|
|
1206
|
+
taskName,
|
|
1207
|
+
score: combinedScore,
|
|
1208
|
+
frontendScore: isFullstack ? currentAppScores.frontend : undefined,
|
|
1209
|
+
backendScore: isFullstack ? currentAppScores.backend : undefined,
|
|
1210
|
+
unifiedScore: isFullstack ? currentAppScores.unified : undefined,
|
|
1211
|
+
});
|
|
1212
|
+
|
|
1213
|
+
// Record correction for fullstack tracking
|
|
1214
|
+
if (isFullstack && iteration > 1) {
|
|
1215
|
+
const previousScore = scores.length >= 2 ? scores[scores.length - 2] : 0;
|
|
1216
|
+
const correction: CorrectionRecord = {
|
|
1217
|
+
id: `correction-${iteration}`,
|
|
1218
|
+
timestamp: new Date().toISOString(),
|
|
1219
|
+
app: 'unified', // Top-level correction
|
|
1220
|
+
previousScore,
|
|
1221
|
+
newScore: combinedScore,
|
|
1222
|
+
concerns: lastConcerns.slice(0, 5),
|
|
1223
|
+
changes: lastRecommendations.slice(0, 3),
|
|
1224
|
+
reviewer,
|
|
1225
|
+
};
|
|
1226
|
+
corrections.push(correction);
|
|
1227
|
+
|
|
1228
|
+
await planStorage.recordCorrection(
|
|
1229
|
+
taskId ? 'task' : 'milestone',
|
|
1230
|
+
correction,
|
|
1231
|
+
milestoneId,
|
|
1232
|
+
taskId
|
|
1233
|
+
);
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
// Record in project state
|
|
1237
|
+
await recordConsensusIteration(projectDir, iterationRecord);
|
|
1238
|
+
|
|
1239
|
+
onProgress?.('consensus', `Combined score: ${combinedScore}% (from ${allFeedback.length} reviewer(s))`);
|
|
1240
|
+
|
|
1241
|
+
// Check if consensus reached
|
|
1242
|
+
if (combinedScore >= threshold) {
|
|
1243
|
+
onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
|
|
1244
|
+
await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
|
|
1245
|
+
|
|
1246
|
+
// Update per-app approval status for fullstack
|
|
1247
|
+
if (isFullstack) {
|
|
1248
|
+
const feApproved = (currentAppScores.frontend || 0) >= threshold;
|
|
1249
|
+
const beApproved = (currentAppScores.backend || 0) >= threshold;
|
|
1250
|
+
|
|
1251
|
+
await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'frontend', feApproved, currentAppScores.frontend || 0, milestoneId, taskId);
|
|
1252
|
+
await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'backend', beApproved, currentAppScores.backend || 0, milestoneId, taskId);
|
|
1253
|
+
await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'unified', true, currentAppScores.unified, milestoneId, taskId);
|
|
1254
|
+
|
|
1255
|
+
onProgress?.('consensus', `Per-app approval: FE=${feApproved}, BE=${beApproved}, Unified=true`);
|
|
1256
|
+
|
|
1257
|
+
return {
|
|
1258
|
+
approved: true,
|
|
1259
|
+
finalPlan: currentPlan,
|
|
1260
|
+
finalScore: combinedScore,
|
|
1261
|
+
bestPlan: currentPlan,
|
|
1262
|
+
bestScore: combinedScore,
|
|
1263
|
+
bestIteration: iteration,
|
|
1264
|
+
iterations,
|
|
1265
|
+
totalIterations: iteration,
|
|
1266
|
+
finalConcerns: allConcerns,
|
|
1267
|
+
finalRecommendations: allRecommendations,
|
|
1268
|
+
arbitrated: false,
|
|
1269
|
+
// Fullstack-specific fields
|
|
1270
|
+
appScores: currentAppScores,
|
|
1271
|
+
appApproved: {
|
|
1272
|
+
frontend: feApproved,
|
|
1273
|
+
backend: beApproved,
|
|
1274
|
+
unified: true,
|
|
1275
|
+
},
|
|
1276
|
+
taggedConcerns: allTaggedConcerns,
|
|
1277
|
+
taggedRecommendations: allTaggedRecommendations,
|
|
1278
|
+
corrections,
|
|
1279
|
+
} as FullstackConsensusProcessResult;
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
return {
|
|
1283
|
+
approved: true,
|
|
1284
|
+
finalPlan: currentPlan,
|
|
1285
|
+
finalScore: combinedScore,
|
|
1286
|
+
bestPlan: currentPlan,
|
|
1287
|
+
bestScore: combinedScore,
|
|
1288
|
+
bestIteration: iteration,
|
|
1289
|
+
iterations,
|
|
1290
|
+
totalIterations: iteration,
|
|
1291
|
+
finalConcerns: allConcerns,
|
|
1292
|
+
finalRecommendations: allRecommendations,
|
|
1293
|
+
arbitrated: false,
|
|
1294
|
+
};
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
// Check if stuck
|
|
1298
|
+
if (isStuck(scores, stuckIterations) && enableArbitration) {
|
|
1299
|
+
onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
|
|
1300
|
+
|
|
1301
|
+
try {
|
|
1302
|
+
const arbitrationResult = await requestArbitratorDecision(
|
|
1303
|
+
bestPlan,
|
|
1304
|
+
combinedAnalysis,
|
|
1305
|
+
`Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`,
|
|
1306
|
+
iteration,
|
|
1307
|
+
scores,
|
|
1308
|
+
arbitrator
|
|
1309
|
+
);
|
|
1310
|
+
|
|
1311
|
+
if (onArbitration) onArbitration(arbitrationResult);
|
|
1312
|
+
|
|
1313
|
+
if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
|
|
1314
|
+
onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
|
|
1315
|
+
await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
|
|
1316
|
+
|
|
1317
|
+
if (isFullstack) {
|
|
1318
|
+
const feApproved = (currentAppScores.frontend || 0) >= arbitrationThreshold;
|
|
1319
|
+
const beApproved = (currentAppScores.backend || 0) >= arbitrationThreshold;
|
|
1320
|
+
|
|
1321
|
+
return {
|
|
1322
|
+
approved: true,
|
|
1323
|
+
finalPlan: bestPlan,
|
|
1324
|
+
finalScore: arbitrationResult.score,
|
|
1325
|
+
bestPlan,
|
|
1326
|
+
bestScore: arbitrationResult.score,
|
|
1327
|
+
bestIteration,
|
|
1328
|
+
iterations,
|
|
1329
|
+
totalIterations: iteration,
|
|
1330
|
+
finalConcerns: arbitrationResult.minorConcerns || allConcerns,
|
|
1331
|
+
finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
|
|
1332
|
+
arbitrated: true,
|
|
1333
|
+
arbitrationResult,
|
|
1334
|
+
// Fullstack-specific fields
|
|
1335
|
+
appScores: currentAppScores,
|
|
1336
|
+
appApproved: {
|
|
1337
|
+
frontend: feApproved,
|
|
1338
|
+
backend: beApproved,
|
|
1339
|
+
unified: true,
|
|
1340
|
+
},
|
|
1341
|
+
taggedConcerns: allTaggedConcerns,
|
|
1342
|
+
taggedRecommendations: allTaggedRecommendations,
|
|
1343
|
+
corrections,
|
|
1344
|
+
} as FullstackConsensusProcessResult;
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
return {
|
|
1348
|
+
approved: true,
|
|
1349
|
+
finalPlan: bestPlan,
|
|
1350
|
+
finalScore: arbitrationResult.score,
|
|
1351
|
+
bestPlan,
|
|
1352
|
+
bestScore: arbitrationResult.score,
|
|
1353
|
+
bestIteration,
|
|
1354
|
+
iterations,
|
|
1355
|
+
totalIterations: iteration,
|
|
1356
|
+
finalConcerns: arbitrationResult.minorConcerns || allConcerns,
|
|
1357
|
+
finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
|
|
1358
|
+
arbitrated: true,
|
|
1359
|
+
arbitrationResult,
|
|
1360
|
+
};
|
|
1361
|
+
}
|
|
1362
|
+
} catch (arbError) {
|
|
1363
|
+
onProgress?.('arbitration', `Arbitration failed: ${arbError instanceof Error ? arbError.message : 'Unknown error'}`);
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// ============================================
|
|
1368
|
+
// OPTIMIZATION: Single revision with ALL feedback
|
|
1369
|
+
// ============================================
|
|
1370
|
+
if (iteration < maxIterations) {
|
|
1371
|
+
onProgress?.('consensus', `Revising plan with combined feedback from ${allFeedback.length} reviewer(s)...`);
|
|
1372
|
+
|
|
1373
|
+
const revisionProgress = onProgress
|
|
1374
|
+
? (msg: string) => onProgress('consensus', `[revision] ${msg}`)
|
|
1375
|
+
: undefined;
|
|
1376
|
+
|
|
1377
|
+
// Use Claude to revise with ALL combined feedback (single API call)
|
|
1378
|
+
const revisionResult = await revisePlan(
|
|
1379
|
+
currentPlan,
|
|
1380
|
+
combinedAnalysis,
|
|
1381
|
+
allConcerns,
|
|
1382
|
+
language,
|
|
1383
|
+
revisionProgress
|
|
1384
|
+
);
|
|
1385
|
+
|
|
1386
|
+
if (revisionResult.success && revisionResult.response) {
|
|
1387
|
+
currentPlan = revisionResult.response;
|
|
1388
|
+
|
|
1389
|
+
// Save revised plan
|
|
1390
|
+
await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
|
|
1391
|
+
milestoneId,
|
|
1392
|
+
milestoneName,
|
|
1393
|
+
taskId,
|
|
1394
|
+
taskName,
|
|
1395
|
+
});
|
|
1396
|
+
|
|
1397
|
+
if (onRevision) onRevision(iteration, currentPlan);
|
|
1398
|
+
} else {
|
|
1399
|
+
onProgress?.('consensus', `Revision failed, continuing with best plan`);
|
|
1400
|
+
currentPlan = bestPlan;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
// Max iterations reached
|
|
1406
|
+
await planStorage.updateStatus('reviewing', taskId ? 'task' : 'milestone', milestoneId, taskId);
|
|
1407
|
+
|
|
1408
|
+
// Final per-app scores from history
|
|
1409
|
+
const finalAppScores: AppConsensusScores = isFullstack ? {
|
|
1410
|
+
frontend: appScoresHistory.frontend.length > 0
|
|
1411
|
+
? appScoresHistory.frontend[appScoresHistory.frontend.length - 1]
|
|
1412
|
+
: undefined,
|
|
1413
|
+
backend: appScoresHistory.backend.length > 0
|
|
1414
|
+
? appScoresHistory.backend[appScoresHistory.backend.length - 1]
|
|
1415
|
+
: undefined,
|
|
1416
|
+
unified: appScoresHistory.unified.length > 0
|
|
1417
|
+
? appScoresHistory.unified[appScoresHistory.unified.length - 1]
|
|
1418
|
+
: bestScore,
|
|
1419
|
+
} : { unified: bestScore };
|
|
1420
|
+
|
|
1421
|
+
if (isFullstack) {
|
|
1422
|
+
const feApproved = (finalAppScores.frontend || 0) >= threshold;
|
|
1423
|
+
const beApproved = (finalAppScores.backend || 0) >= threshold;
|
|
1424
|
+
|
|
1425
|
+
return {
|
|
1426
|
+
approved: false,
|
|
1427
|
+
finalPlan: bestPlan,
|
|
1428
|
+
finalScore: bestScore,
|
|
1429
|
+
bestPlan,
|
|
1430
|
+
bestScore,
|
|
1431
|
+
bestIteration,
|
|
1432
|
+
iterations,
|
|
1433
|
+
totalIterations: iteration,
|
|
1434
|
+
finalConcerns: lastConcerns,
|
|
1435
|
+
finalRecommendations: lastRecommendations,
|
|
1436
|
+
arbitrated: false,
|
|
1437
|
+
// Fullstack-specific fields
|
|
1438
|
+
appScores: finalAppScores,
|
|
1439
|
+
appApproved: {
|
|
1440
|
+
frontend: feApproved,
|
|
1441
|
+
backend: beApproved,
|
|
1442
|
+
unified: bestScore >= threshold,
|
|
1443
|
+
},
|
|
1444
|
+
taggedConcerns: allTaggedConcerns,
|
|
1445
|
+
taggedRecommendations: allTaggedRecommendations,
|
|
1446
|
+
corrections,
|
|
1447
|
+
} as FullstackConsensusProcessResult;
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
return {
|
|
1451
|
+
approved: false,
|
|
1452
|
+
finalPlan: bestPlan,
|
|
1453
|
+
finalScore: bestScore,
|
|
1454
|
+
bestPlan,
|
|
1455
|
+
bestScore,
|
|
1456
|
+
bestIteration,
|
|
1457
|
+
iterations,
|
|
1458
|
+
totalIterations: iteration,
|
|
1459
|
+
finalConcerns: lastConcerns,
|
|
1460
|
+
finalRecommendations: lastRecommendations,
|
|
1461
|
+
arbitrated: false,
|
|
1462
|
+
};
|
|
1463
|
+
}
|