popeye-cli 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.env.example +24 -1
  2. package/CONTRIBUTING.md +275 -0
  3. package/OPEN_SOURCE_MANIFESTO.md +172 -0
  4. package/README.md +340 -27
  5. package/dist/adapters/claude.d.ts +28 -2
  6. package/dist/adapters/claude.d.ts.map +1 -1
  7. package/dist/adapters/claude.js +273 -20
  8. package/dist/adapters/claude.js.map +1 -1
  9. package/dist/adapters/grok.d.ts +73 -0
  10. package/dist/adapters/grok.d.ts.map +1 -0
  11. package/dist/adapters/grok.js +430 -0
  12. package/dist/adapters/grok.js.map +1 -0
  13. package/dist/adapters/openai.d.ts +1 -1
  14. package/dist/adapters/openai.d.ts.map +1 -1
  15. package/dist/adapters/openai.js +6 -1
  16. package/dist/adapters/openai.js.map +1 -1
  17. package/dist/auth/grok.d.ts +73 -0
  18. package/dist/auth/grok.d.ts.map +1 -0
  19. package/dist/auth/grok.js +211 -0
  20. package/dist/auth/grok.js.map +1 -0
  21. package/dist/auth/index.d.ts +9 -6
  22. package/dist/auth/index.d.ts.map +1 -1
  23. package/dist/auth/index.js +23 -6
  24. package/dist/auth/index.js.map +1 -1
  25. package/dist/cli/commands/auth.d.ts +1 -1
  26. package/dist/cli/commands/auth.d.ts.map +1 -1
  27. package/dist/cli/commands/auth.js +79 -8
  28. package/dist/cli/commands/auth.js.map +1 -1
  29. package/dist/cli/commands/create.d.ts.map +1 -1
  30. package/dist/cli/commands/create.js +15 -4
  31. package/dist/cli/commands/create.js.map +1 -1
  32. package/dist/cli/interactive.d.ts.map +1 -1
  33. package/dist/cli/interactive.js +406 -35
  34. package/dist/cli/interactive.js.map +1 -1
  35. package/dist/config/defaults.d.ts +3 -0
  36. package/dist/config/defaults.d.ts.map +1 -1
  37. package/dist/config/defaults.js +9 -0
  38. package/dist/config/defaults.js.map +1 -1
  39. package/dist/config/index.d.ts +9 -0
  40. package/dist/config/index.d.ts.map +1 -1
  41. package/dist/config/index.js +16 -3
  42. package/dist/config/index.js.map +1 -1
  43. package/dist/config/schema.d.ts +27 -0
  44. package/dist/config/schema.d.ts.map +1 -1
  45. package/dist/config/schema.js +24 -3
  46. package/dist/config/schema.js.map +1 -1
  47. package/dist/generators/fullstack.d.ts +32 -0
  48. package/dist/generators/fullstack.d.ts.map +1 -0
  49. package/dist/generators/fullstack.js +497 -0
  50. package/dist/generators/fullstack.js.map +1 -0
  51. package/dist/generators/index.d.ts +4 -3
  52. package/dist/generators/index.d.ts.map +1 -1
  53. package/dist/generators/index.js +15 -1
  54. package/dist/generators/index.js.map +1 -1
  55. package/dist/generators/python.d.ts +17 -1
  56. package/dist/generators/python.d.ts.map +1 -1
  57. package/dist/generators/python.js +34 -21
  58. package/dist/generators/python.js.map +1 -1
  59. package/dist/generators/templates/fullstack.d.ts +113 -0
  60. package/dist/generators/templates/fullstack.d.ts.map +1 -0
  61. package/dist/generators/templates/fullstack.js +1004 -0
  62. package/dist/generators/templates/fullstack.js.map +1 -0
  63. package/dist/generators/typescript.d.ts +19 -1
  64. package/dist/generators/typescript.d.ts.map +1 -1
  65. package/dist/generators/typescript.js +37 -21
  66. package/dist/generators/typescript.js.map +1 -1
  67. package/dist/types/cli.d.ts +4 -0
  68. package/dist/types/cli.d.ts.map +1 -1
  69. package/dist/types/cli.js.map +1 -1
  70. package/dist/types/consensus.d.ts +119 -2
  71. package/dist/types/consensus.d.ts.map +1 -1
  72. package/dist/types/consensus.js +12 -1
  73. package/dist/types/consensus.js.map +1 -1
  74. package/dist/types/project.d.ts +76 -0
  75. package/dist/types/project.d.ts.map +1 -1
  76. package/dist/types/project.js +1 -1
  77. package/dist/types/project.js.map +1 -1
  78. package/dist/types/workflow.d.ts +170 -16
  79. package/dist/types/workflow.d.ts.map +1 -1
  80. package/dist/types/workflow.js +26 -3
  81. package/dist/types/workflow.js.map +1 -1
  82. package/dist/workflow/consensus.d.ts +29 -3
  83. package/dist/workflow/consensus.d.ts.map +1 -1
  84. package/dist/workflow/consensus.js +334 -27
  85. package/dist/workflow/consensus.js.map +1 -1
  86. package/dist/workflow/execution-mode.d.ts +2 -0
  87. package/dist/workflow/execution-mode.d.ts.map +1 -1
  88. package/dist/workflow/execution-mode.js +20 -0
  89. package/dist/workflow/execution-mode.js.map +1 -1
  90. package/dist/workflow/index.d.ts +2 -0
  91. package/dist/workflow/index.d.ts.map +1 -1
  92. package/dist/workflow/index.js +11 -0
  93. package/dist/workflow/index.js.map +1 -1
  94. package/dist/workflow/milestone-workflow.d.ts +2 -0
  95. package/dist/workflow/milestone-workflow.d.ts.map +1 -1
  96. package/dist/workflow/milestone-workflow.js +19 -2
  97. package/dist/workflow/milestone-workflow.js.map +1 -1
  98. package/dist/workflow/plan-mode.d.ts +66 -2
  99. package/dist/workflow/plan-mode.d.ts.map +1 -1
  100. package/dist/workflow/plan-mode.js +187 -11
  101. package/dist/workflow/plan-mode.js.map +1 -1
  102. package/dist/workflow/plan-storage.d.ts +252 -8
  103. package/dist/workflow/plan-storage.d.ts.map +1 -1
  104. package/dist/workflow/plan-storage.js +580 -33
  105. package/dist/workflow/plan-storage.js.map +1 -1
  106. package/dist/workflow/project-verification.js +1 -1
  107. package/dist/workflow/project-verification.js.map +1 -1
  108. package/dist/workflow/task-workflow.d.ts +2 -0
  109. package/dist/workflow/task-workflow.d.ts.map +1 -1
  110. package/dist/workflow/task-workflow.js +23 -1
  111. package/dist/workflow/task-workflow.js.map +1 -1
  112. package/dist/workflow/test-runner.d.ts +8 -0
  113. package/dist/workflow/test-runner.d.ts.map +1 -1
  114. package/dist/workflow/test-runner.js +92 -0
  115. package/dist/workflow/test-runner.js.map +1 -1
  116. package/dist/workflow/workspace-manager.d.ts +342 -0
  117. package/dist/workflow/workspace-manager.d.ts.map +1 -0
  118. package/dist/workflow/workspace-manager.js +733 -0
  119. package/dist/workflow/workspace-manager.js.map +1 -0
  120. package/package.json +1 -1
  121. package/src/adapters/claude.ts +322 -25
  122. package/src/adapters/grok.ts +492 -0
  123. package/src/adapters/openai.ts +8 -2
  124. package/src/auth/grok.ts +255 -0
  125. package/src/auth/index.ts +27 -9
  126. package/src/cli/commands/auth.ts +89 -10
  127. package/src/cli/commands/create.ts +13 -4
  128. package/src/cli/interactive.ts +453 -34
  129. package/src/config/defaults.ts +9 -0
  130. package/src/config/index.ts +17 -3
  131. package/src/config/schema.ts +25 -3
  132. package/src/generators/fullstack.ts +551 -0
  133. package/src/generators/index.ts +25 -1
  134. package/src/generators/python.ts +65 -21
  135. package/src/generators/templates/fullstack.ts +1047 -0
  136. package/src/generators/typescript.ts +69 -21
  137. package/src/types/cli.ts +4 -0
  138. package/src/types/consensus.ts +135 -3
  139. package/src/types/project.ts +82 -1
  140. package/src/types/workflow.ts +58 -4
  141. package/src/workflow/consensus.ts +461 -31
  142. package/src/workflow/execution-mode.ts +32 -0
  143. package/src/workflow/index.ts +12 -0
  144. package/src/workflow/milestone-workflow.ts +24 -2
  145. package/src/workflow/plan-mode.ts +238 -10
  146. package/src/workflow/plan-storage.ts +835 -35
  147. package/src/workflow/project-verification.ts +1 -1
  148. package/src/workflow/task-workflow.ts +29 -1
  149. package/src/workflow/test-runner.ts +110 -0
  150. package/src/workflow/workspace-manager.ts +912 -0
@@ -4,13 +4,28 @@
4
4
  * with arbitration support when consensus cannot be reached
5
5
  */
6
6
 
7
- import type { ConsensusResult, ConsensusIteration, ConsensusConfig, ArbitrationResult, AIProvider } from '../types/consensus.js';
7
+ import type {
8
+ ConsensusResult,
9
+ ConsensusIteration,
10
+ ConsensusConfig,
11
+ ArbitrationResult,
12
+ AIProvider,
13
+ TaggedItem,
14
+ AppConsensusScores,
15
+ CorrectionRecord,
16
+ } from '../types/consensus.js';
8
17
  import { DEFAULT_CONSENSUS_CONFIG } from '../types/consensus.js';
9
18
  import { requestConsensus as requestOpenAIConsensus } from '../adapters/openai.js';
10
19
  import { requestConsensus as requestGeminiConsensus, requestArbitration as requestGeminiArbitration } from '../adapters/gemini.js';
20
+ import { requestConsensus as requestGrokConsensus, requestArbitration as requestGrokArbitration } from '../adapters/grok.js';
11
21
  import { revisePlan } from '../adapters/claude.js';
12
22
  import { recordConsensusIteration } from '../state/index.js';
13
- import { createPlanStorage, type ReviewerFeedback } from './plan-storage.js';
23
+ import {
24
+ createPlanStorage,
25
+ type ReviewerFeedback,
26
+ type FullstackReviewerFeedback,
27
+ type FeedbackAppTarget,
28
+ } from './plan-storage.js';
14
29
 
15
30
  /**
16
31
  * Options for consensus iteration
@@ -18,6 +33,10 @@ import { createPlanStorage, type ReviewerFeedback } from './plan-storage.js';
18
33
  export interface ConsensusOptions {
19
34
  projectDir: string;
20
35
  config?: Partial<ConsensusConfig>;
36
+ /** Whether this is a fullstack project (enables per-app tracking) */
37
+ isFullstack?: boolean;
38
+ /** Project language for revision prompts */
39
+ language?: 'python' | 'typescript' | 'fullstack';
21
40
  onIteration?: (iteration: number, result: ConsensusResult) => void;
22
41
  onRevision?: (iteration: number, revisedPlan: string) => void;
23
42
  onConcerns?: (concerns: string[], recommendations: string[]) => void;
@@ -46,7 +65,7 @@ export interface ConsensusProcessResult {
46
65
  }
47
66
 
48
67
  /**
49
- * Request consensus from the configured reviewer (OpenAI or Gemini)
68
+ * Request consensus from the configured reviewer (OpenAI, Gemini, or Grok)
50
69
  */
51
70
  async function requestReviewerConsensus(
52
71
  plan: string,
@@ -61,9 +80,34 @@ async function requestReviewerConsensus(
61
80
  maxTokens: config.maxTokens,
62
81
  });
63
82
  }
83
+ if (reviewer === 'grok') {
84
+ return requestGrokConsensus(plan, context, {
85
+ model: config.grokModel,
86
+ temperature: config.temperature,
87
+ maxTokens: config.maxTokens,
88
+ });
89
+ }
64
90
  return requestOpenAIConsensus(plan, context, config);
65
91
  }
66
92
 
93
+ /**
94
+ * Request arbitration from the configured arbitrator (OpenAI, Gemini, or Grok)
95
+ */
96
+ async function requestArbitratorDecision(
97
+ plan: string,
98
+ reviewerFeedback: string,
99
+ claudeFeedback: string,
100
+ iterations: number,
101
+ scores: number[],
102
+ arbitrator: AIProvider
103
+ ): Promise<ArbitrationResult> {
104
+ if (arbitrator === 'grok') {
105
+ return requestGrokArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
106
+ }
107
+ // Default to Gemini for arbitration (most capable at reasoning)
108
+ return requestGeminiArbitration(plan, reviewerFeedback, claudeFeedback, iterations, scores);
109
+ }
110
+
67
111
  /**
68
112
  * Check if the consensus process is "stuck" (not improving)
69
113
  * Detects both:
@@ -183,6 +227,8 @@ export async function iterateUntilConsensus(
183
227
  const {
184
228
  projectDir,
185
229
  config = {},
230
+ isFullstack = false,
231
+ language: providedLanguage,
186
232
  onIteration,
187
233
  onRevision,
188
234
  onConcerns,
@@ -190,6 +236,9 @@ export async function iterateUntilConsensus(
190
236
  onProgress,
191
237
  } = options;
192
238
 
239
+ // Derive language from isFullstack if not explicitly provided
240
+ const language = providedLanguage || (isFullstack ? 'fullstack' : 'python');
241
+
193
242
  const {
194
243
  threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
195
244
  maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
@@ -232,12 +281,13 @@ export async function iterateUntilConsensus(
232
281
 
233
282
  try {
234
283
  arbitrationAttempts++;
235
- const arbitrationResult = await requestGeminiArbitration(
284
+ const arbitrationResult = await requestArbitratorDecision(
236
285
  bestPlan,
237
286
  lastAnalysis,
238
287
  `Consensus timed out after ${Math.round(totalElapsed / 60000)} minutes. Best score: ${bestScore}%. Main concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
239
288
  iteration,
240
- scores
289
+ scores,
290
+ arbitrator
241
291
  );
242
292
 
243
293
  if (onArbitration) {
@@ -363,12 +413,13 @@ export async function iterateUntilConsensus(
363
413
  onProgress?.('arbitration', `Consensus stuck at ${bestScore}%, invoking ${arbitrator} arbitrator (attempt ${arbitrationAttempts}/${maxArbitrationAttempts})...`);
364
414
 
365
415
  try {
366
- const arbitrationResult = await requestGeminiArbitration(
416
+ const arbitrationResult = await requestArbitratorDecision(
367
417
  bestPlan,
368
418
  lastAnalysis,
369
419
  `The plan has been revised ${iteration} times. Best score achieved: ${bestScore}%. The reviewer's main concerns are: ${lastConcerns.slice(0, 3).join('; ')}`,
370
420
  iteration,
371
- scores
421
+ scores,
422
+ arbitrator
372
423
  );
373
424
 
374
425
  if (onArbitration) {
@@ -409,7 +460,8 @@ export async function iterateUntilConsensus(
409
460
  const revisionResult = await revisePlan(
410
461
  bestPlan,
411
462
  arbitrationResult.reasoning,
412
- arbitrationResult.suggestedChanges
463
+ arbitrationResult.suggestedChanges,
464
+ language
413
465
  );
414
466
  if (revisionResult.success && revisionResult.response) {
415
467
  currentPlan = revisionResult.response;
@@ -459,6 +511,7 @@ export async function iterateUntilConsensus(
459
511
  currentPlan,
460
512
  consensusResult.analysis,
461
513
  concerns,
514
+ language,
462
515
  revisionProgress
463
516
  );
464
517
 
@@ -643,6 +696,28 @@ export interface OptimizedConsensusOptions extends ConsensusOptions {
643
696
  parallelReviews?: boolean;
644
697
  /** Additional reviewers beyond primary */
645
698
  additionalReviewers?: AIProvider[];
699
+ /** Whether this is a fullstack project (enables per-app tracking) */
700
+ isFullstack?: boolean;
701
+ }
702
+
703
+ /**
704
+ * Result for fullstack consensus with per-app tracking
705
+ */
706
+ export interface FullstackConsensusProcessResult extends ConsensusProcessResult {
707
+ /** Per-app scores */
708
+ appScores: AppConsensusScores;
709
+ /** Per-app approval status */
710
+ appApproved: {
711
+ frontend?: boolean;
712
+ backend?: boolean;
713
+ unified: boolean;
714
+ };
715
+ /** Tagged concerns by app */
716
+ taggedConcerns: TaggedItem[];
717
+ /** Tagged recommendations by app */
718
+ taggedRecommendations: TaggedItem[];
719
+ /** Corrections made during consensus */
720
+ corrections: CorrectionRecord[];
646
721
  }
647
722
 
648
723
  /**
@@ -697,6 +772,143 @@ async function collectAllFeedback(
697
772
  return results.filter((f): f is ReviewerFeedback => f !== null);
698
773
  }
699
774
 
775
+ /**
776
+ * Categorize a concern or recommendation by app target
777
+ * Analyzes text content to determine if it relates to frontend, backend, or unified
778
+ */
779
+ function categorizeByContent(content: string): FeedbackAppTarget {
780
+ const lowerContent = content.toLowerCase();
781
+
782
+ // Frontend indicators
783
+ const frontendKeywords = [
784
+ 'react', 'component', 'jsx', 'tsx', 'css', 'tailwind', 'ui', 'user interface',
785
+ 'button', 'form', 'input', 'modal', 'page', 'router', 'navigation', 'state management',
786
+ 'redux', 'zustand', 'vite', 'frontend', 'front-end', 'client', 'browser', 'dom',
787
+ 'styling', 'layout', 'responsive', 'animation', 'hook', 'usestate', 'useeffect',
788
+ 'shadcn', 'radix', 'tailwindcss', 'vitest', 'jest', 'testing-library', 'playwright',
789
+ ];
790
+
791
+ // Backend indicators
792
+ const backendKeywords = [
793
+ 'fastapi', 'api', 'endpoint', 'route', 'database', 'sql', 'postgresql', 'neon',
794
+ 'model', 'schema', 'migration', 'orm', 'sqlalchemy', 'pydantic', 'validation',
795
+ 'authentication', 'authorization', 'jwt', 'token', 'middleware', 'backend', 'back-end',
796
+ 'server', 'python', 'pytest', 'alembic', 'celery', 'redis', 'cache', 'queue',
797
+ 'repository', 'service', 'crud', 'rest', 'graphql', 'websocket',
798
+ ];
799
+
800
+ // Count matches
801
+ let frontendMatches = 0;
802
+ let backendMatches = 0;
803
+
804
+ for (const keyword of frontendKeywords) {
805
+ if (lowerContent.includes(keyword)) {
806
+ frontendMatches++;
807
+ }
808
+ }
809
+
810
+ for (const keyword of backendKeywords) {
811
+ if (lowerContent.includes(keyword)) {
812
+ backendMatches++;
813
+ }
814
+ }
815
+
816
+ // Determine category
817
+ if (frontendMatches > backendMatches && frontendMatches >= 2) {
818
+ return 'frontend';
819
+ } else if (backendMatches > frontendMatches && backendMatches >= 2) {
820
+ return 'backend';
821
+ } else {
822
+ return 'unified';
823
+ }
824
+ }
825
+
826
+ /**
827
+ * Categorize all concerns and recommendations by app target
828
+ */
829
+ function categorizeFeedbackItems(
830
+ concerns: string[],
831
+ recommendations: string[]
832
+ ): {
833
+ taggedConcerns: TaggedItem[];
834
+ taggedRecommendations: TaggedItem[];
835
+ appScores: { frontend: number; backend: number; unified: number };
836
+ } {
837
+ const taggedConcerns: TaggedItem[] = concerns.map(concern => ({
838
+ app: categorizeByContent(concern),
839
+ content: concern,
840
+ }));
841
+
842
+ const taggedRecommendations: TaggedItem[] = recommendations.map(rec => ({
843
+ app: categorizeByContent(rec),
844
+ content: rec,
845
+ }));
846
+
847
+ // Count items per app for score calculation
848
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
849
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
850
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
851
+
852
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
853
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
854
+ const unifiedRecs = taggedRecommendations.filter(r => r.app === 'unified').length;
855
+
856
+ // Calculate relative scores (more concerns = lower score)
857
+ const totalItems = taggedConcerns.length + taggedRecommendations.length;
858
+ const baseScore = totalItems > 0 ? 100 : 0;
859
+
860
+ return {
861
+ taggedConcerns,
862
+ taggedRecommendations,
863
+ appScores: {
864
+ frontend: Math.max(0, baseScore - (frontendConcerns + frontendRecs) * 5),
865
+ backend: Math.max(0, baseScore - (backendConcerns + backendRecs) * 5),
866
+ unified: Math.max(0, baseScore - (unifiedConcerns + unifiedRecs) * 5),
867
+ },
868
+ };
869
+ }
870
+
871
+ /**
872
+ * Calculate per-app scores from feedback
873
+ */
874
+ function calculateAppScores(
875
+ allFeedback: ReviewerFeedback[],
876
+ taggedConcerns: TaggedItem[],
877
+ taggedRecommendations: TaggedItem[]
878
+ ): AppConsensusScores {
879
+ // Base score from average feedback score
880
+ const baseScore = allFeedback.length > 0
881
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
882
+ : 0;
883
+
884
+ // Count concerns per app
885
+ const frontendConcerns = taggedConcerns.filter(c => c.app === 'frontend').length;
886
+ const backendConcerns = taggedConcerns.filter(c => c.app === 'backend').length;
887
+ const unifiedConcerns = taggedConcerns.filter(c => c.app === 'unified').length;
888
+
889
+ const frontendRecs = taggedRecommendations.filter(r => r.app === 'frontend').length;
890
+ const backendRecs = taggedRecommendations.filter(r => r.app === 'backend').length;
891
+
892
+ // Calculate app-specific scores
893
+ // More concerns = lower score (each concern/rec reduces score by 2 points)
894
+ const frontendScore = frontendConcerns > 0 || frontendRecs > 0
895
+ ? Math.max(0, baseScore - (frontendConcerns * 2 + frontendRecs))
896
+ : baseScore;
897
+
898
+ const backendScore = backendConcerns > 0 || backendRecs > 0
899
+ ? Math.max(0, baseScore - (backendConcerns * 2 + backendRecs))
900
+ : baseScore;
901
+
902
+ // Unified score is the base combined score
903
+ const unifiedScore = Math.max(0, baseScore - (unifiedConcerns * 2));
904
+
905
+ return {
906
+ frontend: frontendScore,
907
+ backend: backendScore,
908
+ unified: unifiedScore,
909
+ };
910
+ }
911
+
700
912
  /**
701
913
  * Optimized consensus process that batches feedback and reduces API calls
702
914
  *
@@ -705,17 +917,18 @@ async function collectAllFeedback(
705
917
  * 2. Collects ALL reviewer feedback before revision
706
918
  * 3. Claude revises ONCE per round with combined feedback
707
919
  * 4. Parallel reviews when multiple reviewers configured
920
+ * 5. Per-app tracking for fullstack projects (frontend/backend/unified)
708
921
  *
709
922
  * @param initialPlan - The initial plan to seek consensus on
710
923
  * @param context - Project context for review
711
924
  * @param options - Consensus options including tracking info
712
- * @returns Consensus process result
925
+ * @returns Consensus process result (FullstackConsensusProcessResult for fullstack projects)
713
926
  */
714
927
  export async function runOptimizedConsensusProcess(
715
928
  initialPlan: string,
716
929
  context: string,
717
930
  options: OptimizedConsensusOptions
718
- ): Promise<ConsensusProcessResult> {
931
+ ): Promise<ConsensusProcessResult | FullstackConsensusProcessResult> {
719
932
  const {
720
933
  projectDir,
721
934
  config = {},
@@ -730,8 +943,12 @@ export async function runOptimizedConsensusProcess(
730
943
  taskName,
731
944
  parallelReviews = true,
732
945
  additionalReviewers = [],
946
+ isFullstack = false,
733
947
  } = options;
734
948
 
949
+ // Derive language from isFullstack for revision prompts
950
+ const language: 'python' | 'typescript' | 'fullstack' = isFullstack ? 'fullstack' : 'python';
951
+
735
952
  const {
736
953
  threshold = DEFAULT_CONSENSUS_CONFIG.threshold,
737
954
  maxIterations = DEFAULT_CONSENSUS_CONFIG.maxIterations,
@@ -742,10 +959,20 @@ export async function runOptimizedConsensusProcess(
742
959
  stuckIterations = DEFAULT_CONSENSUS_CONFIG.stuckIterations,
743
960
  } = config;
744
961
 
745
- // Initialize plan storage
746
- const planStorage = createPlanStorage(projectDir);
962
+ // Initialize plan storage with fullstack support
963
+ const planStorage = createPlanStorage(projectDir, isFullstack);
747
964
  await planStorage.initialize();
748
965
 
966
+ // Track per-app consensus for fullstack projects
967
+ const appScoresHistory: { frontend: number[]; backend: number[]; unified: number[] } = {
968
+ frontend: [],
969
+ backend: [],
970
+ unified: [],
971
+ };
972
+ const allTaggedConcerns: TaggedItem[] = [];
973
+ const allTaggedRecommendations: TaggedItem[] = [];
974
+ const corrections: CorrectionRecord[] = [];
975
+
749
976
  // Determine all reviewers
750
977
  const allReviewers: AIProvider[] = [reviewer, ...additionalReviewers.filter(r => r !== reviewer)];
751
978
 
@@ -766,6 +993,9 @@ export async function runOptimizedConsensusProcess(
766
993
 
767
994
  onProgress?.('consensus', `Using optimized consensus with ${allReviewers.join(', ')} as reviewer(s)`);
768
995
  onProgress?.('consensus', `Plan tracking: milestone=${milestoneId}${taskId ? `, task=${taskId}` : ''}`);
996
+ if (isFullstack) {
997
+ onProgress?.('consensus', `Fullstack mode enabled - tracking per-app consensus (frontend/backend/unified)`);
998
+ }
769
999
 
770
1000
  // Save initial plan to storage
771
1001
  await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
@@ -785,12 +1015,13 @@ export async function runOptimizedConsensusProcess(
785
1015
 
786
1016
  if (enableArbitration) {
787
1017
  try {
788
- const arbitrationResult = await requestGeminiArbitration(
1018
+ const arbitrationResult = await requestArbitratorDecision(
789
1019
  bestPlan,
790
1020
  lastAnalysis,
791
1021
  `Timeout. Best score: ${bestScore}%. Concerns: ${lastConcerns.slice(0, 3).join('; ')}`,
792
1022
  iteration,
793
- scores
1023
+ scores,
1024
+ arbitrator
794
1025
  );
795
1026
 
796
1027
  if (onArbitration) onArbitration(arbitrationResult);
@@ -853,9 +1084,80 @@ export async function runOptimizedConsensusProcess(
853
1084
  }
854
1085
  }
855
1086
 
856
- // Save all feedback
857
- for (const feedback of allFeedback) {
858
- await planStorage.saveFeedback(feedback, milestoneId, taskId);
1087
+ // Combine all concerns and recommendations
1088
+ const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
1089
+ const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
1090
+ const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
1091
+
1092
+ lastConcerns = allConcerns;
1093
+ lastRecommendations = allRecommendations;
1094
+
1095
+ // ============================================
1096
+ // FULLSTACK: Categorize feedback by app target
1097
+ // ============================================
1098
+ let currentAppScores: AppConsensusScores = { unified: 0 };
1099
+ let iterationTaggedConcerns: TaggedItem[] = [];
1100
+ let iterationTaggedRecs: TaggedItem[] = [];
1101
+
1102
+ if (isFullstack) {
1103
+ onProgress?.('consensus', 'Categorizing feedback by app (frontend/backend/unified)...');
1104
+
1105
+ // Categorize concerns and recommendations
1106
+ const categorized = categorizeFeedbackItems(allConcerns, allRecommendations);
1107
+ iterationTaggedConcerns = categorized.taggedConcerns;
1108
+ iterationTaggedRecs = categorized.taggedRecommendations;
1109
+
1110
+ // Calculate per-app scores
1111
+ currentAppScores = calculateAppScores(allFeedback, iterationTaggedConcerns, iterationTaggedRecs);
1112
+
1113
+ // Track scores history
1114
+ appScoresHistory.frontend.push(currentAppScores.frontend || 0);
1115
+ appScoresHistory.backend.push(currentAppScores.backend || 0);
1116
+ appScoresHistory.unified.push(currentAppScores.unified);
1117
+
1118
+ // Accumulate tagged items for final result
1119
+ allTaggedConcerns.push(...iterationTaggedConcerns);
1120
+ allTaggedRecommendations.push(...iterationTaggedRecs);
1121
+
1122
+ // Log per-app breakdown
1123
+ const frontendConcerns = iterationTaggedConcerns.filter(c => c.app === 'frontend').length;
1124
+ const backendConcerns = iterationTaggedConcerns.filter(c => c.app === 'backend').length;
1125
+ const unifiedConcerns = iterationTaggedConcerns.filter(c => c.app === 'unified').length;
1126
+
1127
+ onProgress?.('consensus', `Per-app concerns: FE=${frontendConcerns}, BE=${backendConcerns}, Unified=${unifiedConcerns}`);
1128
+ onProgress?.('consensus', `Per-app scores: FE=${currentAppScores.frontend}%, BE=${currentAppScores.backend}%, Unified=${currentAppScores.unified}%`);
1129
+
1130
+ // Save feedback to per-app directories
1131
+ for (const feedback of allFeedback) {
1132
+ // Create fullstack feedback with tagged items
1133
+ const fullstackFeedback: FullstackReviewerFeedback = {
1134
+ ...feedback,
1135
+ appScores: currentAppScores,
1136
+ taggedConcerns: iterationTaggedConcerns.filter(c =>
1137
+ feedback.concerns.some(fc => fc === c.content)
1138
+ ),
1139
+ taggedRecommendations: iterationTaggedRecs.filter(r =>
1140
+ feedback.recommendations.some(fr => fr === r.content)
1141
+ ),
1142
+ isFullstack: true,
1143
+ };
1144
+
1145
+ // Save to all app directories
1146
+ await planStorage.saveFullstackFeedback(
1147
+ fullstackFeedback,
1148
+ taskId ? 'task' : 'milestone',
1149
+ milestoneId,
1150
+ taskId
1151
+ );
1152
+ }
1153
+ } else {
1154
+ // Non-fullstack: save feedback without app categorization
1155
+ for (const feedback of allFeedback) {
1156
+ await planStorage.saveFeedback(feedback, milestoneId, taskId);
1157
+ }
1158
+ currentAppScores = { unified: allFeedback.length > 0
1159
+ ? Math.round(allFeedback.reduce((sum, f) => sum + f.score, 0) / allFeedback.length)
1160
+ : 0 };
859
1161
  }
860
1162
 
861
1163
  // Calculate combined score (average of all reviewers)
@@ -864,14 +1166,6 @@ export async function runOptimizedConsensusProcess(
864
1166
  : 0;
865
1167
 
866
1168
  scores.push(combinedScore);
867
-
868
- // Combine all concerns and recommendations
869
- const allConcerns = [...new Set(allFeedback.flatMap(f => f.concerns))];
870
- const allRecommendations = [...new Set(allFeedback.flatMap(f => f.recommendations))];
871
- const combinedAnalysis = allFeedback.map(f => `[${f.reviewer}] ${f.analysis}`).join('\n\n');
872
-
873
- lastConcerns = allConcerns;
874
- lastRecommendations = allRecommendations;
875
1169
  lastAnalysis = combinedAnalysis;
876
1170
 
877
1171
  // Create consensus result for tracking
@@ -904,15 +1198,41 @@ export async function runOptimizedConsensusProcess(
904
1198
  bestIteration = iteration;
905
1199
  }
906
1200
 
907
- // Save plan with updated score
1201
+ // Save plan with updated score (including per-app scores for fullstack)
908
1202
  await planStorage.savePlan(currentPlan, taskId ? 'task' : 'milestone', {
909
1203
  milestoneId,
910
1204
  milestoneName,
911
1205
  taskId,
912
1206
  taskName,
913
1207
  score: combinedScore,
1208
+ frontendScore: isFullstack ? currentAppScores.frontend : undefined,
1209
+ backendScore: isFullstack ? currentAppScores.backend : undefined,
1210
+ unifiedScore: isFullstack ? currentAppScores.unified : undefined,
914
1211
  });
915
1212
 
1213
+ // Record correction for fullstack tracking
1214
+ if (isFullstack && iteration > 1) {
1215
+ const previousScore = scores.length >= 2 ? scores[scores.length - 2] : 0;
1216
+ const correction: CorrectionRecord = {
1217
+ id: `correction-${iteration}`,
1218
+ timestamp: new Date().toISOString(),
1219
+ app: 'unified', // Top-level correction
1220
+ previousScore,
1221
+ newScore: combinedScore,
1222
+ concerns: lastConcerns.slice(0, 5),
1223
+ changes: lastRecommendations.slice(0, 3),
1224
+ reviewer,
1225
+ };
1226
+ corrections.push(correction);
1227
+
1228
+ await planStorage.recordCorrection(
1229
+ taskId ? 'task' : 'milestone',
1230
+ correction,
1231
+ milestoneId,
1232
+ taskId
1233
+ );
1234
+ }
1235
+
916
1236
  // Record in project state
917
1237
  await recordConsensusIteration(projectDir, iterationRecord);
918
1238
 
@@ -921,7 +1241,43 @@ export async function runOptimizedConsensusProcess(
921
1241
  // Check if consensus reached
922
1242
  if (combinedScore >= threshold) {
923
1243
  onProgress?.('consensus', `Consensus reached at ${combinedScore}%`);
924
- await planStorage.updateStatus('approved', milestoneId, taskId);
1244
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
1245
+
1246
+ // Update per-app approval status for fullstack
1247
+ if (isFullstack) {
1248
+ const feApproved = (currentAppScores.frontend || 0) >= threshold;
1249
+ const beApproved = (currentAppScores.backend || 0) >= threshold;
1250
+
1251
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'frontend', feApproved, currentAppScores.frontend || 0, milestoneId, taskId);
1252
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'backend', beApproved, currentAppScores.backend || 0, milestoneId, taskId);
1253
+ await planStorage.updateAppApproval(taskId ? 'task' : 'milestone', 'unified', true, currentAppScores.unified, milestoneId, taskId);
1254
+
1255
+ onProgress?.('consensus', `Per-app approval: FE=${feApproved}, BE=${beApproved}, Unified=true`);
1256
+
1257
+ return {
1258
+ approved: true,
1259
+ finalPlan: currentPlan,
1260
+ finalScore: combinedScore,
1261
+ bestPlan: currentPlan,
1262
+ bestScore: combinedScore,
1263
+ bestIteration: iteration,
1264
+ iterations,
1265
+ totalIterations: iteration,
1266
+ finalConcerns: allConcerns,
1267
+ finalRecommendations: allRecommendations,
1268
+ arbitrated: false,
1269
+ // Fullstack-specific fields
1270
+ appScores: currentAppScores,
1271
+ appApproved: {
1272
+ frontend: feApproved,
1273
+ backend: beApproved,
1274
+ unified: true,
1275
+ },
1276
+ taggedConcerns: allTaggedConcerns,
1277
+ taggedRecommendations: allTaggedRecommendations,
1278
+ corrections,
1279
+ } as FullstackConsensusProcessResult;
1280
+ }
925
1281
 
926
1282
  return {
927
1283
  approved: true,
@@ -943,19 +1299,50 @@ export async function runOptimizedConsensusProcess(
943
1299
  onProgress?.('consensus', `Consensus stuck - invoking ${arbitrator} for arbitration`);
944
1300
 
945
1301
  try {
946
- const arbitrationResult = await requestGeminiArbitration(
1302
+ const arbitrationResult = await requestArbitratorDecision(
947
1303
  bestPlan,
948
1304
  combinedAnalysis,
949
1305
  `Stuck after ${iteration} iterations. Scores: ${scores.slice(-stuckIterations).join(', ')}`,
950
1306
  iteration,
951
- scores
1307
+ scores,
1308
+ arbitrator
952
1309
  );
953
1310
 
954
1311
  if (onArbitration) onArbitration(arbitrationResult);
955
1312
 
956
1313
  if (arbitrationResult.approved || arbitrationResult.score >= arbitrationThreshold) {
957
1314
  onProgress?.('arbitration', `Arbitrator approved with ${arbitrationResult.score}%`);
958
- await planStorage.updateStatus('approved', milestoneId, taskId);
1315
+ await planStorage.updateStatus('approved', taskId ? 'task' : 'milestone', milestoneId, taskId);
1316
+
1317
+ if (isFullstack) {
1318
+ const feApproved = (currentAppScores.frontend || 0) >= arbitrationThreshold;
1319
+ const beApproved = (currentAppScores.backend || 0) >= arbitrationThreshold;
1320
+
1321
+ return {
1322
+ approved: true,
1323
+ finalPlan: bestPlan,
1324
+ finalScore: arbitrationResult.score,
1325
+ bestPlan,
1326
+ bestScore: arbitrationResult.score,
1327
+ bestIteration,
1328
+ iterations,
1329
+ totalIterations: iteration,
1330
+ finalConcerns: arbitrationResult.minorConcerns || allConcerns,
1331
+ finalRecommendations: arbitrationResult.suggestedChanges || allRecommendations,
1332
+ arbitrated: true,
1333
+ arbitrationResult,
1334
+ // Fullstack-specific fields
1335
+ appScores: currentAppScores,
1336
+ appApproved: {
1337
+ frontend: feApproved,
1338
+ backend: beApproved,
1339
+ unified: true,
1340
+ },
1341
+ taggedConcerns: allTaggedConcerns,
1342
+ taggedRecommendations: allTaggedRecommendations,
1343
+ corrections,
1344
+ } as FullstackConsensusProcessResult;
1345
+ }
959
1346
 
960
1347
  return {
961
1348
  approved: true,
@@ -992,6 +1379,7 @@ export async function runOptimizedConsensusProcess(
992
1379
  currentPlan,
993
1380
  combinedAnalysis,
994
1381
  allConcerns,
1382
+ language,
995
1383
  revisionProgress
996
1384
  );
997
1385
 
@@ -1015,7 +1403,49 @@ export async function runOptimizedConsensusProcess(
1015
1403
  }
1016
1404
 
1017
1405
  // Max iterations reached
1018
- await planStorage.updateStatus('reviewing', milestoneId, taskId);
1406
+ await planStorage.updateStatus('reviewing', taskId ? 'task' : 'milestone', milestoneId, taskId);
1407
+
1408
+ // Final per-app scores from history
1409
+ const finalAppScores: AppConsensusScores = isFullstack ? {
1410
+ frontend: appScoresHistory.frontend.length > 0
1411
+ ? appScoresHistory.frontend[appScoresHistory.frontend.length - 1]
1412
+ : undefined,
1413
+ backend: appScoresHistory.backend.length > 0
1414
+ ? appScoresHistory.backend[appScoresHistory.backend.length - 1]
1415
+ : undefined,
1416
+ unified: appScoresHistory.unified.length > 0
1417
+ ? appScoresHistory.unified[appScoresHistory.unified.length - 1]
1418
+ : bestScore,
1419
+ } : { unified: bestScore };
1420
+
1421
+ if (isFullstack) {
1422
+ const feApproved = (finalAppScores.frontend || 0) >= threshold;
1423
+ const beApproved = (finalAppScores.backend || 0) >= threshold;
1424
+
1425
+ return {
1426
+ approved: false,
1427
+ finalPlan: bestPlan,
1428
+ finalScore: bestScore,
1429
+ bestPlan,
1430
+ bestScore,
1431
+ bestIteration,
1432
+ iterations,
1433
+ totalIterations: iteration,
1434
+ finalConcerns: lastConcerns,
1435
+ finalRecommendations: lastRecommendations,
1436
+ arbitrated: false,
1437
+ // Fullstack-specific fields
1438
+ appScores: finalAppScores,
1439
+ appApproved: {
1440
+ frontend: feApproved,
1441
+ backend: beApproved,
1442
+ unified: bestScore >= threshold,
1443
+ },
1444
+ taggedConcerns: allTaggedConcerns,
1445
+ taggedRecommendations: allTaggedRecommendations,
1446
+ corrections,
1447
+ } as FullstackConsensusProcessResult;
1448
+ }
1019
1449
 
1020
1450
  return {
1021
1451
  approved: false,