@agile-vibe-coding/avc 0.1.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/agent-loader.js +21 -0
- package/cli/agents/agent-selector.md +152 -0
- package/cli/agents/architecture-recommender.md +418 -0
- package/cli/agents/code-implementer.md +117 -0
- package/cli/agents/code-validator.md +80 -0
- package/cli/agents/context-reviewer-epic.md +101 -0
- package/cli/agents/context-reviewer-story.md +92 -0
- package/cli/agents/context-writer-epic.md +145 -0
- package/cli/agents/context-writer-story.md +111 -0
- package/cli/agents/database-deep-dive.md +470 -0
- package/cli/agents/database-recommender.md +634 -0
- package/cli/agents/doc-distributor.md +176 -0
- package/cli/agents/doc-writer-epic.md +42 -0
- package/cli/agents/doc-writer-story.md +43 -0
- package/cli/agents/documentation-updater.md +203 -0
- package/cli/agents/duplicate-detector.md +110 -0
- package/cli/agents/epic-story-decomposer.md +559 -0
- package/cli/agents/feature-context-generator.md +91 -0
- package/cli/agents/gap-checker-epic.md +52 -0
- package/cli/agents/impact-checker-story.md +51 -0
- package/cli/agents/migration-guide-generator.md +305 -0
- package/cli/agents/mission-scope-generator.md +143 -0
- package/cli/agents/mission-scope-validator.md +146 -0
- package/cli/agents/project-context-extractor.md +122 -0
- package/cli/agents/project-documentation-creator.json +226 -0
- package/cli/agents/project-documentation-creator.md +595 -0
- package/cli/agents/question-prefiller.md +269 -0
- package/cli/agents/refiner-epic.md +39 -0
- package/cli/agents/refiner-story.md +42 -0
- package/cli/agents/scaffolding-generator.md +99 -0
- package/cli/agents/seed-validator.md +71 -0
- package/cli/agents/story-doc-enricher.md +133 -0
- package/cli/agents/story-scope-reviewer.md +147 -0
- package/cli/agents/story-splitter.md +83 -0
- package/cli/agents/suggestion-business-analyst.md +88 -0
- package/cli/agents/suggestion-deployment-architect.md +263 -0
- package/cli/agents/suggestion-product-manager.md +129 -0
- package/cli/agents/suggestion-security-specialist.md +156 -0
- package/cli/agents/suggestion-technical-architect.md +269 -0
- package/cli/agents/suggestion-ux-researcher.md +93 -0
- package/cli/agents/task-subtask-decomposer.md +188 -0
- package/cli/agents/validator-documentation.json +183 -0
- package/cli/agents/validator-documentation.md +455 -0
- package/cli/agents/validator-selector.md +211 -0
- package/cli/ansi-colors.js +21 -0
- package/cli/api-reference-tool.js +368 -0
- package/cli/build-docs.js +29 -8
- package/cli/ceremony-history.js +369 -0
- package/cli/checks/catalog.json +76 -0
- package/cli/checks/code/quality.json +26 -0
- package/cli/checks/code/testing.json +14 -0
- package/cli/checks/code/traceability.json +26 -0
- package/cli/checks/cross-refs/epic.json +171 -0
- package/cli/checks/cross-refs/story.json +149 -0
- package/cli/checks/epic/api.json +114 -0
- package/cli/checks/epic/backend.json +126 -0
- package/cli/checks/epic/cloud.json +126 -0
- package/cli/checks/epic/data.json +102 -0
- package/cli/checks/epic/database.json +114 -0
- package/cli/checks/epic/developer.json +182 -0
- package/cli/checks/epic/devops.json +174 -0
- package/cli/checks/epic/frontend.json +162 -0
- package/cli/checks/epic/mobile.json +102 -0
- package/cli/checks/epic/qa.json +90 -0
- package/cli/checks/epic/security.json +184 -0
- package/cli/checks/epic/solution-architect.json +192 -0
- package/cli/checks/epic/test-architect.json +90 -0
- package/cli/checks/epic/ui.json +102 -0
- package/cli/checks/epic/ux.json +90 -0
- package/cli/checks/fixes/epic-fix-template.md +10 -0
- package/cli/checks/fixes/story-fix-template.md +10 -0
- package/cli/checks/story/api.json +186 -0
- package/cli/checks/story/backend.json +102 -0
- package/cli/checks/story/cloud.json +102 -0
- package/cli/checks/story/data.json +210 -0
- package/cli/checks/story/database.json +102 -0
- package/cli/checks/story/developer.json +168 -0
- package/cli/checks/story/devops.json +102 -0
- package/cli/checks/story/frontend.json +174 -0
- package/cli/checks/story/mobile.json +102 -0
- package/cli/checks/story/qa.json +210 -0
- package/cli/checks/story/security.json +198 -0
- package/cli/checks/story/solution-architect.json +230 -0
- package/cli/checks/story/test-architect.json +210 -0
- package/cli/checks/story/ui.json +102 -0
- package/cli/checks/story/ux.json +102 -0
- package/cli/coding-order.js +401 -0
- package/cli/command-logger.js +49 -12
- package/cli/components/static-output.js +63 -0
- package/cli/console-output-manager.js +94 -0
- package/cli/dependency-checker.js +72 -0
- package/cli/docs-sync.js +306 -0
- package/cli/epic-story-validator.js +659 -0
- package/cli/evaluation-prompts.js +1008 -0
- package/cli/execution-context.js +195 -0
- package/cli/generate-summary-table.js +340 -0
- package/cli/init-model-config.js +704 -0
- package/cli/init.js +1737 -278
- package/cli/kanban-server-manager.js +227 -0
- package/cli/llm-claude.js +150 -1
- package/cli/llm-gemini.js +109 -0
- package/cli/llm-local.js +493 -0
- package/cli/llm-mock.js +233 -0
- package/cli/llm-openai.js +454 -0
- package/cli/llm-provider.js +379 -3
- package/cli/llm-token-limits.js +211 -0
- package/cli/llm-verifier.js +662 -0
- package/cli/llm-xiaomi.js +143 -0
- package/cli/message-constants.js +49 -0
- package/cli/message-manager.js +334 -0
- package/cli/message-types.js +96 -0
- package/cli/messaging-api.js +291 -0
- package/cli/micro-check-fixer.js +335 -0
- package/cli/micro-check-runner.js +449 -0
- package/cli/micro-check-scorer.js +148 -0
- package/cli/micro-check-validator.js +538 -0
- package/cli/model-pricing.js +192 -0
- package/cli/model-query-engine.js +468 -0
- package/cli/model-recommendation-analyzer.js +495 -0
- package/cli/model-selector.js +270 -0
- package/cli/output-buffer.js +107 -0
- package/cli/process-manager.js +73 -2
- package/cli/prompt-logger.js +57 -0
- package/cli/repl-ink.js +4625 -1094
- package/cli/repl-old.js +3 -4
- package/cli/seed-processor.js +962 -0
- package/cli/sprint-planning-processor.js +4162 -0
- package/cli/template-processor.js +2149 -105
- package/cli/templates/project.md +25 -8
- package/cli/templates/vitepress-config.mts.template +5 -4
- package/cli/token-tracker.js +547 -0
- package/cli/tools/generate-story-validators.js +317 -0
- package/cli/tools/generate-validators.js +669 -0
- package/cli/update-checker.js +19 -17
- package/cli/update-notifier.js +4 -4
- package/cli/validation-router.js +667 -0
- package/cli/verification-tracker.js +563 -0
- package/cli/worktree-runner.js +654 -0
- package/kanban/README.md +386 -0
- package/kanban/client/README.md +205 -0
- package/kanban/client/components.json +20 -0
- package/kanban/client/dist/assets/index-D_KC5EQT.css +1 -0
- package/kanban/client/dist/assets/index-DjY5zqW7.js +351 -0
- package/kanban/client/dist/index.html +16 -0
- package/kanban/client/dist/vite.svg +1 -0
- package/kanban/client/index.html +15 -0
- package/kanban/client/package-lock.json +9442 -0
- package/kanban/client/package.json +44 -0
- package/kanban/client/postcss.config.js +6 -0
- package/kanban/client/public/vite.svg +1 -0
- package/kanban/client/src/App.jsx +651 -0
- package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
- package/kanban/client/src/components/ceremony/AskArchPopup.jsx +420 -0
- package/kanban/client/src/components/ceremony/AskModelPopup.jsx +629 -0
- package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +1133 -0
- package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
- package/kanban/client/src/components/ceremony/ProviderSwitcherButton.jsx +290 -0
- package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +686 -0
- package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +838 -0
- package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
- package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +136 -0
- package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
- package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
- package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
- package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +329 -0
- package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +249 -0
- package/kanban/client/src/components/kanban/CardDetailModal.jsx +646 -0
- package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
- package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
- package/kanban/client/src/components/kanban/GroupingSelector.jsx +63 -0
- package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
- package/kanban/client/src/components/kanban/KanbanCard.jsx +147 -0
- package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
- package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +784 -0
- package/kanban/client/src/components/kanban/RunButton.jsx +162 -0
- package/kanban/client/src/components/kanban/SeedButton.jsx +176 -0
- package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
- package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
- package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
- package/kanban/client/src/components/settings/AgentsTab.jsx +381 -0
- package/kanban/client/src/components/settings/ApiKeysTab.jsx +142 -0
- package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +105 -0
- package/kanban/client/src/components/settings/CheckEditorPopup.jsx +507 -0
- package/kanban/client/src/components/settings/CostThresholdsTab.jsx +95 -0
- package/kanban/client/src/components/settings/ModelPricingTab.jsx +269 -0
- package/kanban/client/src/components/settings/OpenAIAuthSection.jsx +412 -0
- package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
- package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
- package/kanban/client/src/components/stats/CostModal.jsx +384 -0
- package/kanban/client/src/components/ui/badge.jsx +27 -0
- package/kanban/client/src/components/ui/dialog.jsx +121 -0
- package/kanban/client/src/components/ui/tabs.jsx +85 -0
- package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
- package/kanban/client/src/hooks/useGrouping.js +177 -0
- package/kanban/client/src/hooks/useWebSocket.js +120 -0
- package/kanban/client/src/lib/__tests__/api.test.js +196 -0
- package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
- package/kanban/client/src/lib/api.js +515 -0
- package/kanban/client/src/lib/status-grouping.js +154 -0
- package/kanban/client/src/lib/utils.js +11 -0
- package/kanban/client/src/main.jsx +10 -0
- package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
- package/kanban/client/src/store/ceremonyStore.js +172 -0
- package/kanban/client/src/store/filterStore.js +201 -0
- package/kanban/client/src/store/kanbanStore.js +123 -0
- package/kanban/client/src/store/processStore.js +65 -0
- package/kanban/client/src/store/sprintPlanningStore.js +33 -0
- package/kanban/client/src/styles/globals.css +59 -0
- package/kanban/client/tailwind.config.js +77 -0
- package/kanban/client/vite.config.js +28 -0
- package/kanban/client/vitest.config.js +28 -0
- package/kanban/dev-start.sh +47 -0
- package/kanban/package.json +12 -0
- package/kanban/server/index.js +537 -0
- package/kanban/server/routes/ceremony.js +454 -0
- package/kanban/server/routes/costs.js +163 -0
- package/kanban/server/routes/openai-oauth.js +366 -0
- package/kanban/server/routes/processes.js +50 -0
- package/kanban/server/routes/settings.js +736 -0
- package/kanban/server/routes/websocket.js +281 -0
- package/kanban/server/routes/work-items.js +487 -0
- package/kanban/server/services/CeremonyService.js +1441 -0
- package/kanban/server/services/FileSystemScanner.js +95 -0
- package/kanban/server/services/FileWatcher.js +144 -0
- package/kanban/server/services/HierarchyBuilder.js +196 -0
- package/kanban/server/services/ProcessRegistry.js +122 -0
- package/kanban/server/services/TaskRunnerService.js +261 -0
- package/kanban/server/services/WorkItemReader.js +123 -0
- package/kanban/server/services/WorkItemRefineService.js +510 -0
- package/kanban/server/start.js +49 -0
- package/kanban/server/utils/kanban-logger.js +132 -0
- package/kanban/server/utils/markdown.js +91 -0
- package/kanban/server/utils/status-grouping.js +107 -0
- package/kanban/server/workers/run-task-worker.js +121 -0
- package/kanban/server/workers/seed-worker.js +94 -0
- package/kanban/server/workers/sponsor-call-worker.js +92 -0
- package/kanban/server/workers/sprint-planning-worker.js +212 -0
- package/package.json +19 -7
- package/cli/agents/documentation.md +0 -302
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Recommendation Analyzer
|
|
3
|
+
* Analyzes multi-provider recommendations and generates insights
|
|
4
|
+
*
|
|
5
|
+
* Compares provider recommendations, detects consensus patterns,
|
|
6
|
+
* and generates human-readable analysis reports.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Debug logging helper
|
|
11
|
+
*/
|
|
12
|
+
function debug(message, data = null) {
|
|
13
|
+
const timestamp = new Date().toISOString();
|
|
14
|
+
if (data) {
|
|
15
|
+
console.log(`[DEBUG][${timestamp}] ${message}`, JSON.stringify(data, null, 2));
|
|
16
|
+
} else {
|
|
17
|
+
console.log(`[DEBUG][${timestamp}] ${message}`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class ModelRecommendationAnalyzer {
|
|
22
|
+
constructor(evaluationResults) {
|
|
23
|
+
this.evaluationResults = evaluationResults;
|
|
24
|
+
this.analysis = null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Analyze all evaluation results
|
|
29
|
+
* Compares recommendations across providers and generates insights
|
|
30
|
+
* @returns {Object} Complete analysis with consensus and insights
|
|
31
|
+
*/
|
|
32
|
+
analyze() {
|
|
33
|
+
debug('Starting analysis of evaluation results...');
|
|
34
|
+
|
|
35
|
+
const analyses = [];
|
|
36
|
+
|
|
37
|
+
for (const evaluation of this.evaluationResults) {
|
|
38
|
+
const analysisResult = this._analyzeEvaluation(evaluation);
|
|
39
|
+
analyses.push(analysisResult);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Generate overall statistics
|
|
43
|
+
const stats = this._generateStatistics(analyses);
|
|
44
|
+
|
|
45
|
+
this.analysis = {
|
|
46
|
+
analyses,
|
|
47
|
+
statistics: stats,
|
|
48
|
+
timestamp: new Date().toISOString()
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
debug(`Analysis complete: ${analyses.length} evaluations analyzed`);
|
|
52
|
+
|
|
53
|
+
return this.analysis;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Analyze a single evaluation (one prompt, multiple provider responses)
|
|
58
|
+
* @param {Object} evaluation - Evaluation result with recommendations from providers
|
|
59
|
+
* @returns {Object} Analysis result with consensus and insights
|
|
60
|
+
*/
|
|
61
|
+
_analyzeEvaluation(evaluation) {
|
|
62
|
+
const { promptId, ceremony, stage, stageName, recommendations } = evaluation;
|
|
63
|
+
|
|
64
|
+
debug(`Analyzing evaluation: ${promptId}`);
|
|
65
|
+
|
|
66
|
+
// Extract model recommendations
|
|
67
|
+
const modelsByProvider = {};
|
|
68
|
+
for (const [provider, rec] of Object.entries(recommendations)) {
|
|
69
|
+
if (rec.status === 'success') {
|
|
70
|
+
modelsByProvider[provider] = {
|
|
71
|
+
model: rec.model,
|
|
72
|
+
reasoning: rec.reasoning,
|
|
73
|
+
confidence: rec.confidence,
|
|
74
|
+
cost: rec.cost
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Detect consensus
|
|
80
|
+
const consensus = this._detectConsensus(modelsByProvider);
|
|
81
|
+
|
|
82
|
+
// Compare with current default
|
|
83
|
+
const currentDefault = this._getCurrentDefault(promptId);
|
|
84
|
+
const comparison = this._compareWithDefault(modelsByProvider, currentDefault);
|
|
85
|
+
|
|
86
|
+
// Generate insights
|
|
87
|
+
const insights = this._generateInsights(modelsByProvider, consensus, comparison);
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
promptId,
|
|
91
|
+
ceremony,
|
|
92
|
+
stage,
|
|
93
|
+
stageName,
|
|
94
|
+
currentDefault,
|
|
95
|
+
recommendations: modelsByProvider,
|
|
96
|
+
consensus,
|
|
97
|
+
comparison,
|
|
98
|
+
insights
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Detect consensus level across provider recommendations
|
|
104
|
+
* @param {Object} modelsByProvider - Models recommended by each provider
|
|
105
|
+
* @returns {Object} Consensus information
|
|
106
|
+
*/
|
|
107
|
+
_detectConsensus(modelsByProvider) {
|
|
108
|
+
const providers = Object.keys(modelsByProvider);
|
|
109
|
+
const models = Object.values(modelsByProvider).map(r => r.model);
|
|
110
|
+
|
|
111
|
+
if (providers.length === 0) {
|
|
112
|
+
return { level: 'none', agreement: '0/0', details: 'No provider recommendations available' };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (providers.length === 1) {
|
|
116
|
+
return {
|
|
117
|
+
level: 'single',
|
|
118
|
+
agreement: '1/1',
|
|
119
|
+
model: models[0],
|
|
120
|
+
details: `Only ${providers[0]} available`
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Count model occurrences
|
|
125
|
+
const modelCounts = {};
|
|
126
|
+
for (const model of models) {
|
|
127
|
+
// Normalize model names for comparison (lowercase, remove versions)
|
|
128
|
+
const normalized = this._normalizeModelName(model);
|
|
129
|
+
modelCounts[normalized] = (modelCounts[normalized] || 0) + 1;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Find most common recommendation
|
|
133
|
+
const sortedModels = Object.entries(modelCounts)
|
|
134
|
+
.sort((a, b) => b[1] - a[1]);
|
|
135
|
+
|
|
136
|
+
const [topModel, topCount] = sortedModels[0];
|
|
137
|
+
const totalProviders = providers.length;
|
|
138
|
+
|
|
139
|
+
// Determine consensus level
|
|
140
|
+
let level, agreement, details;
|
|
141
|
+
if (topCount === totalProviders) {
|
|
142
|
+
level = 'full';
|
|
143
|
+
agreement = `${topCount}/${totalProviders}`;
|
|
144
|
+
details = `All providers recommend same model tier`;
|
|
145
|
+
} else if (topCount >= Math.ceil(totalProviders / 2)) {
|
|
146
|
+
level = 'partial';
|
|
147
|
+
agreement = `${topCount}/${totalProviders}`;
|
|
148
|
+
details = `Majority agreement (${topCount}/${totalProviders})`;
|
|
149
|
+
} else {
|
|
150
|
+
level = 'none';
|
|
151
|
+
agreement = `0/${totalProviders}`;
|
|
152
|
+
details = 'No consensus - all providers recommend different models';
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
level,
|
|
157
|
+
agreement,
|
|
158
|
+
majority: topModel,
|
|
159
|
+
minorityCount: totalProviders - topCount,
|
|
160
|
+
details,
|
|
161
|
+
distribution: modelCounts
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Normalize model name for comparison
|
|
167
|
+
* Removes version numbers and normalizes tier names
|
|
168
|
+
* @param {string} modelName - Original model name
|
|
169
|
+
* @returns {string} Normalized model name
|
|
170
|
+
*/
|
|
171
|
+
_normalizeModelName(modelName) {
|
|
172
|
+
const lower = modelName.toLowerCase();
|
|
173
|
+
|
|
174
|
+
// Identify tier
|
|
175
|
+
if (lower.includes('opus')) return 'opus';
|
|
176
|
+
if (lower.includes('sonnet')) return 'sonnet';
|
|
177
|
+
if (lower.includes('haiku')) return 'haiku';
|
|
178
|
+
if (lower.includes('flash')) return 'flash';
|
|
179
|
+
if (lower.includes('pro')) return 'pro';
|
|
180
|
+
if (lower.includes('gpt-5') || lower.includes('gpt-4')) return 'premium';
|
|
181
|
+
if (lower.includes('gpt-3')) return 'standard';
|
|
182
|
+
|
|
183
|
+
// Return original if no pattern matched
|
|
184
|
+
return lower;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Get current default model for a stage
|
|
189
|
+
* @param {string} promptId - Prompt ID
|
|
190
|
+
* @returns {string} Current default model name
|
|
191
|
+
*/
|
|
192
|
+
_getCurrentDefault(promptId) {
|
|
193
|
+
// Map prompt IDs to current defaults (from AVC_DEFAULT_LLMS.md metadata)
|
|
194
|
+
const defaults = {
|
|
195
|
+
'sponsor-call-suggestions': 'Claude Sonnet 4.5',
|
|
196
|
+
'sponsor-call-documentation': 'Claude Sonnet 4.5',
|
|
197
|
+
'sponsor-call-validation': 'Claude Sonnet 4.5',
|
|
198
|
+
'sprint-planning-decomposition': 'Claude Opus 4.6',
|
|
199
|
+
'sprint-planning-validation-universal': 'Claude Sonnet 4.5',
|
|
200
|
+
'sprint-planning-validation-domain': 'Claude Sonnet 4.5',
|
|
201
|
+
'sprint-planning-validation-feature': 'Claude Sonnet 4.5',
|
|
202
|
+
'sprint-planning-doc-distribution': 'Claude Sonnet 4.6',
|
|
203
|
+
'seed-decomposition': 'Claude Opus 4.6',
|
|
204
|
+
'seed-validation': 'Claude Sonnet 4.5',
|
|
205
|
+
'seed-context-generation': 'Claude Sonnet 4.5'
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
return defaults[promptId] || 'Unknown';
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Compare provider recommendations with current default
|
|
213
|
+
* @param {Object} modelsByProvider - Models recommended by each provider
|
|
214
|
+
* @param {string} currentDefault - Current default model
|
|
215
|
+
* @returns {Object} Comparison result
|
|
216
|
+
*/
|
|
217
|
+
_compareWithDefault(modelsByProvider, currentDefault) {
|
|
218
|
+
const normalizedDefault = this._normalizeModelName(currentDefault);
|
|
219
|
+
|
|
220
|
+
let matchCount = 0;
|
|
221
|
+
let upgradeCount = 0;
|
|
222
|
+
let downgradeCount = 0;
|
|
223
|
+
|
|
224
|
+
for (const rec of Object.values(modelsByProvider)) {
|
|
225
|
+
const normalized = this._normalizeModelName(rec.model);
|
|
226
|
+
|
|
227
|
+
if (normalized === normalizedDefault) {
|
|
228
|
+
matchCount++;
|
|
229
|
+
} else if (this._isUpgrade(normalized, normalizedDefault)) {
|
|
230
|
+
upgradeCount++;
|
|
231
|
+
} else {
|
|
232
|
+
downgradeCount++;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const totalRecommendations = Object.keys(modelsByProvider).length;
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
matchCount,
|
|
240
|
+
upgradeCount,
|
|
241
|
+
downgradeCount,
|
|
242
|
+
totalRecommendations,
|
|
243
|
+
alignment: matchCount === totalRecommendations ? 'perfect' :
|
|
244
|
+
matchCount > 0 ? 'partial' : 'none'
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Determine if model is an upgrade from current
|
|
250
|
+
* @param {string} recommended - Normalized recommended model
|
|
251
|
+
* @param {string} current - Normalized current model
|
|
252
|
+
* @returns {boolean} True if recommended is higher tier
|
|
253
|
+
*/
|
|
254
|
+
_isUpgrade(recommended, current) {
|
|
255
|
+
const tiers = ['flash', 'haiku', 'standard', 'sonnet', 'pro', 'opus', 'premium'];
|
|
256
|
+
const recIndex = tiers.indexOf(recommended);
|
|
257
|
+
const curIndex = tiers.indexOf(current);
|
|
258
|
+
|
|
259
|
+
if (recIndex === -1 || curIndex === -1) return false;
|
|
260
|
+
return recIndex > curIndex;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Generate insights from analysis
|
|
265
|
+
* @param {Object} modelsByProvider - Models recommended by each provider
|
|
266
|
+
* @param {Object} consensus - Consensus information
|
|
267
|
+
* @param {Object} comparison - Comparison with current default
|
|
268
|
+
* @returns {Array} Array of insight strings
|
|
269
|
+
*/
|
|
270
|
+
_generateInsights(modelsByProvider, consensus, comparison) {
|
|
271
|
+
const insights = [];
|
|
272
|
+
|
|
273
|
+
// Consensus insights
|
|
274
|
+
if (consensus.level === 'full') {
|
|
275
|
+
insights.push(`[OK] Full consensus: All providers agree on ${consensus.majority} tier`);
|
|
276
|
+
} else if (consensus.level === 'partial') {
|
|
277
|
+
insights.push(`[WARN] Partial consensus: ${consensus.agreement} providers agree on ${consensus.majority} tier`);
|
|
278
|
+
} else if (consensus.level === 'none') {
|
|
279
|
+
insights.push(`[INFO] No consensus: Providers recommend different model tiers`);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Default comparison insights
|
|
283
|
+
if (comparison.alignment === 'perfect') {
|
|
284
|
+
insights.push(`[OK] All provider recommendations match current default`);
|
|
285
|
+
} else if (comparison.upgradeCount > 0) {
|
|
286
|
+
insights.push(`[UP] ${comparison.upgradeCount} provider(s) recommend upgrading from current default`);
|
|
287
|
+
} else if (comparison.downgradeCount > 0) {
|
|
288
|
+
insights.push(`[DOWN] ${comparison.downgradeCount} provider(s) recommend downgrading from current default`);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Cost insights
|
|
292
|
+
const costs = Object.values(modelsByProvider).map(r => r.cost);
|
|
293
|
+
const avgCost = costs.reduce((sum, c) => sum + c, 0) / costs.length;
|
|
294
|
+
if (avgCost > 0.01) {
|
|
295
|
+
insights.push(`💰 Higher pricing tier recommended (avg cost: $${avgCost.toFixed(4)} per query)`);
|
|
296
|
+
} else if (avgCost < 0.001) {
|
|
297
|
+
insights.push(`💰 Budget-friendly tier recommended (avg cost: $${avgCost.toFixed(4)} per query)`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Confidence insights
|
|
301
|
+
const confidenceLevels = Object.values(modelsByProvider).map(r => r.confidence);
|
|
302
|
+
const highConfidence = confidenceLevels.filter(c => c === 'High').length;
|
|
303
|
+
if (highConfidence === confidenceLevels.length) {
|
|
304
|
+
insights.push(`[OK] All providers express high confidence in recommendations`);
|
|
305
|
+
} else if (highConfidence === 0) {
|
|
306
|
+
insights.push(`[WARN] Low provider confidence - recommendations may need review`);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return insights;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Generate overall statistics from all analyses
|
|
314
|
+
* @param {Array} analyses - Array of analysis results
|
|
315
|
+
* @returns {Object} Overall statistics
|
|
316
|
+
*/
|
|
317
|
+
_generateStatistics(analyses) {
|
|
318
|
+
const total = analyses.length;
|
|
319
|
+
|
|
320
|
+
const consensus = {
|
|
321
|
+
full: analyses.filter(a => a.consensus.level === 'full').length,
|
|
322
|
+
partial: analyses.filter(a => a.consensus.level === 'partial').length,
|
|
323
|
+
none: analyses.filter(a => a.consensus.level === 'none').length
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
const defaultAlignment = {
|
|
327
|
+
perfect: analyses.filter(a => a.comparison.alignment === 'perfect').length,
|
|
328
|
+
partial: analyses.filter(a => a.comparison.alignment === 'partial').length,
|
|
329
|
+
none: analyses.filter(a => a.comparison.alignment === 'none').length
|
|
330
|
+
};
|
|
331
|
+
|
|
332
|
+
const upgradeRecommendations = analyses.filter(a => a.comparison.upgradeCount > 0).length;
|
|
333
|
+
const downgradeRecommendations = analyses.filter(a => a.comparison.downgradeCount > 0).length;
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
totalEvaluations: total,
|
|
337
|
+
consensus,
|
|
338
|
+
consensusRate: {
|
|
339
|
+
full: ((consensus.full / total) * 100).toFixed(1) + '%',
|
|
340
|
+
partial: ((consensus.partial / total) * 100).toFixed(1) + '%',
|
|
341
|
+
none: ((consensus.none / total) * 100).toFixed(1) + '%'
|
|
342
|
+
},
|
|
343
|
+
defaultAlignment,
|
|
344
|
+
upgradeRecommendations,
|
|
345
|
+
downgradeRecommendations
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Format analysis as markdown table
|
|
351
|
+
* @param {Object} analysis - Analysis result for one evaluation
|
|
352
|
+
* @returns {string} Markdown table
|
|
353
|
+
*/
|
|
354
|
+
formatAsMarkdownTable(analysis) {
|
|
355
|
+
const { promptId, ceremony, stage, stageName, currentDefault, recommendations, consensus, insights } = analysis;
|
|
356
|
+
|
|
357
|
+
let md = `### ${stageName} (${ceremony}/${stage})\n\n`;
|
|
358
|
+
md += `**Current Default:** ${currentDefault}\n\n`;
|
|
359
|
+
md += `**Consensus:** ${consensus.level} (${consensus.agreement})\n\n`;
|
|
360
|
+
|
|
361
|
+
// Provider recommendations table
|
|
362
|
+
md += '| Provider | Recommended Model | Confidence | Reasoning |\n';
|
|
363
|
+
md += '|----------|-------------------|------------|----------|\n';
|
|
364
|
+
|
|
365
|
+
for (const [provider, rec] of Object.entries(recommendations)) {
|
|
366
|
+
const reasoning = rec.reasoning.length > 100
|
|
367
|
+
? rec.reasoning.substring(0, 97) + '...'
|
|
368
|
+
: rec.reasoning;
|
|
369
|
+
md += `| ${provider} | ${rec.model} | ${rec.confidence} | ${reasoning} |\n`;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Insights
|
|
373
|
+
if (insights.length > 0) {
|
|
374
|
+
md += '\n**Insights:**\n';
|
|
375
|
+
for (const insight of insights) {
|
|
376
|
+
md += `- ${insight}\n`;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
md += '\n---\n\n';
|
|
381
|
+
return md;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Generate complete markdown report
|
|
386
|
+
* @returns {string} Complete markdown report
|
|
387
|
+
*/
|
|
388
|
+
generateMarkdownReport() {
|
|
389
|
+
if (!this.analysis) {
|
|
390
|
+
this.analyze();
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
const { analyses, statistics } = this.analysis;
|
|
394
|
+
|
|
395
|
+
let report = '# Model Recommendations Report\n\n';
|
|
396
|
+
report += `Generated: ${new Date().toISOString()}\n\n`;
|
|
397
|
+
|
|
398
|
+
// Executive Summary
|
|
399
|
+
report += '## Executive Summary\n\n';
|
|
400
|
+
report += `Total Evaluations: ${statistics.totalEvaluations}\n\n`;
|
|
401
|
+
report += '### Consensus Statistics\n\n';
|
|
402
|
+
report += `- **Full Consensus:** ${statistics.consensus.full} (${statistics.consensusRate.full})\n`;
|
|
403
|
+
report += `- **Partial Consensus:** ${statistics.consensus.partial} (${statistics.consensusRate.partial})\n`;
|
|
404
|
+
report += `- **No Consensus:** ${statistics.consensus.none} (${statistics.consensusRate.none})\n\n`;
|
|
405
|
+
report += '### Default Alignment\n\n';
|
|
406
|
+
report += `- **Perfect Alignment:** ${statistics.defaultAlignment.perfect} stages\n`;
|
|
407
|
+
report += `- **Partial Alignment:** ${statistics.defaultAlignment.partial} stages\n`;
|
|
408
|
+
report += `- **No Alignment:** ${statistics.defaultAlignment.none} stages\n\n`;
|
|
409
|
+
report += `- **Upgrade Recommendations:** ${statistics.upgradeRecommendations} stages\n`;
|
|
410
|
+
report += `- **Downgrade Recommendations:** ${statistics.downgradeRecommendations} stages\n\n`;
|
|
411
|
+
|
|
412
|
+
report += '---\n\n';
|
|
413
|
+
|
|
414
|
+
// Per-stage analysis
|
|
415
|
+
report += '## Per-Stage Analysis\n\n';
|
|
416
|
+
|
|
417
|
+
// Group by ceremony
|
|
418
|
+
const byCeremony = {};
|
|
419
|
+
for (const analysis of analyses) {
|
|
420
|
+
if (!byCeremony[analysis.ceremony]) {
|
|
421
|
+
byCeremony[analysis.ceremony] = [];
|
|
422
|
+
}
|
|
423
|
+
byCeremony[analysis.ceremony].push(analysis);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
for (const [ceremony, ceremonyAnalyses] of Object.entries(byCeremony)) {
|
|
427
|
+
report += `## ${ceremony}\n\n`;
|
|
428
|
+
for (const analysis of ceremonyAnalyses) {
|
|
429
|
+
report += this.formatAsMarkdownTable(analysis);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Key Findings
|
|
434
|
+
report += '## Key Findings\n\n';
|
|
435
|
+
report += this._generateKeyFindings(analyses, statistics);
|
|
436
|
+
|
|
437
|
+
return report;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Generate key findings section
|
|
442
|
+
* @param {Array} analyses - All analysis results
|
|
443
|
+
* @param {Object} statistics - Overall statistics
|
|
444
|
+
* @returns {string} Key findings markdown
|
|
445
|
+
*/
|
|
446
|
+
_generateKeyFindings(analyses, statistics) {
|
|
447
|
+
let findings = '';
|
|
448
|
+
|
|
449
|
+
// Find stages with full consensus
|
|
450
|
+
const fullConsensus = analyses.filter(a => a.consensus.level === 'full');
|
|
451
|
+
if (fullConsensus.length > 0) {
|
|
452
|
+
findings += '### Strong Agreement\n\n';
|
|
453
|
+
findings += 'Stages where all providers agree:\n\n';
|
|
454
|
+
for (const analysis of fullConsensus) {
|
|
455
|
+
findings += `- **${analysis.stageName}**: ${analysis.consensus.majority}\n`;
|
|
456
|
+
}
|
|
457
|
+
findings += '\n';
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Find stages with no consensus
|
|
461
|
+
const noConsensus = analyses.filter(a => a.consensus.level === 'none');
|
|
462
|
+
if (noConsensus.length > 0) {
|
|
463
|
+
findings += '### Disagreement Areas\n\n';
|
|
464
|
+
findings += 'Stages where providers recommend different models:\n\n';
|
|
465
|
+
for (const analysis of noConsensus) {
|
|
466
|
+
findings += `- **${analysis.stageName}**: ${Object.keys(analysis.recommendations).length} different recommendations\n`;
|
|
467
|
+
}
|
|
468
|
+
findings += '\n';
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Find upgrade recommendations
|
|
472
|
+
const upgrades = analyses.filter(a => a.comparison.upgradeCount > 0);
|
|
473
|
+
if (upgrades.length > 0) {
|
|
474
|
+
findings += '### Suggested Upgrades\n\n';
|
|
475
|
+
findings += 'Stages where providers recommend higher-tier models:\n\n';
|
|
476
|
+
for (const analysis of upgrades) {
|
|
477
|
+
findings += `- **${analysis.stageName}**: ${analysis.comparison.upgradeCount} provider(s) recommend upgrade from ${analysis.currentDefault}\n`;
|
|
478
|
+
}
|
|
479
|
+
findings += '\n';
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return findings;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Get analysis results
|
|
487
|
+
* @returns {Object} Analysis results
|
|
488
|
+
*/
|
|
489
|
+
getAnalysis() {
|
|
490
|
+
if (!this.analysis) {
|
|
491
|
+
this.analyze();
|
|
492
|
+
}
|
|
493
|
+
return this.analysis;
|
|
494
|
+
}
|
|
495
|
+
}
|