@agile-vibe-coding/avc 0.1.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/cli/agent-loader.js +21 -0
- package/cli/agents/agent-selector.md +129 -0
- package/cli/agents/architecture-recommender.md +418 -0
- package/cli/agents/database-deep-dive.md +470 -0
- package/cli/agents/database-recommender.md +634 -0
- package/cli/agents/doc-distributor.md +176 -0
- package/cli/agents/documentation-updater.md +203 -0
- package/cli/agents/epic-story-decomposer.md +280 -0
- package/cli/agents/feature-context-generator.md +91 -0
- package/cli/agents/gap-checker-epic.md +52 -0
- package/cli/agents/impact-checker-story.md +51 -0
- package/cli/agents/migration-guide-generator.md +305 -0
- package/cli/agents/mission-scope-generator.md +79 -0
- package/cli/agents/mission-scope-validator.md +112 -0
- package/cli/agents/project-context-extractor.md +107 -0
- package/cli/agents/project-documentation-creator.json +226 -0
- package/cli/agents/project-documentation-creator.md +595 -0
- package/cli/agents/question-prefiller.md +269 -0
- package/cli/agents/refiner-epic.md +39 -0
- package/cli/agents/refiner-story.md +42 -0
- package/cli/agents/solver-epic-api.json +15 -0
- package/cli/agents/solver-epic-api.md +39 -0
- package/cli/agents/solver-epic-backend.json +15 -0
- package/cli/agents/solver-epic-backend.md +39 -0
- package/cli/agents/solver-epic-cloud.json +15 -0
- package/cli/agents/solver-epic-cloud.md +39 -0
- package/cli/agents/solver-epic-data.json +15 -0
- package/cli/agents/solver-epic-data.md +39 -0
- package/cli/agents/solver-epic-database.json +15 -0
- package/cli/agents/solver-epic-database.md +39 -0
- package/cli/agents/solver-epic-developer.json +15 -0
- package/cli/agents/solver-epic-developer.md +39 -0
- package/cli/agents/solver-epic-devops.json +15 -0
- package/cli/agents/solver-epic-devops.md +39 -0
- package/cli/agents/solver-epic-frontend.json +15 -0
- package/cli/agents/solver-epic-frontend.md +39 -0
- package/cli/agents/solver-epic-mobile.json +15 -0
- package/cli/agents/solver-epic-mobile.md +39 -0
- package/cli/agents/solver-epic-qa.json +15 -0
- package/cli/agents/solver-epic-qa.md +39 -0
- package/cli/agents/solver-epic-security.json +15 -0
- package/cli/agents/solver-epic-security.md +39 -0
- package/cli/agents/solver-epic-solution-architect.json +15 -0
- package/cli/agents/solver-epic-solution-architect.md +39 -0
- package/cli/agents/solver-epic-test-architect.json +15 -0
- package/cli/agents/solver-epic-test-architect.md +39 -0
- package/cli/agents/solver-epic-ui.json +15 -0
- package/cli/agents/solver-epic-ui.md +39 -0
- package/cli/agents/solver-epic-ux.json +15 -0
- package/cli/agents/solver-epic-ux.md +39 -0
- package/cli/agents/solver-story-api.json +15 -0
- package/cli/agents/solver-story-api.md +39 -0
- package/cli/agents/solver-story-backend.json +15 -0
- package/cli/agents/solver-story-backend.md +39 -0
- package/cli/agents/solver-story-cloud.json +15 -0
- package/cli/agents/solver-story-cloud.md +39 -0
- package/cli/agents/solver-story-data.json +15 -0
- package/cli/agents/solver-story-data.md +39 -0
- package/cli/agents/solver-story-database.json +15 -0
- package/cli/agents/solver-story-database.md +39 -0
- package/cli/agents/solver-story-developer.json +15 -0
- package/cli/agents/solver-story-developer.md +39 -0
- package/cli/agents/solver-story-devops.json +15 -0
- package/cli/agents/solver-story-devops.md +39 -0
- package/cli/agents/solver-story-frontend.json +15 -0
- package/cli/agents/solver-story-frontend.md +39 -0
- package/cli/agents/solver-story-mobile.json +15 -0
- package/cli/agents/solver-story-mobile.md +39 -0
- package/cli/agents/solver-story-qa.json +15 -0
- package/cli/agents/solver-story-qa.md +39 -0
- package/cli/agents/solver-story-security.json +15 -0
- package/cli/agents/solver-story-security.md +39 -0
- package/cli/agents/solver-story-solution-architect.json +15 -0
- package/cli/agents/solver-story-solution-architect.md +39 -0
- package/cli/agents/solver-story-test-architect.json +15 -0
- package/cli/agents/solver-story-test-architect.md +39 -0
- package/cli/agents/solver-story-ui.json +15 -0
- package/cli/agents/solver-story-ui.md +39 -0
- package/cli/agents/solver-story-ux.json +15 -0
- package/cli/agents/solver-story-ux.md +39 -0
- package/cli/agents/story-doc-enricher.md +133 -0
- package/cli/agents/suggestion-business-analyst.md +88 -0
- package/cli/agents/suggestion-deployment-architect.md +263 -0
- package/cli/agents/suggestion-product-manager.md +129 -0
- package/cli/agents/suggestion-security-specialist.md +156 -0
- package/cli/agents/suggestion-technical-architect.md +269 -0
- package/cli/agents/suggestion-ux-researcher.md +93 -0
- package/cli/agents/task-subtask-decomposer.md +188 -0
- package/cli/agents/validator-documentation.json +152 -0
- package/cli/agents/validator-documentation.md +453 -0
- package/cli/agents/validator-epic-api.json +93 -0
- package/cli/agents/validator-epic-api.md +137 -0
- package/cli/agents/validator-epic-backend.json +93 -0
- package/cli/agents/validator-epic-backend.md +130 -0
- package/cli/agents/validator-epic-cloud.json +93 -0
- package/cli/agents/validator-epic-cloud.md +137 -0
- package/cli/agents/validator-epic-data.json +93 -0
- package/cli/agents/validator-epic-data.md +130 -0
- package/cli/agents/validator-epic-database.json +93 -0
- package/cli/agents/validator-epic-database.md +137 -0
- package/cli/agents/validator-epic-developer.json +74 -0
- package/cli/agents/validator-epic-developer.md +153 -0
- package/cli/agents/validator-epic-devops.json +74 -0
- package/cli/agents/validator-epic-devops.md +153 -0
- package/cli/agents/validator-epic-frontend.json +74 -0
- package/cli/agents/validator-epic-frontend.md +153 -0
- package/cli/agents/validator-epic-mobile.json +93 -0
- package/cli/agents/validator-epic-mobile.md +130 -0
- package/cli/agents/validator-epic-qa.json +93 -0
- package/cli/agents/validator-epic-qa.md +130 -0
- package/cli/agents/validator-epic-security.json +74 -0
- package/cli/agents/validator-epic-security.md +154 -0
- package/cli/agents/validator-epic-solution-architect.json +74 -0
- package/cli/agents/validator-epic-solution-architect.md +156 -0
- package/cli/agents/validator-epic-test-architect.json +93 -0
- package/cli/agents/validator-epic-test-architect.md +130 -0
- package/cli/agents/validator-epic-ui.json +93 -0
- package/cli/agents/validator-epic-ui.md +130 -0
- package/cli/agents/validator-epic-ux.json +93 -0
- package/cli/agents/validator-epic-ux.md +130 -0
- package/cli/agents/validator-selector.md +211 -0
- package/cli/agents/validator-story-api.json +104 -0
- package/cli/agents/validator-story-api.md +152 -0
- package/cli/agents/validator-story-backend.json +104 -0
- package/cli/agents/validator-story-backend.md +152 -0
- package/cli/agents/validator-story-cloud.json +104 -0
- package/cli/agents/validator-story-cloud.md +152 -0
- package/cli/agents/validator-story-data.json +104 -0
- package/cli/agents/validator-story-data.md +152 -0
- package/cli/agents/validator-story-database.json +104 -0
- package/cli/agents/validator-story-database.md +152 -0
- package/cli/agents/validator-story-developer.json +104 -0
- package/cli/agents/validator-story-developer.md +152 -0
- package/cli/agents/validator-story-devops.json +104 -0
- package/cli/agents/validator-story-devops.md +152 -0
- package/cli/agents/validator-story-frontend.json +104 -0
- package/cli/agents/validator-story-frontend.md +152 -0
- package/cli/agents/validator-story-mobile.json +104 -0
- package/cli/agents/validator-story-mobile.md +152 -0
- package/cli/agents/validator-story-qa.json +104 -0
- package/cli/agents/validator-story-qa.md +152 -0
- package/cli/agents/validator-story-security.json +104 -0
- package/cli/agents/validator-story-security.md +152 -0
- package/cli/agents/validator-story-solution-architect.json +104 -0
- package/cli/agents/validator-story-solution-architect.md +152 -0
- package/cli/agents/validator-story-test-architect.json +104 -0
- package/cli/agents/validator-story-test-architect.md +152 -0
- package/cli/agents/validator-story-ui.json +104 -0
- package/cli/agents/validator-story-ui.md +152 -0
- package/cli/agents/validator-story-ux.json +104 -0
- package/cli/agents/validator-story-ux.md +152 -0
- package/cli/ansi-colors.js +21 -0
- package/cli/build-docs.js +298 -0
- package/cli/ceremony-history.js +369 -0
- package/cli/command-logger.js +245 -0
- package/cli/components/static-output.js +63 -0
- package/cli/console-output-manager.js +94 -0
- package/cli/docs-sync.js +306 -0
- package/cli/epic-story-validator.js +1174 -0
- package/cli/evaluation-prompts.js +1008 -0
- package/cli/execution-context.js +195 -0
- package/cli/generate-summary-table.js +340 -0
- package/cli/index.js +3 -25
- package/cli/init-model-config.js +697 -0
- package/cli/init.js +1765 -100
- package/cli/kanban-server-manager.js +228 -0
- package/cli/llm-claude.js +109 -0
- package/cli/llm-gemini.js +115 -0
- package/cli/llm-mock.js +233 -0
- package/cli/llm-openai.js +233 -0
- package/cli/llm-provider.js +300 -0
- package/cli/llm-token-limits.js +102 -0
- package/cli/llm-verifier.js +454 -0
- package/cli/logger.js +32 -5
- package/cli/message-constants.js +58 -0
- package/cli/message-manager.js +334 -0
- package/cli/message-types.js +96 -0
- package/cli/messaging-api.js +297 -0
- package/cli/model-pricing.js +169 -0
- package/cli/model-query-engine.js +468 -0
- package/cli/model-recommendation-analyzer.js +495 -0
- package/cli/model-selector.js +269 -0
- package/cli/output-buffer.js +107 -0
- package/cli/process-manager.js +332 -0
- package/cli/repl-ink.js +5840 -504
- package/cli/repl-old.js +4 -4
- package/cli/seed-processor.js +792 -0
- package/cli/sprint-planning-processor.js +1813 -0
- package/cli/template-processor.js +2306 -108
- package/cli/templates/project.md +25 -8
- package/cli/templates/vitepress-config.mts.template +34 -0
- package/cli/token-tracker.js +520 -0
- package/cli/tools/generate-story-validators.js +317 -0
- package/cli/tools/generate-validators.js +669 -0
- package/cli/update-checker.js +19 -17
- package/cli/update-notifier.js +4 -4
- package/cli/validation-router.js +605 -0
- package/cli/verification-tracker.js +563 -0
- package/kanban/README.md +386 -0
- package/kanban/client/README.md +205 -0
- package/kanban/client/components.json +20 -0
- package/kanban/client/dist/assets/index-CiD8PS2e.js +306 -0
- package/kanban/client/dist/assets/index-nLh0m82Q.css +1 -0
- package/kanban/client/dist/index.html +16 -0
- package/kanban/client/dist/vite.svg +1 -0
- package/kanban/client/index.html +15 -0
- package/kanban/client/package-lock.json +9442 -0
- package/kanban/client/package.json +44 -0
- package/kanban/client/postcss.config.js +6 -0
- package/kanban/client/public/vite.svg +1 -0
- package/kanban/client/src/App.jsx +622 -0
- package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
- package/kanban/client/src/components/ceremony/AskArchPopup.jsx +416 -0
- package/kanban/client/src/components/ceremony/AskModelPopup.jsx +616 -0
- package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +946 -0
- package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
- package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +619 -0
- package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +704 -0
- package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
- package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +154 -0
- package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
- package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
- package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
- package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +125 -0
- package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +228 -0
- package/kanban/client/src/components/kanban/CardDetailModal.jsx +559 -0
- package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
- package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
- package/kanban/client/src/components/kanban/GroupingSelector.jsx +57 -0
- package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
- package/kanban/client/src/components/kanban/KanbanCard.jsx +138 -0
- package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
- package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +789 -0
- package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
- package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
- package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
- package/kanban/client/src/components/settings/AgentsTab.jsx +353 -0
- package/kanban/client/src/components/settings/ApiKeysTab.jsx +113 -0
- package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +98 -0
- package/kanban/client/src/components/settings/CostThresholdsTab.jsx +94 -0
- package/kanban/client/src/components/settings/ModelPricingTab.jsx +204 -0
- package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
- package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
- package/kanban/client/src/components/stats/CostModal.jsx +353 -0
- package/kanban/client/src/components/ui/badge.jsx +27 -0
- package/kanban/client/src/components/ui/dialog.jsx +121 -0
- package/kanban/client/src/components/ui/tabs.jsx +85 -0
- package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
- package/kanban/client/src/hooks/useGrouping.js +118 -0
- package/kanban/client/src/hooks/useWebSocket.js +120 -0
- package/kanban/client/src/lib/__tests__/api.test.js +196 -0
- package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
- package/kanban/client/src/lib/api.js +401 -0
- package/kanban/client/src/lib/status-grouping.js +144 -0
- package/kanban/client/src/lib/utils.js +11 -0
- package/kanban/client/src/main.jsx +10 -0
- package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
- package/kanban/client/src/store/ceremonyStore.js +172 -0
- package/kanban/client/src/store/filterStore.js +201 -0
- package/kanban/client/src/store/kanbanStore.js +115 -0
- package/kanban/client/src/store/processStore.js +65 -0
- package/kanban/client/src/store/sprintPlanningStore.js +33 -0
- package/kanban/client/src/styles/globals.css +59 -0
- package/kanban/client/tailwind.config.js +77 -0
- package/kanban/client/vite.config.js +28 -0
- package/kanban/client/vitest.config.js +28 -0
- package/kanban/dev-start.sh +47 -0
- package/kanban/package.json +12 -0
- package/kanban/server/index.js +516 -0
- package/kanban/server/routes/ceremony.js +305 -0
- package/kanban/server/routes/costs.js +157 -0
- package/kanban/server/routes/processes.js +50 -0
- package/kanban/server/routes/settings.js +303 -0
- package/kanban/server/routes/websocket.js +276 -0
- package/kanban/server/routes/work-items.js +347 -0
- package/kanban/server/services/CeremonyService.js +1190 -0
- package/kanban/server/services/FileSystemScanner.js +95 -0
- package/kanban/server/services/FileWatcher.js +144 -0
- package/kanban/server/services/HierarchyBuilder.js +196 -0
- package/kanban/server/services/ProcessRegistry.js +122 -0
- package/kanban/server/services/WorkItemReader.js +123 -0
- package/kanban/server/services/WorkItemRefineService.js +510 -0
- package/kanban/server/start.js +49 -0
- package/kanban/server/utils/kanban-logger.js +132 -0
- package/kanban/server/utils/markdown.js +91 -0
- package/kanban/server/utils/status-grouping.js +107 -0
- package/kanban/server/workers/sponsor-call-worker.js +84 -0
- package/kanban/server/workers/sprint-planning-worker.js +130 -0
- package/package.json +34 -7
|
@@ -0,0 +1,1008 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation prompts for LLM model selection across all AVC ceremonies and stages
|
|
3
|
+
* Extracted from AVC_DEFAULT_LLMS.md
|
|
4
|
+
*
|
|
5
|
+
* Each prompt is used to query LLM providers about which of their models
|
|
6
|
+
* is best suited for the specific stage's requirements.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export const EVALUATION_PROMPTS = [
|
|
10
|
+
{
|
|
11
|
+
id: 'sponsor-call-suggestions',
|
|
12
|
+
ceremony: 'sponsor-call',
|
|
13
|
+
stage: 'suggestions',
|
|
14
|
+
stageName: 'Questionnaire Suggestions',
|
|
15
|
+
prompt: `TASK: Select optimal LLM model for questionnaire suggestions in sponsor-call ceremony
|
|
16
|
+
PRIORITY: Best possible output quality
|
|
17
|
+
|
|
18
|
+
CONTEXT:
|
|
19
|
+
- Stage: suggestions (Questionnaire Suggestions)
|
|
20
|
+
- Ceremony: sponsor-call
|
|
21
|
+
- Purpose: AI analyzes project name and suggests intelligent, contextually appropriate answers for 6 questionnaire fields
|
|
22
|
+
- Input: Project name only (10-50 tokens)
|
|
23
|
+
- Output: Suggested answers for Mission Statement, Target Users, Initial Scope, Deployment Target, Technical Considerations, Security Requirements (500-1500 tokens total)
|
|
24
|
+
- Call frequency: 1 per ceremony
|
|
25
|
+
- User interaction: Real-time (user waiting for suggestions while viewing questionnaire)
|
|
26
|
+
- Impact: Sets initial project direction and quality tone for all downstream artifacts
|
|
27
|
+
|
|
28
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
29
|
+
|
|
30
|
+
1. Output Quality Requirements: HIGH
|
|
31
|
+
- Must infer project domain from minimal context (just project name)
|
|
32
|
+
- Suggestions must be relevant, specific, and actionable
|
|
33
|
+
- Must demonstrate domain knowledge and industry best practices
|
|
34
|
+
- Quality directly impacts user's project definition experience
|
|
35
|
+
→ Requires appropriate model capabilities
|
|
36
|
+
|
|
37
|
+
2. Task Complexity: 6/10 (Moderate reasoning with domain inference)
|
|
38
|
+
- Analyze project name for domain clues
|
|
39
|
+
- Infer appropriate technology stack and architecture
|
|
40
|
+
- Generate contextually relevant suggestions across 6 different categories
|
|
41
|
+
- Balance between generic and specific recommendations
|
|
42
|
+
|
|
43
|
+
3. Context Understanding:
|
|
44
|
+
- Extremely limited input context (just project name)
|
|
45
|
+
- Must leverage broad domain knowledge to compensate
|
|
46
|
+
- Must infer user intent and project type
|
|
47
|
+
→ Requires appropriate model capabilities
|
|
48
|
+
|
|
49
|
+
4. Consistency & Reliability:
|
|
50
|
+
- Suggestions must be coherent across all 6 fields
|
|
51
|
+
- Must avoid contradictory recommendations
|
|
52
|
+
- Critical for user's first impression of AVC
|
|
53
|
+
→ Requires appropriate model capabilities
|
|
54
|
+
|
|
55
|
+
5. Speed Requirements: IMPORTANT (Secondary)
|
|
56
|
+
- User is actively waiting for suggestions
|
|
57
|
+
- Real-time interaction requires reasonable response time
|
|
58
|
+
- 2-4 second response ideal, <8 seconds acceptable
|
|
59
|
+
|
|
60
|
+
6. Pricing Considerations: TERTIARY
|
|
61
|
+
- Single call per ceremony (low volume)
|
|
62
|
+
- User-facing quality important
|
|
63
|
+
- Pricing impact minimal with only 1 call
|
|
64
|
+
→ Quality worth any pricing tier for this stage
|
|
65
|
+
|
|
66
|
+
RECOMMENDATION:
|
|
67
|
+
|
|
68
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
69
|
+
1. PRIMARY: Output Quality - Excellent domain inference from minimal context (project name)
|
|
70
|
+
2. SECONDARY: World Knowledge - Strong understanding of business domains and technical stacks
|
|
71
|
+
3. TERTIARY: Task Complexity - Moderate (7/10) - requires intelligent suggestion generation
|
|
72
|
+
4. Speed Requirements: MODERATE - User waiting in real-time (2-4s ideal, <8s acceptable)
|
|
73
|
+
5. Pricing: TERTIARY - Single call per ceremony, minimal impact
|
|
74
|
+
|
|
75
|
+
Selection Guidance:
|
|
76
|
+
- Prioritize models with strong domain/business knowledge for accurate suggestions
|
|
77
|
+
- Require excellent inference capabilities from very limited context
|
|
78
|
+
- Must support real-time interaction speed (<8 seconds)
|
|
79
|
+
- Pricing is not a constraint (quality worth any tier for user-facing interaction)
|
|
80
|
+
|
|
81
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
82
|
+
metadata: {
|
|
83
|
+
callFrequency: 1,
|
|
84
|
+
impact: 'HIGH',
|
|
85
|
+
taskComplexity: 6,
|
|
86
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
|
|
90
|
+
{
|
|
91
|
+
id: 'sponsor-call-documentation',
|
|
92
|
+
ceremony: 'sponsor-call',
|
|
93
|
+
stage: 'documentation',
|
|
94
|
+
stageName: 'Documentation Generation',
|
|
95
|
+
prompt: `TASK: Select optimal LLM model for documentation generation in sponsor-call ceremony
|
|
96
|
+
PRIORITY: Best possible output quality
|
|
97
|
+
|
|
98
|
+
CONTEXT:
|
|
99
|
+
- Stage: documentation (Project Documentation Creation)
|
|
100
|
+
- Ceremony: sponsor-call
|
|
101
|
+
- Purpose: AI generates comprehensive PROJECT.md from questionnaire answers
|
|
102
|
+
- Input: 6 questionnaire field answers (1,000-5,000 tokens)
|
|
103
|
+
- Output: Structured PROJECT.md with Executive Summary, Problem Statement, Solution, User Personas, Core Features, Technical Architecture, Security, Success Metrics (2,000-8,000 tokens)
|
|
104
|
+
- Call frequency: 1 per ceremony
|
|
105
|
+
- User interaction: Background process after questionnaire completion
|
|
106
|
+
- Impact: CRITICAL - This is the foundational document that defines the entire project; all future AI agents, epics, stories, and tasks derive from this document
|
|
107
|
+
|
|
108
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
109
|
+
|
|
110
|
+
1. Output Quality Requirements: CRITICAL
|
|
111
|
+
- Must produce well-structured, comprehensive technical documentation
|
|
112
|
+
- Content must be coherent, professional, and actionable
|
|
113
|
+
- Must maintain consistency across all sections
|
|
114
|
+
- Quality affects all downstream project artifacts
|
|
115
|
+
→ Requires appropriate model capabilities
|
|
116
|
+
|
|
117
|
+
2. Task Complexity: 8/10 (Advanced technical writing and synthesis)
|
|
118
|
+
- Synthesize multiple questionnaire inputs into coherent narrative
|
|
119
|
+
- Organize information into logical section structure
|
|
120
|
+
- Generate appropriate technical architecture descriptions
|
|
121
|
+
- Create realistic user personas and success metrics
|
|
122
|
+
- Maintain professional tone and technical accuracy
|
|
123
|
+
|
|
124
|
+
3. Context Understanding:
|
|
125
|
+
- Must understand relationships between questionnaire answers
|
|
126
|
+
- Infer appropriate technical depth and detail level
|
|
127
|
+
- Expand brief answers into comprehensive sections
|
|
128
|
+
- Maintain consistency across document sections
|
|
129
|
+
→ Requires appropriate model capabilities
|
|
130
|
+
|
|
131
|
+
4. Consistency & Reliability:
|
|
132
|
+
- Critical that all sections align and don't contradict
|
|
133
|
+
- Technical architecture must match scope and requirements
|
|
134
|
+
- Success metrics must align with stated goals
|
|
135
|
+
|
|
136
|
+
5. Speed Requirements: LOW (Secondary)
|
|
137
|
+
- Background process, user not actively waiting
|
|
138
|
+
- Quality far more important than speed
|
|
139
|
+
- Can take 10-30 seconds without issue
|
|
140
|
+
→ Speed not a constraint
|
|
141
|
+
|
|
142
|
+
6. Pricing Considerations: TERTIARY
|
|
143
|
+
- Single call per ceremony
|
|
144
|
+
- Foundational document for entire project
|
|
145
|
+
- Quality impact is massive
|
|
146
|
+
→ Worth any pricing tier given criticality
|
|
147
|
+
|
|
148
|
+
RECOMMENDATION:
|
|
149
|
+
|
|
150
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
151
|
+
1. PRIMARY: Output Quality - CRITICAL - Foundational project document
|
|
152
|
+
2. SECONDARY: Technical Writing - Excellent multi-section document structure
|
|
153
|
+
3. TERTIARY: Task Complexity - High (8/10) - requires synthesis of diverse inputs
|
|
154
|
+
4. Speed Requirements: LOW - Background process, quality >>> speed
|
|
155
|
+
5. Pricing: TERTIARY - Single call, massive quality impact justifies any tier
|
|
156
|
+
|
|
157
|
+
Selection Guidance:
|
|
158
|
+
- Prioritize models with exceptional technical writing and document synthesis
|
|
159
|
+
- Require strong markdown formatting and organizational structure
|
|
160
|
+
- Must maintain consistency across complex 9-section document
|
|
161
|
+
- Must synthesize questionnaire inputs, suggestions, and context into coherent narrative
|
|
162
|
+
- Speed is not a constraint (can take 10-30 seconds)
|
|
163
|
+
- Pricing not a limitation (worth premium for project foundation)
|
|
164
|
+
|
|
165
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
166
|
+
metadata: {
|
|
167
|
+
callFrequency: 1,
|
|
168
|
+
impact: 'CRITICAL',
|
|
169
|
+
taskComplexity: 8,
|
|
170
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
id: 'sponsor-call-validation',
|
|
176
|
+
ceremony: 'sponsor-call',
|
|
177
|
+
stage: 'validation',
|
|
178
|
+
stageName: 'Documentation Validation',
|
|
179
|
+
prompt: `TASK: Select optimal LLM model for documentation validation in sponsor-call ceremony
|
|
180
|
+
PRIORITY: Best possible output quality
|
|
181
|
+
|
|
182
|
+
CONTEXT:
|
|
183
|
+
- Stage: validation (Documentation & Context Validation)
|
|
184
|
+
- Ceremony: sponsor-call
|
|
185
|
+
- Purpose: AI validators check PROJECT.md and context.md against quality rules
|
|
186
|
+
- Input: Full PROJECT.md or context.md + validation rules (5,000-12,000 tokens)
|
|
187
|
+
- Output: Validation report identifying issues and suggestions
|
|
188
|
+
- Call frequency: 2 validators per ceremony
|
|
189
|
+
- Impact: HIGH - Catches quality issues before they propagate
|
|
190
|
+
|
|
191
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
192
|
+
|
|
193
|
+
1. Output Quality Requirements: HIGH
|
|
194
|
+
- Must accurately identify quality issues and inconsistencies
|
|
195
|
+
- Must provide actionable feedback for improvement
|
|
196
|
+
- Must understand project documentation best practices
|
|
197
|
+
→ Requires appropriate model capabilities
|
|
198
|
+
|
|
199
|
+
2. Task Complexity: 6/10 (Analytical validation + rule application)
|
|
200
|
+
- Apply validation rules to documentation
|
|
201
|
+
- Identify inconsistencies and gaps
|
|
202
|
+
- Assess completeness and coherence
|
|
203
|
+
|
|
204
|
+
3. Context Understanding:
|
|
205
|
+
- Must handle large documents (full PROJECT.md)
|
|
206
|
+
- Understand relationships across sections
|
|
207
|
+
- Identify subtle inconsistencies
|
|
208
|
+
→ Requires appropriate model capabilities
|
|
209
|
+
|
|
210
|
+
4. Speed Requirements: LOW (Secondary)
|
|
211
|
+
- Background validation stage
|
|
212
|
+
- Quality more important than speed
|
|
213
|
+
|
|
214
|
+
6. Pricing Considerations: TERTIARY
|
|
215
|
+
- Only 2 calls per ceremony
|
|
216
|
+
- Moderate quality requirements
|
|
217
|
+
|
|
218
|
+
RECOMMENDATION:
|
|
219
|
+
|
|
220
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
221
|
+
1. PRIMARY: Output Quality - HIGH - Analytical validation with actionable feedback
|
|
222
|
+
2. SECONDARY: Task Complexity - Moderate (6/10) - rule application + gap identification
|
|
223
|
+
3. TERTIARY: Context Understanding - Must handle large documents with cross-section relationships
|
|
224
|
+
4. Speed Requirements: LOW - Background validation, quality >>> speed
|
|
225
|
+
5. Pricing: TERTIARY - 2 calls per ceremony, moderate impact
|
|
226
|
+
|
|
227
|
+
Selection Guidance:
|
|
228
|
+
- Prioritize models with strong analytical and critical thinking capabilities
|
|
229
|
+
- Require ability to identify subtle inconsistencies and gaps
|
|
230
|
+
- Must provide actionable, specific feedback for improvement
|
|
231
|
+
- Must understand project documentation best practices
|
|
232
|
+
- Speed not a constraint (background processing)
|
|
233
|
+
- Consider pricing tier relative to quality improvement
|
|
234
|
+
|
|
235
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
236
|
+
metadata: {
|
|
237
|
+
callFrequency: 2,
|
|
238
|
+
impact: 'HIGH',
|
|
239
|
+
taskComplexity: 6,
|
|
240
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
|
|
244
|
+
{
|
|
245
|
+
id: 'sprint-planning-decomposition',
|
|
246
|
+
ceremony: 'sprint-planning',
|
|
247
|
+
stage: 'decomposition',
|
|
248
|
+
stageName: 'Epic & Story Decomposition',
|
|
249
|
+
prompt: `TASK: Select optimal LLM model for epic and story decomposition in sprint-planning ceremony
|
|
250
|
+
PRIORITY: Best possible output quality
|
|
251
|
+
|
|
252
|
+
CONTEXT:
|
|
253
|
+
- Stage: decomposition (Epic & Story Decomposition)
|
|
254
|
+
- Ceremony: sprint-planning
|
|
255
|
+
- Purpose: AI analyzes PROJECT.md and decomposes project scope into hierarchical epics and stories
|
|
256
|
+
- Input: PROJECT.md, project context.md, existing epics/stories (5,000-20,000 tokens)
|
|
257
|
+
- Output: Structured JSON with epics, stories, acceptance criteria
|
|
258
|
+
- Call frequency: 1 per ceremony
|
|
259
|
+
- Impact: CRITICAL - Defines entire project work breakdown structure
|
|
260
|
+
|
|
261
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
262
|
+
|
|
263
|
+
1. Output Quality Requirements: CRITICAL
|
|
264
|
+
- Must create logical, implementable work breakdown
|
|
265
|
+
- Acceptance criteria must be specific and testable
|
|
266
|
+
- Hierarchy must reflect dependencies
|
|
267
|
+
→ Requires appropriate model capabilities
|
|
268
|
+
|
|
269
|
+
2. Task Complexity: 9/10 (Complex hierarchical decomposition)
|
|
270
|
+
- Analyze full project scope
|
|
271
|
+
- Break features into implementable story-level units
|
|
272
|
+
- Generate specific, testable acceptance criteria
|
|
273
|
+
- Identify dependencies between stories
|
|
274
|
+
|
|
275
|
+
3. Context Understanding:
|
|
276
|
+
- Must synthesize entire project vision
|
|
277
|
+
- Understand technical architecture and constraints
|
|
278
|
+
- Recognize implicit dependencies
|
|
279
|
+
→ Requires appropriate model capabilities
|
|
280
|
+
|
|
281
|
+
4. Consistency & Reliability:
|
|
282
|
+
- Critical that decomposition is complete (no gaps)
|
|
283
|
+
- Stories must not overlap or contradict
|
|
284
|
+
|
|
285
|
+
5. Speed Requirements: LOW (Secondary)
|
|
286
|
+
- Background process, one-time operation
|
|
287
|
+
- Quality far more important than speed
|
|
288
|
+
|
|
289
|
+
6. Pricing Considerations: TERTIARY
|
|
290
|
+
- Single call per ceremony
|
|
291
|
+
- Most critical stage in sprint-planning
|
|
292
|
+
→ Worth any pricing tier
|
|
293
|
+
|
|
294
|
+
RECOMMENDATION:
|
|
295
|
+
|
|
296
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
297
|
+
1. PRIMARY: Output Quality - CRITICAL - Defines entire project work structure
|
|
298
|
+
2. SECONDARY: Task Complexity - Very High (9/10) - complex hierarchical decomposition
|
|
299
|
+
3. TERTIARY: Hierarchical Reasoning - Exceptional domain breakdown with dependencies
|
|
300
|
+
4. Speed Requirements: LOW - Background process, quality >>> speed
|
|
301
|
+
5. Pricing: TERTIARY - Single call, most critical stage, worth any tier
|
|
302
|
+
|
|
303
|
+
Selection Guidance:
|
|
304
|
+
- Prioritize models with exceptional hierarchical reasoning and decomposition
|
|
305
|
+
- Require deep project synthesis and implicit dependency recognition
|
|
306
|
+
- Must generate complete, non-overlapping, consistent epic/story structure
|
|
307
|
+
- Must produce specific, testable acceptance criteria at story level
|
|
308
|
+
- Consistency and completeness are paramount (gaps cause downstream failures)
|
|
309
|
+
- Speed not a constraint (one-time background operation)
|
|
310
|
+
- Pricing justified by critical impact on entire project
|
|
311
|
+
|
|
312
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
313
|
+
metadata: {
|
|
314
|
+
callFrequency: 1,
|
|
315
|
+
impact: 'CRITICAL',
|
|
316
|
+
taskComplexity: 9,
|
|
317
|
+
currentDefault: 'Claude Opus 4.6'
|
|
318
|
+
}
|
|
319
|
+
},
|
|
320
|
+
|
|
321
|
+
{
|
|
322
|
+
id: 'sprint-planning-validation-universal',
|
|
323
|
+
ceremony: 'sprint-planning',
|
|
324
|
+
stage: 'validation-universal',
|
|
325
|
+
stageName: 'Universal Validators',
|
|
326
|
+
prompt: `TASK: Select optimal LLM model for universal validators in sprint-planning ceremony
|
|
327
|
+
PRIORITY: Best possible output quality
|
|
328
|
+
|
|
329
|
+
CONTEXT:
|
|
330
|
+
- Stage: validation-universal (Universal Validators)
|
|
331
|
+
- Ceremony: sprint-planning
|
|
332
|
+
- Purpose: Critical validators always applied: architecture, security, quality
|
|
333
|
+
- Input: Epic or Story with full context (2,000-8,000 tokens per item)
|
|
334
|
+
- Output: Detailed validation report with architectural and security analysis
|
|
335
|
+
- Call frequency: ~30 calls per ceremony
|
|
336
|
+
- Impact: CRITICAL - Catches fundamental issues before implementation
|
|
337
|
+
|
|
338
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
339
|
+
|
|
340
|
+
1. Output Quality Requirements: CRITICAL
|
|
341
|
+
- Must perform deep architectural analysis
|
|
342
|
+
- Must identify security vulnerabilities
|
|
343
|
+
- Must provide specific, actionable recommendations
|
|
344
|
+
→ Requires appropriate model capabilities
|
|
345
|
+
|
|
346
|
+
2. Task Complexity: 9/10 (Deep architectural and security analysis)
|
|
347
|
+
- Analyze system architecture and design patterns
|
|
348
|
+
- Identify security vulnerabilities
|
|
349
|
+
- Assess technical feasibility
|
|
350
|
+
- Evaluate testing strategies
|
|
351
|
+
|
|
352
|
+
3. Context Understanding:
|
|
353
|
+
- Must understand full project architecture
|
|
354
|
+
- Cross-reference with other epics/stories
|
|
355
|
+
- Identify system-wide architectural issues
|
|
356
|
+
→ Requires appropriate model capabilities
|
|
357
|
+
|
|
358
|
+
4. Consistency & Reliability:
|
|
359
|
+
- Cannot miss critical architectural flaws
|
|
360
|
+
- Must consistently identify security issues
|
|
361
|
+
→ Critical appropriate model capabilities
|
|
362
|
+
|
|
363
|
+
5. Speed Requirements: MODERATE (Secondary)
|
|
364
|
+
- 30 calls in parallel validation stage
|
|
365
|
+
- Quality far more important than speed
|
|
366
|
+
|
|
367
|
+
6. Pricing Considerations: MODERATE
|
|
368
|
+
- 30 calls = significant volume
|
|
369
|
+
- Quality cannot be compromised
|
|
370
|
+
|
|
371
|
+
RECOMMENDATION:
|
|
372
|
+
|
|
373
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
374
|
+
1. PRIMARY: Output Quality - CRITICAL - Deep architectural and security analysis
|
|
375
|
+
2. SECONDARY: Task Complexity - Very High (9/10) - system-wide analysis
|
|
376
|
+
3. TERTIARY: Reliability - Cannot miss critical architectural flaws or security issues
|
|
377
|
+
4. Speed Requirements: MODERATE - 30 calls in parallel, quality still paramount
|
|
378
|
+
5. Pricing: MODERATE - 30 calls = significant volume, but quality cannot be compromised
|
|
379
|
+
|
|
380
|
+
Selection Guidance:
|
|
381
|
+
- Prioritize models with exceptional architectural reasoning and security analysis
|
|
382
|
+
- Require consistent, reliable identification of vulnerabilities and design flaws
|
|
383
|
+
- Must provide specific, actionable recommendations for improvements
|
|
384
|
+
- Must understand system-wide architecture and cross-reference between components
|
|
385
|
+
- Reliability is critical (false negatives in security/architecture are high-risk)
|
|
386
|
+
- Do not compromise on quality for these critical validators
|
|
387
|
+
|
|
388
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
389
|
+
metadata: {
|
|
390
|
+
callFrequency: 30,
|
|
391
|
+
impact: 'CRITICAL',
|
|
392
|
+
taskComplexity: 9,
|
|
393
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
394
|
+
}
|
|
395
|
+
},
|
|
396
|
+
|
|
397
|
+
{
|
|
398
|
+
id: 'sprint-planning-validation-domain',
|
|
399
|
+
ceremony: 'sprint-planning',
|
|
400
|
+
stage: 'validation-domain',
|
|
401
|
+
stageName: 'Domain Validators',
|
|
402
|
+
prompt: `TASK: Select optimal LLM model for domain validators in sprint-planning ceremony
|
|
403
|
+
PRIORITY: Best possible output quality
|
|
404
|
+
|
|
405
|
+
CONTEXT:
|
|
406
|
+
- Stage: validation-domain (Domain Validators)
|
|
407
|
+
- Ceremony: sprint-planning
|
|
408
|
+
- Purpose: Domain-specific validators based on tech stack
|
|
409
|
+
- Input: Epic or Story with domain-relevant context (2,000-5,000 tokens)
|
|
410
|
+
- Output: Domain-specific validation report with best practices
|
|
411
|
+
- Call frequency: ~90 calls per ceremony (largest volume stage)
|
|
412
|
+
- Impact: HIGH - Catches domain-specific issues, ensures best practices
|
|
413
|
+
|
|
414
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
415
|
+
|
|
416
|
+
1. Output Quality Requirements: HIGH
|
|
417
|
+
- Must apply domain-specific best practices
|
|
418
|
+
- Must identify domain anti-patterns
|
|
419
|
+
- Must provide actionable recommendations
|
|
420
|
+
→ Requires appropriate model capabilities
|
|
421
|
+
|
|
422
|
+
2. Task Complexity: 7/10 (Domain expertise application)
|
|
423
|
+
- Apply domain-specific patterns
|
|
424
|
+
- Identify violations of domain conventions
|
|
425
|
+
- Not just pattern matching - requires context understanding
|
|
426
|
+
|
|
427
|
+
3. Context Understanding:
|
|
428
|
+
- Must understand project architecture in domain context
|
|
429
|
+
- Cross-reference with other domains
|
|
430
|
+
|
|
431
|
+
4. Speed Requirements: MODERATE (Secondary)
|
|
432
|
+
- 90 calls = highest volume in sprint-planning
|
|
433
|
+
- Parallel execution
|
|
434
|
+
|
|
435
|
+
6. Pricing Considerations: SIGNIFICANT
|
|
436
|
+
- 90 calls = largest pricing driver
|
|
437
|
+
- Medium vs Low pricing makes material difference
|
|
438
|
+
|
|
439
|
+
RECOMMENDATION:
|
|
440
|
+
|
|
441
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
442
|
+
1. PRIMARY: Output Quality - HIGH - Domain-specific best practices and anti-patterns
|
|
443
|
+
2. SECONDARY: Task Complexity - High (7/10) - requires domain expertise, not just pattern matching
|
|
444
|
+
3. TERTIARY: Volume Impact - HIGHEST (90 calls) - largest pricing driver in ceremony
|
|
445
|
+
4. Speed Requirements: MODERATE - Parallel execution, but quality important
|
|
446
|
+
5. Pricing: SIGNIFICANT - 90 calls make pricing tier materially impactful
|
|
447
|
+
|
|
448
|
+
Selection Guidance:
|
|
449
|
+
- Prioritize models with strong domain knowledge (DevOps, Database, API, Frontend, etc.)
|
|
450
|
+
- Require ability to apply domain-specific best practices and identify anti-patterns
|
|
451
|
+
- Must provide actionable, context-appropriate recommendations
|
|
452
|
+
- Consider pricing tier carefully (90 calls = 3-4x cost difference between tiers)
|
|
453
|
+
- Balance domain expertise depth vs pricing efficiency
|
|
454
|
+
- Acceptable to use lower tier if basic domain pattern checking sufficient
|
|
455
|
+
|
|
456
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
457
|
+
metadata: {
|
|
458
|
+
callFrequency: 90,
|
|
459
|
+
impact: 'HIGH',
|
|
460
|
+
taskComplexity: 7,
|
|
461
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
462
|
+
}
|
|
463
|
+
},
|
|
464
|
+
|
|
465
|
+
{
|
|
466
|
+
id: 'sprint-planning-validation-feature',
|
|
467
|
+
ceremony: 'sprint-planning',
|
|
468
|
+
stage: 'validation-feature',
|
|
469
|
+
stageName: 'Feature Validators',
|
|
470
|
+
prompt: `TASK: Select optimal LLM model for feature validators in sprint-planning ceremony
|
|
471
|
+
PRIORITY: Best possible output quality
|
|
472
|
+
|
|
473
|
+
CONTEXT:
|
|
474
|
+
- Stage: validation-feature (Feature Validators)
|
|
475
|
+
- Ceremony: sprint-planning
|
|
476
|
+
- Purpose: Feature-specific validators based on keywords in acceptance criteria
|
|
477
|
+
- Input: Epic or Story with feature-specific context (1,500-4,000 tokens)
|
|
478
|
+
- Output: Feature-specific validation checklist and completeness assessment
|
|
479
|
+
- Call frequency: ~25 calls per ceremony
|
|
480
|
+
- Impact: MEDIUM - Ensures feature completeness, identifies missing requirements
|
|
481
|
+
|
|
482
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
483
|
+
|
|
484
|
+
1. Output Quality Requirements: MEDIUM-HIGH
|
|
485
|
+
- Must verify feature completeness
|
|
486
|
+
- Must identify missing scenarios or edge cases
|
|
487
|
+
- Should provide feature-specific implementation guidance
|
|
488
|
+
→ Requires appropriate model capabilities
|
|
489
|
+
|
|
490
|
+
2. Task Complexity: 5/10 (Feature checklist validation)
|
|
491
|
+
- Apply feature-specific checklists
|
|
492
|
+
- Identify missing scenarios
|
|
493
|
+
- Verify acceptance criteria completeness
|
|
494
|
+
|
|
495
|
+
3. Context Understanding:
|
|
496
|
+
- Understand feature requirements from acceptance criteria
|
|
497
|
+
- Identify implicit requirements not explicitly stated
|
|
498
|
+
|
|
499
|
+
4. Speed Requirements: MODERATE (Secondary)
|
|
500
|
+
- 25 calls, parallel execution
|
|
501
|
+
- Background processing
|
|
502
|
+
|
|
503
|
+
6. Pricing Considerations: MODERATE
|
|
504
|
+
- 25 calls = moderate volume
|
|
505
|
+
- Completeness checking vs deep analysis
|
|
506
|
+
|
|
507
|
+
RECOMMENDATION:
|
|
508
|
+
|
|
509
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
510
|
+
1. PRIMARY: Output Quality - MEDIUM-HIGH - Feature completeness and edge case identification
|
|
511
|
+
2. SECONDARY: Task Complexity - Moderate (5/10) - checklist application with implicit requirement detection
|
|
512
|
+
3. TERTIARY: Volume Impact - MODERATE (25 calls) - pricing tier makes moderate impact
|
|
513
|
+
4. Speed Requirements: MODERATE - Parallel execution in background
|
|
514
|
+
5. Pricing: MODERATE - 25 calls = moderate volume, balance quality vs cost
|
|
515
|
+
|
|
516
|
+
Selection Guidance:
|
|
517
|
+
- Prioritize models capable of identifying implicit requirements (not just explicit ones)
|
|
518
|
+
- Require feature completeness checking (missing scenarios, edge cases)
|
|
519
|
+
- Should provide test scenario suggestions
|
|
520
|
+
- Consider pricing tier (25 calls = 2-3x cost difference between tiers)
|
|
521
|
+
- Balance edge case detection quality vs pricing efficiency
|
|
522
|
+
- Acceptable to use lower tier if basic explicit requirement checking sufficient
|
|
523
|
+
|
|
524
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
525
|
+
metadata: {
|
|
526
|
+
callFrequency: 25,
|
|
527
|
+
impact: 'MEDIUM',
|
|
528
|
+
taskComplexity: 5,
|
|
529
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
530
|
+
}
|
|
531
|
+
},
|
|
532
|
+
|
|
533
|
+
{
|
|
534
|
+
id: 'sprint-planning-doc-distribution',
|
|
535
|
+
ceremony: 'sprint-planning',
|
|
536
|
+
stage: 'doc-distribution',
|
|
537
|
+
stageName: 'Documentation Distribution',
|
|
538
|
+
prompt: `TASK: Select optimal LLM model for documentation distribution in sprint-planning ceremony
|
|
539
|
+
PRIORITY: Best possible output quality
|
|
540
|
+
|
|
541
|
+
CONTEXT:
|
|
542
|
+
- Stage: doc-distribution (Documentation Distribution)
|
|
543
|
+
- Ceremony: sprint-planning
|
|
544
|
+
- Purpose: AI moves relevant content from parent doc.md to each epic/story doc.md and elaborates with domain-specific detail
|
|
545
|
+
- Input: Parent doc.md + epic/story item description (2,000-5,000 tokens)
|
|
546
|
+
- Output: Child doc.md (extracted + elaborated) + updated parent doc.md (lighter) as JSON
|
|
547
|
+
- Call frequency: ~25 calls per ceremony
|
|
548
|
+
- Impact: VERY HIGH - Establishes the documentation tree that guides all future AI agents
|
|
549
|
+
|
|
550
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
551
|
+
|
|
552
|
+
1. Output Quality Requirements: VERY HIGH
|
|
553
|
+
- Must accurately identify which content belongs to the child scope
|
|
554
|
+
- Must elaborate with domain-specific implementation detail
|
|
555
|
+
- Must produce valid JSON with embedded markdown
|
|
556
|
+
→ Requires appropriate model capabilities
|
|
557
|
+
|
|
558
|
+
2. Task Complexity: 7/10 (Content extraction + elaboration + JSON output)
|
|
559
|
+
- Identify domain-relevant sections in parent doc
|
|
560
|
+
- Remove extracted content cleanly from parent
|
|
561
|
+
- Extend child doc with actionable implementation notes
|
|
562
|
+
|
|
563
|
+
3. Consistency & Reliability:
|
|
564
|
+
- Child doc must cover only the child's scope
|
|
565
|
+
- Parent doc must remain coherent after extraction
|
|
566
|
+
- JSON output must be well-formed
|
|
567
|
+
|
|
568
|
+
4. Speed Requirements: MODERATE (Secondary)
|
|
569
|
+
- 25 calls, sequential per epic/story chain
|
|
570
|
+
- Quality more important than speed
|
|
571
|
+
|
|
572
|
+
5. Pricing Considerations: MODERATE
|
|
573
|
+
- 25 calls = moderate volume
|
|
574
|
+
- High impact on downstream work quality
|
|
575
|
+
|
|
576
|
+
RECOMMENDATION:
|
|
577
|
+
|
|
578
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
579
|
+
1. PRIMARY: Output Quality - VERY HIGH - Defines the documentation hierarchy for the project
|
|
580
|
+
2. SECONDARY: Technical Writing - Focused extraction with domain elaboration
|
|
581
|
+
3. TERTIARY: JSON reliability - Must produce parseable JSON with embedded markdown
|
|
582
|
+
|
|
583
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
584
|
+
metadata: {
|
|
585
|
+
callFrequency: 25,
|
|
586
|
+
impact: 'VERY HIGH',
|
|
587
|
+
taskComplexity: 7,
|
|
588
|
+
currentDefault: 'Claude Sonnet 4.6'
|
|
589
|
+
}
|
|
590
|
+
},
|
|
591
|
+
|
|
592
|
+
{
|
|
593
|
+
id: 'seed-decomposition',
|
|
594
|
+
ceremony: 'seed',
|
|
595
|
+
stage: 'decomposition',
|
|
596
|
+
stageName: 'Task Decomposition',
|
|
597
|
+
prompt: `TASK: Select optimal LLM model for task decomposition in seed ceremony
|
|
598
|
+
PRIORITY: Best possible output quality
|
|
599
|
+
|
|
600
|
+
CONTEXT:
|
|
601
|
+
- Stage: decomposition (Task & Subtask Decomposition)
|
|
602
|
+
- Ceremony: seed
|
|
603
|
+
- Purpose: AI breaks down story into tasks and subtasks
|
|
604
|
+
- Input: Story with acceptance criteria, contexts (2,000-6,000 tokens)
|
|
605
|
+
- Output: Hierarchical task structure with dependencies
|
|
606
|
+
- Call frequency: 1 per ceremony execution
|
|
607
|
+
- Impact: CRITICAL - Defines actual implementation plan
|
|
608
|
+
|
|
609
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
610
|
+
|
|
611
|
+
1. Output Quality Requirements: CRITICAL
|
|
612
|
+
- Must break story into logical task groupings
|
|
613
|
+
- Subtasks must be atomic and implementable
|
|
614
|
+
- Must identify dependencies
|
|
615
|
+
→ Requires appropriate model capabilities
|
|
616
|
+
|
|
617
|
+
2. Task Complexity: 8/10 (Granular work breakdown)
|
|
618
|
+
- Analyze story and break into tasks
|
|
619
|
+
- Determine appropriate task granularity
|
|
620
|
+
- Break tasks into atomic subtasks
|
|
621
|
+
- Identify task dependencies
|
|
622
|
+
|
|
623
|
+
3. Context Understanding:
|
|
624
|
+
- Understand full story requirements
|
|
625
|
+
- Consider epic and project context
|
|
626
|
+
- Identify implicit implementation needs
|
|
627
|
+
→ Requires appropriate model capabilities
|
|
628
|
+
|
|
629
|
+
4. Consistency & Reliability:
|
|
630
|
+
- Task breakdown must be complete
|
|
631
|
+
- Dependencies must be accurate
|
|
632
|
+
|
|
633
|
+
5. Speed Requirements: LOW (Secondary)
|
|
634
|
+
- Background processing
|
|
635
|
+
- Quality far more important
|
|
636
|
+
|
|
637
|
+
6. Pricing Considerations: TERTIARY
|
|
638
|
+
- Single call per ceremony
|
|
639
|
+
- Critical for implementation planning
|
|
640
|
+
|
|
641
|
+
RECOMMENDATION:
|
|
642
|
+
|
|
643
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
644
|
+
1. PRIMARY: Output Quality - CRITICAL - Defines actual implementation plan
|
|
645
|
+
2. SECONDARY: Task Complexity - Very High (8/10) - granular work breakdown with dependencies
|
|
646
|
+
3. TERTIARY: Breakdown Capability - Exceptional atomic subtask identification
|
|
647
|
+
4. Speed Requirements: LOW - Background processing, quality >>> speed
|
|
648
|
+
5. Pricing: TERTIARY - Single call, critical impact justifies any tier
|
|
649
|
+
|
|
650
|
+
Selection Guidance:
|
|
651
|
+
- Prioritize models with exceptional hierarchical decomposition and breakdown capability
|
|
652
|
+
- Require accurate identification of atomic, implementable subtasks
|
|
653
|
+
- Must properly identify task dependencies
|
|
654
|
+
- Must understand full story requirements in epic/project context
|
|
655
|
+
- Completeness is critical (missing tasks cause implementation delays)
|
|
656
|
+
- Reliability paramount (inconsistent granularity causes confusion)
|
|
657
|
+
- Single call makes pricing tier less significant than quality
|
|
658
|
+
|
|
659
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
660
|
+
metadata: {
|
|
661
|
+
callFrequency: 1,
|
|
662
|
+
impact: 'CRITICAL',
|
|
663
|
+
taskComplexity: 8,
|
|
664
|
+
currentDefault: 'Claude Opus 4.6'
|
|
665
|
+
}
|
|
666
|
+
},
|
|
667
|
+
|
|
668
|
+
{
|
|
669
|
+
id: 'seed-validation',
|
|
670
|
+
ceremony: 'seed',
|
|
671
|
+
stage: 'validation',
|
|
672
|
+
stageName: 'Task Validation',
|
|
673
|
+
prompt: `TASK: Select optimal LLM model for task validation in seed ceremony
|
|
674
|
+
PRIORITY: Best possible output quality
|
|
675
|
+
|
|
676
|
+
CONTEXT:
|
|
677
|
+
- Stage: validation (Task Validation)
|
|
678
|
+
- Ceremony: seed
|
|
679
|
+
- Purpose: AI validates task hierarchy completeness and feasibility
|
|
680
|
+
- Input: Complete task/subtask hierarchy (3,000-10,000 tokens)
|
|
681
|
+
- Output: Validation report identifying gaps and issues
|
|
682
|
+
- Call frequency: ~20 calls per ceremony
|
|
683
|
+
- Impact: VERY HIGH - Catches planning issues before development
|
|
684
|
+
|
|
685
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
686
|
+
|
|
687
|
+
1. Output Quality Requirements: VERY HIGH
|
|
688
|
+
- Must identify gaps in task coverage
|
|
689
|
+
- Must assess task granularity
|
|
690
|
+
- Must validate dependency correctness
|
|
691
|
+
→ Requires appropriate model capabilities
|
|
692
|
+
|
|
693
|
+
2. Task Complexity: 7/10 (Completeness validation + reasoning)
|
|
694
|
+
- Analyze task hierarchy for gaps
|
|
695
|
+
- Assess granularity appropriateness
|
|
696
|
+
- Validate dependency relationships
|
|
697
|
+
- Reason about implementation feasibility
|
|
698
|
+
|
|
699
|
+
3. Context Understanding:
|
|
700
|
+
- Must understand full task hierarchy
|
|
701
|
+
- Cross-reference with story requirements
|
|
702
|
+
- Reason about dependencies
|
|
703
|
+
→ Requires appropriate model capabilities
|
|
704
|
+
|
|
705
|
+
4. Consistency & Reliability:
|
|
706
|
+
- Must consistently identify gaps
|
|
707
|
+
- Cannot miss critical completeness problems
|
|
708
|
+
|
|
709
|
+
5. Speed Requirements: MODERATE (Secondary)
|
|
710
|
+
- 20 calls, parallel validation
|
|
711
|
+
- Quality far more important
|
|
712
|
+
|
|
713
|
+
6. Pricing Considerations: MODERATE
|
|
714
|
+
- 20 calls = moderate volume
|
|
715
|
+
- Critical impact (prevents implementation issues)
|
|
716
|
+
|
|
717
|
+
RECOMMENDATION:
|
|
718
|
+
|
|
719
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
720
|
+
1. PRIMARY: Output Quality - VERY HIGH - Catches planning issues before development
|
|
721
|
+
2. SECONDARY: Validation Reasoning - Sophisticated gap identification and granularity assessment
|
|
722
|
+
3. TERTIARY: Task Complexity - High (7/10) - completeness validation with dependency reasoning
|
|
723
|
+
4. Speed Requirements: MODERATE - 20 calls in parallel, quality >>> speed
|
|
724
|
+
5. Pricing: MODERATE - 20 calls, critical impact justifies investment
|
|
725
|
+
|
|
726
|
+
Selection Guidance:
|
|
727
|
+
- Prioritize models with excellent completeness validation and gap identification
|
|
728
|
+
- Require sophisticated reasoning about task granularity appropriateness
|
|
729
|
+
- Must validate dependency correctness and implementation feasibility
|
|
730
|
+
- Must cross-reference task hierarchy with story requirements
|
|
731
|
+
- Cannot miss critical completeness problems (false negatives are costly)
|
|
732
|
+
- Do not compromise on quality (prevents downstream implementation issues)
|
|
733
|
+
|
|
734
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
735
|
+
metadata: {
|
|
736
|
+
callFrequency: 20,
|
|
737
|
+
impact: 'VERY HIGH',
|
|
738
|
+
taskComplexity: 7,
|
|
739
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
740
|
+
}
|
|
741
|
+
},
|
|
742
|
+
|
|
743
|
+
{
|
|
744
|
+
id: 'seed-context-generation',
|
|
745
|
+
ceremony: 'seed',
|
|
746
|
+
stage: 'context-generation',
|
|
747
|
+
stageName: 'Task Context Generation',
|
|
748
|
+
prompt: `TASK: Select optimal LLM model for task context generation in seed ceremony
|
|
749
|
+
PRIORITY: Best possible output quality
|
|
750
|
+
|
|
751
|
+
CONTEXT:
|
|
752
|
+
- Stage: context-generation (Task Context Generation)
|
|
753
|
+
- Ceremony: seed
|
|
754
|
+
- Purpose: AI generates context.md for each task
|
|
755
|
+
- Input: Task with subtasks, story/epic/project contexts (1,500-4,000 tokens)
|
|
756
|
+
- Output: Concise context.md with implementation approach (300-1,000 tokens)
|
|
757
|
+
- Call frequency: ~10 calls per ceremony
|
|
758
|
+
- Impact: VERY HIGH - Developers read immediately before implementing
|
|
759
|
+
|
|
760
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
761
|
+
|
|
762
|
+
1. Output Quality Requirements: VERY HIGH
|
|
763
|
+
- Must provide specific, actionable implementation guidance
|
|
764
|
+
- Technical details must be accurate
|
|
765
|
+
- Must highlight gotchas and edge cases
|
|
766
|
+
→ Requires appropriate model capabilities
|
|
767
|
+
|
|
768
|
+
2. Task Complexity: 7/10 (Implementation-focused technical context)
|
|
769
|
+
- Synthesize task requirements into guidance
|
|
770
|
+
- Provide appropriate technical detail
|
|
771
|
+
- Identify implementation approaches
|
|
772
|
+
- Balance brevity with actionability
|
|
773
|
+
|
|
774
|
+
3. Context Understanding:
|
|
775
|
+
- Understand task within story/epic context
|
|
776
|
+
- Recognize relevant patterns
|
|
777
|
+
- Identify task dependencies
|
|
778
|
+
→ Requires appropriate model capabilities
|
|
779
|
+
|
|
780
|
+
4. Consistency & Reliability:
|
|
781
|
+
- Context must align with task requirements
|
|
782
|
+
- Technical guidance must be accurate
|
|
783
|
+
|
|
784
|
+
5. Speed Requirements: MODERATE (Secondary)
|
|
785
|
+
- 10 calls, parallel generation
|
|
786
|
+
- Quality more important
|
|
787
|
+
|
|
788
|
+
6. Pricing Considerations: MODERATE
|
|
789
|
+
- 10 calls = low-moderate volume
|
|
790
|
+
- Developer-facing, high impact
|
|
791
|
+
|
|
792
|
+
RECOMMENDATION:
|
|
793
|
+
|
|
794
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
795
|
+
1. PRIMARY: Output Quality - VERY HIGH - Developers read immediately before implementing
|
|
796
|
+
2. SECONDARY: Technical Writing - Implementation-focused guidance with accuracy
|
|
797
|
+
3. TERTIARY: Task Complexity - High (7/10) - synthesis with appropriate detail level
|
|
798
|
+
4. Speed Requirements: MODERATE - 10 calls in parallel, quality >>> speed
|
|
799
|
+
5. Pricing: MODERATE - 10 calls, developer-facing, high impact
|
|
800
|
+
|
|
801
|
+
Selection Guidance:
|
|
802
|
+
- Prioritize models with excellent technical writing for implementation guidance
|
|
803
|
+
- Require ability to highlight gotchas, edge cases, and implementation approaches
|
|
804
|
+
- Must provide specific, actionable guidance (not generic advice)
|
|
805
|
+
- Must balance brevity with actionability (300-1,000 token output)
|
|
806
|
+
- Technical details must be accurate (errors cause developer confusion)
|
|
807
|
+
- Do not compromise on quality (directly impacts implementation efficiency)
|
|
808
|
+
|
|
809
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
810
|
+
metadata: {
|
|
811
|
+
callFrequency: 10,
|
|
812
|
+
impact: 'VERY HIGH',
|
|
813
|
+
taskComplexity: 7,
|
|
814
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
815
|
+
}
|
|
816
|
+
},
|
|
817
|
+
|
|
818
|
+
{
|
|
819
|
+
id: 'context-retrospective-documentation-update',
|
|
820
|
+
ceremony: 'context-retrospective',
|
|
821
|
+
stage: 'documentation-update',
|
|
822
|
+
stageName: 'Documentation Enhancement',
|
|
823
|
+
prompt: `TASK: Select optimal LLM model for documentation enhancement in context-retrospective ceremony
|
|
824
|
+
PRIORITY: Best possible output quality
|
|
825
|
+
|
|
826
|
+
CONTEXT:
|
|
827
|
+
- Stage: documentation-update (Documentation Enhancement)
|
|
828
|
+
- Ceremony: context-retrospective
|
|
829
|
+
- Purpose: AI refines PROJECT.md based on implementation learnings
|
|
830
|
+
- Input: PROJECT.md, git history, completed work (10,000-30,000 tokens)
|
|
831
|
+
- Output: Updated PROJECT.md with refined descriptions and learnings
|
|
832
|
+
- Call frequency: ~10 calls per ceremony
|
|
833
|
+
- Impact: HIGH - Maintains PROJECT.md as source of truth
|
|
834
|
+
|
|
835
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
836
|
+
|
|
837
|
+
1. Output Quality Requirements: HIGH
|
|
838
|
+
- Must synthesize implementation learnings
|
|
839
|
+
- Technical updates must be accurate
|
|
840
|
+
- Must maintain documentation consistency
|
|
841
|
+
→ Requires appropriate model capabilities
|
|
842
|
+
|
|
843
|
+
2. Task Complexity: 7/10 (Technical synthesis and writing)
|
|
844
|
+
- Analyze git history
|
|
845
|
+
- Identify patterns and insights
|
|
846
|
+
- Synthesize into documentation updates
|
|
847
|
+
|
|
848
|
+
3. Context Understanding:
|
|
849
|
+
- Must handle large context (PROJECT.md + git history)
|
|
850
|
+
- Understand implementation changes
|
|
851
|
+
- Identify obsolete information
|
|
852
|
+
→ Large context window needed appropriate model capabilities
|
|
853
|
+
|
|
854
|
+
4. Consistency & Reliability:
|
|
855
|
+
- Updates must not contradict existing content
|
|
856
|
+
- Technical accuracy critical
|
|
857
|
+
|
|
858
|
+
5. Speed Requirements: LOW (Secondary)
|
|
859
|
+
- Background processing
|
|
860
|
+
- Quality more important
|
|
861
|
+
|
|
862
|
+
6. Pricing Considerations: MODERATE
|
|
863
|
+
- 10 calls = low-moderate volume
|
|
864
|
+
- Important but not critical path
|
|
865
|
+
|
|
866
|
+
RECOMMENDATION:
|
|
867
|
+
|
|
868
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
869
|
+
1. PRIMARY: Output Quality - HIGH - Maintains PROJECT.md as source of truth
|
|
870
|
+
2. SECONDARY: Technical Writing - Synthesis of implementation learnings
|
|
871
|
+
3. TERTIARY: Task Complexity - High (7/10) - git history analysis + documentation
|
|
872
|
+
4. Speed Requirements: LOW - Background processing, quality >>> speed
|
|
873
|
+
5. Pricing: MODERATE - 10 calls, important but not critical path
|
|
874
|
+
|
|
875
|
+
Selection Guidance:
|
|
876
|
+
- Prioritize models with excellent technical writing and documentation synthesis
|
|
877
|
+
- Require ability to analyze git history and identify implementation patterns
|
|
878
|
+
- Must maintain consistency with existing documentation (no contradictions)
|
|
879
|
+
- Must handle large context (PROJECT.md + git history = 10K-30K tokens)
|
|
880
|
+
- Technical accuracy critical (documentation is project source of truth)
|
|
881
|
+
- Consider balance of quality vs pricing for moderate-volume task
|
|
882
|
+
|
|
883
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
884
|
+
metadata: {
|
|
885
|
+
callFrequency: 10,
|
|
886
|
+
impact: 'HIGH',
|
|
887
|
+
taskComplexity: 7,
|
|
888
|
+
currentDefault: 'Claude Sonnet 4.5'
|
|
889
|
+
}
|
|
890
|
+
},
|
|
891
|
+
|
|
892
|
+
{
|
|
893
|
+
id: 'context-retrospective-context-refinement',
|
|
894
|
+
ceremony: 'context-retrospective',
|
|
895
|
+
stage: 'context-refinement',
|
|
896
|
+
stageName: 'Context Enhancement',
|
|
897
|
+
prompt: `TASK: Select optimal LLM model for context refinement in context-retrospective ceremony
|
|
898
|
+
PRIORITY: Best possible output quality
|
|
899
|
+
|
|
900
|
+
CONTEXT:
|
|
901
|
+
- Stage: context-refinement (Context Enhancement)
|
|
902
|
+
- Ceremony: context-retrospective
|
|
903
|
+
- Purpose: AI enhances context.md with implementation insights
|
|
904
|
+
- Input: context.md, git diffs, PRs, issue comments (5,000-15,000 tokens)
|
|
905
|
+
- Output: Enhanced context.md with insights, patterns, code examples
|
|
906
|
+
- Call frequency: ~15 calls per ceremony
|
|
907
|
+
- Impact: VERY HIGH - Enhanced context improves all future AI agent performance
|
|
908
|
+
|
|
909
|
+
EVALUATION CRITERIA (BEST OUTPUT FIRST):
|
|
910
|
+
|
|
911
|
+
1. Output Quality Requirements: VERY HIGH
|
|
912
|
+
- Must extract valuable insights from code changes
|
|
913
|
+
- Must identify implementation patterns
|
|
914
|
+
- Should include practical code examples
|
|
915
|
+
→ Requires appropriate model capabilities
|
|
916
|
+
|
|
917
|
+
2. Task Complexity: 8/10 (Code analysis + insight extraction)
|
|
918
|
+
- Analyze code diffs
|
|
919
|
+
- Identify patterns and best practices
|
|
920
|
+
- Extract insights from issue discussions
|
|
921
|
+
- Synthesize into actionable context
|
|
922
|
+
|
|
923
|
+
3. Context Understanding:
|
|
924
|
+
- Understand code changes in project context
|
|
925
|
+
- Analyze git diffs
|
|
926
|
+
- Synthesize from multiple sources
|
|
927
|
+
→ Requires appropriate model capabilities
|
|
928
|
+
|
|
929
|
+
4. Consistency & Reliability:
|
|
930
|
+
- Enhanced context must align with original
|
|
931
|
+
- Code examples must be accurate
|
|
932
|
+
→ Both appropriate model capabilities
|
|
933
|
+
|
|
934
|
+
5. Speed Requirements: LOW (Secondary)
|
|
935
|
+
- Background processing
|
|
936
|
+
- Quality far more important
|
|
937
|
+
|
|
938
|
+
6. Pricing Considerations: MODERATE
|
|
939
|
+
- 15 calls = moderate volume
|
|
940
|
+
- Very high leverage (improves all future AI work)
|
|
941
|
+
|
|
942
|
+
RECOMMENDATION:
|
|
943
|
+
|
|
944
|
+
Based on evaluation criteria above, select the model that best meets:
|
|
945
|
+
1. PRIMARY: Output Quality - VERY HIGH - Enhances context for all future AI agents
|
|
946
|
+
2. SECONDARY: Code Analysis - Exceptional insight extraction from git diffs
|
|
947
|
+
3. TERTIARY: Task Complexity - Very High (8/10) - pattern identification + synthesis
|
|
948
|
+
4. Speed Requirements: LOW - Background processing, quality >>> speed
|
|
949
|
+
5. Pricing: MODERATE - 15 calls, very high leverage (improves all future ceremonies)
|
|
950
|
+
|
|
951
|
+
Selection Guidance:
|
|
952
|
+
- Prioritize models with exceptional code analysis and pattern recognition
|
|
953
|
+
- Require ability to extract valuable insights from code diffs and PRs
|
|
954
|
+
- Must identify implementation patterns, gotchas, and best practices
|
|
955
|
+
- Must synthesize from multiple sources (git diffs, issues, comments)
|
|
956
|
+
- Code examples must be accurate (errors propagate to future AI work)
|
|
957
|
+
- Very high leverage justifies premium tier (enhanced context used in all future work)
|
|
958
|
+
- Quality directly impacts all future AI agent effectiveness
|
|
959
|
+
|
|
960
|
+
Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
|
|
961
|
+
metadata: {
|
|
962
|
+
callFrequency: 15,
|
|
963
|
+
impact: 'VERY HIGH',
|
|
964
|
+
taskComplexity: 8,
|
|
965
|
+
currentDefault: 'Claude Opus 4.6'
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
];
|
|
969
|
+
|
|
970
|
+
/**
|
|
971
|
+
* Get evaluation prompt by ID
|
|
972
|
+
* @param {string} id - Prompt ID (e.g., 'sponsor-call-suggestions')
|
|
973
|
+
* @returns {Object|null} Evaluation prompt object or null if not found
|
|
974
|
+
*/
|
|
975
|
+
export function getPromptById(id) {
|
|
976
|
+
return EVALUATION_PROMPTS.find(p => p.id === id) || null;
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
/**
|
|
980
|
+
* Get all prompts for a specific ceremony
|
|
981
|
+
* @param {string} ceremony - Ceremony name (e.g., 'sponsor-call')
|
|
982
|
+
* @returns {Array} Array of evaluation prompts for the ceremony
|
|
983
|
+
*/
|
|
984
|
+
export function getPromptsByCeremony(ceremony) {
|
|
985
|
+
return EVALUATION_PROMPTS.filter(p => p.ceremony === ceremony);
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
/**
|
|
989
|
+
* Get prompt statistics
|
|
990
|
+
* @returns {Object} Statistics about evaluation prompts
|
|
991
|
+
*/
|
|
992
|
+
export function getPromptStats() {
|
|
993
|
+
const ceremonies = [...new Set(EVALUATION_PROMPTS.map(p => p.ceremony))];
|
|
994
|
+
const totalCalls = EVALUATION_PROMPTS.reduce((sum, p) => sum + p.metadata.callFrequency, 0);
|
|
995
|
+
|
|
996
|
+
return {
|
|
997
|
+
totalPrompts: EVALUATION_PROMPTS.length,
|
|
998
|
+
ceremonies: ceremonies.length,
|
|
999
|
+
ceremonyList: ceremonies,
|
|
1000
|
+
estimatedTotalCalls: totalCalls,
|
|
1001
|
+
impactDistribution: {
|
|
1002
|
+
CRITICAL: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'CRITICAL').length,
|
|
1003
|
+
'VERY HIGH': EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'VERY HIGH').length,
|
|
1004
|
+
HIGH: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'HIGH').length,
|
|
1005
|
+
MEDIUM: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'MEDIUM').length
|
|
1006
|
+
}
|
|
1007
|
+
};
|
|
1008
|
+
}
|