@agile-vibe-coding/avc 0.1.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. package/cli/agent-loader.js +21 -0
  2. package/cli/agents/agent-selector.md +129 -0
  3. package/cli/agents/architecture-recommender.md +418 -0
  4. package/cli/agents/database-deep-dive.md +470 -0
  5. package/cli/agents/database-recommender.md +634 -0
  6. package/cli/agents/doc-distributor.md +176 -0
  7. package/cli/agents/documentation-updater.md +203 -0
  8. package/cli/agents/epic-story-decomposer.md +280 -0
  9. package/cli/agents/feature-context-generator.md +91 -0
  10. package/cli/agents/gap-checker-epic.md +52 -0
  11. package/cli/agents/impact-checker-story.md +51 -0
  12. package/cli/agents/migration-guide-generator.md +305 -0
  13. package/cli/agents/mission-scope-generator.md +79 -0
  14. package/cli/agents/mission-scope-validator.md +112 -0
  15. package/cli/agents/project-context-extractor.md +107 -0
  16. package/cli/agents/project-documentation-creator.json +226 -0
  17. package/cli/agents/project-documentation-creator.md +595 -0
  18. package/cli/agents/question-prefiller.md +269 -0
  19. package/cli/agents/refiner-epic.md +39 -0
  20. package/cli/agents/refiner-story.md +42 -0
  21. package/cli/agents/solver-epic-api.json +15 -0
  22. package/cli/agents/solver-epic-api.md +39 -0
  23. package/cli/agents/solver-epic-backend.json +15 -0
  24. package/cli/agents/solver-epic-backend.md +39 -0
  25. package/cli/agents/solver-epic-cloud.json +15 -0
  26. package/cli/agents/solver-epic-cloud.md +39 -0
  27. package/cli/agents/solver-epic-data.json +15 -0
  28. package/cli/agents/solver-epic-data.md +39 -0
  29. package/cli/agents/solver-epic-database.json +15 -0
  30. package/cli/agents/solver-epic-database.md +39 -0
  31. package/cli/agents/solver-epic-developer.json +15 -0
  32. package/cli/agents/solver-epic-developer.md +39 -0
  33. package/cli/agents/solver-epic-devops.json +15 -0
  34. package/cli/agents/solver-epic-devops.md +39 -0
  35. package/cli/agents/solver-epic-frontend.json +15 -0
  36. package/cli/agents/solver-epic-frontend.md +39 -0
  37. package/cli/agents/solver-epic-mobile.json +15 -0
  38. package/cli/agents/solver-epic-mobile.md +39 -0
  39. package/cli/agents/solver-epic-qa.json +15 -0
  40. package/cli/agents/solver-epic-qa.md +39 -0
  41. package/cli/agents/solver-epic-security.json +15 -0
  42. package/cli/agents/solver-epic-security.md +39 -0
  43. package/cli/agents/solver-epic-solution-architect.json +15 -0
  44. package/cli/agents/solver-epic-solution-architect.md +39 -0
  45. package/cli/agents/solver-epic-test-architect.json +15 -0
  46. package/cli/agents/solver-epic-test-architect.md +39 -0
  47. package/cli/agents/solver-epic-ui.json +15 -0
  48. package/cli/agents/solver-epic-ui.md +39 -0
  49. package/cli/agents/solver-epic-ux.json +15 -0
  50. package/cli/agents/solver-epic-ux.md +39 -0
  51. package/cli/agents/solver-story-api.json +15 -0
  52. package/cli/agents/solver-story-api.md +39 -0
  53. package/cli/agents/solver-story-backend.json +15 -0
  54. package/cli/agents/solver-story-backend.md +39 -0
  55. package/cli/agents/solver-story-cloud.json +15 -0
  56. package/cli/agents/solver-story-cloud.md +39 -0
  57. package/cli/agents/solver-story-data.json +15 -0
  58. package/cli/agents/solver-story-data.md +39 -0
  59. package/cli/agents/solver-story-database.json +15 -0
  60. package/cli/agents/solver-story-database.md +39 -0
  61. package/cli/agents/solver-story-developer.json +15 -0
  62. package/cli/agents/solver-story-developer.md +39 -0
  63. package/cli/agents/solver-story-devops.json +15 -0
  64. package/cli/agents/solver-story-devops.md +39 -0
  65. package/cli/agents/solver-story-frontend.json +15 -0
  66. package/cli/agents/solver-story-frontend.md +39 -0
  67. package/cli/agents/solver-story-mobile.json +15 -0
  68. package/cli/agents/solver-story-mobile.md +39 -0
  69. package/cli/agents/solver-story-qa.json +15 -0
  70. package/cli/agents/solver-story-qa.md +39 -0
  71. package/cli/agents/solver-story-security.json +15 -0
  72. package/cli/agents/solver-story-security.md +39 -0
  73. package/cli/agents/solver-story-solution-architect.json +15 -0
  74. package/cli/agents/solver-story-solution-architect.md +39 -0
  75. package/cli/agents/solver-story-test-architect.json +15 -0
  76. package/cli/agents/solver-story-test-architect.md +39 -0
  77. package/cli/agents/solver-story-ui.json +15 -0
  78. package/cli/agents/solver-story-ui.md +39 -0
  79. package/cli/agents/solver-story-ux.json +15 -0
  80. package/cli/agents/solver-story-ux.md +39 -0
  81. package/cli/agents/story-doc-enricher.md +133 -0
  82. package/cli/agents/suggestion-business-analyst.md +88 -0
  83. package/cli/agents/suggestion-deployment-architect.md +263 -0
  84. package/cli/agents/suggestion-product-manager.md +129 -0
  85. package/cli/agents/suggestion-security-specialist.md +156 -0
  86. package/cli/agents/suggestion-technical-architect.md +269 -0
  87. package/cli/agents/suggestion-ux-researcher.md +93 -0
  88. package/cli/agents/task-subtask-decomposer.md +188 -0
  89. package/cli/agents/validator-documentation.json +152 -0
  90. package/cli/agents/validator-documentation.md +453 -0
  91. package/cli/agents/validator-epic-api.json +93 -0
  92. package/cli/agents/validator-epic-api.md +137 -0
  93. package/cli/agents/validator-epic-backend.json +93 -0
  94. package/cli/agents/validator-epic-backend.md +130 -0
  95. package/cli/agents/validator-epic-cloud.json +93 -0
  96. package/cli/agents/validator-epic-cloud.md +137 -0
  97. package/cli/agents/validator-epic-data.json +93 -0
  98. package/cli/agents/validator-epic-data.md +130 -0
  99. package/cli/agents/validator-epic-database.json +93 -0
  100. package/cli/agents/validator-epic-database.md +137 -0
  101. package/cli/agents/validator-epic-developer.json +74 -0
  102. package/cli/agents/validator-epic-developer.md +153 -0
  103. package/cli/agents/validator-epic-devops.json +74 -0
  104. package/cli/agents/validator-epic-devops.md +153 -0
  105. package/cli/agents/validator-epic-frontend.json +74 -0
  106. package/cli/agents/validator-epic-frontend.md +153 -0
  107. package/cli/agents/validator-epic-mobile.json +93 -0
  108. package/cli/agents/validator-epic-mobile.md +130 -0
  109. package/cli/agents/validator-epic-qa.json +93 -0
  110. package/cli/agents/validator-epic-qa.md +130 -0
  111. package/cli/agents/validator-epic-security.json +74 -0
  112. package/cli/agents/validator-epic-security.md +154 -0
  113. package/cli/agents/validator-epic-solution-architect.json +74 -0
  114. package/cli/agents/validator-epic-solution-architect.md +156 -0
  115. package/cli/agents/validator-epic-test-architect.json +93 -0
  116. package/cli/agents/validator-epic-test-architect.md +130 -0
  117. package/cli/agents/validator-epic-ui.json +93 -0
  118. package/cli/agents/validator-epic-ui.md +130 -0
  119. package/cli/agents/validator-epic-ux.json +93 -0
  120. package/cli/agents/validator-epic-ux.md +130 -0
  121. package/cli/agents/validator-selector.md +211 -0
  122. package/cli/agents/validator-story-api.json +104 -0
  123. package/cli/agents/validator-story-api.md +152 -0
  124. package/cli/agents/validator-story-backend.json +104 -0
  125. package/cli/agents/validator-story-backend.md +152 -0
  126. package/cli/agents/validator-story-cloud.json +104 -0
  127. package/cli/agents/validator-story-cloud.md +152 -0
  128. package/cli/agents/validator-story-data.json +104 -0
  129. package/cli/agents/validator-story-data.md +152 -0
  130. package/cli/agents/validator-story-database.json +104 -0
  131. package/cli/agents/validator-story-database.md +152 -0
  132. package/cli/agents/validator-story-developer.json +104 -0
  133. package/cli/agents/validator-story-developer.md +152 -0
  134. package/cli/agents/validator-story-devops.json +104 -0
  135. package/cli/agents/validator-story-devops.md +152 -0
  136. package/cli/agents/validator-story-frontend.json +104 -0
  137. package/cli/agents/validator-story-frontend.md +152 -0
  138. package/cli/agents/validator-story-mobile.json +104 -0
  139. package/cli/agents/validator-story-mobile.md +152 -0
  140. package/cli/agents/validator-story-qa.json +104 -0
  141. package/cli/agents/validator-story-qa.md +152 -0
  142. package/cli/agents/validator-story-security.json +104 -0
  143. package/cli/agents/validator-story-security.md +152 -0
  144. package/cli/agents/validator-story-solution-architect.json +104 -0
  145. package/cli/agents/validator-story-solution-architect.md +152 -0
  146. package/cli/agents/validator-story-test-architect.json +104 -0
  147. package/cli/agents/validator-story-test-architect.md +152 -0
  148. package/cli/agents/validator-story-ui.json +104 -0
  149. package/cli/agents/validator-story-ui.md +152 -0
  150. package/cli/agents/validator-story-ux.json +104 -0
  151. package/cli/agents/validator-story-ux.md +152 -0
  152. package/cli/ansi-colors.js +21 -0
  153. package/cli/build-docs.js +29 -8
  154. package/cli/ceremony-history.js +369 -0
  155. package/cli/command-logger.js +49 -12
  156. package/cli/components/static-output.js +63 -0
  157. package/cli/console-output-manager.js +94 -0
  158. package/cli/docs-sync.js +306 -0
  159. package/cli/epic-story-validator.js +1174 -0
  160. package/cli/evaluation-prompts.js +1008 -0
  161. package/cli/execution-context.js +195 -0
  162. package/cli/generate-summary-table.js +340 -0
  163. package/cli/index.js +0 -0
  164. package/cli/init-model-config.js +697 -0
  165. package/cli/init.js +1311 -274
  166. package/cli/kanban-server-manager.js +228 -0
  167. package/cli/llm-claude.js +83 -1
  168. package/cli/llm-gemini.js +85 -0
  169. package/cli/llm-mock.js +233 -0
  170. package/cli/llm-openai.js +233 -0
  171. package/cli/llm-provider.js +240 -3
  172. package/cli/llm-token-limits.js +102 -0
  173. package/cli/llm-verifier.js +454 -0
  174. package/cli/message-constants.js +58 -0
  175. package/cli/message-manager.js +334 -0
  176. package/cli/message-types.js +96 -0
  177. package/cli/messaging-api.js +297 -0
  178. package/cli/model-pricing.js +169 -0
  179. package/cli/model-query-engine.js +468 -0
  180. package/cli/model-recommendation-analyzer.js +495 -0
  181. package/cli/model-selector.js +269 -0
  182. package/cli/output-buffer.js +107 -0
  183. package/cli/process-manager.js +73 -2
  184. package/cli/repl-ink.js +4988 -1217
  185. package/cli/repl-old.js +4 -4
  186. package/cli/seed-processor.js +792 -0
  187. package/cli/sprint-planning-processor.js +1813 -0
  188. package/cli/template-processor.js +2102 -105
  189. package/cli/templates/project.md +25 -8
  190. package/cli/templates/vitepress-config.mts.template +5 -4
  191. package/cli/token-tracker.js +520 -0
  192. package/cli/tools/generate-story-validators.js +317 -0
  193. package/cli/tools/generate-validators.js +669 -0
  194. package/cli/update-checker.js +19 -17
  195. package/cli/update-notifier.js +4 -4
  196. package/cli/validation-router.js +605 -0
  197. package/cli/verification-tracker.js +563 -0
  198. package/kanban/README.md +386 -0
  199. package/kanban/client/README.md +205 -0
  200. package/kanban/client/components.json +20 -0
  201. package/kanban/client/dist/assets/index-CiD8PS2e.js +306 -0
  202. package/kanban/client/dist/assets/index-nLh0m82Q.css +1 -0
  203. package/kanban/client/dist/index.html +16 -0
  204. package/kanban/client/dist/vite.svg +1 -0
  205. package/kanban/client/index.html +15 -0
  206. package/kanban/client/package-lock.json +9442 -0
  207. package/kanban/client/package.json +44 -0
  208. package/kanban/client/postcss.config.js +6 -0
  209. package/kanban/client/public/vite.svg +1 -0
  210. package/kanban/client/src/App.jsx +622 -0
  211. package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
  212. package/kanban/client/src/components/ceremony/AskArchPopup.jsx +416 -0
  213. package/kanban/client/src/components/ceremony/AskModelPopup.jsx +616 -0
  214. package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +946 -0
  215. package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
  216. package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +619 -0
  217. package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +704 -0
  218. package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
  219. package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +154 -0
  220. package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
  221. package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
  222. package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
  223. package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +125 -0
  224. package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +228 -0
  225. package/kanban/client/src/components/kanban/CardDetailModal.jsx +559 -0
  226. package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
  227. package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
  228. package/kanban/client/src/components/kanban/GroupingSelector.jsx +57 -0
  229. package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
  230. package/kanban/client/src/components/kanban/KanbanCard.jsx +138 -0
  231. package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
  232. package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +789 -0
  233. package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
  234. package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
  235. package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
  236. package/kanban/client/src/components/settings/AgentsTab.jsx +353 -0
  237. package/kanban/client/src/components/settings/ApiKeysTab.jsx +113 -0
  238. package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +98 -0
  239. package/kanban/client/src/components/settings/CostThresholdsTab.jsx +94 -0
  240. package/kanban/client/src/components/settings/ModelPricingTab.jsx +204 -0
  241. package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
  242. package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
  243. package/kanban/client/src/components/stats/CostModal.jsx +353 -0
  244. package/kanban/client/src/components/ui/badge.jsx +27 -0
  245. package/kanban/client/src/components/ui/dialog.jsx +121 -0
  246. package/kanban/client/src/components/ui/tabs.jsx +85 -0
  247. package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
  248. package/kanban/client/src/hooks/useGrouping.js +118 -0
  249. package/kanban/client/src/hooks/useWebSocket.js +120 -0
  250. package/kanban/client/src/lib/__tests__/api.test.js +196 -0
  251. package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
  252. package/kanban/client/src/lib/api.js +401 -0
  253. package/kanban/client/src/lib/status-grouping.js +144 -0
  254. package/kanban/client/src/lib/utils.js +11 -0
  255. package/kanban/client/src/main.jsx +10 -0
  256. package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
  257. package/kanban/client/src/store/ceremonyStore.js +172 -0
  258. package/kanban/client/src/store/filterStore.js +201 -0
  259. package/kanban/client/src/store/kanbanStore.js +115 -0
  260. package/kanban/client/src/store/processStore.js +65 -0
  261. package/kanban/client/src/store/sprintPlanningStore.js +33 -0
  262. package/kanban/client/src/styles/globals.css +59 -0
  263. package/kanban/client/tailwind.config.js +77 -0
  264. package/kanban/client/vite.config.js +28 -0
  265. package/kanban/client/vitest.config.js +28 -0
  266. package/kanban/dev-start.sh +47 -0
  267. package/kanban/package.json +12 -0
  268. package/kanban/server/index.js +516 -0
  269. package/kanban/server/routes/ceremony.js +305 -0
  270. package/kanban/server/routes/costs.js +157 -0
  271. package/kanban/server/routes/processes.js +50 -0
  272. package/kanban/server/routes/settings.js +303 -0
  273. package/kanban/server/routes/websocket.js +276 -0
  274. package/kanban/server/routes/work-items.js +347 -0
  275. package/kanban/server/services/CeremonyService.js +1190 -0
  276. package/kanban/server/services/FileSystemScanner.js +95 -0
  277. package/kanban/server/services/FileWatcher.js +144 -0
  278. package/kanban/server/services/HierarchyBuilder.js +196 -0
  279. package/kanban/server/services/ProcessRegistry.js +122 -0
  280. package/kanban/server/services/WorkItemReader.js +123 -0
  281. package/kanban/server/services/WorkItemRefineService.js +510 -0
  282. package/kanban/server/start.js +49 -0
  283. package/kanban/server/utils/kanban-logger.js +132 -0
  284. package/kanban/server/utils/markdown.js +91 -0
  285. package/kanban/server/utils/status-grouping.js +107 -0
  286. package/kanban/server/workers/sponsor-call-worker.js +84 -0
  287. package/kanban/server/workers/sprint-planning-worker.js +130 -0
  288. package/package.json +18 -5
  289. package/cli/agents/documentation.md +0 -302
@@ -0,0 +1,1008 @@
1
+ /**
2
+ * Evaluation prompts for LLM model selection across all AVC ceremonies and stages
3
+ * Extracted from AVC_DEFAULT_LLMS.md
4
+ *
5
+ * Each prompt is used to query LLM providers about which of their models
6
+ * is best suited for the specific stage's requirements.
7
+ */
8
+
9
+ export const EVALUATION_PROMPTS = [
10
+ {
11
+ id: 'sponsor-call-suggestions',
12
+ ceremony: 'sponsor-call',
13
+ stage: 'suggestions',
14
+ stageName: 'Questionnaire Suggestions',
15
+ prompt: `TASK: Select optimal LLM model for questionnaire suggestions in sponsor-call ceremony
16
+ PRIORITY: Best possible output quality
17
+
18
+ CONTEXT:
19
+ - Stage: suggestions (Questionnaire Suggestions)
20
+ - Ceremony: sponsor-call
21
+ - Purpose: AI analyzes project name and suggests intelligent, contextually appropriate answers for 6 questionnaire fields
22
+ - Input: Project name only (10-50 tokens)
23
+ - Output: Suggested answers for Mission Statement, Target Users, Initial Scope, Deployment Target, Technical Considerations, Security Requirements (500-1500 tokens total)
24
+ - Call frequency: 1 per ceremony
25
+ - User interaction: Real-time (user waiting for suggestions while viewing questionnaire)
26
+ - Impact: Sets initial project direction and quality tone for all downstream artifacts
27
+
28
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
29
+
30
+ 1. Output Quality Requirements: HIGH
31
+ - Must infer project domain from minimal context (just project name)
32
+ - Suggestions must be relevant, specific, and actionable
33
+ - Must demonstrate domain knowledge and industry best practices
34
+ - Quality directly impacts user's project definition experience
35
+ → Requires appropriate model capabilities
36
+
37
+ 2. Task Complexity: 6/10 (Moderate reasoning with domain inference)
38
+ - Analyze project name for domain clues
39
+ - Infer appropriate technology stack and architecture
40
+ - Generate contextually relevant suggestions across 6 different categories
41
+ - Balance between generic and specific recommendations
42
+
43
+ 3. Context Understanding:
44
+ - Extremely limited input context (just project name)
45
+ - Must leverage broad domain knowledge to compensate
46
+ - Must infer user intent and project type
47
+ → Requires appropriate model capabilities
48
+
49
+ 4. Consistency & Reliability:
50
+ - Suggestions must be coherent across all 6 fields
51
+ - Must avoid contradictory recommendations
52
+ - Critical for user's first impression of AVC
53
+ → Requires appropriate model capabilities
54
+
55
+ 5. Speed Requirements: IMPORTANT (Secondary)
56
+ - User is actively waiting for suggestions
57
+ - Real-time interaction requires reasonable response time
58
+ - 2-4 second response ideal, <8 seconds acceptable
59
+
60
+ 6. Pricing Considerations: TERTIARY
61
+ - Single call per ceremony (low volume)
62
+ - User-facing quality important
63
+ - Pricing impact minimal with only 1 call
64
+ → Quality worth any pricing tier for this stage
65
+
66
+ RECOMMENDATION:
67
+
68
+ Based on evaluation criteria above, select the model that best meets:
69
+ 1. PRIMARY: Output Quality - Excellent domain inference from minimal context (project name)
70
+ 2. SECONDARY: World Knowledge - Strong understanding of business domains and technical stacks
71
+ 3. TERTIARY: Task Complexity - Moderate (7/10) - requires intelligent suggestion generation
72
+ 4. Speed Requirements: MODERATE - User waiting in real-time (2-4s ideal, <8s acceptable)
73
+ 5. Pricing: TERTIARY - Single call per ceremony, minimal impact
74
+
75
+ Selection Guidance:
76
+ - Prioritize models with strong domain/business knowledge for accurate suggestions
77
+ - Require excellent inference capabilities from very limited context
78
+ - Must support real-time interaction speed (<8 seconds)
79
+ - Pricing is not a constraint (quality worth any tier for user-facing interaction)
80
+
81
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
82
+ metadata: {
83
+ callFrequency: 1,
84
+ impact: 'HIGH',
85
+ taskComplexity: 6,
86
+ currentDefault: 'Claude Sonnet 4.5'
87
+ }
88
+ },
89
+
90
+ {
91
+ id: 'sponsor-call-documentation',
92
+ ceremony: 'sponsor-call',
93
+ stage: 'documentation',
94
+ stageName: 'Documentation Generation',
95
+ prompt: `TASK: Select optimal LLM model for documentation generation in sponsor-call ceremony
96
+ PRIORITY: Best possible output quality
97
+
98
+ CONTEXT:
99
+ - Stage: documentation (Project Documentation Creation)
100
+ - Ceremony: sponsor-call
101
+ - Purpose: AI generates comprehensive PROJECT.md from questionnaire answers
102
+ - Input: 6 questionnaire field answers (1,000-5,000 tokens)
103
+ - Output: Structured PROJECT.md with Executive Summary, Problem Statement, Solution, User Personas, Core Features, Technical Architecture, Security, Success Metrics (2,000-8,000 tokens)
104
+ - Call frequency: 1 per ceremony
105
+ - User interaction: Background process after questionnaire completion
106
+ - Impact: CRITICAL - This is the foundational document that defines the entire project; all future AI agents, epics, stories, and tasks derive from this document
107
+
108
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
109
+
110
+ 1. Output Quality Requirements: CRITICAL
111
+ - Must produce well-structured, comprehensive technical documentation
112
+ - Content must be coherent, professional, and actionable
113
+ - Must maintain consistency across all sections
114
+ - Quality affects all downstream project artifacts
115
+ → Requires appropriate model capabilities
116
+
117
+ 2. Task Complexity: 8/10 (Advanced technical writing and synthesis)
118
+ - Synthesize multiple questionnaire inputs into coherent narrative
119
+ - Organize information into logical section structure
120
+ - Generate appropriate technical architecture descriptions
121
+ - Create realistic user personas and success metrics
122
+ - Maintain professional tone and technical accuracy
123
+
124
+ 3. Context Understanding:
125
+ - Must understand relationships between questionnaire answers
126
+ - Infer appropriate technical depth and detail level
127
+ - Expand brief answers into comprehensive sections
128
+ - Maintain consistency across document sections
129
+ → Requires appropriate model capabilities
130
+
131
+ 4. Consistency & Reliability:
132
+ - Critical that all sections align and don't contradict
133
+ - Technical architecture must match scope and requirements
134
+ - Success metrics must align with stated goals
135
+
136
+ 5. Speed Requirements: LOW (Secondary)
137
+ - Background process, user not actively waiting
138
+ - Quality far more important than speed
139
+ - Can take 10-30 seconds without issue
140
+ → Speed not a constraint
141
+
142
+ 6. Pricing Considerations: TERTIARY
143
+ - Single call per ceremony
144
+ - Foundational document for entire project
145
+ - Quality impact is massive
146
+ → Worth any pricing tier given criticality
147
+
148
+ RECOMMENDATION:
149
+
150
+ Based on evaluation criteria above, select the model that best meets:
151
+ 1. PRIMARY: Output Quality - CRITICAL - Foundational project document
152
+ 2. SECONDARY: Technical Writing - Excellent multi-section document structure
153
+ 3. TERTIARY: Task Complexity - High (8/10) - requires synthesis of diverse inputs
154
+ 4. Speed Requirements: LOW - Background process, quality >>> speed
155
+ 5. Pricing: TERTIARY - Single call, massive quality impact justifies any tier
156
+
157
+ Selection Guidance:
158
+ - Prioritize models with exceptional technical writing and document synthesis
159
+ - Require strong markdown formatting and organizational structure
160
+ - Must maintain consistency across complex 9-section document
161
+ - Must synthesize questionnaire inputs, suggestions, and context into coherent narrative
162
+ - Speed is not a constraint (can take 10-30 seconds)
163
+ - Pricing not a limitation (worth premium for project foundation)
164
+
165
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
166
+ metadata: {
167
+ callFrequency: 1,
168
+ impact: 'CRITICAL',
169
+ taskComplexity: 8,
170
+ currentDefault: 'Claude Sonnet 4.5'
171
+ }
172
+ },
173
+
174
+ {
175
+ id: 'sponsor-call-validation',
176
+ ceremony: 'sponsor-call',
177
+ stage: 'validation',
178
+ stageName: 'Documentation Validation',
179
+ prompt: `TASK: Select optimal LLM model for documentation validation in sponsor-call ceremony
180
+ PRIORITY: Best possible output quality
181
+
182
+ CONTEXT:
183
+ - Stage: validation (Documentation & Context Validation)
184
+ - Ceremony: sponsor-call
185
+ - Purpose: AI validators check PROJECT.md and context.md against quality rules
186
+ - Input: Full PROJECT.md or context.md + validation rules (5,000-12,000 tokens)
187
+ - Output: Validation report identifying issues and suggestions
188
+ - Call frequency: 2 validators per ceremony
189
+ - Impact: HIGH - Catches quality issues before they propagate
190
+
191
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
192
+
193
+ 1. Output Quality Requirements: HIGH
194
+ - Must accurately identify quality issues and inconsistencies
195
+ - Must provide actionable feedback for improvement
196
+ - Must understand project documentation best practices
197
+ → Requires appropriate model capabilities
198
+
199
+ 2. Task Complexity: 6/10 (Analytical validation + rule application)
200
+ - Apply validation rules to documentation
201
+ - Identify inconsistencies and gaps
202
+ - Assess completeness and coherence
203
+
204
+ 3. Context Understanding:
205
+ - Must handle large documents (full PROJECT.md)
206
+ - Understand relationships across sections
207
+ - Identify subtle inconsistencies
208
+ → Requires appropriate model capabilities
209
+
210
+ 4. Speed Requirements: LOW (Secondary)
211
+ - Background validation stage
212
+ - Quality more important than speed
213
+
214
+ 6. Pricing Considerations: TERTIARY
215
+ - Only 2 calls per ceremony
216
+ - Moderate quality requirements
217
+
218
+ RECOMMENDATION:
219
+
220
+ Based on evaluation criteria above, select the model that best meets:
221
+ 1. PRIMARY: Output Quality - HIGH - Analytical validation with actionable feedback
222
+ 2. SECONDARY: Task Complexity - Moderate (6/10) - rule application + gap identification
223
+ 3. TERTIARY: Context Understanding - Must handle large documents with cross-section relationships
224
+ 4. Speed Requirements: LOW - Background validation, quality >>> speed
225
+ 5. Pricing: TERTIARY - 2 calls per ceremony, moderate impact
226
+
227
+ Selection Guidance:
228
+ - Prioritize models with strong analytical and critical thinking capabilities
229
+ - Require ability to identify subtle inconsistencies and gaps
230
+ - Must provide actionable, specific feedback for improvement
231
+ - Must understand project documentation best practices
232
+ - Speed not a constraint (background processing)
233
+ - Consider pricing tier relative to quality improvement
234
+
235
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
236
+ metadata: {
237
+ callFrequency: 2,
238
+ impact: 'HIGH',
239
+ taskComplexity: 6,
240
+ currentDefault: 'Claude Sonnet 4.5'
241
+ }
242
+ },
243
+
244
+ {
245
+ id: 'sprint-planning-decomposition',
246
+ ceremony: 'sprint-planning',
247
+ stage: 'decomposition',
248
+ stageName: 'Epic & Story Decomposition',
249
+ prompt: `TASK: Select optimal LLM model for epic and story decomposition in sprint-planning ceremony
250
+ PRIORITY: Best possible output quality
251
+
252
+ CONTEXT:
253
+ - Stage: decomposition (Epic & Story Decomposition)
254
+ - Ceremony: sprint-planning
255
+ - Purpose: AI analyzes PROJECT.md and decomposes project scope into hierarchical epics and stories
256
+ - Input: PROJECT.md, project context.md, existing epics/stories (5,000-20,000 tokens)
257
+ - Output: Structured JSON with epics, stories, acceptance criteria
258
+ - Call frequency: 1 per ceremony
259
+ - Impact: CRITICAL - Defines entire project work breakdown structure
260
+
261
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
262
+
263
+ 1. Output Quality Requirements: CRITICAL
264
+ - Must create logical, implementable work breakdown
265
+ - Acceptance criteria must be specific and testable
266
+ - Hierarchy must reflect dependencies
267
+ → Requires appropriate model capabilities
268
+
269
+ 2. Task Complexity: 9/10 (Complex hierarchical decomposition)
270
+ - Analyze full project scope
271
+ - Break features into implementable story-level units
272
+ - Generate specific, testable acceptance criteria
273
+ - Identify dependencies between stories
274
+
275
+ 3. Context Understanding:
276
+ - Must synthesize entire project vision
277
+ - Understand technical architecture and constraints
278
+ - Recognize implicit dependencies
279
+ → Requires appropriate model capabilities
280
+
281
+ 4. Consistency & Reliability:
282
+ - Critical that decomposition is complete (no gaps)
283
+ - Stories must not overlap or contradict
284
+
285
+ 5. Speed Requirements: LOW (Secondary)
286
+ - Background process, one-time operation
287
+ - Quality far more important than speed
288
+
289
+ 6. Pricing Considerations: TERTIARY
290
+ - Single call per ceremony
291
+ - Most critical stage in sprint-planning
292
+ → Worth any pricing tier
293
+
294
+ RECOMMENDATION:
295
+
296
+ Based on evaluation criteria above, select the model that best meets:
297
+ 1. PRIMARY: Output Quality - CRITICAL - Defines entire project work structure
298
+ 2. SECONDARY: Task Complexity - Very High (9/10) - complex hierarchical decomposition
299
+ 3. TERTIARY: Hierarchical Reasoning - Exceptional domain breakdown with dependencies
300
+ 4. Speed Requirements: LOW - Background process, quality >>> speed
301
+ 5. Pricing: TERTIARY - Single call, most critical stage, worth any tier
302
+
303
+ Selection Guidance:
304
+ - Prioritize models with exceptional hierarchical reasoning and decomposition
305
+ - Require deep project synthesis and implicit dependency recognition
306
+ - Must generate complete, non-overlapping, consistent epic/story structure
307
+ - Must produce specific, testable acceptance criteria at story level
308
+ - Consistency and completeness are paramount (gaps cause downstream failures)
309
+ - Speed not a constraint (one-time background operation)
310
+ - Pricing justified by critical impact on entire project
311
+
312
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
313
+ metadata: {
314
+ callFrequency: 1,
315
+ impact: 'CRITICAL',
316
+ taskComplexity: 9,
317
+ currentDefault: 'Claude Opus 4.6'
318
+ }
319
+ },
320
+
321
+ {
322
+ id: 'sprint-planning-validation-universal',
323
+ ceremony: 'sprint-planning',
324
+ stage: 'validation-universal',
325
+ stageName: 'Universal Validators',
326
+ prompt: `TASK: Select optimal LLM model for universal validators in sprint-planning ceremony
327
+ PRIORITY: Best possible output quality
328
+
329
+ CONTEXT:
330
+ - Stage: validation-universal (Universal Validators)
331
+ - Ceremony: sprint-planning
332
+ - Purpose: Critical validators always applied: architecture, security, quality
333
+ - Input: Epic or Story with full context (2,000-8,000 tokens per item)
334
+ - Output: Detailed validation report with architectural and security analysis
335
+ - Call frequency: ~30 calls per ceremony
336
+ - Impact: CRITICAL - Catches fundamental issues before implementation
337
+
338
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
339
+
340
+ 1. Output Quality Requirements: CRITICAL
341
+ - Must perform deep architectural analysis
342
+ - Must identify security vulnerabilities
343
+ - Must provide specific, actionable recommendations
344
+ → Requires appropriate model capabilities
345
+
346
+ 2. Task Complexity: 9/10 (Deep architectural and security analysis)
347
+ - Analyze system architecture and design patterns
348
+ - Identify security vulnerabilities
349
+ - Assess technical feasibility
350
+ - Evaluate testing strategies
351
+
352
+ 3. Context Understanding:
353
+ - Must understand full project architecture
354
+ - Cross-reference with other epics/stories
355
+ - Identify system-wide architectural issues
356
+ → Requires appropriate model capabilities
357
+
358
+ 4. Consistency & Reliability:
359
+ - Cannot miss critical architectural flaws
360
+ - Must consistently identify security issues
361
+ → Critical appropriate model capabilities
362
+
363
+ 5. Speed Requirements: MODERATE (Secondary)
364
+ - 30 calls in parallel validation stage
365
+ - Quality far more important than speed
366
+
367
+ 6. Pricing Considerations: MODERATE
368
+ - 30 calls = significant volume
369
+ - Quality cannot be compromised
370
+
371
+ RECOMMENDATION:
372
+
373
+ Based on evaluation criteria above, select the model that best meets:
374
+ 1. PRIMARY: Output Quality - CRITICAL - Deep architectural and security analysis
375
+ 2. SECONDARY: Task Complexity - Very High (9/10) - system-wide analysis
376
+ 3. TERTIARY: Reliability - Cannot miss critical architectural flaws or security issues
377
+ 4. Speed Requirements: MODERATE - 30 calls in parallel, quality still paramount
378
+ 5. Pricing: MODERATE - 30 calls = significant volume, but quality cannot be compromised
379
+
380
+ Selection Guidance:
381
+ - Prioritize models with exceptional architectural reasoning and security analysis
382
+ - Require consistent, reliable identification of vulnerabilities and design flaws
383
+ - Must provide specific, actionable recommendations for improvements
384
+ - Must understand system-wide architecture and cross-reference between components
385
+ - Reliability is critical (false negatives in security/architecture are high-risk)
386
+ - Do not compromise on quality for these critical validators
387
+
388
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
389
+ metadata: {
390
+ callFrequency: 30,
391
+ impact: 'CRITICAL',
392
+ taskComplexity: 9,
393
+ currentDefault: 'Claude Sonnet 4.5'
394
+ }
395
+ },
396
+
397
+ {
398
+ id: 'sprint-planning-validation-domain',
399
+ ceremony: 'sprint-planning',
400
+ stage: 'validation-domain',
401
+ stageName: 'Domain Validators',
402
+ prompt: `TASK: Select optimal LLM model for domain validators in sprint-planning ceremony
403
+ PRIORITY: Best possible output quality
404
+
405
+ CONTEXT:
406
+ - Stage: validation-domain (Domain Validators)
407
+ - Ceremony: sprint-planning
408
+ - Purpose: Domain-specific validators based on tech stack
409
+ - Input: Epic or Story with domain-relevant context (2,000-5,000 tokens)
410
+ - Output: Domain-specific validation report with best practices
411
+ - Call frequency: ~90 calls per ceremony (largest volume stage)
412
+ - Impact: HIGH - Catches domain-specific issues, ensures best practices
413
+
414
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
415
+
416
+ 1. Output Quality Requirements: HIGH
417
+ - Must apply domain-specific best practices
418
+ - Must identify domain anti-patterns
419
+ - Must provide actionable recommendations
420
+ → Requires appropriate model capabilities
421
+
422
+ 2. Task Complexity: 7/10 (Domain expertise application)
423
+ - Apply domain-specific patterns
424
+ - Identify violations of domain conventions
425
+ - Not just pattern matching - requires context understanding
426
+
427
+ 3. Context Understanding:
428
+ - Must understand project architecture in domain context
429
+ - Cross-reference with other domains
430
+
431
+ 4. Speed Requirements: MODERATE (Secondary)
432
+ - 90 calls = highest volume in sprint-planning
433
+ - Parallel execution
434
+
435
+ 6. Pricing Considerations: SIGNIFICANT
436
+ - 90 calls = largest pricing driver
437
+ - Medium vs Low pricing makes material difference
438
+
439
+ RECOMMENDATION:
440
+
441
+ Based on evaluation criteria above, select the model that best meets:
442
+ 1. PRIMARY: Output Quality - HIGH - Domain-specific best practices and anti-patterns
443
+ 2. SECONDARY: Task Complexity - High (7/10) - requires domain expertise, not just pattern matching
444
+ 3. TERTIARY: Volume Impact - HIGHEST (90 calls) - largest pricing driver in ceremony
445
+ 4. Speed Requirements: MODERATE - Parallel execution, but quality important
446
+ 5. Pricing: SIGNIFICANT - 90 calls make pricing tier materially impactful
447
+
448
+ Selection Guidance:
449
+ - Prioritize models with strong domain knowledge (DevOps, Database, API, Frontend, etc.)
450
+ - Require ability to apply domain-specific best practices and identify anti-patterns
451
+ - Must provide actionable, context-appropriate recommendations
452
+ - Consider pricing tier carefully (90 calls = 3-4x cost difference between tiers)
453
+ - Balance domain expertise depth vs pricing efficiency
454
+ - Acceptable to use lower tier if basic domain pattern checking sufficient
455
+
456
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
457
+ metadata: {
458
+ callFrequency: 90,
459
+ impact: 'HIGH',
460
+ taskComplexity: 7,
461
+ currentDefault: 'Claude Sonnet 4.5'
462
+ }
463
+ },
464
+
465
+ {
466
+ id: 'sprint-planning-validation-feature',
467
+ ceremony: 'sprint-planning',
468
+ stage: 'validation-feature',
469
+ stageName: 'Feature Validators',
470
+ prompt: `TASK: Select optimal LLM model for feature validators in sprint-planning ceremony
471
+ PRIORITY: Best possible output quality
472
+
473
+ CONTEXT:
474
+ - Stage: validation-feature (Feature Validators)
475
+ - Ceremony: sprint-planning
476
+ - Purpose: Feature-specific validators based on keywords in acceptance criteria
477
+ - Input: Epic or Story with feature-specific context (1,500-4,000 tokens)
478
+ - Output: Feature-specific validation checklist and completeness assessment
479
+ - Call frequency: ~25 calls per ceremony
480
+ - Impact: MEDIUM - Ensures feature completeness, identifies missing requirements
481
+
482
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
483
+
484
+ 1. Output Quality Requirements: MEDIUM-HIGH
485
+ - Must verify feature completeness
486
+ - Must identify missing scenarios or edge cases
487
+ - Should provide feature-specific implementation guidance
488
+ → Requires appropriate model capabilities
489
+
490
+ 2. Task Complexity: 5/10 (Feature checklist validation)
491
+ - Apply feature-specific checklists
492
+ - Identify missing scenarios
493
+ - Verify acceptance criteria completeness
494
+
495
+ 3. Context Understanding:
496
+ - Understand feature requirements from acceptance criteria
497
+ - Identify implicit requirements not explicitly stated
498
+
499
+ 4. Speed Requirements: MODERATE (Secondary)
500
+ - 25 calls, parallel execution
501
+ - Background processing
502
+
503
+ 6. Pricing Considerations: MODERATE
504
+ - 25 calls = moderate volume
505
+ - Completeness checking vs deep analysis
506
+
507
+ RECOMMENDATION:
508
+
509
+ Based on evaluation criteria above, select the model that best meets:
510
+ 1. PRIMARY: Output Quality - MEDIUM-HIGH - Feature completeness and edge case identification
511
+ 2. SECONDARY: Task Complexity - Moderate (5/10) - checklist application with implicit requirement detection
512
+ 3. TERTIARY: Volume Impact - MODERATE (25 calls) - pricing tier makes moderate impact
513
+ 4. Speed Requirements: MODERATE - Parallel execution in background
514
+ 5. Pricing: MODERATE - 25 calls = moderate volume, balance quality vs cost
515
+
516
+ Selection Guidance:
517
+ - Prioritize models capable of identifying implicit requirements (not just explicit ones)
518
+ - Require feature completeness checking (missing scenarios, edge cases)
519
+ - Should provide test scenario suggestions
520
+ - Consider pricing tier (25 calls = 2-3x cost difference between tiers)
521
+ - Balance edge case detection quality vs pricing efficiency
522
+ - Acceptable to use lower tier if basic explicit requirement checking sufficient
523
+
524
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
525
+ metadata: {
526
+ callFrequency: 25,
527
+ impact: 'MEDIUM',
528
+ taskComplexity: 5,
529
+ currentDefault: 'Claude Sonnet 4.5'
530
+ }
531
+ },
532
+
533
+ {
534
+ id: 'sprint-planning-doc-distribution',
535
+ ceremony: 'sprint-planning',
536
+ stage: 'doc-distribution',
537
+ stageName: 'Documentation Distribution',
538
+ prompt: `TASK: Select optimal LLM model for documentation distribution in sprint-planning ceremony
539
+ PRIORITY: Best possible output quality
540
+
541
+ CONTEXT:
542
+ - Stage: doc-distribution (Documentation Distribution)
543
+ - Ceremony: sprint-planning
544
+ - Purpose: AI moves relevant content from parent doc.md to each epic/story doc.md and elaborates with domain-specific detail
545
+ - Input: Parent doc.md + epic/story item description (2,000-5,000 tokens)
546
+ - Output: Child doc.md (extracted + elaborated) + updated parent doc.md (lighter) as JSON
547
+ - Call frequency: ~25 calls per ceremony
548
+ - Impact: VERY HIGH - Establishes the documentation tree that guides all future AI agents
549
+
550
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
551
+
552
+ 1. Output Quality Requirements: VERY HIGH
553
+ - Must accurately identify which content belongs to the child scope
554
+ - Must elaborate with domain-specific implementation detail
555
+ - Must produce valid JSON with embedded markdown
556
+ → Requires appropriate model capabilities
557
+
558
+ 2. Task Complexity: 7/10 (Content extraction + elaboration + JSON output)
559
+ - Identify domain-relevant sections in parent doc
560
+ - Remove extracted content cleanly from parent
561
+ - Extend child doc with actionable implementation notes
562
+
563
+ 3. Consistency & Reliability:
564
+ - Child doc must cover only the child's scope
565
+ - Parent doc must remain coherent after extraction
566
+ - JSON output must be well-formed
567
+
568
+ 4. Speed Requirements: MODERATE (Secondary)
569
+ - 25 calls, sequential per epic/story chain
570
+ - Quality more important than speed
571
+
572
+ 5. Pricing Considerations: MODERATE
573
+ - 25 calls = moderate volume
574
+ - High impact on downstream work quality
575
+
576
+ RECOMMENDATION:
577
+
578
+ Based on evaluation criteria above, select the model that best meets:
579
+ 1. PRIMARY: Output Quality - VERY HIGH - Defines the documentation hierarchy for the project
580
+ 2. SECONDARY: Technical Writing - Focused extraction with domain elaboration
581
+ 3. TERTIARY: JSON reliability - Must produce parseable JSON with embedded markdown
582
+
583
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
584
+ metadata: {
585
+ callFrequency: 25,
586
+ impact: 'VERY HIGH',
587
+ taskComplexity: 7,
588
+ currentDefault: 'Claude Sonnet 4.6'
589
+ }
590
+ },
591
+
592
+ {
593
+ id: 'seed-decomposition',
594
+ ceremony: 'seed',
595
+ stage: 'decomposition',
596
+ stageName: 'Task Decomposition',
597
+ prompt: `TASK: Select optimal LLM model for task decomposition in seed ceremony
598
+ PRIORITY: Best possible output quality
599
+
600
+ CONTEXT:
601
+ - Stage: decomposition (Task & Subtask Decomposition)
602
+ - Ceremony: seed
603
+ - Purpose: AI breaks down story into tasks and subtasks
604
+ - Input: Story with acceptance criteria, contexts (2,000-6,000 tokens)
605
+ - Output: Hierarchical task structure with dependencies
606
+ - Call frequency: 1 per ceremony execution
607
+ - Impact: CRITICAL - Defines actual implementation plan
608
+
609
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
610
+
611
+ 1. Output Quality Requirements: CRITICAL
612
+ - Must break story into logical task groupings
613
+ - Subtasks must be atomic and implementable
614
+ - Must identify dependencies
615
+ → Requires appropriate model capabilities
616
+
617
+ 2. Task Complexity: 8/10 (Granular work breakdown)
618
+ - Analyze story and break into tasks
619
+ - Determine appropriate task granularity
620
+ - Break tasks into atomic subtasks
621
+ - Identify task dependencies
622
+
623
+ 3. Context Understanding:
624
+ - Understand full story requirements
625
+ - Consider epic and project context
626
+ - Identify implicit implementation needs
627
+ → Requires appropriate model capabilities
628
+
629
+ 4. Consistency & Reliability:
630
+ - Task breakdown must be complete
631
+ - Dependencies must be accurate
632
+
633
+ 5. Speed Requirements: LOW (Secondary)
634
+ - Background processing
635
+ - Quality far more important
636
+
637
+ 6. Pricing Considerations: TERTIARY
638
+ - Single call per ceremony
639
+ - Critical for implementation planning
640
+
641
+ RECOMMENDATION:
642
+
643
+ Based on evaluation criteria above, select the model that best meets:
644
+ 1. PRIMARY: Output Quality - CRITICAL - Defines actual implementation plan
645
+ 2. SECONDARY: Task Complexity - Very High (8/10) - granular work breakdown with dependencies
646
+ 3. TERTIARY: Breakdown Capability - Exceptional atomic subtask identification
647
+ 4. Speed Requirements: LOW - Background processing, quality >>> speed
648
+ 5. Pricing: TERTIARY - Single call, critical impact justifies any tier
649
+
650
+ Selection Guidance:
651
+ - Prioritize models with exceptional hierarchical decomposition and breakdown capability
652
+ - Require accurate identification of atomic, implementable subtasks
653
+ - Must properly identify task dependencies
654
+ - Must understand full story requirements in epic/project context
655
+ - Completeness is critical (missing tasks cause implementation delays)
656
+ - Reliability paramount (inconsistent granularity causes confusion)
657
+ - Single call makes pricing tier less significant than quality
658
+
659
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
660
+ metadata: {
661
+ callFrequency: 1,
662
+ impact: 'CRITICAL',
663
+ taskComplexity: 8,
664
+ currentDefault: 'Claude Opus 4.6'
665
+ }
666
+ },
667
+
668
+ {
669
+ id: 'seed-validation',
670
+ ceremony: 'seed',
671
+ stage: 'validation',
672
+ stageName: 'Task Validation',
673
+ prompt: `TASK: Select optimal LLM model for task validation in seed ceremony
674
+ PRIORITY: Best possible output quality
675
+
676
+ CONTEXT:
677
+ - Stage: validation (Task Validation)
678
+ - Ceremony: seed
679
+ - Purpose: AI validates task hierarchy completeness and feasibility
680
+ - Input: Complete task/subtask hierarchy (3,000-10,000 tokens)
681
+ - Output: Validation report identifying gaps and issues
682
+ - Call frequency: ~20 calls per ceremony
683
+ - Impact: VERY HIGH - Catches planning issues before development
684
+
685
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
686
+
687
+ 1. Output Quality Requirements: VERY HIGH
688
+ - Must identify gaps in task coverage
689
+ - Must assess task granularity
690
+ - Must validate dependency correctness
691
+ → Requires appropriate model capabilities
692
+
693
+ 2. Task Complexity: 7/10 (Completeness validation + reasoning)
694
+ - Analyze task hierarchy for gaps
695
+ - Assess granularity appropriateness
696
+ - Validate dependency relationships
697
+ - Reason about implementation feasibility
698
+
699
+ 3. Context Understanding:
700
+ - Must understand full task hierarchy
701
+ - Cross-reference with story requirements
702
+ - Reason about dependencies
703
+ → Requires appropriate model capabilities
704
+
705
+ 4. Consistency & Reliability:
706
+ - Must consistently identify gaps
707
+ - Cannot miss critical completeness problems
708
+
709
+ 5. Speed Requirements: MODERATE (Secondary)
710
+ - 20 calls, parallel validation
711
+ - Quality far more important
712
+
713
+ 6. Pricing Considerations: MODERATE
714
+ - 20 calls = moderate volume
715
+ - Critical impact (prevents implementation issues)
716
+
717
+ RECOMMENDATION:
718
+
719
+ Based on evaluation criteria above, select the model that best meets:
720
+ 1. PRIMARY: Output Quality - VERY HIGH - Catches planning issues before development
721
+ 2. SECONDARY: Validation Reasoning - Sophisticated gap identification and granularity assessment
722
+ 3. TERTIARY: Task Complexity - High (7/10) - completeness validation with dependency reasoning
723
+ 4. Speed Requirements: MODERATE - 20 calls in parallel, quality >>> speed
724
+ 5. Pricing: MODERATE - 20 calls, critical impact justifies investment
725
+
726
+ Selection Guidance:
727
+ - Prioritize models with excellent completeness validation and gap identification
728
+ - Require sophisticated reasoning about task granularity appropriateness
729
+ - Must validate dependency correctness and implementation feasibility
730
+ - Must cross-reference task hierarchy with story requirements
731
+ - Cannot miss critical completeness problems (false negatives are costly)
732
+ - Do not compromise on quality (prevents downstream implementation issues)
733
+
734
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
735
+ metadata: {
736
+ callFrequency: 20,
737
+ impact: 'VERY HIGH',
738
+ taskComplexity: 7,
739
+ currentDefault: 'Claude Sonnet 4.5'
740
+ }
741
+ },
742
+
743
+ {
744
+ id: 'seed-context-generation',
745
+ ceremony: 'seed',
746
+ stage: 'context-generation',
747
+ stageName: 'Task Context Generation',
748
+ prompt: `TASK: Select optimal LLM model for task context generation in seed ceremony
749
+ PRIORITY: Best possible output quality
750
+
751
+ CONTEXT:
752
+ - Stage: context-generation (Task Context Generation)
753
+ - Ceremony: seed
754
+ - Purpose: AI generates context.md for each task
755
+ - Input: Task with subtasks, story/epic/project contexts (1,500-4,000 tokens)
756
+ - Output: Concise context.md with implementation approach (300-1,000 tokens)
757
+ - Call frequency: ~10 calls per ceremony
758
+ - Impact: VERY HIGH - Developers read immediately before implementing
759
+
760
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
761
+
762
+ 1. Output Quality Requirements: VERY HIGH
763
+ - Must provide specific, actionable implementation guidance
764
+ - Technical details must be accurate
765
+ - Must highlight gotchas and edge cases
766
+ → Requires appropriate model capabilities
767
+
768
+ 2. Task Complexity: 7/10 (Implementation-focused technical context)
769
+ - Synthesize task requirements into guidance
770
+ - Provide appropriate technical detail
771
+ - Identify implementation approaches
772
+ - Balance brevity with actionability
773
+
774
+ 3. Context Understanding:
775
+ - Understand task within story/epic context
776
+ - Recognize relevant patterns
777
+ - Identify task dependencies
778
+ → Requires appropriate model capabilities
779
+
780
+ 4. Consistency & Reliability:
781
+ - Context must align with task requirements
782
+ - Technical guidance must be accurate
783
+
784
+ 5. Speed Requirements: MODERATE (Secondary)
785
+ - 10 calls, parallel generation
786
+ - Quality more important
787
+
788
+ 6. Pricing Considerations: MODERATE
789
+ - 10 calls = low-moderate volume
790
+ - Developer-facing, high impact
791
+
792
+ RECOMMENDATION:
793
+
794
+ Based on evaluation criteria above, select the model that best meets:
795
+ 1. PRIMARY: Output Quality - VERY HIGH - Developers read immediately before implementing
796
+ 2. SECONDARY: Technical Writing - Implementation-focused guidance with accuracy
797
+ 3. TERTIARY: Task Complexity - High (7/10) - synthesis with appropriate detail level
798
+ 4. Speed Requirements: MODERATE - 10 calls in parallel, quality >>> speed
799
+ 5. Pricing: MODERATE - 10 calls, developer-facing, high impact
800
+
801
+ Selection Guidance:
802
+ - Prioritize models with excellent technical writing for implementation guidance
803
+ - Require ability to highlight gotchas, edge cases, and implementation approaches
804
+ - Must provide specific, actionable guidance (not generic advice)
805
+ - Must balance brevity with actionability (300-1,000 token output)
806
+ - Technical details must be accurate (errors cause developer confusion)
807
+ - Do not compromise on quality (directly impacts implementation efficiency)
808
+
809
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
810
+ metadata: {
811
+ callFrequency: 10,
812
+ impact: 'VERY HIGH',
813
+ taskComplexity: 7,
814
+ currentDefault: 'Claude Sonnet 4.5'
815
+ }
816
+ },
817
+
818
+ {
819
+ id: 'context-retrospective-documentation-update',
820
+ ceremony: 'context-retrospective',
821
+ stage: 'documentation-update',
822
+ stageName: 'Documentation Enhancement',
823
+ prompt: `TASK: Select optimal LLM model for documentation enhancement in context-retrospective ceremony
824
+ PRIORITY: Best possible output quality
825
+
826
+ CONTEXT:
827
+ - Stage: documentation-update (Documentation Enhancement)
828
+ - Ceremony: context-retrospective
829
+ - Purpose: AI refines PROJECT.md based on implementation learnings
830
+ - Input: PROJECT.md, git history, completed work (10,000-30,000 tokens)
831
+ - Output: Updated PROJECT.md with refined descriptions and learnings
832
+ - Call frequency: ~10 calls per ceremony
833
+ - Impact: HIGH - Maintains PROJECT.md as source of truth
834
+
835
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
836
+
837
+ 1. Output Quality Requirements: HIGH
838
+ - Must synthesize implementation learnings
839
+ - Technical updates must be accurate
840
+ - Must maintain documentation consistency
841
+ → Requires appropriate model capabilities
842
+
843
+ 2. Task Complexity: 7/10 (Technical synthesis and writing)
844
+ - Analyze git history
845
+ - Identify patterns and insights
846
+ - Synthesize into documentation updates
847
+
848
+ 3. Context Understanding:
849
+ - Must handle large context (PROJECT.md + git history)
850
+ - Understand implementation changes
851
+ - Identify obsolete information
852
+ → Large context window needed appropriate model capabilities
853
+
854
+ 4. Consistency & Reliability:
855
+ - Updates must not contradict existing content
856
+ - Technical accuracy critical
857
+
858
+ 5. Speed Requirements: LOW (Secondary)
859
+ - Background processing
860
+ - Quality more important
861
+
862
+ 6. Pricing Considerations: MODERATE
863
+ - 10 calls = low-moderate volume
864
+ - Important but not critical path
865
+
866
+ RECOMMENDATION:
867
+
868
+ Based on evaluation criteria above, select the model that best meets:
869
+ 1. PRIMARY: Output Quality - HIGH - Maintains PROJECT.md as source of truth
870
+ 2. SECONDARY: Technical Writing - Synthesis of implementation learnings
871
+ 3. TERTIARY: Task Complexity - High (7/10) - git history analysis + documentation
872
+ 4. Speed Requirements: LOW - Background processing, quality >>> speed
873
+ 5. Pricing: MODERATE - 10 calls, important but not critical path
874
+
875
+ Selection Guidance:
876
+ - Prioritize models with excellent technical writing and documentation synthesis
877
+ - Require ability to analyze git history and identify implementation patterns
878
+ - Must maintain consistency with existing documentation (no contradictions)
879
+ - Must handle large context (PROJECT.md + git history = 10K-30K tokens)
880
+ - Technical accuracy critical (documentation is project source of truth)
881
+ - Consider balance of quality vs pricing for moderate-volume task
882
+
883
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
884
+ metadata: {
885
+ callFrequency: 10,
886
+ impact: 'HIGH',
887
+ taskComplexity: 7,
888
+ currentDefault: 'Claude Sonnet 4.5'
889
+ }
890
+ },
891
+
892
+ {
893
+ id: 'context-retrospective-context-refinement',
894
+ ceremony: 'context-retrospective',
895
+ stage: 'context-refinement',
896
+ stageName: 'Context Enhancement',
897
+ prompt: `TASK: Select optimal LLM model for context refinement in context-retrospective ceremony
898
+ PRIORITY: Best possible output quality
899
+
900
+ CONTEXT:
901
+ - Stage: context-refinement (Context Enhancement)
902
+ - Ceremony: context-retrospective
903
+ - Purpose: AI enhances context.md with implementation insights
904
+ - Input: context.md, git diffs, PRs, issue comments (5,000-15,000 tokens)
905
+ - Output: Enhanced context.md with insights, patterns, code examples
906
+ - Call frequency: ~15 calls per ceremony
907
+ - Impact: VERY HIGH - Enhanced context improves all future AI agent performance
908
+
909
+ EVALUATION CRITERIA (BEST OUTPUT FIRST):
910
+
911
+ 1. Output Quality Requirements: VERY HIGH
912
+ - Must extract valuable insights from code changes
913
+ - Must identify implementation patterns
914
+ - Should include practical code examples
915
+ → Requires appropriate model capabilities
916
+
917
+ 2. Task Complexity: 8/10 (Code analysis + insight extraction)
918
+ - Analyze code diffs
919
+ - Identify patterns and best practices
920
+ - Extract insights from issue discussions
921
+ - Synthesize into actionable context
922
+
923
+ 3. Context Understanding:
924
+ - Understand code changes in project context
925
+ - Analyze git diffs
926
+ - Synthesize from multiple sources
927
+ → Requires appropriate model capabilities
928
+
929
+ 4. Consistency & Reliability:
930
+ - Enhanced context must align with original
931
+ - Code examples must be accurate
932
+ → Both appropriate model capabilities
933
+
934
+ 5. Speed Requirements: LOW (Secondary)
935
+ - Background processing
936
+ - Quality far more important
937
+
938
+ 6. Pricing Considerations: MODERATE
939
+ - 15 calls = moderate volume
940
+ - Very high leverage (improves all future AI work)
941
+
942
+ RECOMMENDATION:
943
+
944
+ Based on evaluation criteria above, select the model that best meets:
945
+ 1. PRIMARY: Output Quality - VERY HIGH - Enhances context for all future AI agents
946
+ 2. SECONDARY: Code Analysis - Exceptional insight extraction from git diffs
947
+ 3. TERTIARY: Task Complexity - Very High (8/10) - pattern identification + synthesis
948
+ 4. Speed Requirements: LOW - Background processing, quality >>> speed
949
+ 5. Pricing: MODERATE - 15 calls, very high leverage (improves all future ceremonies)
950
+
951
+ Selection Guidance:
952
+ - Prioritize models with exceptional code analysis and pattern recognition
953
+ - Require ability to extract valuable insights from code diffs and PRs
954
+ - Must identify implementation patterns, gotchas, and best practices
955
+ - Must synthesize from multiple sources (git diffs, issues, comments)
956
+ - Code examples must be accurate (errors propagate to future AI work)
957
+ - Very high leverage justifies premium tier (enhanced context used in all future work)
958
+ - Quality directly impacts all future AI agent effectiveness
959
+
960
+ Consider all available models from any provider (Claude, Gemini, OpenAI, etc.)`,
961
+ metadata: {
962
+ callFrequency: 15,
963
+ impact: 'VERY HIGH',
964
+ taskComplexity: 8,
965
+ currentDefault: 'Claude Opus 4.6'
966
+ }
967
+ }
968
+ ];
969
+
970
+ /**
971
+ * Get evaluation prompt by ID
972
+ * @param {string} id - Prompt ID (e.g., 'sponsor-call-suggestions')
973
+ * @returns {Object|null} Evaluation prompt object or null if not found
974
+ */
975
+ export function getPromptById(id) {
976
+ return EVALUATION_PROMPTS.find(p => p.id === id) || null;
977
+ }
978
+
979
+ /**
980
+ * Get all prompts for a specific ceremony
981
+ * @param {string} ceremony - Ceremony name (e.g., 'sponsor-call')
982
+ * @returns {Array} Array of evaluation prompts for the ceremony
983
+ */
984
+ export function getPromptsByCeremony(ceremony) {
985
+ return EVALUATION_PROMPTS.filter(p => p.ceremony === ceremony);
986
+ }
987
+
988
+ /**
989
+ * Get prompt statistics
990
+ * @returns {Object} Statistics about evaluation prompts
991
+ */
992
+ export function getPromptStats() {
993
+ const ceremonies = [...new Set(EVALUATION_PROMPTS.map(p => p.ceremony))];
994
+ const totalCalls = EVALUATION_PROMPTS.reduce((sum, p) => sum + p.metadata.callFrequency, 0);
995
+
996
+ return {
997
+ totalPrompts: EVALUATION_PROMPTS.length,
998
+ ceremonies: ceremonies.length,
999
+ ceremonyList: ceremonies,
1000
+ estimatedTotalCalls: totalCalls,
1001
+ impactDistribution: {
1002
+ CRITICAL: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'CRITICAL').length,
1003
+ 'VERY HIGH': EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'VERY HIGH').length,
1004
+ HIGH: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'HIGH').length,
1005
+ MEDIUM: EVALUATION_PROMPTS.filter(p => p.metadata.impact === 'MEDIUM').length
1006
+ }
1007
+ };
1008
+ }