popeye-cli 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/README.md +240 -32
  2. package/cheatsheet.md +407 -0
  3. package/dist/cli/commands/db.d.ts +10 -0
  4. package/dist/cli/commands/db.d.ts.map +1 -0
  5. package/dist/cli/commands/db.js +240 -0
  6. package/dist/cli/commands/db.js.map +1 -0
  7. package/dist/cli/commands/doctor.d.ts +18 -0
  8. package/dist/cli/commands/doctor.d.ts.map +1 -0
  9. package/dist/cli/commands/doctor.js +255 -0
  10. package/dist/cli/commands/doctor.js.map +1 -0
  11. package/dist/cli/commands/index.d.ts +2 -0
  12. package/dist/cli/commands/index.d.ts.map +1 -1
  13. package/dist/cli/commands/index.js +2 -0
  14. package/dist/cli/commands/index.js.map +1 -1
  15. package/dist/cli/index.d.ts.map +1 -1
  16. package/dist/cli/index.js +3 -1
  17. package/dist/cli/index.js.map +1 -1
  18. package/dist/cli/interactive.d.ts.map +1 -1
  19. package/dist/cli/interactive.js +96 -0
  20. package/dist/cli/interactive.js.map +1 -1
  21. package/dist/generators/admin-wizard.d.ts +25 -0
  22. package/dist/generators/admin-wizard.d.ts.map +1 -0
  23. package/dist/generators/admin-wizard.js +123 -0
  24. package/dist/generators/admin-wizard.js.map +1 -0
  25. package/dist/generators/all.d.ts.map +1 -1
  26. package/dist/generators/all.js +10 -3
  27. package/dist/generators/all.js.map +1 -1
  28. package/dist/generators/database.d.ts +58 -0
  29. package/dist/generators/database.d.ts.map +1 -0
  30. package/dist/generators/database.js +229 -0
  31. package/dist/generators/database.js.map +1 -0
  32. package/dist/generators/fullstack.d.ts.map +1 -1
  33. package/dist/generators/fullstack.js +23 -7
  34. package/dist/generators/fullstack.js.map +1 -1
  35. package/dist/generators/index.d.ts +2 -0
  36. package/dist/generators/index.d.ts.map +1 -1
  37. package/dist/generators/index.js +2 -0
  38. package/dist/generators/index.js.map +1 -1
  39. package/dist/generators/templates/admin-wizard-python.d.ts +32 -0
  40. package/dist/generators/templates/admin-wizard-python.d.ts.map +1 -0
  41. package/dist/generators/templates/admin-wizard-python.js +425 -0
  42. package/dist/generators/templates/admin-wizard-python.js.map +1 -0
  43. package/dist/generators/templates/admin-wizard-react.d.ts +48 -0
  44. package/dist/generators/templates/admin-wizard-react.d.ts.map +1 -0
  45. package/dist/generators/templates/admin-wizard-react.js +554 -0
  46. package/dist/generators/templates/admin-wizard-react.js.map +1 -0
  47. package/dist/generators/templates/database-docker.d.ts +23 -0
  48. package/dist/generators/templates/database-docker.d.ts.map +1 -0
  49. package/dist/generators/templates/database-docker.js +221 -0
  50. package/dist/generators/templates/database-docker.js.map +1 -0
  51. package/dist/generators/templates/database-python.d.ts +54 -0
  52. package/dist/generators/templates/database-python.d.ts.map +1 -0
  53. package/dist/generators/templates/database-python.js +723 -0
  54. package/dist/generators/templates/database-python.js.map +1 -0
  55. package/dist/generators/templates/database-typescript.d.ts +34 -0
  56. package/dist/generators/templates/database-typescript.d.ts.map +1 -0
  57. package/dist/generators/templates/database-typescript.js +232 -0
  58. package/dist/generators/templates/database-typescript.js.map +1 -0
  59. package/dist/generators/templates/fullstack.d.ts.map +1 -1
  60. package/dist/generators/templates/fullstack.js +29 -0
  61. package/dist/generators/templates/fullstack.js.map +1 -1
  62. package/dist/generators/templates/index.d.ts +5 -0
  63. package/dist/generators/templates/index.d.ts.map +1 -1
  64. package/dist/generators/templates/index.js +5 -0
  65. package/dist/generators/templates/index.js.map +1 -1
  66. package/dist/state/index.d.ts +10 -0
  67. package/dist/state/index.d.ts.map +1 -1
  68. package/dist/state/index.js +22 -0
  69. package/dist/state/index.js.map +1 -1
  70. package/dist/types/consensus.d.ts +3 -0
  71. package/dist/types/consensus.d.ts.map +1 -1
  72. package/dist/types/consensus.js +1 -0
  73. package/dist/types/consensus.js.map +1 -1
  74. package/dist/types/database-runtime.d.ts +86 -0
  75. package/dist/types/database-runtime.d.ts.map +1 -0
  76. package/dist/types/database-runtime.js +61 -0
  77. package/dist/types/database-runtime.js.map +1 -0
  78. package/dist/types/database.d.ts +85 -0
  79. package/dist/types/database.d.ts.map +1 -0
  80. package/dist/types/database.js +71 -0
  81. package/dist/types/database.js.map +1 -0
  82. package/dist/types/index.d.ts +3 -0
  83. package/dist/types/index.d.ts.map +1 -1
  84. package/dist/types/index.js +6 -0
  85. package/dist/types/index.js.map +1 -1
  86. package/dist/types/tester.d.ts +138 -0
  87. package/dist/types/tester.d.ts.map +1 -0
  88. package/dist/types/tester.js +110 -0
  89. package/dist/types/tester.js.map +1 -0
  90. package/dist/types/workflow.d.ts +166 -0
  91. package/dist/types/workflow.d.ts.map +1 -1
  92. package/dist/types/workflow.js +14 -0
  93. package/dist/types/workflow.js.map +1 -1
  94. package/dist/workflow/db-setup-runner.d.ts +63 -0
  95. package/dist/workflow/db-setup-runner.d.ts.map +1 -0
  96. package/dist/workflow/db-setup-runner.js +336 -0
  97. package/dist/workflow/db-setup-runner.js.map +1 -0
  98. package/dist/workflow/db-state-machine.d.ts +30 -0
  99. package/dist/workflow/db-state-machine.d.ts.map +1 -0
  100. package/dist/workflow/db-state-machine.js +51 -0
  101. package/dist/workflow/db-state-machine.js.map +1 -0
  102. package/dist/workflow/execution-mode.js +2 -2
  103. package/dist/workflow/execution-mode.js.map +1 -1
  104. package/dist/workflow/index.d.ts +3 -0
  105. package/dist/workflow/index.d.ts.map +1 -1
  106. package/dist/workflow/index.js +3 -0
  107. package/dist/workflow/index.js.map +1 -1
  108. package/dist/workflow/task-workflow.d.ts +5 -0
  109. package/dist/workflow/task-workflow.d.ts.map +1 -1
  110. package/dist/workflow/task-workflow.js +172 -6
  111. package/dist/workflow/task-workflow.js.map +1 -1
  112. package/dist/workflow/tester.d.ts +120 -0
  113. package/dist/workflow/tester.d.ts.map +1 -0
  114. package/dist/workflow/tester.js +589 -0
  115. package/dist/workflow/tester.js.map +1 -0
  116. package/dist/workflow/workflow-logger.d.ts +1 -1
  117. package/dist/workflow/workflow-logger.d.ts.map +1 -1
  118. package/dist/workflow/workflow-logger.js.map +1 -1
  119. package/package.json +1 -1
  120. package/src/cli/commands/db.ts +281 -0
  121. package/src/cli/commands/doctor.ts +273 -0
  122. package/src/cli/commands/index.ts +2 -0
  123. package/src/cli/index.ts +4 -0
  124. package/src/cli/interactive.ts +102 -0
  125. package/src/generators/admin-wizard.ts +146 -0
  126. package/src/generators/all.ts +10 -3
  127. package/src/generators/database.ts +286 -0
  128. package/src/generators/fullstack.ts +26 -9
  129. package/src/generators/index.ts +12 -0
  130. package/src/generators/templates/admin-wizard-python.ts +431 -0
  131. package/src/generators/templates/admin-wizard-react.ts +560 -0
  132. package/src/generators/templates/database-docker.ts +227 -0
  133. package/src/generators/templates/database-python.ts +734 -0
  134. package/src/generators/templates/database-typescript.ts +238 -0
  135. package/src/generators/templates/fullstack.ts +29 -0
  136. package/src/generators/templates/index.ts +5 -0
  137. package/src/state/index.ts +29 -0
  138. package/src/types/consensus.ts +3 -0
  139. package/src/types/database-runtime.ts +69 -0
  140. package/src/types/database.ts +84 -0
  141. package/src/types/index.ts +50 -0
  142. package/src/types/tester.ts +136 -0
  143. package/src/types/workflow.ts +31 -0
  144. package/src/workflow/db-setup-runner.ts +391 -0
  145. package/src/workflow/db-state-machine.ts +58 -0
  146. package/src/workflow/execution-mode.ts +2 -2
  147. package/src/workflow/index.ts +3 -0
  148. package/src/workflow/task-workflow.ts +227 -5
  149. package/src/workflow/tester.ts +723 -0
  150. package/src/workflow/workflow-logger.ts +2 -0
  151. package/tests/generators/admin-wizard-orchestrator.test.ts +64 -0
  152. package/tests/generators/admin-wizard-templates.test.ts +366 -0
  153. package/tests/generators/cross-phase-integration.test.ts +383 -0
  154. package/tests/generators/database.test.ts +456 -0
  155. package/tests/generators/fe-be-db-integration.test.ts +613 -0
  156. package/tests/types/database-runtime.test.ts +158 -0
  157. package/tests/types/database.test.ts +187 -0
  158. package/tests/types/tester.test.ts +174 -0
  159. package/tests/workflow/db-setup-runner.test.ts +211 -0
  160. package/tests/workflow/db-state-machine.test.ts +117 -0
  161. package/tests/workflow/tester.test.ts +401 -0
@@ -0,0 +1,723 @@
1
+ /**
2
+ * Tester (QA) skill module
3
+ * Provides test planning, review, and fix plan capabilities.
4
+ * Provider-agnostic -- uses whichever AI provider is configured.
5
+ */
6
+
7
+ import { promises as fs } from 'node:fs';
8
+ import path from 'node:path';
9
+ import type { ProjectState, Task, Milestone } from '../types/workflow.js';
10
+ import type { ConsensusConfig } from '../types/consensus.js';
11
+ import type {
12
+ TestPlanOutput,
13
+ TestRunReview,
14
+ DiscoveredTestCommands,
15
+ } from '../types/tester.js';
16
+ import type { OutputLanguage } from '../types/project.js';
17
+ import { isWorkspace } from '../types/project.js';
18
+ import { createPlan as claudeCreatePlan } from '../adapters/claude.js';
19
+ import { runOptimizedConsensusProcess, iterateUntilConsensus, type ConsensusProcessResult } from './consensus.js';
20
+ import type { TestResult } from './test-runner.js';
21
+
22
+ // ============================================================================
23
+ // Command Discovery
24
+ // ============================================================================
25
+
26
+ /**
27
+ * Inspect the project directory to discover available test/lint/build commands.
28
+ * Checks package.json scripts, pyproject.toml, Makefile, and common config files.
29
+ *
30
+ * @param projectDir - Root of the project
31
+ * @param language - Project language
32
+ * @returns Discovered command references
33
+ */
34
+ export async function discoverTestCommands(
35
+ projectDir: string,
36
+ language: OutputLanguage,
37
+ ): Promise<DiscoveredTestCommands> {
38
+ const result: DiscoveredTestCommands = {
39
+ testCmd: null,
40
+ lintCmd: null,
41
+ buildCmd: null,
42
+ typecheckCmd: null,
43
+ };
44
+
45
+ // Check package.json for JS/TS projects
46
+ const pkgPath = path.join(projectDir, 'package.json');
47
+ try {
48
+ const raw = await fs.readFile(pkgPath, 'utf-8');
49
+ const pkg = JSON.parse(raw);
50
+ const scripts = pkg.scripts || {};
51
+ if (scripts.test) result.testCmd = 'npm test';
52
+ if (scripts.lint) result.lintCmd = 'npm run lint';
53
+ if (scripts.build) result.buildCmd = 'npm run build';
54
+ if (scripts.typecheck || scripts['type-check']) {
55
+ result.typecheckCmd = scripts.typecheck ? 'npm run typecheck' : 'npm run type-check';
56
+ }
57
+ } catch {
58
+ // No package.json or invalid JSON -- not a JS project at root
59
+ }
60
+
61
+ // Check pyproject.toml for Python projects
62
+ const pyprojectPath = path.join(projectDir, 'pyproject.toml');
63
+ try {
64
+ const raw = await fs.readFile(pyprojectPath, 'utf-8');
65
+ if (raw.includes('[tool.pytest')) result.testCmd = result.testCmd || 'pytest';
66
+ if (raw.includes('ruff') || raw.includes('flake8')) result.lintCmd = result.lintCmd || 'ruff check .';
67
+ if (raw.includes('mypy')) result.typecheckCmd = result.typecheckCmd || 'mypy .';
68
+ } catch {
69
+ // No pyproject.toml
70
+ }
71
+
72
+ // Check Makefile for any project
73
+ const makefilePath = path.join(projectDir, 'Makefile');
74
+ try {
75
+ const raw = await fs.readFile(makefilePath, 'utf-8');
76
+ if (!result.testCmd && /^test:/m.test(raw)) result.testCmd = 'make test';
77
+ if (!result.lintCmd && /^lint:/m.test(raw)) result.lintCmd = 'make lint';
78
+ if (!result.buildCmd && /^build:/m.test(raw)) result.buildCmd = 'make build';
79
+ } catch {
80
+ // No Makefile
81
+ }
82
+
83
+ // Fallback defaults by language
84
+ if (!result.testCmd) {
85
+ if (language === 'python') result.testCmd = 'pytest';
86
+ if (language === 'typescript') result.testCmd = 'npx vitest run';
87
+ }
88
+ if (!result.lintCmd) {
89
+ if (language === 'python') result.lintCmd = 'ruff check .';
90
+ if (language === 'typescript') result.lintCmd = 'npx eslint .';
91
+ }
92
+ if (!result.buildCmd) {
93
+ if (language === 'typescript') result.buildCmd = 'npm run build';
94
+ }
95
+
96
+ return result;
97
+ }
98
+
99
+ // ============================================================================
100
+ // Component Playbooks
101
+ // ============================================================================
102
+
103
+ /**
104
+ * Return language-specific testing guidance for the Tester persona.
105
+ *
106
+ * @param language - Project output language
107
+ * @returns Playbook text to embed in the Tester prompt
108
+ */
109
+ export function getComponentPlaybook(language: OutputLanguage): string {
110
+ const pythonPlaybook = `
111
+ ### Python Testing Playbook
112
+ - Use pytest with fixtures and conftest.py for shared setup
113
+ - Use FastAPI TestClient for API endpoint testing
114
+ - Use unittest.mock / pytest-mock for mocking external dependencies
115
+ - Structure: tests/ mirroring src/ with test_ prefix per file
116
+ - Coverage: pytest --cov for coverage reports
117
+ - Async tests: use pytest-asyncio for async function testing
118
+ `.trim();
119
+
120
+ const tsPlaybook = `
121
+ ### TypeScript Testing Playbook
122
+ - Use Vitest or Jest as the test runner
123
+ - Use React Testing Library for component testing
124
+ - Use MSW (Mock Service Worker) for API mocking
125
+ - Structure: tests/ or __tests__/ directories alongside source
126
+ - Type checking: tsc --noEmit for compile-time validation
127
+ - Coverage: vitest run --coverage or jest --coverage
128
+ `.trim();
129
+
130
+ const websitePlaybook = `
131
+ ### Website Testing Playbook
132
+ - Use Next.js test utilities for page/component testing
133
+ - Use axe-core or @axe-core/react for accessibility testing
134
+ - Verify SEO meta tags with custom assertions (title, description, OG tags)
135
+ - Test responsive layouts with viewport size assertions
136
+ - Lighthouse CI for performance regression testing
137
+ `.trim();
138
+
139
+ if (language === 'python') return pythonPlaybook;
140
+ if (language === 'typescript') return tsPlaybook;
141
+ if (language === 'website') return `${tsPlaybook}\n\n${websitePlaybook}`;
142
+ if (isWorkspace(language)) {
143
+ return `${pythonPlaybook}\n\n${tsPlaybook}\n\n${websitePlaybook}\n\n### API Contract Testing\n- Validate frontend API calls match backend endpoint schemas\n- Use shared type definitions or OpenAPI specs for contract alignment`;
144
+ }
145
+ // Default fallback
146
+ return pythonPlaybook;
147
+ }
148
+
149
+ // ============================================================================
150
+ // Prompt Builders
151
+ // ============================================================================
152
+
153
+ /**
154
+ * Build the prompt for the Tester to create a TestPlan.
155
+ * Provider-agnostic -- refers to "the Tester", not any specific AI.
156
+ */
157
+ export function buildTestPlanPrompt(
158
+ task: Task,
159
+ milestone: Milestone,
160
+ state: ProjectState,
161
+ approvedCodePlan: string,
162
+ discoveredCommands: DiscoveredTestCommands,
163
+ ): string {
164
+ const playbook = getComponentPlaybook(state.language);
165
+ const completedTasks = milestone.tasks
166
+ .filter(t => t.status === 'complete')
167
+ .map(t => `- ${t.name}`)
168
+ .join('\n') || 'None yet';
169
+
170
+ const cmdSummary = [
171
+ discoveredCommands.testCmd ? `Test: ${discoveredCommands.testCmd}` : null,
172
+ discoveredCommands.lintCmd ? `Lint: ${discoveredCommands.lintCmd}` : null,
173
+ discoveredCommands.buildCmd ? `Build: ${discoveredCommands.buildCmd}` : null,
174
+ discoveredCommands.typecheckCmd ? `Typecheck: ${discoveredCommands.typecheckCmd}` : null,
175
+ ].filter(Boolean).join('\n');
176
+
177
+ return `
178
+ You are the Tester -- a dedicated QA engineer responsible for designing a comprehensive test plan.
179
+ Your job is to ensure code quality, catch regressions, and verify that the implementation meets its requirements.
180
+
181
+ ## Project Context
182
+ Project: ${state.name}
183
+ Language: ${state.language}
184
+
185
+ ## Milestone: ${milestone.name}
186
+ ${milestone.description}
187
+
188
+ ## Completed Tasks
189
+ ${completedTasks}
190
+
191
+ ## Task Under Test
192
+ **${task.name}**
193
+ ${task.description}
194
+
195
+ ## Approved Code Plan (read-only context -- DO NOT modify this)
196
+ ${approvedCodePlan}
197
+
198
+ ## Discovered Test Infrastructure
199
+ ${cmdSummary || 'No test commands discovered -- the Tester should specify commands explicitly.'}
200
+
201
+ ${playbook}
202
+
203
+ ## Instructions
204
+ Based on the approved code plan above, design a structured test plan. Output valid JSON matching this schema:
205
+
206
+ \`\`\`json
207
+ {
208
+ "summary": "What risks this plan targets",
209
+ "scope": ["frontend" | "backend" | "db" | "infra"],
210
+ "testMatrix": [
211
+ {
212
+ "id": "TC-1",
213
+ "category": "unit | integration | e2e | smoke | lint | build",
214
+ "description": "What is being tested",
215
+ "acceptanceCriteria": "What must be true to pass",
216
+ "evidenceRequired": "What output/log proves it passed",
217
+ "priority": "critical | high | medium | low"
218
+ }
219
+ ],
220
+ "commands": [
221
+ {
222
+ "command": "exact shell command",
223
+ "cwd": "optional relative path",
224
+ "purpose": "why this command is needed",
225
+ "required": true
226
+ }
227
+ ],
228
+ "riskFocus": ["top risks being tested"],
229
+ "evidenceRequired": ["logs/reports to capture"],
230
+ "minimumVerification": ["build check", "lint check", "smoke test"]
231
+ }
232
+ \`\`\`
233
+
234
+ Rules:
235
+ - Always include minimumVerification (build, lint, basic smoke test)
236
+ - Commands must be concrete and executable (no placeholders)
237
+ - Each test case needs clear acceptance criteria
238
+ - Focus on risks introduced by the code plan, not general testing
239
+ - If no custom tests are needed beyond minimum verification, include "noTestsRationale" explaining why
240
+ `.trim();
241
+ }
242
+
243
+ /**
244
+ * Build the prompt for the Tester to review test execution results.
245
+ * Provider-agnostic.
246
+ */
247
+ export function buildTestRunReviewPrompt(
248
+ task: Task,
249
+ approvedTestPlan: string,
250
+ testResult: TestResult,
251
+ state: ProjectState,
252
+ ): string {
253
+ const output = testResult.output.slice(0, 5000);
254
+ const failedTests = testResult.failedTests?.map(t => `- ${t}`).join('\n') || 'None';
255
+
256
+ return `
257
+ You are the Tester -- a dedicated QA engineer reviewing the test execution results.
258
+ ONLY the Tester decides whether tests pass or fail. The coder cannot override this verdict.
259
+
260
+ ## Project Context
261
+ Project: ${state.name}
262
+ Language: ${state.language}
263
+
264
+ ## Task: ${task.name}
265
+ ${task.description}
266
+
267
+ ## Approved Test Plan
268
+ ${approvedTestPlan}
269
+
270
+ ## Test Execution Results
271
+ - Success: ${testResult.success}
272
+ - Total: ${testResult.total}
273
+ - Passed: ${testResult.passed}
274
+ - Failed: ${testResult.failed}
275
+
276
+ ### Failed Tests
277
+ ${failedTests}
278
+
279
+ ### Output (truncated to 5000 chars)
280
+ \`\`\`
281
+ ${output}
282
+ \`\`\`
283
+
284
+ ## Instructions
285
+ Review the test results against the approved test plan's acceptance criteria.
286
+ Output valid JSON matching this schema:
287
+
288
+ \`\`\`json
289
+ {
290
+ "verdict": "PASS | PASS_WITH_NOTES | FAIL",
291
+ "summary": "Brief summary of the review",
292
+ "evidenceReviewed": ["list of evidence checked"],
293
+ "failures": ["specific failures found, empty if PASS"],
294
+ "gaps": ["missing evidence or coverage gaps"],
295
+ "recommendations": ["suggestions for improvement"],
296
+ "requiresConsensus": false
297
+ }
298
+ \`\`\`
299
+
300
+ Rules:
301
+ - PASS: All critical and high-priority acceptance criteria met, evidence present
302
+ - PASS_WITH_NOTES: Criteria met but with caveats or minor gaps (recommendations logged)
303
+ - FAIL: Any critical acceptance criteria not met, or required commands failed
304
+ - Set requiresConsensus to true ONLY when verdict is FAIL
305
+ - Be specific about which acceptance criteria passed/failed
306
+ `.trim();
307
+ }
308
+
309
+ /**
310
+ * Build the prompt for the Tester to create a fix plan after test failures.
311
+ * Provider-agnostic.
312
+ */
313
+ export function buildTestFixPlanPrompt(
314
+ task: Task,
315
+ approvedTestPlan: string,
316
+ testResult: TestResult,
317
+ review: TestRunReview,
318
+ state: ProjectState,
319
+ ): string {
320
+ const output = testResult.output.slice(0, 4000);
321
+ const isCrash = testResult.passed === 0 && testResult.failed > 20;
322
+
323
+ return `
324
+ You are the Tester -- a dedicated QA engineer creating a fix plan for test failures.
325
+ Your root cause analysis guides the coder's fix implementation.
326
+
327
+ ## Project Context
328
+ Project: ${state.name}
329
+ Language: ${state.language}
330
+
331
+ ## Task: ${task.name}
332
+ ${task.description}
333
+
334
+ ## Approved Test Plan
335
+ ${approvedTestPlan}
336
+
337
+ ## Tester's Review
338
+ Verdict: ${review.verdict}
339
+ Summary: ${review.summary}
340
+ Failures: ${review.failures.join('; ')}
341
+
342
+ ## Test Output
343
+ \`\`\`
344
+ ${output}
345
+ \`\`\`
346
+
347
+ ${isCrash ? '**WARNING: This appears to be a test runner crash (0 passed), not individual test failures. Focus on the root import/syntax/config error.**\n' : ''}
348
+
349
+ ## Instructions
350
+ Create a fix plan. Output valid JSON matching this schema:
351
+
352
+ \`\`\`json
353
+ {
354
+ "failedCriteria": ["which acceptance criteria failed"],
355
+ "rootCauseAnalysis": "detailed root cause analysis",
356
+ "fixSteps": [
357
+ { "file": "path/to/file", "change": "what to change", "reason": "why" }
358
+ ],
359
+ "regressionRisks": ["risks of introducing new bugs"],
360
+ "retestStrategy": "how to verify the fix"
361
+ }
362
+ \`\`\`
363
+
364
+ Rules:
365
+ - Identify the root cause, not just symptoms
366
+ - Fix steps should be minimal and focused
367
+ - Consider regression risks for each change
368
+ - Retest strategy must reference the original acceptance criteria
369
+ `.trim();
370
+ }
371
+
372
+ // ============================================================================
373
+ // Orchestration Functions
374
+ // ============================================================================
375
+
376
+ /**
377
+ * Result of the test planning phase
378
+ */
379
+ export interface TestPlanningResult {
380
+ testPlanText: string;
381
+ testPlanParsed: TestPlanOutput | null;
382
+ consensusResult: ConsensusProcessResult;
383
+ error?: string;
384
+ }
385
+
386
+ /**
387
+ * Run the test planning phase: discover commands -> create test plan -> consensus.
388
+ *
389
+ * @param task - The task to plan tests for
390
+ * @param milestone - Parent milestone
391
+ * @param state - Current project state
392
+ * @param approvedCodePlan - The consensus-approved code plan
393
+ * @param options - Workflow options (projectDir, consensusConfig, onProgress)
394
+ * @returns Test plan result with consensus outcome
395
+ */
396
+ export async function runTestPlanningPhase(
397
+ task: Task,
398
+ milestone: Milestone,
399
+ state: ProjectState,
400
+ approvedCodePlan: string,
401
+ options: {
402
+ projectDir: string;
403
+ consensusConfig?: Partial<ConsensusConfig>;
404
+ onProgress?: (phase: string, message: string) => void;
405
+ },
406
+ ): Promise<TestPlanningResult> {
407
+ const { projectDir, consensusConfig, onProgress } = options;
408
+
409
+ // Step 1: Discover test infrastructure
410
+ onProgress?.('test-planning', 'Discovering test infrastructure...');
411
+ const discoveredCommands = await discoverTestCommands(projectDir, state.language);
412
+
413
+ // Step 2: Build the test plan prompt
414
+ const testPlanPrompt = buildTestPlanPrompt(
415
+ task, milestone, state, approvedCodePlan, discoveredCommands,
416
+ );
417
+
418
+ // Step 3: Generate test plan via AI (provider-agnostic)
419
+ onProgress?.('test-planning', 'Tester is designing the test plan...');
420
+ const planResult = await claudeCreatePlan(
421
+ testPlanPrompt,
422
+ `Project: ${state.name}\nLanguage: ${state.language}`,
423
+ state.language,
424
+ (msg) => onProgress?.('test-planning', msg),
425
+ );
426
+
427
+ if (!planResult.success) {
428
+ return {
429
+ testPlanText: '',
430
+ testPlanParsed: null,
431
+ consensusResult: {
432
+ approved: false, finalPlan: '', finalScore: 0, bestPlan: '', bestScore: 0,
433
+ bestIteration: 0, totalIterations: 0, iterations: [], finalConcerns: [],
434
+ finalRecommendations: [], arbitrated: false,
435
+ },
436
+ error: `Tester failed to create test plan: ${planResult.error}`,
437
+ };
438
+ }
439
+
440
+ const testPlanText = planResult.response;
441
+
442
+ // Step 4: Parse structured test plan (best-effort)
443
+ let testPlanParsed: TestPlanOutput | null = null;
444
+ try {
445
+ const jsonMatch = testPlanText.match(/```json\s*([\s\S]*?)```/) ||
446
+ testPlanText.match(/\{[\s\S]*"summary"[\s\S]*\}/);
447
+ if (jsonMatch) {
448
+ const raw = jsonMatch[1] || jsonMatch[0];
449
+ testPlanParsed = JSON.parse(raw) as TestPlanOutput;
450
+ }
451
+ } catch {
452
+ // Structured parsing failed -- plan text still usable for consensus
453
+ onProgress?.('test-planning', 'Could not parse structured test plan; using text-based plan for consensus.');
454
+ }
455
+
456
+ // Step 5: Submit for consensus with BOTH code plan and test plan as context
457
+ onProgress?.('test-planning', 'Submitting test plan for consensus review...');
458
+
459
+ const combinedPlanForConsensus = `## Approved Code Plan (read-only, for reviewer context)\n${approvedCodePlan}\n\n## Proposed Test Plan (subject to consensus review)\n${testPlanText}`;
460
+ const consensusContext = `Project: ${state.name}\nLanguage: ${state.language}\nMilestone: ${milestone.name}\nTask: ${task.name}\nPhase: Test Plan Review`;
461
+
462
+ // Use configurable threshold (default 90 for test plans, lower than code plans)
463
+ const testPlanThreshold = consensusConfig?.testPlanThreshold ?? 90;
464
+ const testPlanConfig = {
465
+ ...consensusConfig,
466
+ threshold: testPlanThreshold,
467
+ };
468
+
469
+ const useOptimized = consensusConfig?.useOptimizedConsensus !== false;
470
+ let consensusResult: ConsensusProcessResult;
471
+
472
+ if (useOptimized) {
473
+ consensusResult = await runOptimizedConsensusProcess(
474
+ combinedPlanForConsensus,
475
+ consensusContext,
476
+ {
477
+ projectDir,
478
+ config: testPlanConfig,
479
+ milestoneId: milestone.id,
480
+ milestoneName: milestone.name,
481
+ taskId: task.id,
482
+ taskName: `${task.name} - Test Plan`,
483
+ parallelReviews: true,
484
+ isFullstack: isWorkspace(state.language),
485
+ onIteration: (iteration, result) => {
486
+ onProgress?.('test-planning', `Test plan consensus iteration ${iteration}: ${result.score}%`);
487
+ },
488
+ onProgress,
489
+ },
490
+ ) as ConsensusProcessResult;
491
+ } else {
492
+ consensusResult = await iterateUntilConsensus(
493
+ combinedPlanForConsensus,
494
+ consensusContext,
495
+ {
496
+ projectDir,
497
+ config: testPlanConfig,
498
+ isFullstack: isWorkspace(state.language),
499
+ language: state.language,
500
+ onIteration: (iteration, result) => {
501
+ onProgress?.('test-planning', `Test plan consensus iteration ${iteration}: ${result.score}%`);
502
+ },
503
+ onProgress,
504
+ },
505
+ ) as ConsensusProcessResult;
506
+ }
507
+
508
+ return { testPlanText, testPlanParsed, consensusResult };
509
+ }
510
+
511
+ /**
512
+ * Run the test review phase: AI reviews test results and issues a verdict.
513
+ * ONLY the Tester decides PASS/FAIL -- the coder cannot bypass this.
514
+ *
515
+ * @param task - The task whose tests were run
516
+ * @param approvedTestPlan - The approved test plan text
517
+ * @param testResult - Test execution results
518
+ * @param state - Current project state
519
+ * @param onProgress - Progress callback
520
+ * @returns Structured TestRunReview
521
+ */
522
+ export async function runTestReviewPhase(
523
+ task: Task,
524
+ approvedTestPlan: string,
525
+ testResult: TestResult,
526
+ state: ProjectState,
527
+ onProgress?: (phase: string, message: string) => void,
528
+ ): Promise<TestRunReview> {
529
+ onProgress?.('test-review', 'Tester is reviewing test results...');
530
+
531
+ const reviewPrompt = buildTestRunReviewPrompt(task, approvedTestPlan, testResult, state);
532
+
533
+ const result = await claudeCreatePlan(
534
+ reviewPrompt,
535
+ `Project: ${state.name}\nLanguage: ${state.language}`,
536
+ state.language,
537
+ (msg) => onProgress?.('test-review', msg),
538
+ );
539
+
540
+ if (!result.success) {
541
+ // If AI fails, default to a conservative review based on raw results
542
+ return {
543
+ verdict: testResult.success ? 'PASS_WITH_NOTES' : 'FAIL',
544
+ summary: result.error || 'Tester review unavailable; falling back to raw test results.',
545
+ evidenceReviewed: ['raw test output'],
546
+ failures: testResult.success ? [] : [`${testResult.failed} test(s) failed`],
547
+ gaps: ['Full tester review could not be generated'],
548
+ recommendations: [],
549
+ requiresConsensus: !testResult.success,
550
+ };
551
+ }
552
+
553
+ // Parse the structured review
554
+ try {
555
+ const jsonMatch = result.response.match(/```json\s*([\s\S]*?)```/) ||
556
+ result.response.match(/\{[\s\S]*"verdict"[\s\S]*\}/);
557
+ if (jsonMatch) {
558
+ const raw = jsonMatch[1] || jsonMatch[0];
559
+ return JSON.parse(raw) as TestRunReview;
560
+ }
561
+ } catch {
562
+ // Parse failed -- construct from text
563
+ }
564
+
565
+ // Fallback: construct review from raw AI response
566
+ return {
567
+ verdict: testResult.success ? 'PASS_WITH_NOTES' : 'FAIL',
568
+ summary: result.response.slice(0, 500),
569
+ evidenceReviewed: ['test output', 'AI review response'],
570
+ failures: testResult.success ? [] : [`${testResult.failed} test(s) failed`],
571
+ gaps: ['Structured review could not be parsed'],
572
+ recommendations: [],
573
+ requiresConsensus: !testResult.success,
574
+ };
575
+ }
576
+
577
+ /**
578
+ * Create a fix plan from the Tester when tests fail.
579
+ *
580
+ * @param task - The task whose tests failed
581
+ * @param approvedTestPlan - The approved test plan text
582
+ * @param testResult - Test execution results
583
+ * @param review - The Tester's review with verdict
584
+ * @param state - Current project state
585
+ * @param onProgress - Progress callback
586
+ * @returns Fix plan text for consensus review
587
+ */
588
+ export async function createTesterFixPlan(
589
+ task: Task,
590
+ approvedTestPlan: string,
591
+ testResult: TestResult,
592
+ review: TestRunReview,
593
+ state: ProjectState,
594
+ onProgress?: (phase: string, message: string) => void,
595
+ ): Promise<string> {
596
+ onProgress?.('test-review', 'Tester is creating a fix plan...');
597
+
598
+ const fixPrompt = buildTestFixPlanPrompt(task, approvedTestPlan, testResult, review, state);
599
+
600
+ const result = await claudeCreatePlan(
601
+ fixPrompt,
602
+ `Project: ${state.name}\nLanguage: ${state.language}`,
603
+ state.language,
604
+ (msg) => onProgress?.('test-review', msg),
605
+ );
606
+
607
+ if (!result.success) {
608
+ return `## Tester Fix Plan (auto-generated fallback)\n\nThe Tester could not generate a structured fix plan.\n\nReview summary: ${review.summary}\nFailures: ${review.failures.join('; ')}\n\nPlease address the test failures listed above.`;
609
+ }
610
+
611
+ return result.response;
612
+ }
613
+
614
+ // ============================================================================
615
+ // Documentation
616
+ // ============================================================================
617
+
618
+ /**
619
+ * Document an approved test plan to docs/qa/test-plans/
620
+ *
621
+ * @param projectDir - Project root
622
+ * @param milestone - Parent milestone
623
+ * @param task - The task
624
+ * @param testPlan - Test plan text
625
+ * @param consensusResult - Consensus outcome
626
+ * @returns Relative path to the doc
627
+ */
628
+ export async function documentTestPlan(
629
+ projectDir: string,
630
+ milestone: Milestone,
631
+ task: Task,
632
+ testPlan: string,
633
+ consensusResult: ConsensusProcessResult,
634
+ ): Promise<string> {
635
+ const docsDir = path.join(projectDir, 'docs', 'qa', 'test-plans');
636
+ await fs.mkdir(docsDir, { recursive: true });
637
+
638
+ const milestoneNum = milestone.id.replace('milestone-', '');
639
+ const taskNum = task.id.split('-task-')[1] || '1';
640
+ const filename = `milestone_${milestoneNum}_task_${taskNum}.md`;
641
+ const docPath = path.join(docsDir, filename);
642
+
643
+ const content = `# QA Test Plan: ${task.name}
644
+
645
+ ## Metadata
646
+ - **Milestone**: ${milestone.name}
647
+ - **Task ID**: ${task.id}
648
+ - **Consensus Score**: ${consensusResult.finalScore}%
649
+ - **Iterations**: ${consensusResult.totalIterations}
650
+ - **Status**: ${consensusResult.approved ? 'APPROVED' : 'NOT APPROVED'}
651
+ - **Generated**: ${new Date().toISOString()}
652
+
653
+ ## Task Description
654
+ ${task.description}
655
+
656
+ ## Test Plan
657
+ ${testPlan}
658
+
659
+ ${consensusResult.finalConcerns.length > 0 ? `## Review Notes\n${consensusResult.finalConcerns.map(c => `- ${c}`).join('\n')}\n` : ''}
660
+ `;
661
+
662
+ await fs.writeFile(docPath, content, 'utf-8');
663
+ return `docs/qa/test-plans/${filename}`;
664
+ }
665
+
666
+ /**
667
+ * Document a test run review to docs/qa/test-runs/
668
+ *
669
+ * @param projectDir - Project root
670
+ * @param milestone - Parent milestone
671
+ * @param task - The task
672
+ * @param review - Tester's review
673
+ * @returns Relative path to the doc
674
+ */
675
+ export async function documentTestReview(
676
+ projectDir: string,
677
+ milestone: Milestone,
678
+ task: Task,
679
+ review: TestRunReview,
680
+ ): Promise<string> {
681
+ const docsDir = path.join(projectDir, 'docs', 'qa', 'test-runs');
682
+ await fs.mkdir(docsDir, { recursive: true });
683
+
684
+ const milestoneNum = milestone.id.replace('milestone-', '');
685
+ const taskNum = task.id.split('-task-')[1] || '1';
686
+ const filename = `milestone_${milestoneNum}_task_${taskNum}.md`;
687
+ const docPath = path.join(docsDir, filename);
688
+
689
+ const content = `# QA Test Review: ${task.name}
690
+
691
+ ## Verdict: ${review.verdict}
692
+
693
+ ## Summary
694
+ ${review.summary}
695
+
696
+ ## Evidence Reviewed
697
+ ${review.evidenceReviewed.map(e => `- ${e}`).join('\n')}
698
+
699
+ ${review.failures.length > 0 ? `## Failures\n${review.failures.map(f => `- ${f}`).join('\n')}\n` : ''}
700
+ ${review.gaps.length > 0 ? `## Gaps\n${review.gaps.map(g => `- ${g}`).join('\n')}\n` : ''}
701
+ ${review.recommendations.length > 0 ? `## Recommendations\n${review.recommendations.map(r => `- ${r}`).join('\n')}\n` : ''}
702
+
703
+ - **Generated**: ${new Date().toISOString()}
704
+ `;
705
+
706
+ await fs.writeFile(docPath, content, 'utf-8');
707
+ return `docs/qa/test-runs/${filename}`;
708
+ }
709
+
710
+ // ============================================================================
711
+ // Helpers
712
+ // ============================================================================
713
+
714
+ /**
715
+ * Check whether QA is enabled for the given project state.
716
+ * Defaults to false for existing projects (undefined), true only when explicitly set.
717
+ *
718
+ * @param state - Current project state
719
+ * @returns Whether the QA Tester skill is active
720
+ */
721
+ export function isQaEnabled(state: ProjectState): boolean {
722
+ return state.qaEnabled === true;
723
+ }