supipowers 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/package.json +21 -6
  2. package/skills/debugging/SKILL.md +54 -15
  3. package/skills/planning/SKILL.md +70 -10
  4. package/skills/receiving-code-review/SKILL.md +87 -0
  5. package/skills/tdd/SKILL.md +83 -0
  6. package/skills/verification/SKILL.md +54 -0
  7. package/src/commands/plan.ts +96 -31
  8. package/src/commands/qa.ts +150 -29
  9. package/src/commands/release.ts +1 -1
  10. package/src/commands/review.ts +2 -2
  11. package/src/commands/run.ts +52 -2
  12. package/src/commands/update.ts +2 -2
  13. package/src/discipline/debugging.ts +57 -0
  14. package/src/discipline/receiving-review.ts +65 -0
  15. package/src/discipline/tdd.ts +77 -0
  16. package/src/discipline/verification.ts +68 -0
  17. package/src/git/branch-finish.ts +101 -0
  18. package/src/git/worktree.ts +119 -0
  19. package/src/index.ts +11 -2
  20. package/src/lsp/detector.ts +2 -2
  21. package/src/orchestrator/agent-prompts.ts +282 -0
  22. package/src/orchestrator/dispatcher.ts +150 -1
  23. package/src/orchestrator/prompts.ts +17 -31
  24. package/src/planning/plan-reviewer.ts +49 -0
  25. package/src/planning/plan-writer-prompt.ts +173 -0
  26. package/src/planning/prompt-builder.ts +178 -0
  27. package/src/planning/spec-reviewer.ts +43 -0
  28. package/src/qa/phases/discovery.ts +34 -0
  29. package/src/qa/phases/execution.ts +65 -0
  30. package/src/qa/phases/matrix.ts +41 -0
  31. package/src/qa/phases/reporting.ts +71 -0
  32. package/src/qa/session.ts +104 -0
  33. package/src/storage/qa-sessions.ts +83 -0
  34. package/src/storage/specs.ts +36 -0
  35. package/src/types.ts +70 -0
  36. package/src/visual/companion.ts +115 -0
  37. package/src/visual/prompt-instructions.ts +102 -0
  38. package/src/visual/scripts/frame-template.html +201 -0
  39. package/src/visual/scripts/helper.js +88 -0
  40. package/src/visual/scripts/index.js +148 -0
  41. package/src/visual/scripts/package.json +10 -0
  42. package/src/visual/scripts/start-server.sh +98 -0
  43. package/src/visual/scripts/stop-server.sh +21 -0
  44. package/src/visual/types.ts +16 -0
@@ -1,11 +1,30 @@
1
1
  import type { ExtensionAPI } from "@oh-my-pi/pi-coding-agent";
2
2
  import { detectAndCache } from "../qa/detector.js";
3
- import { buildQaRunPrompt } from "../qa/runner.js";
4
3
  import { notifyInfo, notifyError } from "../notifications/renderer.js";
4
+ import { findActiveSession, findSessionWithFailures } from "../storage/qa-sessions.js";
5
+ import {
6
+ createNewSession,
7
+ advancePhase,
8
+ getFailedTests,
9
+ getNextPhase,
10
+ getPhaseStatusLine,
11
+ } from "../qa/session.js";
12
+ import { buildDiscoveryPrompt } from "../qa/phases/discovery.js";
13
+ import { buildMatrixPrompt } from "../qa/phases/matrix.js";
14
+ import { buildExecutionPrompt } from "../qa/phases/execution.js";
15
+ import { buildReportingPrompt } from "../qa/phases/reporting.js";
16
+ import type { QaPhase, QaSessionLedger } from "../types.js";
17
+
18
+ const PHASE_LABELS: Record<QaPhase, string> = {
19
+ discovery: "Discovery — Scan for test cases",
20
+ matrix: "Matrix — Build traceability matrix",
21
+ execution: "Execution — Run tests",
22
+ reporting: "Reporting — Generate summary",
23
+ };
5
24
 
6
25
  export function registerQaCommand(pi: ExtensionAPI): void {
7
26
  pi.registerCommand("supi:qa", {
8
- description: "Run QA pipeline (test suite, E2E)",
27
+ description: "Run QA pipeline with session management (discovery matrix → execution → reporting)",
9
28
  async handler(args, ctx) {
10
29
  const framework = detectAndCache(ctx.cwd);
11
30
 
@@ -13,48 +32,150 @@ export function registerQaCommand(pi: ExtensionAPI): void {
13
32
  notifyError(
14
33
  ctx,
15
34
  "No test framework detected",
16
- "Configure manually: /supi:config set qa.framework vitest && /supi:config set qa.command 'npx vitest run'"
35
+ "Configure manually via /supi:config"
17
36
  );
18
37
  return;
19
38
  }
20
39
 
21
- let scope: "all" | "changed" | "e2e" = "all";
22
- let changedFiles: string[] | undefined;
23
-
24
- if (args?.includes("--changed")) {
25
- scope = "changed";
26
- } else if (args?.includes("--e2e")) {
27
- scope = "e2e";
28
- } else if (ctx.hasUI && !args?.trim()) {
29
- // No flag provided — let the user pick
30
- const choice = await ctx.ui.select(
31
- "QA scope",
32
- ["all Run all tests", "changed Only changed files", "e2e E2E / Playwright only"],
33
- { helpText: "Select test scope · Esc to cancel" },
34
- );
35
- if (!choice) return;
36
- scope = choice.split(" — ")[0] as "all" | "changed" | "e2e";
40
+ // ── Step 1: Session selection ──────────────────────────────────
41
+ let ledger: QaSessionLedger | null = null;
42
+
43
+ const activeSession = findActiveSession(ctx.cwd);
44
+ const failedSession = findSessionWithFailures(ctx.cwd);
45
+
46
+ if (ctx.hasUI && !args?.trim()) {
47
+ const sessionOptions: string[] = [];
48
+
49
+ if (failedSession) {
50
+ const failCount = failedSession.results.filter((r) => r.status === "fail").length;
51
+ sessionOptions.push(`Resume ${failedSession.id} (${failCount} failed test${failCount !== 1 ? "s" : ""})`);
52
+ } else if (activeSession) {
53
+ const next = getNextPhase(activeSession);
54
+ sessionOptions.push(`Resume ${activeSession.id} (${next ?? "all phases done"} pending)`);
55
+ }
56
+
57
+ sessionOptions.push("Start new session");
58
+
59
+ if (sessionOptions.length > 1) {
60
+ const choice = await ctx.ui.select(
61
+ "QA Session",
62
+ sessionOptions,
63
+ { helpText: "Select session · Esc to cancel" },
64
+ );
65
+ if (!choice) return;
66
+
67
+ if (choice.startsWith("Resume")) {
68
+ ledger = failedSession ?? activeSession;
69
+ }
70
+ }
37
71
  }
38
72
 
39
- if (scope === "changed") {
40
- try {
41
- const result = await pi.exec("git", ["diff", "--name-only", "HEAD"], { cwd: ctx.cwd });
42
- if (result.exitCode === 0) {
43
- changedFiles = result.stdout.split("\n").filter((f) => f.trim().length > 0);
73
+ // Create new session if none selected
74
+ if (!ledger) {
75
+ ledger = createNewSession(ctx.cwd, framework.name);
76
+ notifyInfo(ctx, "QA session created", ledger.id);
77
+ }
78
+
79
+ // ── Step 2: Phase selection ────────────────────────────────────
80
+ type PhaseAction =
81
+ | { type: "run-phase"; phase: QaPhase }
82
+ | { type: "rerun-failed" };
83
+
84
+ let action: PhaseAction | null = null;
85
+ const nextPhase = getNextPhase(ledger);
86
+ const failedTests = getFailedTests(ledger);
87
+
88
+ if (ctx.hasUI && !args?.trim()) {
89
+ const phaseOptions: string[] = [];
90
+
91
+ // Offer re-run failed if there are failures
92
+ if (failedTests.length > 0) {
93
+ phaseOptions.push(`Re-run ${failedTests.length} failed test${failedTests.length !== 1 ? "s" : ""} only`);
94
+ }
95
+
96
+ // Offer starting from next pending phase
97
+ if (nextPhase) {
98
+ phaseOptions.push(PHASE_LABELS[nextPhase]);
99
+ }
100
+
101
+ if (phaseOptions.length > 1) {
102
+ const statusLine = getPhaseStatusLine(ledger);
103
+ const choice = await ctx.ui.select(
104
+ `QA Phase · ${statusLine}`,
105
+ phaseOptions,
106
+ { helpText: "Select action · Esc to cancel" },
107
+ );
108
+ if (!choice) return;
109
+
110
+ if (choice.startsWith("Re-run")) {
111
+ action = { type: "rerun-failed" };
112
+ } else {
113
+ // Extract phase from label
114
+ const selectedPhase = (Object.entries(PHASE_LABELS) as [QaPhase, string][])
115
+ .find(([, label]) => label === choice)?.[0];
116
+ if (selectedPhase) {
117
+ action = { type: "run-phase", phase: selectedPhase };
118
+ }
44
119
  }
45
- } catch {
46
- scope = "all";
120
+ } else if (nextPhase) {
121
+ // Only one option — just run the next phase
122
+ action = { type: "run-phase", phase: nextPhase };
47
123
  }
124
+ } else if (nextPhase) {
125
+ action = { type: "run-phase", phase: nextPhase };
126
+ }
127
+
128
+ if (!action) {
129
+ notifyInfo(ctx, "QA pipeline complete", getPhaseStatusLine(ledger));
130
+ return;
48
131
  }
49
132
 
50
- notifyInfo(ctx, "QA started", `${framework.name} | scope: ${scope}`);
133
+ // ── Step 3: Execute ────────────────────────────────────────────
134
+ let prompt: string;
135
+
136
+ if (action.type === "rerun-failed") {
137
+ ledger = advancePhase(ctx.cwd, ledger, "execution", "running");
138
+ prompt = buildExecutionPrompt(ledger, { failedOnly: true, failedTests });
139
+ notifyInfo(ctx, "QA re-running failed tests", `${failedTests.length} test(s)`);
140
+ } else {
141
+ const phase = action.phase;
142
+ ledger = advancePhase(ctx.cwd, ledger, phase, "running");
143
+
144
+ switch (phase) {
145
+ case "discovery":
146
+ prompt = buildDiscoveryPrompt(framework, ctx.cwd);
147
+ break;
148
+ case "matrix":
149
+ prompt = buildMatrixPrompt(ledger);
150
+ break;
151
+ case "execution":
152
+ prompt = buildExecutionPrompt(ledger);
153
+ break;
154
+ case "reporting":
155
+ prompt = buildReportingPrompt(ledger);
156
+ break;
157
+ }
158
+
159
+ notifyInfo(ctx, `QA phase: ${phase}`, `session: ${ledger.id}`);
160
+ }
51
161
 
52
- const prompt = buildQaRunPrompt(framework.command, scope, changedFiles);
162
+ // Include session context for the sub-agent
163
+ const sessionContext = [
164
+ `\n\n## QA Session Context`,
165
+ ``,
166
+ `Session ID: ${ledger.id}`,
167
+ `Session ledger path: .omp/supipowers/qa-sessions/${ledger.id}/ledger.json`,
168
+ ``,
169
+ `Current ledger state:`,
170
+ "```json",
171
+ JSON.stringify(ledger, null, 2),
172
+ "```",
173
+ ].join("\n");
53
174
 
54
175
  pi.sendMessage(
55
176
  {
56
177
  customType: "supi-qa",
57
- content: [{ type: "text", text: prompt }],
178
+ content: [{ type: "text", text: prompt + sessionContext }],
58
179
  display: "none",
59
180
  },
60
181
  { deliverAs: "steer" }
@@ -12,7 +12,7 @@ export function registerReleaseCommand(pi: ExtensionAPI): void {
12
12
  let lastTag: string | null = null;
13
13
  try {
14
14
  const result = await pi.exec("git", ["describe", "--tags", "--abbrev=0"], { cwd: ctx.cwd });
15
- if (result.exitCode === 0) lastTag = result.stdout.trim();
15
+ if (result.code === 0) lastTag = result.stdout.trim();
16
16
  } catch {
17
17
  // no tags yet
18
18
  }
@@ -49,7 +49,7 @@ export function registerReviewCommand(pi: ExtensionAPI): void {
49
49
  let changedFiles: string[] = [];
50
50
  try {
51
51
  const result = await pi.exec("git", ["diff", "--name-only", "HEAD"], { cwd: ctx.cwd });
52
- if (result.exitCode === 0) {
52
+ if (result.code === 0) {
53
53
  changedFiles = result.stdout
54
54
  .split("\n")
55
55
  .map((f) => f.trim())
@@ -62,7 +62,7 @@ export function registerReviewCommand(pi: ExtensionAPI): void {
62
62
  if (changedFiles.length === 0) {
63
63
  try {
64
64
  const result = await pi.exec("git", ["diff", "--name-only", "--cached"], { cwd: ctx.cwd });
65
- if (result.exitCode === 0) {
65
+ if (result.code === 0) {
66
66
  changedFiles = result.stdout
67
67
  .split("\n")
68
68
  .map((f) => f.trim())
@@ -11,7 +11,7 @@ import {
11
11
  loadAllAgentResults,
12
12
  } from "../storage/runs.js";
13
13
  import { scheduleBatches } from "../orchestrator/batch-scheduler.js";
14
- import { dispatchAgent, dispatchFixAgent } from "../orchestrator/dispatcher.js";
14
+ import { dispatchAgent, dispatchAgentWithReview, dispatchFixAgent } from "../orchestrator/dispatcher.js";
15
15
  import { summarizeBatch, buildRunSummary } from "../orchestrator/result-collector.js";
16
16
  import { analyzeConflicts } from "../orchestrator/conflict-resolver.js";
17
17
  import { isLspAvailable } from "../lsp/detector.js";
@@ -22,6 +22,8 @@ import {
22
22
  notifyError,
23
23
  notifySummary,
24
24
  } from "../notifications/renderer.js";
25
+ import { buildWorktreePrompt } from "../git/worktree.js";
26
+ import { buildBranchFinishPrompt } from "../git/branch-finish.js";
25
27
  import type { RunManifest, AgentResult } from "../types.js";
26
28
 
27
29
  export function registerRunCommand(pi: ExtensionAPI): void {
@@ -32,6 +34,7 @@ export function registerRunCommand(pi: ExtensionAPI): void {
32
34
  const profile = resolveProfile(ctx.cwd, config, args?.replace("--profile ", "") || undefined);
33
35
 
34
36
  let manifest = findActiveRun(ctx.cwd);
37
+ let branchName: string | null = null;
35
38
 
36
39
  if (!manifest) {
37
40
  const plans = listPlans(ctx.cwd);
@@ -60,6 +63,36 @@ export function registerRunCommand(pi: ExtensionAPI): void {
60
63
  };
61
64
  createRun(ctx.cwd, manifest);
62
65
  notifyInfo(ctx, `Run started: ${manifest.id}`, `${plan.tasks.length} tasks in ${batches.length} batches`);
66
+
67
+ // Offer worktree setup for isolated execution
68
+ if (ctx.hasUI) {
69
+ const useWorktree = await ctx.ui.select(
70
+ "Execution isolation",
71
+ [
72
+ "Run in current workspace",
73
+ "Create isolated worktree (recommended)",
74
+ ],
75
+ { helpText: "Worktrees prevent work-in-progress from polluting your workspace" },
76
+ );
77
+ if (!useWorktree) return;
78
+
79
+ if (useWorktree.startsWith("Create isolated")) {
80
+ branchName = `supi/${plan.name || manifest.id}`;
81
+ const worktreeInstructions = buildWorktreePrompt({
82
+ branchName,
83
+ cwd: ctx.cwd,
84
+ });
85
+ pi.sendMessage(
86
+ {
87
+ customType: "supi-worktree-setup",
88
+ content: [{ type: "text", text: worktreeInstructions }],
89
+ display: "none",
90
+ },
91
+ { deliverAs: "steer" },
92
+ );
93
+ notifyInfo(ctx, "Setting up worktree", `Branch: ${branchName}`);
94
+ }
95
+ }
63
96
  } else {
64
97
  notifyInfo(ctx, `Resuming run: ${manifest.id}`);
65
98
  }
@@ -89,7 +122,7 @@ export function registerRunCommand(pi: ExtensionAPI): void {
89
122
  const task = plan.tasks.find((t) => t.id === taskId);
90
123
  if (!task) return Promise.resolve(null);
91
124
 
92
- return dispatchAgent({
125
+ return dispatchAgentWithReview({
93
126
  pi,
94
127
  ctx,
95
128
  task,
@@ -170,6 +203,23 @@ export function registerRunCommand(pi: ExtensionAPI): void {
170
203
  `(${runSummary.done} clean, ${runSummary.doneWithConcerns} with concerns, ` +
171
204
  `${runSummary.blocked} blocked) | ${runSummary.totalFilesChanged} files | ${durationSec}s`
172
205
  );
206
+
207
+ // Offer branch finish options if we created a worktree branch
208
+ if (branchName && manifest.status === "completed") {
209
+ const finishInstructions = buildBranchFinishPrompt({
210
+ branchName,
211
+ baseBranch: "main",
212
+ });
213
+ pi.sendMessage(
214
+ {
215
+ customType: "supi-branch-finish",
216
+ content: [{ type: "text", text: finishInstructions }],
217
+ display: "none",
218
+ },
219
+ { deliverAs: "steer" },
220
+ );
221
+ notifyInfo(ctx, "Run succeeded", "Follow branch finish instructions to integrate your work");
222
+ }
173
223
  },
174
224
  });
175
225
  }
@@ -24,7 +24,7 @@ export function handleUpdate(pi: ExtensionAPI, ctx: ExtensionContext): void {
24
24
 
25
25
  // Check latest version on npm
26
26
  const checkResult = await pi.exec("npm", ["view", "supipowers", "version"], { cwd: tmpdir() });
27
- if (checkResult.exitCode !== 0) {
27
+ if (checkResult.code !== 0) {
28
28
  ctx.ui.notify("Failed to check for updates — npm view failed", "error");
29
29
  return;
30
30
  }
@@ -46,7 +46,7 @@ export function handleUpdate(pi: ExtensionAPI, ctx: ExtensionContext): void {
46
46
  "npm", ["install", "--prefix", tempDir, `supipowers@${latestVersion}`],
47
47
  { cwd: tempDir },
48
48
  );
49
- if (installResult.exitCode !== 0) {
49
+ if (installResult.code !== 0) {
50
50
  ctx.ui.notify("Failed to download latest version", "error");
51
51
  return;
52
52
  }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Systematic debugging instructions for sub-agent prompts.
3
+ * Matches superpowers' systematic-debugging skill depth.
4
+ */
5
+ export function buildDebuggingInstructions(): string {
6
+ return [
7
+ "## Systematic Debugging",
8
+ "",
9
+ "### Iron Law",
10
+ "",
11
+ "NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST.",
12
+ "Symptom fixes are failure. Find root cause before attempting any fix.",
13
+ "",
14
+ "### Phase 1: Root Cause Investigation",
15
+ "",
16
+ "Complete this phase before proposing fixes:",
17
+ "",
18
+ "1. **Read the error message carefully.** Don't skip it; it often contains the solution.",
19
+ "2. **Reproduce consistently.** Exact steps, every time.",
20
+ "3. **Check recent changes.** Run `git diff` to see what changed. Check new dependencies, config changes.",
21
+ "4. **Gather evidence** in multi-component systems: add diagnostic instrumentation at each boundary.",
22
+ "5. **Trace the data flow** backward through the call stack to find the original trigger.",
23
+ "",
24
+ "### Phase 2: Pattern Analysis",
25
+ "",
26
+ "1. Find a working example in the codebase that does something similar.",
27
+ "2. Compare against the reference completely (don't skim).",
28
+ "3. Identify differences between working and broken.",
29
+ "4. Understand dependencies and assumptions.",
30
+ "",
31
+ "### Phase 3: Hypothesis and Testing",
32
+ "",
33
+ "1. Form a single, specific hypothesis (not vague).",
34
+ "2. Test minimally: smallest possible change, one variable at a time.",
35
+ "3. Verify before continuing. If wrong → form a NEW hypothesis, not more fixes.",
36
+ "4. Admit uncertainty. Don't pretend to know.",
37
+ "",
38
+ "### Phase 4: Implementation",
39
+ "",
40
+ "1. Create a failing test case first (automated or manual).",
41
+ "2. Implement a single fix that addresses the root cause only.",
42
+ "3. Verify the fix: test passes, no other tests broken.",
43
+ "4. **If the fix doesn't work:**",
44
+ " - Fewer than 3 attempts → return to Phase 1 with new information",
45
+ " - 3 or more attempts → STOP and question the architecture. Discuss with human partner.",
46
+ "",
47
+ "### Red Flags — STOP and Follow the Process",
48
+ "",
49
+ "- \"Quick fix for now, investigate later\"",
50
+ "- \"Just try changing X and see if it works\"",
51
+ "- \"Skip the test, I'll manually verify\"",
52
+ "- \"It's probably X, let me fix that\"",
53
+ "- \"I don't fully understand but this might work\"",
54
+ "- \"One more fix attempt\" (when already tried 2+)",
55
+ "- Each fix reveals a new problem in a different place",
56
+ ].join("\n");
57
+ }
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Receiving code review instructions for sub-agent prompts.
3
+ * Matches superpowers' receiving-code-review skill:
4
+ * technical rigor and verification, not performative agreement.
5
+ */
6
+ export function buildReceivingReviewInstructions(): string {
7
+ return [
8
+ "## Receiving Code Review Feedback",
9
+ "",
10
+ "Code review requires technical evaluation, not emotional performance.",
11
+ "Verify before implementing. Ask before assuming. Technical correctness over social comfort.",
12
+ "",
13
+ "### The Response Pattern",
14
+ "",
15
+ "1. **READ:** Complete feedback without reacting.",
16
+ "2. **UNDERSTAND:** Restate the requirement in your own words, or ask for clarification.",
17
+ "3. **VERIFY:** Check against codebase reality.",
18
+ "4. **EVALUATE:** Is this technically sound for THIS codebase?",
19
+ "5. **RESPOND:** Technical acknowledgment or reasoned pushback.",
20
+ "6. **IMPLEMENT:** One at a time, test each change.",
21
+ "",
22
+ "### Forbidden Responses",
23
+ "",
24
+ "Never use performative agreement:",
25
+ "- \"You're absolutely right!\"",
26
+ "- \"Great point!\"",
27
+ "- \"Excellent catch!\"",
28
+ "",
29
+ "Instead: restate requirements, ask clarifying questions, take action.",
30
+ "",
31
+ "### Handling Unclear Feedback",
32
+ "",
33
+ "If any item is unclear, stop and ask for clarification before implementing anything.",
34
+ "Items may be related — clarify all unclear items before starting work.",
35
+ "",
36
+ "### Source-Specific Handling",
37
+ "",
38
+ "**From your human partner:** Trusted. Implement after understanding.",
39
+ "**From external reviewers:** Verify technically. Check for breaking changes.",
40
+ "Question whether the reviewer understands the full context.",
41
+ "",
42
+ "### YAGNI Check",
43
+ "",
44
+ "For suggested \"professional features\" — grep the codebase for actual usage.",
45
+ "If unused, suggest removal instead of implementing.",
46
+ "",
47
+ "### Implementation Order",
48
+ "",
49
+ "1. Clarify all unclear items first.",
50
+ "2. Then implement in order: blocking issues → simple fixes → complex fixes.",
51
+ "3. Test each change before moving to the next.",
52
+ "",
53
+ "### When to Push Back",
54
+ "",
55
+ "Push back when feedback would introduce bugs, break existing behavior,",
56
+ "add unnecessary complexity, or contradicts the codebase's established patterns.",
57
+ "Use technical reasoning, not defensiveness.",
58
+ "",
59
+ "### The Bottom Line",
60
+ "",
61
+ "External feedback = suggestions to evaluate, not orders to follow.",
62
+ "Verify before implementing. Question. Then implement.",
63
+ "No performative agreement. Technical rigor always.",
64
+ ].join("\n");
65
+ }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * TDD enforcement instructions for sub-agent prompts.
3
+ * Matches superpowers' test-driven-development skill depth.
4
+ */
5
+ export function buildTddInstructions(): string {
6
+ return [
7
+ "## Test-Driven Development",
8
+ "",
9
+ "### Iron Law",
10
+ "",
11
+ "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.",
12
+ "",
13
+ "Write code before the test? Delete it. Start over.",
14
+ "",
15
+ "### Red-Green-Refactor Cycle",
16
+ "",
17
+ "**RED — Write failing test:**",
18
+ "- One behavior per test, clear name, real code (no mocks unless unavoidable)",
19
+ "- Watch it fail. MANDATORY. Never skip.",
20
+ "- Confirm: test fails (not errors), failure message is expected, fails because feature is missing",
21
+ "- Test passes? You're testing existing behavior. Fix the test.",
22
+ "",
23
+ "**GREEN — Write the simplest code to pass:**",
24
+ "- Minimal, simplest code that makes the test pass",
25
+ "- Don't add features, refactor other code, or improve beyond the test",
26
+ "- Watch it pass. MANDATORY.",
27
+ "- Confirm: test passes, other tests still pass, output pristine",
28
+ "- Test fails? Fix code, not test. Other tests fail? Fix now.",
29
+ "",
30
+ "**REFACTOR — Clean up (after green only):**",
31
+ "- Remove duplication, improve names, extract helpers",
32
+ "- Keep tests green. Don't add behavior.",
33
+ "",
34
+ "### Verification Checklist",
35
+ "",
36
+ "Before marking work complete:",
37
+ "- Every new function/method has a test",
38
+ "- Watched each test fail before implementing",
39
+ "- Each test failed for expected reason (feature missing, not typo)",
40
+ "- Wrote minimal code to pass each test",
41
+ "- All tests pass with pristine output",
42
+ "- Tests use real code (mocks only if unavoidable)",
43
+ "- Edge cases and errors covered",
44
+ "",
45
+ "### Red Flags — STOP and Start Over",
46
+ "",
47
+ "- Code before test",
48
+ "- Test after implementation",
49
+ "- Test passes immediately",
50
+ "- Can't explain why test failed",
51
+ "- Tests added later",
52
+ "- Rationalizing \"just this once\"",
53
+ "",
54
+ "### Testing Anti-Patterns",
55
+ "",
56
+ "- Don't test mock behavior instead of real behavior",
57
+ "- Don't add test-only methods to production classes",
58
+ "- Don't mock without understanding dependencies",
59
+ "- Don't use incomplete mocks (partial data structures)",
60
+ "- Mocks are tools to isolate, not things to test",
61
+ "",
62
+ "### When Stuck",
63
+ "",
64
+ "| Problem | Solution |",
65
+ "|---------|----------|",
66
+ "| Don't know how to test | Write wished-for API. Write assertion first. Ask. |",
67
+ "| Test too complicated | Design too complicated. Simplify interface. |",
68
+ "| Must mock everything | Code too coupled. Use dependency injection. |",
69
+ "| Test setup huge | Extract helpers. Still complex? Simplify design. |",
70
+ "",
71
+ "### Bug Fix Flow",
72
+ "",
73
+ "Bug found? Write a failing test reproducing it first.",
74
+ "Follow the TDD cycle. The test proves the fix and prevents regression.",
75
+ "Never fix bugs without a failing test.",
76
+ ].join("\n");
77
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Verification-before-completion instructions for sub-agent prompts.
3
+ * Matches superpowers' verification-before-completion skill depth.
4
+ */
5
+ export function buildVerificationInstructions(): string {
6
+ return [
7
+ "## Verification Before Completion",
8
+ "",
9
+ "### Iron Law",
10
+ "",
11
+ "NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.",
12
+ "Evidence before assertions, always.",
13
+ "",
14
+ "### The Gate Function (Mandatory Before Any Status Claim)",
15
+ "",
16
+ "1. **IDENTIFY:** What command proves this claim?",
17
+ "2. **RUN:** Execute the FULL command (fresh, complete).",
18
+ "3. **READ:** Full output. Check exit code. Count failures.",
19
+ "4. **VERIFY:** Does output confirm the claim?",
20
+ " - If NO: State actual status with evidence.",
21
+ " - If YES: State claim WITH evidence.",
22
+ "5. **ONLY THEN:** Make the claim.",
23
+ "",
24
+ "Skip any step = lying, not verifying.",
25
+ "",
26
+ "### Common Failure Patterns",
27
+ "",
28
+ "| Claim | Requires | Not Sufficient |",
29
+ "|-------|----------|----------------|",
30
+ "| Tests pass | Test command output: 0 failures | Previous run, \"should pass\" |",
31
+ "| Build succeeds | Build command: exit 0 | Linter passing, logs look good |",
32
+ "| Bug fixed | Test original symptom: passes | Code changed, assumed fixed |",
33
+ "| Regression test works | Red-green cycle verified | Test passes once |",
34
+ "| Agent completed | VCS diff shows changes | Agent reports \"success\" |",
35
+ "| Requirements met | Line-by-line checklist | Tests passing |",
36
+ "",
37
+ "### Red Flags — STOP Before Claiming",
38
+ "",
39
+ "- Using \"should\", \"probably\", \"seems to\"",
40
+ "- Expressing satisfaction before verification (\"Great!\", \"Perfect!\", \"Done!\")",
41
+ "- About to commit/push/PR without verification",
42
+ "- Trusting agent success reports without checking",
43
+ "- Relying on partial verification",
44
+ "- Thinking \"just this once\"",
45
+ "",
46
+ "### Verification Patterns",
47
+ "",
48
+ "**Tests:**",
49
+ "- Run test command → see actual pass count → then claim",
50
+ "- Never say \"should pass now\" or \"looks correct\"",
51
+ "",
52
+ "**Regression tests (TDD red-green):**",
53
+ "- Write → Run (pass) → Revert fix → Run (MUST FAIL) → Restore → Run (pass)",
54
+ "- Never say \"I've written a regression test\" without red-green verification",
55
+ "",
56
+ "**Build:**",
57
+ "- Run build → see exit 0 → then claim",
58
+ "- Linter passing does not prove compilation",
59
+ "",
60
+ "**Requirements:**",
61
+ "- Re-read plan → create checklist → verify each → report gaps or completion",
62
+ "- Never say \"tests pass, phase complete\" without checking requirements",
63
+ "",
64
+ "**Agent delegation:**",
65
+ "- Agent reports success → check VCS diff → verify changes → report actual state",
66
+ "- Never trust agent report without independent verification",
67
+ ].join("\n");
68
+ }