npm - supipowers - Versions diffs - 0.3.0 → 0.5.0 - Mend

supipowers 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/package.json +1 -1
package/skills/fix-pr/SKILL.md +99 -0
package/skills/qa-strategy/SKILL.md +103 -21
package/src/commands/fix-pr.ts +324 -0
package/src/commands/qa.ts +232 -148
package/src/commands/supi.ts +2 -1
package/src/config/defaults.ts +1 -0
package/src/config/schema.ts +1 -0
package/src/fix-pr/config.ts +36 -0
package/src/fix-pr/prompt-builder.ts +201 -0
package/src/fix-pr/scripts/diff-comments.sh +33 -0
package/src/fix-pr/scripts/fetch-pr-comments.sh +25 -0
package/src/fix-pr/scripts/trigger-review.sh +36 -0
package/src/fix-pr/scripts/wait-and-check.sh +37 -0
package/src/fix-pr/types.ts +71 -0
package/src/index.ts +2 -0
package/src/qa/config.ts +43 -0
package/src/qa/matrix.ts +84 -0
package/src/qa/prompt-builder.ts +212 -0
package/src/qa/scripts/detect-app-type.sh +68 -0
package/src/qa/scripts/discover-routes.sh +143 -0
package/src/qa/scripts/ensure-playwright.sh +38 -0
package/src/qa/scripts/run-e2e-tests.sh +99 -0
package/src/qa/scripts/start-dev-server.sh +46 -0
package/src/qa/scripts/stop-dev-server.sh +36 -0
package/src/qa/session.ts +39 -55
package/src/qa/types.ts +97 -0
package/src/storage/fix-pr-sessions.ts +59 -0
package/src/storage/qa-sessions.ts +9 -9
package/src/types.ts +1 -70
package/src/qa/detector.ts +0 -61
package/src/qa/phases/discovery.ts +0 -34
package/src/qa/phases/execution.ts +0 -65
package/src/qa/phases/matrix.ts +0 -41
package/src/qa/phases/reporting.ts +0 -71
package/src/qa/report.ts +0 -22
package/src/qa/runner.ts +0 -46

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "supipowers",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "OMP-native workflow extension inspired by Superpowers.",
   "type": "module",
   "scripts": {

package/skills/fix-pr/SKILL.md ADDED Viewed

@@ -0,0 +1,99 @@
+---
+name: fix-pr
+description: Critically assess PR review comments — verify, investigate ripple effects, then fix or reject with evidence
+---
+# PR Review Comment Assessment
+## Core Principle
+Review comments are suggestions to evaluate, not orders to follow.
+Assess each one critically before acting. The reviewer may lack context you have.
+## Assessment Framework
+### For Each Comment, Answer:
+1. **Is this valid?** Read the actual code being commented on. Does the concern apply?
+2. **Is this important?** Bug fix vs style preference vs premature optimization.
+3. **What breaks if we change this?** Trace callers, check tests, find ripple effects.
+4. **Does the reviewer have full context?** They often review diffs, not the full picture.
+5. **Is this YAGNI?** "You should also handle X" — but does X actually occur?
+### Verdict Categories
+- **ACCEPT**: Valid concern, should fix. Evidence: the code has the problem described.
+- **REJECT**: Invalid, unnecessary, or would cause harm. Evidence: why this doesn't apply.
+- **INVESTIGATE**: Need to check more before deciding. List what to check.
+### Investigation Protocol
+When INVESTIGATE:
+1. Read the file(s) mentioned in full (not just the diff)
+2. Search for usages of the symbol/pattern being discussed
+3. Check test coverage for the area
+4. Look at git blame — why is the code written this way?
+5. Then decide ACCEPT or REJECT with evidence
+## Ripple Effect Analysis
+Before accepting any change:
+1. **Who calls this?** Search for usages of the function/method/class
+2. **Who depends on this behavior?** Check tests that assert current behavior
+3. **What imports this?** Follow the dependency graph
+4. **Is this a public API?** Changes to public interfaces affect consumers
+If ripple effects are significant, note them in the plan so the fixer handles them.
+## Grouping Strategy
+Group comments that:
+- Touch the same file
+- Touch tightly coupled files (caller/callee, type/implementation)
+- Relate to the same logical concern (e.g., "error handling in module X")
+Keep separate:
+- Comments on unrelated files/areas
+- Cosmetic vs functional changes
+- Independent features or concerns
+## Comment Reply Guidelines
+### For ACCEPT:
+- "Fixed. [description of change]."
+- "Fixed in [file]. Also updated [related file] to maintain consistency."
+### For REJECT:
+- "Investigated — [reason this doesn't apply]. The current implementation [explanation]."
+- "This is intentional: [reason]. Changing it would [consequence]."
+### For grouped fixes:
+- "Addressed these comments together in [commit]. Changes: [bullet list]."
+**Never use performative agreement.** No "Great catch!", "You're absolutely right!", etc.
+Technical acknowledgment only.
+## Common Reviewer Mistakes to Watch For
+| Pattern | Reality |
+|---------|---------|
+| Suggesting abstraction for code used once | YAGNI — one usage doesn't need a helper |
+| Requesting error handling for impossible states | Trust internal code; only validate at boundaries |
+| Style preferences disguised as correctness | If it works and is readable, style is preference |
+| Suggesting patterns from a different language | Follow THIS codebase's patterns |
+| Not seeing the full file (diff-only context) | They may miss why code is structured this way |
+| "This could be a security issue" without specifics | Ask for the specific attack vector |
+| "Add tests for X" when X is already tested | Check before accepting |
+## Decision Record
+For each comment, record:
+```
+Comment #ID by @user on file:line
+Verdict: ACCEPT | REJECT | INVESTIGATE
+Reasoning: [1-2 sentences]
+Ripple effects: [list or "none"]
+Group: [group-id]
+```
+This record serves as the basis for reply content and fix planning.

package/skills/qa-strategy/SKILL.md CHANGED Viewed

@@ -1,32 +1,114 @@
 ---
 name: qa-strategy
-description: QA test planning for comprehensive coverage
+description: E2E product testing strategy using Playwright — flow-based, autonomous, close to human interaction
 ---
-# QA Strategy Skill
+# E2E Product Testing Strategy
-## Test Pyramid
+## Core Principle
-1. **Unit tests**: Fast, isolated, cover individual functions
-2. **Integration tests**: Test component interactions
-3. **E2E tests**: Test user-facing flows end-to-end
+Test the product the way a user uses it. Every test simulates a real user flow — navigating, clicking, filling forms, waiting for responses. If a human wouldn't do it, don't test it here.
-## When to Write What
+**This is NOT for unit or integration tests.** This pipeline tests complete user journeys through the running application.
-- New function → unit test
-- New API endpoint → integration test
-- New user flow → E2E test
-- Bug fix → regression test at the appropriate level
+## Flow Discovery
-## Coverage Priorities
+Before writing tests, understand what the product does:
-Focus testing effort on:
-1. Business logic (highest value)
-2. Error handling paths
-3. Edge cases in input validation
-4. Integration points (API boundaries, DB queries)
+1. **Scan routes and pages** — every URL a user can visit is a potential flow entry point
+2. **Identify forms** — login, signup, search, create, edit — these are high-value interaction points
+3. **Map navigation** — how does a user get from page A to page B? What's the happy path?
+4. **Find auth boundaries** — what's public vs protected? Test both sides
+5. **Check CRUD operations** — can you create, read, update, delete the core entities?
-Don't test:
-- Framework boilerplate
-- Simple getters/setters
-- Third-party library behavior
+## Flow Prioritization
+| Priority | Description | Examples |
+|----------|-------------|---------|
+| **Critical** | Revenue or access blocking | Login, checkout, payment |
+| **High** | Core product value | Create/edit main entities, dashboard |
+| **Medium** | Secondary features | Settings, profile, search |
+| **Low** | Nice-to-have | Theme toggle, tooltips |
+Test critical and high flows first. Skip low flows if hitting the token budget.
+## Playwright Best Practices
+### Locators (prefer resilient selectors)
+```typescript
+// GOOD — role-based, resilient to styling changes
+page.getByRole('button', { name: 'Submit' })
+page.getByLabel('Email')
+page.getByText('Welcome back')
+page.getByTestId('user-avatar')
+// BAD — fragile, breaks on refactoring
+page.locator('.btn-primary')
+page.locator('#submit-btn')
+page.locator('div > form > button:nth-child(2)')
+```
+### Assertions
+```typescript
+// Wait for navigation
+await expect(page).toHaveURL('/dashboard');
+// Wait for element visibility
+await expect(page.getByText('Success')).toBeVisible();
+// Wait for element to disappear (loading states)
+await expect(page.getByText('Loading...')).not.toBeVisible();
+```
+### Waiting
+```typescript
+// GOOD — wait for specific condition
+await page.waitForResponse(resp => resp.url().includes('/api/users'));
+await page.waitForLoadState('networkidle');
+// BAD — arbitrary delays
+await page.waitForTimeout(3000);
+```
+### Test Structure
+One flow per file. Each test in the flow tests a step or variant:
+```typescript
+test.describe('Checkout flow', () => {
+  test('adds item to cart', async ({ page }) => { ... });
+  test('fills shipping info', async ({ page }) => { ... });
+  test('completes payment', async ({ page }) => { ... });
+  test('shows confirmation', async ({ page }) => { ... });
+});
+```
+## What Makes a Good E2E Test
+| Quality | Good | Bad |
+|---------|------|-----|
+| **User-centric** | Tests what a user would do | Tests implementation details |
+| **Independent** | Each test can run alone | Tests depend on previous test state |
+| **Resilient** | Uses role/label selectors | Uses CSS classes or DOM structure |
+| **Fast-failing** | Fails clearly on the broken step | Fails on a timeout with no context |
+| **Readable** | Test name describes the user action | Test name is a technical description |
+## Common Pitfalls
+1. **Testing internal state** — don't check Redux store, localStorage, or cookies directly. Test what the user sees.
+2. **Flaky waits** — use `waitForResponse` or `waitForSelector`, never `waitForTimeout`.
+3. **Shared state** — each test should set up its own state. Don't rely on test execution order.
+4. **Over-testing** — one flow per critical path. Don't test every permutation of a form.
+5. **Ignoring error states** — test what happens when the API returns an error, the network is slow, or the user enters invalid data.
+## Regression Analysis
+When a previously-passing test fails:
+1. **Read the error** — what element wasn't found? What URL didn't match?
+2. **Check if the app changed** — did a route move? Did a button get renamed?
+3. **Distinguish bug from change** — if the app intentionally changed, the test needs updating. If not, it's a regression.
+4. **Record the finding** — update the flow matrix with the new status and reasoning.

package/src/commands/fix-pr.ts ADDED Viewed

@@ -0,0 +1,324 @@
+import type { ExtensionAPI } from "@oh-my-pi/pi-coding-agent";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { loadFixPrConfig, saveFixPrConfig, DEFAULT_FIX_PR_CONFIG } from "../fix-pr/config.js";
+import { buildFixPrOrchestratorPrompt } from "../fix-pr/prompt-builder.js";
+import type { FixPrConfig, ReviewerType, CommentReplyPolicy } from "../fix-pr/types.js";
+import {
+  generateFixPrSessionId,
+  createFixPrSession,
+  findActiveFixPrSession,
+  getSessionDir,
+} from "../storage/fix-pr-sessions.js";
+import { notifyInfo, notifyError, notifyWarning } from "../notifications/renderer.js";
+function getScriptsDir(): string {
+  return path.join(path.dirname(new URL(import.meta.url).pathname), "..", "fix-pr", "scripts");
+}
+function findSkillPath(skillName: string): string | null {
+  const candidates = [
+    path.join(process.cwd(), "skills", skillName, "SKILL.md"),
+    path.join(path.dirname(new URL(import.meta.url).pathname), "..", "..", "skills", skillName, "SKILL.md"),
+  ];
+  for (const p of candidates) {
+    if (fs.existsSync(p)) return p;
+  }
+  return null;
+}
+export function registerFixPrCommand(pi: ExtensionAPI): void {
+  pi.registerCommand("supi:fix-pr", {
+    description: "Fix PR review comments with token-optimized agent orchestration",
+    async handler(args, ctx) {
+      // ── Step 1: Detect PR ──────────────────────────────────────────
+      let prNumber: number | null = null;
+      let repo: string | null = null;
+      // Try to parse from args
+      const argTrimmed = args?.trim().replace("#", "") || "";
+      if (/^\d+$/.test(argTrimmed)) {
+        prNumber = parseInt(argTrimmed, 10);
+      }
+      // Detect repo
+      try {
+        const repoResult = await pi.exec("gh", ["repo", "view", "--json", "nameWithOwner", "-q", ".nameWithOwner"], { cwd: ctx.cwd });
+        if (repoResult.code === 0) repo = repoResult.stdout.trim();
+      } catch { /* ignore */ }
+      if (!repo) {
+        notifyError(ctx, "Could not detect repository", "Run from a git repo with gh CLI configured");
+        return;
+      }
+      // Detect PR number from current branch if not provided
+      if (!prNumber) {
+        try {
+          const prResult = await pi.exec("gh", ["pr", "view", "--json", "number", "-q", ".number"], { cwd: ctx.cwd });
+          if (prResult.code === 0) prNumber = parseInt(prResult.stdout.trim(), 10);
+        } catch { /* ignore */ }
+      }
+      if (!prNumber) {
+        notifyError(ctx, "No PR found", "Provide PR number as argument or run from a PR branch");
+        return;
+      }
+      // ── Step 2: Load or create config ──────────────────────────────
+      let config = loadFixPrConfig(ctx.cwd);
+      if (!config && ctx.hasUI) {
+        config = await runSetupWizard(ctx);
+        if (!config) return; // user cancelled
+        saveFixPrConfig(ctx.cwd, config);
+        ctx.ui.notify("Fix-PR config saved to .omp/supipowers/fix-pr.json", "info");
+      }
+      if (!config) {
+        notifyError(ctx, "No fix-pr config", "Run interactively first to set up configuration");
+        return;
+      }
+      // ── Step 3: Session handling ───────────────────────────────────
+      let activeSession = findActiveFixPrSession(ctx.cwd);
+      if (activeSession && ctx.hasUI) {
+        const choice = await ctx.ui.select(
+          "Fix-PR Session",
+          [
+            `Resume ${activeSession.id} (iteration ${activeSession.iteration}, PR #${activeSession.prNumber})`,
+            "Start new session",
+          ],
+          { helpText: "Select session · Esc to cancel" },
+        );
+        if (!choice) return;
+        if (choice.startsWith("Start new")) activeSession = null;
+      }
+      const ledger = activeSession ?? {
+        id: generateFixPrSessionId(),
+        createdAt: new Date().toISOString(),
+        updatedAt: new Date().toISOString(),
+        prNumber,
+        repo,
+        status: "running" as const,
+        iteration: 0,
+        config,
+        commentsProcessed: [],
+      };
+      if (!activeSession) {
+        createFixPrSession(ctx.cwd, ledger);
+      }
+      // ── Step 4: Fetch initial comments ─────────────────────────────
+      const sessionDir = getSessionDir(ctx.cwd, ledger.id);
+      const scriptsDir = getScriptsDir();
+      const snapshotPath = path.join(sessionDir, "snapshots", `comments-${ledger.iteration}.jsonl`);
+      const fetchResult = await pi.exec("bash", [
+        path.join(scriptsDir, "fetch-pr-comments.sh"),
+        repo,
+        String(prNumber),
+        snapshotPath,
+      ], { cwd: ctx.cwd });
+      if (fetchResult.code !== 0) {
+        notifyError(ctx, "Failed to fetch PR comments", fetchResult.stderr);
+        return;
+      }
+      // Read the snapshot
+      let comments = "";
+      try {
+        comments = fs.readFileSync(snapshotPath, "utf-8").trim();
+      } catch {
+        notifyWarning(ctx, "No comments found", "PR has no review comments to process");
+        return;
+      }
+      if (!comments) {
+        notifyInfo(ctx, "No comments to process", "PR has no review comments");
+        return;
+      }
+      const commentCount = comments.split("\n").length;
+      // ── Step 5: Load skill ─────────────────────────────────────────
+      let skillContent = "";
+      const skillPath = findSkillPath("fix-pr");
+      if (skillPath) {
+        try {
+          skillContent = fs.readFileSync(skillPath, "utf-8");
+        } catch { /* proceed without */ }
+      }
+      // ── Step 6: Build and send prompt ──────────────────────────────
+      const prompt = buildFixPrOrchestratorPrompt({
+        prNumber,
+        repo,
+        comments,
+        sessionDir,
+        scriptsDir,
+        config,
+        iteration: ledger.iteration,
+        skillContent,
+      });
+      pi.sendMessage(
+        {
+          customType: "supi-fix-pr",
+          content: [{ type: "text", text: prompt }],
+          display: "none",
+        },
+        { deliverAs: "steer" },
+      );
+      notifyInfo(ctx, `Fix-PR started: PR #${prNumber}`, `${commentCount} comments to assess | session ${ledger.id}`);
+    },
+  });
+}
+// ── Setup Wizard ───────────────────────────────────────────────────────
+const REVIEWER_OPTIONS = [
+  "CodeRabbit",
+  "GitHub Copilot",
+  "Gemini Code Review",
+  "None",
+];
+const REVIEWER_DEFAULTS: Record<string, string> = {
+  "CodeRabbit": "/review",
+  "GitHub Copilot": "@copilot review",
+  "Gemini Code Review": "/gemini review",
+};
+const POLICY_OPTIONS = [
+  "Answer all comments",
+  "Only answer wrong/unnecessary ones (recommended)",
+  "Don't answer, just fix",
+];
+const DELAY_OPTIONS = [
+  "60 seconds",
+  "120 seconds",
+  "180 seconds (recommended)",
+  "300 seconds",
+];
+const ITERATION_OPTIONS = [
+  "1",
+  "2",
+  "3 (recommended)",
+  "5",
+];
+const MODEL_TIER_OPTIONS = [
+  "high — thorough reasoning, more tokens",
+  "low — fast execution, fewer tokens",
+];
+async function runSetupWizard(ctx: any): Promise<FixPrConfig | null> {
+  // 1. Automated reviewer
+  const reviewerChoice = await ctx.ui.select(
+    "Automated PR reviewer",
+    REVIEWER_OPTIONS,
+    { helpText: "Select your automated reviewer, if any" },
+  );
+  if (!reviewerChoice) return null;
+  let reviewerType: ReviewerType = "none";
+  let triggerMethod: string | null = null;
+  if (reviewerChoice !== "None") {
+    reviewerType = reviewerChoice.toLowerCase().replace(/ /g, "").replace("github", "") as ReviewerType;
+    // Normalize to our type names
+    if (reviewerChoice === "CodeRabbit") reviewerType = "coderabbit";
+    else if (reviewerChoice === "GitHub Copilot") reviewerType = "copilot";
+    else if (reviewerChoice === "Gemini Code Review") reviewerType = "gemini";
+    const defaultTrigger = REVIEWER_DEFAULTS[reviewerChoice] || "";
+    triggerMethod = await ctx.ui.input(
+      "How to trigger re-review?",
+      defaultTrigger,
+      { helpText: `Default for ${reviewerChoice}: ${defaultTrigger}` },
+    );
+    if (triggerMethod === undefined) return null;
+    if (!triggerMethod) triggerMethod = defaultTrigger;
+  }
+  // 2. Comment reply policy
+  const policyChoice = await ctx.ui.select(
+    "Comment reply policy",
+    POLICY_OPTIONS,
+    { helpText: "How should we handle replying to comments?" },
+  );
+  if (!policyChoice) return null;
+  let commentPolicy: CommentReplyPolicy = "answer-selective";
+  if (policyChoice.startsWith("Answer all")) commentPolicy = "answer-all";
+  else if (policyChoice.startsWith("Don't")) commentPolicy = "no-answer";
+  // 3. Loop timing
+  const delayChoice = await ctx.ui.select(
+    "Delay between review checks",
+    DELAY_OPTIONS,
+    { helpText: "How long to wait for reviewer after pushing changes" },
+  );
+  if (!delayChoice) return null;
+  const delaySeconds = parseInt(delayChoice, 10);
+  const iterChoice = await ctx.ui.select(
+    "Max review iterations",
+    ITERATION_OPTIONS,
+    { helpText: "Maximum fix-check-fix cycles" },
+  );
+  if (!iterChoice) return null;
+  const maxIterations = parseInt(iterChoice, 10);
+  // 4. Model preferences
+  const orchestratorTier = await ctx.ui.select(
+    "Orchestrator model tier (assessment & grouping)",
+    MODEL_TIER_OPTIONS,
+    { helpText: "Higher tier = more thorough analysis" },
+  );
+  if (!orchestratorTier) return null;
+  const plannerTier = await ctx.ui.select(
+    "Planner model tier (fix planning)",
+    MODEL_TIER_OPTIONS,
+    { helpText: "Higher tier = more detailed plans" },
+  );
+  if (!plannerTier) return null;
+  const fixerTier = await ctx.ui.select(
+    "Fixer model tier (code changes)",
+    MODEL_TIER_OPTIONS,
+    { helpText: "Lower tier usually sufficient for execution" },
+  );
+  if (!fixerTier) return null;
+  const config: FixPrConfig = {
+    reviewer: { type: reviewerType, triggerMethod },
+    commentPolicy,
+    loop: { delaySeconds, maxIterations },
+    models: {
+      orchestrator: {
+        ...DEFAULT_FIX_PR_CONFIG.models.orchestrator,
+        tier: orchestratorTier.startsWith("high") ? "high" : "low",
+      },
+      planner: {
+        ...DEFAULT_FIX_PR_CONFIG.models.planner,
+        tier: plannerTier.startsWith("high") ? "high" : "low",
+      },
+      fixer: {
+        ...DEFAULT_FIX_PR_CONFIG.models.fixer,
+        tier: fixerTier.startsWith("high") ? "high" : "low",
+      },
+    },
+  };
+  return config;
+}