npm - @pharaoh-so/mcp - Versions diffs - 0.3.1 → 0.3.3 - Mend

@pharaoh-so/mcp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -26,7 +26,7 @@ This displays a device code and a URL. Open the URL on **any device** (phone, la
 ### Step 2 — Add to Claude Code
 ```bash
-npx @pharaoh-so/mcp --setup
+npx @pharaoh-so/mcp
 ```
 Verify the connection:
@@ -43,7 +43,7 @@ If you previously added Pharaoh as an SSE server, remove it first:
 ```bash
 claude mcp remove pharaoh
-npx @pharaoh-so/mcp --setup
+npx @pharaoh-so/mcp
 ```
 ## How It Works
@@ -203,7 +203,7 @@ npx @pharaoh-so/mcp
 Make sure you added it with the correct command:
 ```bash
-npx @pharaoh-so/mcp --setup
+npx @pharaoh-so/mcp
 ```
 Note the `--` separator between `pharaoh` and `npx`.

package/dist/helpers.d.ts CHANGED Viewed

@@ -4,13 +4,14 @@
  */
 import type { TokenResponse } from "./auth.js";
 import type { Credentials } from "./credentials.js";
+/** The npx command users run to set up Pharaoh. Single source of truth for all output. */
+export declare const NPX_COMMAND = "npx @pharaoh-so/mcp";
 /** Write one or more lines to stderr. */
 export declare function printLines(...lines: string[]): void;
 /** Parse CLI arguments. */
 export declare function parseArgs(argv?: string[]): {
     server: string;
     logout: boolean;
-    setup: boolean;
 };
 export declare function printUsage(): void;
 /**
@@ -23,11 +24,6 @@ export declare function resolveSseUrl(tokenSseUrl: string | undefined, server: s
 export declare function tokenToCredentials(token: TokenResponse, sseUrl: string): Credentials;
 /** Format remaining TTL as human-readable string (e.g. "5d 12h"). */
 export declare function formatTtl(expiresAt: string): string;
-/**
- * Print setup instructions for Claude Code. Called in interactive mode
- * after auth completes (or when credentials already exist).
- */
-export declare function printSetupInstructions(): void;
 /** Format a credential identity string (e.g. "alice (my-org)"). */
 export declare function formatIdentity(creds: Credentials): string;
 /**

package/dist/helpers.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { isExpired } from "./credentials.js";
 const DEFAULT_SERVER = "https://mcp.pharaoh.so";
+/** The npx command users run to set up Pharaoh. Single source of truth for all output. */
+export const NPX_COMMAND = "npx @pharaoh-so/mcp";
 /** Write one or more lines to stderr. */
 export function printLines(...lines) {
     process.stderr.write(lines.join("\n") + "\n");
@@ -8,7 +10,6 @@ export function printLines(...lines) {
 export function parseArgs(argv = process.argv.slice(2)) {
     let server = DEFAULT_SERVER;
     let logout = false;
-    let setup = false;
     for (let i = 0; i < argv.length; i++) {
         if (argv[i] === "--server" && argv[i + 1]) {
             server = argv[i + 1];
@@ -17,9 +18,6 @@ export function parseArgs(argv = process.argv.slice(2)) {
         else if (argv[i] === "--logout") {
             logout = true;
         }
-        else if (argv[i] === "--setup") {
-            setup = true;
-        }
     }
     // Strip trailing slash
     server = server.replace(/\/+$/, "");
@@ -41,10 +39,10 @@ export function parseArgs(argv = process.argv.slice(2)) {
         printLines(`Pharaoh: --server is not a valid URL: ${server}`);
         process.exit(1);
     }
-    return { server, logout, setup };
+    return { server, logout };
 }
 export function printUsage() {
-    printLines("Usage: pharaoh-mcp [options]", "", "Options:", "  --setup           Full install: auth, register MCP, install skills (start here)", "  --server <url>    Pharaoh server URL (default: https://mcp.pharaoh.so)", "  --logout          Clear stored credentials and exit", "  --install-skills  Force reinstall Pharaoh skills (Claude Code + OpenClaw)", "  --help, -h        Show this help", "", "Get started:", "  npx @pharaoh-so/mcp --setup", "");
+    printLines("Usage: pharaoh-mcp [options]", "", "Options:", "  --server <url>    Pharaoh server URL (default: https://mcp.pharaoh.so)", "  --logout          Clear stored credentials and exit", "  --install-skills  Force reinstall Pharaoh skills (Claude Code + OpenClaw)", "  --help, -h        Show this help", "", "Get started:", `  ${NPX_COMMAND}`, "");
 }
 /**
  * Validate that a server-supplied SSE URL shares the same origin as the configured server.
@@ -95,13 +93,6 @@ export function formatTtl(expiresAt) {
         return `${hours}h`;
     return `${Math.floor(remainingMs / 60_000)}m`;
 }
-/**
- * Print setup instructions for Claude Code. Called in interactive mode
- * after auth completes (or when credentials already exist).
- */
-export function printSetupInstructions() {
-    printLines("", "┌───────────────────────────────────────────────────────┐", "│  To register Pharaoh in Claude Code, run:             │", "│    npx @pharaoh-so/mcp --setup                       │", "│                                                       │", "│  This removes stale entries, registers the MCP        │", "│  server globally, and installs all skills.            │", "└───────────────────────────────────────────────────────┘", "");
-}
 /** Format a credential identity string (e.g. "alice (my-org)"). */
 export function formatIdentity(creds) {
     return [

package/dist/index.js CHANGED Viewed

@@ -12,7 +12,7 @@
  */
 import { pollForToken, printActivationPrompt, printAuthSuccess, requestDeviceCode, } from "./auth.js";
 import { deleteCredentials, isExpired, readCredentials, writeCredentials } from "./credentials.js";
-import { formatIdentity, formatTtl, parseArgs, printLines, printSetupInstructions, printUsage, resolveSseUrl, tokenToCredentials, } from "./helpers.js";
+import { NPX_COMMAND, formatTtl, parseArgs, printLines, printUsage, resolveSseUrl, tokenToCredentials, } from "./helpers.js";
 import { runInstallSkills } from "./install-skills.js";
 import { startProxy, TenantSuspendedError, TokenExpiredError } from "./proxy.js";
 async function main() {
@@ -33,7 +33,7 @@ async function main() {
         runInstallSkills();
         return;
     }
-    const { server, logout, setup } = parseArgs(args);
+    const { server, logout } = parseArgs(args);
     if (logout) {
         deleteCredentials();
         printLines("Pharaoh: credentials cleared");
@@ -41,49 +41,13 @@ async function main() {
     }
     const creds = readCredentials();
     const isInteractive = Boolean(process.stdin.isTTY);
-    // ── Setup mode (--setup): full automated install ──
-    // Auth → remove stale → register MCP → install skills → done.
-    if (setup) {
-        // Authenticate if needed
-        let activeCreds = creds && !isExpired(creds) ? creds : null;
-        if (activeCreds) {
-            printLines(`Pharaoh: authenticated as ${formatIdentity(activeCreds)} — token valid for ${formatTtl(activeCreds.expires_at)}`);
-        }
-        else {
-            printLines("Pharaoh: starting device authorization...");
-            const deviceCode = await requestDeviceCode(server);
-            printActivationPrompt(deviceCode.user_code, deviceCode.verification_uri);
-            const token = await pollForToken(server, deviceCode.device_code, deviceCode.interval);
-            if (token.provisional) {
-                printLines(`Pharaoh: provisional access — install the GitHub App to map your repos: ${token.install_url ?? ""}`);
-            }
-            const sseUrl = resolveSseUrl(token.sse_url, server);
-            const newCreds = tokenToCredentials(token, sseUrl);
-            writeCredentials(newCreds);
-            activeCreds = newCreds;
-            printAuthSuccess(token.github_login ?? null, token.tenant_name ?? null, token.repos?.length ?? 0);
-        }
-        // Register MCP server in Claude Code
-        const { runSetup } = await import("./setup.js");
-        runSetup();
-        // Install skills
-        runInstallSkills();
-        printLines("", "Pharaoh is ready. Start a new Claude Code conversation and ask:", '  "What modules does this codebase have?"', "");
-        process.exit(0);
-    }
     // ── Interactive mode (user running in a terminal) ──
-    // Authenticate if needed, print setup instructions, and exit.
-    // The proxy is useless without Claude Code on the other end of stdin.
+    // Full setup every time: fresh auth → register MCP → install skills → done.
+    // Running `npx @pharaoh-so/mcp` is the only command a user needs.
     if (isInteractive) {
-        if (creds && !isExpired(creds)) {
-            printLines(`Pharaoh: authenticated as ${formatIdentity(creds)} — token valid for ${formatTtl(creds.expires_at)}, ${creds.repos.length} repo${creds.repos.length === 1 ? "" : "s"} connected`);
-            // Ensure skills are installed/up-to-date on every interactive run
-            runInstallSkills();
-            printSetupInstructions();
-            process.exit(0);
-        }
-        // No valid credentials — run device flow
-        printLines("Pharaoh: no valid credentials — starting device authorization");
+        // Always re-authenticate for a fresh session
+        printLines("Pharaoh: starting device authorization...");
+        deleteCredentials();
         const deviceCode = await requestDeviceCode(server);
         printActivationPrompt(deviceCode.user_code, deviceCode.verification_uri);
         const token = await pollForToken(server, deviceCode.device_code, deviceCode.interval);
@@ -94,15 +58,18 @@ async function main() {
         const newCreds = tokenToCredentials(token, sseUrl);
         writeCredentials(newCreds);
         printAuthSuccess(token.github_login ?? null, token.tenant_name ?? null, token.repos?.length ?? 0);
-        // Auto-install skills to detected platforms (Claude Code, OpenClaw)
+        // Register MCP server in Claude Code (remove stale + add fresh)
+        const { runSetup } = await import("./setup.js");
+        runSetup();
+        // Install skills to all detected platforms
         runInstallSkills();
-        printSetupInstructions();
+        printLines("", "Pharaoh is ready. Start a new Claude Code conversation and ask:", '  "What modules does this codebase have?"', "");
         process.exit(0);
     }
     // ── Proxy mode (Claude Code spawned us as a stdio MCP server) ──
     // If no credentials, we can't run the device flow (no TTY for user interaction).
     if (!creds || isExpired(creds)) {
-        printLines("Pharaoh: no valid credentials — cannot start proxy.", "Run this command first to authenticate:", "  npx @pharaoh-so/mcp", "");
+        printLines("Pharaoh: no valid credentials — cannot start proxy.", "Run this command first to authenticate:", `  ${NPX_COMMAND}`, "");
         process.exit(1);
     }
     // Valid credentials — ensure skills are installed before starting proxy
@@ -119,7 +86,7 @@ async function main() {
     }
     catch (err) {
         if (err instanceof TokenExpiredError) {
-            printLines("Pharaoh: token expired or revoked.", "Run this command to re-authenticate:", "  npx @pharaoh-so/mcp", "");
+            printLines("Pharaoh: token expired or revoked.", "Run this command to re-authenticate:", `  ${NPX_COMMAND}`, "");
             deleteCredentials();
             process.exit(1);
         }

package/dist/install-skills.js CHANGED Viewed

@@ -258,7 +258,7 @@ export function runInstallSkills(home = homedir()) {
             "  • Claude Code — install from https://claude.ai/download",
             "  • OpenClaw   — install from https://openclaw.dev/install",
             "",
-            "Once installed, re-run: npx @pharaoh-so/mcp --install-skills",
+            "Once installed, re-run: npx @pharaoh-so/mcp",
             "",
         ].join("\n"));
     }

package/dist/setup.js CHANGED Viewed

@@ -3,10 +3,10 @@
  *
  * Full automated install: authenticates via device flow, removes stale MCP
  * entries, registers Pharaoh as a global stdio MCP server, and installs skills.
- * One command does everything: `npx @pharaoh-so/mcp --setup`
+ * One command does everything: `npx @pharaoh-so/mcp`
  */
 import { execFileSync } from "node:child_process";
-import { printLines } from "./helpers.js";
+import { NPX_COMMAND, printLines } from "./helpers.js";
 /** Check if `claude` CLI is available in PATH. */
 function hasClaude() {
     try {
@@ -48,7 +48,7 @@ function runClaude(args) {
  */
 export function runSetup() {
     if (!hasClaude()) {
-        printLines("Pharaoh: Claude Code CLI not found.", "", "Install Claude Code first: https://claude.ai/download", "Then re-run: npx @pharaoh-so/mcp --setup", "");
+        printLines("Pharaoh: Claude Code CLI not found.", "", "Install Claude Code first: https://claude.ai/download", `Then re-run: ${NPX_COMMAND}`, "");
         return false;
     }
     printLines("Pharaoh: setting up...");
@@ -66,7 +66,7 @@ export function runSetup() {
         "@pharaoh-so/mcp",
     ]);
     if (!added) {
-        printLines("Pharaoh: failed to register MCP server.", "Try manually: npx @pharaoh-so/mcp --setup", "");
+        printLines("Pharaoh: failed to register MCP server.", `Try manually: ${NPX_COMMAND}`, "");
         return false;
     }
     printLines("Pharaoh: registered as global MCP server (scope: user)");

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@pharaoh-so/mcp",
   "mcpName": "so.pharaoh/pharaoh",
-  "version": "0.3.1",
+  "version": "0.3.3",
   "description": "MCP proxy for Pharaoh — maps codebases into queryable knowledge graphs for AI agents. Enables Claude Code in headless environments (VPS, SSH, CI) via device flow auth.",
   "type": "module",
   "main": "dist/index.js",

package/skills/pharaoh/SKILL.md CHANGED Viewed

@@ -73,7 +73,7 @@ Full docs at [pharaoh.so/docs](https://pharaoh.so/docs).
 **CLI** (Claude Code, OpenClaw — works everywhere):
 ```
-npx @pharaoh-so/mcp --setup
+npx @pharaoh-so/mcp
 ```
 **URL** (Claude.ai, Cursor, ChatGPT — paste in settings):

package/skills/review/SKILL.md CHANGED Viewed

@@ -1,62 +1,287 @@
 ---
 name: review
 prompt-name: review-with-pharaoh
-description: "Architecture-aware pre-PR code review using Pharaoh codebase knowledge graph. Four-phase workflow: context gathering with module structure and blast radius, risk assessment with regression scoring and wiring checks, spec alignment against vision docs, and a final verdict of SHIP / SHIP WITH CHANGES / BLOCK. Auto-block rules for unreachable exports, circular dependencies, high regression risk, and spec violations."
-version: 0.2.0
+description: "The definitive code review. Five-phase workflow: git context detection (worktree, branch, base, changed files), Pharaoh recon (architecture map, module context, blast radius in one call), parallel specialized agent dispatch (code quality, error handling, test coverage, type design), cross-model adversarial review for security-sensitive changes, and a final synthesized verdict of SHIP / SHIP WITH CHANGES / BLOCK. Always architecture-aware, always branch-aware."
+version: 0.3.0
 homepage: https://pharaoh.so
 user-invocable: true
-metadata: {"emoji": "☥", "tags": ["code-review", "pull-request", "architecture", "pharaoh", "regression-risk", "spec-alignment"]}
+metadata: {"emoji": "☥", "tags": ["code-review", "pull-request", "architecture", "pharaoh", "multi-agent", "adversarial-review", "regression-risk", "spec-alignment"]}
 ---
 # Review with Pharaoh
-Architecture-aware pre-PR review. Uses `review-with-pharaoh` — a 4-phase workflow that assesses blast radius, regression risk, wiring integrity, duplication, and spec alignment. Produces a final verdict: SHIP, SHIP WITH CHANGES, or BLOCK.
+The definitive code review. Architecture-aware, branch-aware, multi-agent, adversarial. Five phases that combine Pharaoh's knowledge graph with parallel specialized reviewers and an independent cross-model second opinion.
+Final verdict: **SHIP** / **SHIP WITH CHANGES** / **BLOCK**
 ## When to Use
-Invoke before merging any pull request. Use it when reviewing changes that touch shared modules, export new functions, modify core data flows, or claim to implement a spec.
+Before merging any branch. Before opening any PR. When reviewing changes that touch shared modules, export new functions, modify core data flows, or claim to implement a spec.
-## Workflow
+---
-### Phase 1 — Context
+## Phase 0 — Git Context
-1. For each touched module, call `get_module_context` to understand its structure.
-2. For each touched module, call `get_blast_radius` to identify downstream impact.
-3. Call `query_dependencies` between the touched modules to map coupling.
+**Goal:** Know exactly what changed and where you are. This phase is mandatory and runs before anything else.
-### Phase 2 — Risk Assessment
+1. Detect the current environment:
+   ```bash
+   git rev-parse --show-toplevel          # repo root (may be a worktree)
+   git worktree list                      # detect if running in a worktree
+   git branch --show-current              # current branch name
+   git log --oneline -1                   # latest commit
+   ```
-4. Call `get_regression_risk` for the target repository to assess overall change risk.
-5. Call `check_reachability` for new exports in the touched modules — are they wired?
-6. Call `get_consolidation_opportunities` for the repository to check for duplicated logic.
+2. Determine the base branch (what this branch diverged from):
+   ```bash
+   git merge-base HEAD main               # or master, or whatever the default is
+   ```
-### Phase 3 — Spec Alignment
+3. Collect the full changeset from the base:
+   ```bash
+   git diff --name-only $(git merge-base HEAD main)...HEAD   # all changed files
+   git diff --stat $(git merge-base HEAD main)...HEAD         # summary stats
+   git log --oneline $(git merge-base HEAD main)..HEAD        # all commits on this branch
+   ```
-7. Call `get_vision_gaps` for the repository to verify changes align with specs.
+4. Also check for uncommitted work:
+   ```bash
+   git diff --name-only                   # unstaged changes
+   git diff --cached --name-only          # staged but uncommitted
+   ```
-### Phase 4 — Verdict
+5. Extract the **touched modules** from the changed file paths. Group files by their top-level directory or module boundary. These module names feed Phase 1.
-Produce a review with:
-- **Architecture impact:** modules affected, dependency changes, blast radius
-- **Risk assessment:** regression risk level, volatile modules touched
-- **Wiring check:** are all new exports reachable from entry points?
-- **Duplication check:** does new code duplicate existing logic?
-- **Spec alignment:** do changes match or drift from vision specs?
+**Output of Phase 0:** Branch name, base branch, commit count, list of changed files, list of touched modules, and whether there's uncommitted work.
-Final verdict: **SHIP** / **SHIP WITH CHANGES** / **BLOCK**
+---
+## Phase 1 — Pharaoh Recon
+**Goal:** Get the full architectural picture in one call. Do NOT skip this phase — it is what makes this review architecture-aware instead of just a code diff review.
+Call `pharaoh_recon` with:
+- **repo:** The repository name
+- **include_map:** `true`
+- **modules:** The touched modules from Phase 0 (up to 5)
+- **blast_radius:** The most critical changed files/functions as blast radius targets (up to 3). Pick the files with the most downstream risk — entry points, shared utilities, exported APIs.
+- **dependencies:** Pairs of touched modules to trace coupling between (up to 3)
+Then call these additional tools for data recon doesn't cover:
+- `get_regression_risk` — overall change risk score for the repo
+- `get_consolidation_opportunities` — duplicate logic the PR may introduce
+- `check_reachability` — are new exports wired to entry points?
+- `get_vision_gaps` — do changes align with or drift from specs?
+**Output of Phase 1:** Architecture map, module profiles for every touched module, blast radius for high-risk changes, dependency paths between coupled modules, regression risk level, duplication findings, reachability status, and spec alignment.
+---
+## Phase 2 — Parallel Specialized Review
+**Goal:** Deep-dive the actual code changes from multiple expert angles simultaneously. Launch these as **parallel subagents** — they are independent and should run concurrently.
+### Determine which reviewers to dispatch
+| Agent | When to dispatch | Focus |
+|-------|-----------------|-------|
+| **Code Reviewer** | Always | Bugs, logic errors, CLAUDE.md compliance, code quality. Confidence-filtered (only issues >= 80/100). |
+| **Security Reviewer** | When changes touch auth, encryption, tokens, tenant isolation, data access, billing, webhooks, Cypher queries, or any security-sensitive surface. Also dispatch when Phase 1 regression risk is HIGH. | OWASP Top 10, injection vectors, access control bypasses, tenant isolation violations, cryptographic misuse, secret exposure, plus project-specific security rules. See checklist below. |
+| **Silent Failure Hunter** | When changes touch error handling, catch blocks, fallback logic, API calls, or any code that could suppress errors | Silent failures, broad catches, swallowed errors, missing user feedback, unjustified fallbacks. |
+| **Test Analyzer** | When test files are changed, or when new functionality lacks corresponding tests | Behavioral coverage gaps, brittle tests, missing edge cases, tests that prove nothing. |
+| **Type Design Analyzer** | When new types/interfaces are introduced or existing types are modified | Encapsulation, invariant expression, invariant enforcement. Rates each type 1-10 on four axes. |
+### Security Reviewer — Checklist
+The Security Reviewer agent runs a systematic audit against two layers: universal web security (OWASP) and project-specific invariants.
+**Layer 1 — OWASP Top 10 + Common Vulnerabilities:**
+- **Injection:** SQL/Cypher injection, command injection, XSS (reflected/stored/DOM), template injection
+- **Broken auth:** Hardcoded credentials, weak token generation, missing expiry, session fixation
+- **Broken access control:** Missing authorization checks, IDOR, privilege escalation, path traversal
+- **Cryptographic failures:** Weak algorithms, plaintext secrets, missing encryption at rest/transit, key exposure
+- **Security misconfiguration:** Permissive CORS, verbose error messages leaking internals, debug endpoints in production
+- **Vulnerable dependencies:** Known CVEs in direct dependencies (check against changed package.json/lockfile)
+- **SSRF:** Unvalidated URLs in fetch/request calls, redirect chains
+- **Logging & monitoring:** Sensitive data in logs, missing audit trails for privileged operations
+**Layer 2 — Project-Specific Security Rules (from CLAUDE.md):**
+- Every Cypher query takes `repo` as first parameter — no unanchored MATCH clauses
+- `validateRepoOwnership()` runs before every tool handler
+- No default/fallback repo values — repo always from tenant's Postgres `tenant_repos`
+- Tokens stored as SHA-256 hashes, never plaintext
+- GitHub tokens encrypted at rest (AES-256-GCM with per-tenant HKDF-derived keys)
+- Webhook signatures verified on every request (`PHARAOH_GITHUB_WEBHOOK_SECRET`)
+- Org membership re-checked on every token refresh
+- Tenant Neo4j users get `reader` role only — graph writes use admin connection
+- Rate limiting enforced per tenant, not per user
+- Neo4j admin credentials never leave server-side env vars
+**Detection triggers (auto-dispatch when changed files match):**
+- `src/auth/**`, `src/crypto/**` — authentication, encryption
+- `src/mcp/server.ts`, `src/mcp/tenant-resolver.ts` — session management, tenant isolation
+- `src/mcp/neo4j-queries.ts` — Cypher query construction
+- `src/stripe/**`, `src/web/routes/billing.ts` — payment flows
+- `src/github/webhooks.ts`, `src/web/routes/webhooks.ts` — webhook verification
+- `src/db/**` — database access, schema changes
+- `src/upload/**` — file upload validation
+- Any file containing `validateRepoOwnership`, `runQuery`, `encryptProperty`, `verifyWebhookSignature`
+**Output format:** Each finding must include:
+1. Vulnerability class (e.g., "Cypher Injection", "Missing Ownership Check")
+2. Severity: CRITICAL / HIGH / MEDIUM
+3. Affected file:line
+4. Attack scenario: how an attacker would exploit this
+5. Remediation: specific code change required
+### How to dispatch each agent
+For each agent, launch a subagent (via the Agent tool) with:
+1. The **git diff** of the relevant changed files (not the full session history)
+2. The **Pharaoh context** from Phase 1 (architecture map, blast radius, module profiles) — this is what makes these agents architecture-aware
+3. The **CLAUDE.md rules** relevant to the review (testing requirements, security non-negotiables, code style)
+4. A clear instruction to focus ONLY on changed code, not pre-existing issues
+5. For the **Security Reviewer** specifically: include the full Layer 2 checklist above and the list of security-sensitive file paths so it knows the project's threat model
+Each agent returns a structured report with findings categorized by severity:
+- **CRITICAL** (90-100): Must fix before merge
+- **IMPORTANT** (80-89): Should fix before merge
+- **SUGGESTION** (70-79): Consider for a follow-up
+### What NOT to dispatch
+- **Comment Analyzer** and **Code Simplifier** are polish agents. Do not include them in the review — they distract from correctness. Run them separately if wanted.
+- Do not dispatch agents for trivial changes (typo fixes, dependency bumps, config changes). If Phase 0 shows < 20 lines changed across non-test files, skip Phase 2 entirely and go straight to Phase 4.
+---
+## Phase 3 — Adversarial Review
+**Goal:** Independent second opinion on security-sensitive changes. A different agent evaluates the code fresh, without knowledge of your reasoning.
+### When to trigger
+Trigger Phase 3 when ANY of these are true:
+- Changes touch **auth, encryption, access control, token handling, or session management**
+- Changes touch **tenant isolation, query construction, or data access patterns**
+- Changes touch **billing, subscription management, or payment flows**
+- Changes modify **webhook verification or signature checking**
+- Regression risk from Phase 1 is **HIGH**
+- You are not confident about a specific change's correctness
+If none of these triggers are met, **skip Phase 3** and proceed to Phase 4.
+### How to run
-Auto-block triggers (any of these = BLOCK):
-- Unreachable exports (new code with zero callers)
+1. **Prepare a review package** — do NOT send your session history:
+   - The changed files (full diff or complete file contents)
+   - What the code does and why it was changed (1-2 sentences)
+   - Security constraints from CLAUDE.md (tenant isolation rules, encryption requirements, etc.)
+   - Specific concerns you want the reviewer to focus on
+2. **Dispatch to an independent subagent** with instructions to evaluate the code fresh and assign verdicts:
+   | Verdict | Meaning |
+   |---------|---------|
+   | **AGREE** | Implementation is correct for the stated concern |
+   | **DISAGREE** | Concrete issue identified with evidence and suggested fix |
+   | **CONTEXT** | Cannot determine correctness — needs more information |
+3. **Evaluate findings:**
+   - AGREE items: no action
+   - DISAGREE items: verify against actual code. If confirmed, it becomes a CRITICAL finding. If the reviewer lacked context, document why the current approach is correct.
+   - CONTEXT items: provide the missing information and note it in the review output
+---
+## Phase 4 — Synthesis & Verdict
+**Goal:** Merge all findings into a single, actionable review. No raw dumps — synthesize.
+### Structure the output as:
+```markdown
+# Review: [branch-name] → [base-branch]
+**[X] commits | [Y] files changed | [Z] modules touched**
+**Worktree:** [path] (or "main repo")
+---
+## Architecture Impact
+- Modules affected: [list with blast radius numbers]
+- Dependency changes: [new coupling, removed coupling]
+- Highest blast radius: [module/function] → [N downstream callers across M modules]
+## Risk Assessment
+- Regression risk: [LOW / MEDIUM / HIGH] — [one-line reason]
+- Volatile modules touched: [list, if any]
+- Wiring status: [all new exports reachable? / N unreachable exports found]
+## Code Quality ([N] findings)
+### Critical ([count])
+- [finding with file:line, source agent, and fix]
+### Important ([count])
+- [finding with file:line, source agent, and fix]
+### Suggestions ([count])
+- [finding with file:line]
+## Security ([N] findings, or "No security-sensitive changes")
+- [Security reviewer findings with vulnerability class, severity, file:line, attack scenario, and remediation]
+- [Or: "Security reviewer not dispatched — no security-sensitive files in changeset"]
+## Test Coverage
+- [Test analyzer summary — gaps, quality issues, positive observations]
+## Spec Alignment
+- [Vision gaps introduced or resolved]
+## Adversarial Review
+- [Phase 3 results, or "Skipped — no security-sensitive changes detected"]
+---
+## Verdict: [SHIP / SHIP WITH CHANGES / BLOCK]
+[If not SHIP: numbered list of specific required changes before merge]
+```
+### Auto-block triggers (any of these = BLOCK)
+- Any CRITICAL security finding (injection, broken access control, tenant isolation violation, secret exposure)
+- Unreachable exports (new public code with zero callers)
 - New circular dependencies between modules
 - HIGH regression risk without corresponding test coverage
 - Vision spec violations (building against spec intent)
+- Any CRITICAL finding from Phase 2 or Phase 3 that is confirmed and unfixed
+- DISAGREE verdict from adversarial review on security-sensitive code, confirmed after verification
+- Unanchored Cypher query (MATCH without traversing through `Repo {name: $repo}`)
+- Missing `validateRepoOwnership()` on a new tool handler
+### SHIP WITH CHANGES triggers
+- IMPORTANT findings that are confirmed but non-blocking
+- Test coverage gaps for new functionality
+- Duplication that should be consolidated in a follow-up
+- Spec drift that is intentional but should be documented
+### SHIP triggers
+- No CRITICAL or confirmed IMPORTANT findings
+- All new exports are reachable
+- Regression risk is LOW or MEDIUM with adequate test coverage
+- Spec alignment is clean or intentionally divergent with documentation
+---
+## Quick Mode
-## Output
+For small changes (< 50 lines, single module, no security surface):
+- Run Phase 0 + Phase 1 + Phase 4 only
+- Skip Phase 2 (parallel agents) and Phase 3 (adversarial)
+- Still architecture-aware, just faster
-A structured review containing:
-- Architecture impact summary with specific modules and blast radius numbers
-- Risk level (LOW / MEDIUM / HIGH) with data backing
-- Wiring status for all new exports
-- Duplication findings with affected modules
-- Spec alignment verdict
-- Final verdict (SHIP / SHIP WITH CHANGES / BLOCK) with specific required changes if not SHIP
+Explicitly opt in with: `/review quick`