npm - @pharaoh-so/mcp - Versions diffs - 0.3.7 → 0.3.9 - Mend

@pharaoh-so/mcp 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/install-skills.js +22 -2
package/package.json +1 -1
package/skills/plan/SKILL.md +45 -219
package/skills/sessions/SKILL.md +35 -19

package/dist/install-skills.js CHANGED Viewed

@@ -68,12 +68,32 @@ function installClaudeCodePlugin(home = homedir()) {
         process.stderr.write("Pharaoh: .claude-plugin/ manifest not found in package — cannot install.\n");
         return -1;
     }
-    // Copy skills/
+    // Copy skills/ and generate pharaoh-* prefixed aliases
     let skillCount = 0;
     if (existsSync(BUNDLED_SKILLS_DIR)) {
         cpSync(BUNDLED_SKILLS_DIR, join(pluginDir, "skills"), { recursive: true, force: true });
         const entries = readdirSync(BUNDLED_SKILLS_DIR, { withFileTypes: true });
-        skillCount = entries.filter((e) => e.isDirectory()).length;
+        const skillDirs = entries.filter((e) => e.isDirectory());
+        skillCount = skillDirs.length;
+        // Auto-generate pharaoh-* prefixed copies so both `/plan` and `pharaoh:plan`
+        // resolve to the same content. Without this, prefixed copies drift and users
+        // get a stripped skeleton when invoking via the pharaoh: prefix.
+        for (const dir of skillDirs) {
+            if (dir.name === "pharaoh" || dir.name.startsWith("pharaoh-"))
+                continue;
+            const prefixedName = `pharaoh-${dir.name}`;
+            const prefixedDir = join(pluginDir, "skills", prefixedName);
+            const srcSkill = join(pluginDir, "skills", dir.name, "SKILL.md");
+            if (!existsSync(srcSkill))
+                continue;
+            mkdirSync(prefixedDir, { recursive: true });
+            const content = readFileSync(srcSkill, "utf-8");
+            // Rewrite the name field in YAML frontmatter only (between --- delimiters).
+            // Using a whole-file /m regex would match `name:` in body content too.
+            const rewritten = content.replace(/^(---\n[\s\S]*?)(name:\s*).+(\n[\s\S]*?---)/, `$1$2${prefixedName}$3`);
+            writeFileSync(join(prefixedDir, "SKILL.md"), rewritten);
+            skillCount++;
+        }
     }
     // Copy .mcp.json
     const mcpSrc = join(PKG_ROOT, ".mcp.json");

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@pharaoh-so/mcp",
   "mcpName": "so.pharaoh/pharaoh",
-  "version": "0.3.7",
+  "version": "0.3.9",
   "description": "MCP proxy for Pharaoh — maps codebases into queryable knowledge graphs for AI agents. Enables Claude Code in headless environments (VPS, SSH, CI) via device flow auth.",
   "type": "module",
   "main": "dist/index.js",

package/skills/plan/SKILL.md CHANGED Viewed

@@ -1,22 +1,20 @@
 ---
 name: plan
 prompt-name: plan-with-pharaoh
-description: "Full-cycle architecture-aware planning: Pharaoh reconnaissance, structured plan writing with bite-sized TDD steps and zero placeholders, then deep adversarial review with wiring verification and interactive issue resolution. Replaces both writing-plans and plan-review."
-version: 0.3.0
+description: Deep plan review with Pharaoh reconnaissance, wiring verification, and structured issue tracking. Use before implementing any feature, refactor, or significant code change. Enters plan mode (no code changes) and provides structured review with decision points.
+version: 0.5.0
 homepage: https://pharaoh.so
 user-invocable: true
-metadata: {"emoji": "☥", "tags": ["planning", "architecture", "blast-radius", "pharaoh", "implementation-plan", "wiring", "review", "tdd"]}
+metadata: {"emoji": "☥", "tags": ["planning", "architecture", "blast-radius", "pharaoh", "review", "interactive"]}
 ---
-# Plan with Pharaoh
+# Plan Review
-Full-cycle planning: reconnaissance → plan writing → adversarial review. Combines architecture-aware graph analysis with rigorous plan craft and interactive issue resolution.
+**You are now in plan mode. Do NOT make any code changes. Think, evaluate, and present decisions.**
-**You are in plan mode. Do NOT make any code changes. Think, evaluate, plan, review.**
+## Document Review
-## When to Use
-Before implementing any non-trivial change: new features, refactors, adding modules, or anything that touches shared code. Use it whenever you need to answer "what's the right way to build this?" before writing code.
+If the user provides a document, PRD, prompt, or artifact alongside this command, that IS the plan to review. Apply all review sections to that document. Do not treat it as background context — it is the subject of evaluation.
 ## Project Overrides
@@ -32,188 +30,58 @@ If a `.claude/plan-review.md` file exists in this project, read it now and apply
 - Subtraction > addition; target zero or negative net LOC
 - Every export must have a caller; unwired code doesn't exist
-## Document Review Mode
-If the user provides a document, PRD, prompt, or artifact alongside this command, that IS the plan to review. Still run Phase 1 (Reconnaissance) — always verify against the actual codebase. Then proceed to Phase 3 (Approach) and Phase 5 (Review), applying all review sections to that document. Do not treat it as background context — it is the subject of evaluation.
----
-## Phase 1 — Reconnaissance (required — do this BEFORE anything else)
+## Step 1: Pharaoh Reconnaissance (Required — do this BEFORE reviewing)
-Do NOT plan from memory or assumptions. Query the actual codebase first:
+Do NOT review from memory or assumptions. Query the actual codebase first:
 1. `get_codebase_map` — current modules, hot files, dependency graph
 2. `search_functions` for keywords related to the plan — find existing code to reuse/extend
-3. `get_module_context` on each module likely affected by the change
+3. `get_module_context` on affected modules — entry points, patterns, conventions
 4. `query_dependencies` between affected modules — coupling, circular deps
-5. `get_blast_radius` on the primary target of the change
-6. `check_reachability` on the primary target to verify it's reachable from entry points
 Ground every recommendation in what actually exists. If you propose adding something, confirm it doesn't already exist. If you propose changing something, know its blast radius.
-## Phase 2 — Analysis
-Using the reconnaissance data:
-- Evaluate the blast radius — how many callers and modules are affected?
-- Check `search_functions` results — does related code already exist? Can you reuse/extend?
-- Assess module coupling — are the affected modules tightly or loosely coupled?
-- Rate the risk level (LOW / MEDIUM / HIGH) based on blast radius and coupling
-- Does this need new code at all, or can an existing pattern solve it?
-## Phase 3 — Approach
-### Scope Check
-If the spec covers multiple independent subsystems, it should be broken into separate plans — one per subsystem. Each plan should produce working, testable software on its own. Suggest splitting if needed.
-### Mode Selection (MANDATORY — do NOT skip)
-**STOP and ask the user before proceeding.** This is a hard gate — do not infer, assume, or skip this question even if the user says "yes", "go ahead", "yes to all", or similar. Present both options and wait for an explicit choice:
-> **This looks like it could be a BIG or SMALL change. Which mode?**
->
-> - **BIG CHANGE**: Full plan with all sections, approach trade-offs, interactive review
-> - **SMALL CHANGE**: Abbreviated plan, sections 2-4 of review only
-If the user's response is ambiguous (e.g. "just do it", "yes to all"), ask again: "I need to know — BIG or SMALL change?" Do not proceed to Phase 4 without an answer.
-### Approach Trade-offs
-Propose 2-3 implementation approaches:
-- For each: what files change, estimated blast radius, pros, cons
-- Recommend one with justification
-- Flag any approach that would increase module coupling
-- Flag any approach that requires new code where existing code could be extended
-## Phase 4 — Plan Writing
-### File Structure
-Before defining tasks, map out which files will be created or modified and what each one is responsible for. This is where decomposition decisions get locked in.
-- Design units with clear boundaries and well-defined interfaces. Each file should have one clear responsibility.
-- Files that change together should live together. Split by responsibility, not by technical layer.
-- In existing codebases, follow established patterns. If the codebase uses large files, don't unilaterally restructure — but if a file you're modifying has grown unwieldy, including a split is reasonable.
-This structure informs the task decomposition. Each task should produce self-contained changes that make sense independently.
-### Plan Document Header
-Every plan MUST start with:
-```markdown
-# [Feature Name] Implementation Plan
-> **For agentic workers:** Use `pharaoh:orchestrate` (recommended) or `pharaoh:execute` to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-**Goal:** [One sentence describing what this builds]
-**Architecture:** [2-3 sentences about approach]
-**Tech Stack:** [Key technologies/libraries]
-**Risk:** [LOW / MEDIUM / HIGH] — [one line justification from Phase 2 data]
----
-```
-### Bite-Sized Task Granularity
+## Step 1b: Reconnaissance Dashboard
-Each step is one action (2-5 minutes):
-- "Write the failing test" — step
-- "Run it to make sure it fails" — step
-- "Implement the minimal code to make the test pass" — step
-- "Run the tests and make sure they pass" — step
-- "Commit" — step
+After running recon, present a visual summary before proceeding. This shows the user what Pharaoh found.
-### Task Structure
+**Surface all ★ Pharaoh insight blocks verbatim** — they contain pre-formatted bar charts, risk meters, and flow diagrams. Do not summarize or paraphrase them.
-````markdown
-### Task N: [Component Name]
+Then compose a **Recon Summary** table:
-**Files:**
-- Create: `exact/path/to/file.ts`
-- Modify: `exact/path/to/existing.ts:123-145`
-- Test: `tests/exact/path/to/test.ts`
+| Signal | Value | Source |
+|--------|-------|--------|
+| Modules affected | N | get_codebase_map |
+| Blast radius | LOW/MED/HIGH + caller count | get_blast_radius |
+| Existing functions found | N matches | search_functions |
+| Cross-module coupling | deps + circular? | query_dependencies |
-**Blast radius:** [from Phase 1 data — callers affected, modules touched]
+If any signal is surprising (high blast radius, circular deps, existing code that overlaps the plan), call it out before moving to Mode Selection.
-**Wiring:** [where new exports get called from — declared caller for every export]
+## Step 2: Mode Selection
-- [ ] **Step 1: Write the failing test**
+Ask the user which mode before starting the review:
-```typescript
-test('specific behavior', () => {
-    const result = function(input);
-    expect(result).toBe(expected);
-});
-```
+**BIG CHANGE**: Full interactive review, all relevant sections, up to 4 top issues per section.
+**SMALL CHANGE**: One question per section, only sections 2-4.
-- [ ] **Step 2: Run test to verify it fails**
+## Step 3: Review Sections
-Run: `pnpm test -- tests/path/test.ts`
-Expected: FAIL with "function not defined"
-- [ ] **Step 3: Write minimal implementation**
-```typescript
-export function myFunction(input: string): string {
-    return expected;
-}
-```
-- [ ] **Step 4: Run test to verify it passes**
-Run: `pnpm test -- tests/path/test.ts`
-Expected: PASS
-- [ ] **Step 5: Commit**
-```bash
-git add tests/path/test.ts src/path/file.ts
-git commit -m "feat: add specific feature"
-```
-````
-### No Placeholders
-Every step must contain the actual content an engineer needs. These are **plan failures** — never write them:
-- "TBD", "TODO", "implement later", "fill in details"
-- "Add appropriate error handling" / "add validation" / "handle edge cases"
-- "Write tests for the above" (without actual test code)
-- "Similar to Task N" (repeat the code — the engineer may be reading tasks out of order)
-- Steps that describe what to do without showing how (code blocks required for code steps)
-- References to types, functions, or methods not defined in any task
-### Remember
-- Exact file paths always
-- Complete code in every step — if a step changes code, show the code
-- Exact commands with expected output
-- DRY, YAGNI, TDD, frequent commits
-- Every new export must have a declared caller — if a function has no caller, it's not part of the plan
----
-## Phase 5 — Adversarial Review
-Review the plan before presenting it. Apply all relevant sections, adapting depth to change size. Skip sections that don't apply.
+Adapt depth to change size. Skip sections that don't apply.
 ### Section 1 — Architecture (skip for small/single-file changes)
 - Component boundaries and coupling concerns
 - Dependency graph: does this change shrink or expand surface area?
 - Data flow bottlenecks and single points of failure
-- Does this need new code at all, or can an existing pattern solve it?
+- Does this need new code at all, or can a human process / existing pattern solve it?
 ### Section 2 — Code Quality (always)
 - Organization, module structure, DRY violations (be aggressive)
 - Error handling gaps and missing edge cases (call out explicitly)
 - Technical debt: shortcuts, hardcoded values, magic strings
-- Over-engineered or under-engineered relative to engineering preferences
+- Over-engineered or under-engineered relative to my preferences
 - Reuse: does code for this already exist somewhere?
 ### Section 3 — Wiring & Integration (always)
@@ -221,7 +89,7 @@ Review the plan before presenting it. Apply all relevant sections, adapting dept
 - Are all new exports called from a production entry point?
 - Run `get_blast_radius` on any new/changed functions — zero callers = not done
 - `check_reachability` on new exports — verify reachable from API handlers, crons, or event handlers
-- Does every task declare WHERE new code gets called from? If not, flag it
+- Does the plan declare WHERE new code gets called from? If not, flag it
 - Integration points: how does this connect to what already exists?
 ### Section 4 — Tests (always)
@@ -239,77 +107,35 @@ Review the plan before presenting it. Apply all relevant sections, adapting dept
 ### Section 6 — Security & Attack Surface (always for new endpoints/routes/APIs; skip for pure refactors)
-- **Authentication model** — what authenticates requests? Where validated? What happens on failure?
-- **Sensitive data in URLs** — tokens, session IDs, or tenant identifiers in URL paths/params leak via Referer, history, logs
-- **Authorization boundaries** — what prevents User A from accessing User B's data?
-- **Input trust boundary** — user input flowing into shell commands, queries, HTML rendering, or file paths
-- **Error and response surface** — do error responses expose internals to unauthenticated callers?
-- **New attack surface** — new public URLs, webhooks, API routes each need rate limiting, auth, and input validation
-### Self-Review Checklist (run after all sections)
+- **Authentication model** — what authenticates requests in this plan? Where is it validated? What happens on auth failure (redirect, 401, silent pass-through)? Use `search_functions` to find existing auth middleware and confirm reuse.
+- **Sensitive data in URLs** — does the design put tokens, session IDs, or tenant identifiers in URL paths or query params? These leak via Referer headers, browser history, logs, and link sharing.
+- **Authorization boundaries** — what prevents User A from accessing User B's data? Is there an ownership check, or just an "is logged in" check? Use `get_blast_radius` on existing ownership-check functions to see where they're already enforced.
+- **Input trust boundary** — does the plan accept user input that flows into shell commands, database queries, HTML rendering, or file paths? Each is an injection vector.
+- **Error and response surface** — will error responses or API payloads expose internals (stack traces, DB schemas, internal IDs) to unauthenticated callers?
+- **New attack surface** — does the plan introduce new public URLs, webhooks, API routes, or WebSocket endpoints? Each needs: rate limiting, authentication, and input validation. Use `get_module_context` on the receiving module to check what protections exist.
-1. **Spec coverage:** Skim each section/requirement in the spec. Can you point to a task that implements it? List any gaps.
-2. **Placeholder scan:** Search the plan for red flags from the "No Placeholders" section. Fix them.
-3. **Type consistency:** Do types, method signatures, and property names used in later tasks match earlier tasks? A function called `clearLayers()` in Task 3 but `clearFullLayers()` in Task 7 is a bug.
-4. **Wiring sweep:** `get_blast_radius` on ALL new exports — zero callers on non-entry-points = plan is incomplete.
-### For Each Issue Found
+## For Each Issue Found
 For every specific issue (bug, smell, design concern, risk, missing wiring):
 1. **Describe concretely** — file, line/function reference, what's wrong
 2. **Present 2-3 options** including "do nothing" where reasonable
 3. **For each option** — implementation effort, risk, blast radius, maintenance burden
-4. **Recommend one** mapped to engineering preferences above, and say why
-5. **Ask** whether the user agrees or wants a different direction
-Number each issue (1, 2, 3...) and letter each option (A, B, C...). Recommended option is always listed first.
----
-## Phase 6 — Output & Handoff
-### Present the Plan
-A complete implementation plan containing:
-- Risk rating (LOW / MEDIUM / HIGH) with data backing
-- Recommended approach with trade-off rationale
-- File structure map
-- Numbered tasks with bite-sized steps, exact files, and complete code
-- Blast radius per task
-- Wiring declarations for every new export
-- Required tests per step
-- Adversarial review findings (issues caught and resolved)
+4. **Recommend one** mapped to my preferences above, and say why
+5. **Ask** whether I agree or want a different direction
-Save plans to: `docs/sessions/YYYY-MM-DD-<feature-name>.md`
-(User preferences for plan location override this default)
-### Execution Handoff
-After saving the plan, offer execution choice:
-**"Plan complete and saved. Two execution options:**
-**1. Orchestrated (recommended)** — I dispatch a fresh subagent per task with two-stage review (spec compliance then code quality). Use `pharaoh:orchestrate`.
-**2. Inline Execution** — Execute tasks in this session with checkpoints. Use `pharaoh:execute`.
-**Which approach?"**
----
+Number each issue (1, 2, 3...) and letter each option (A, B, C...). Recommended option is always listed first. Use AskUserQuestion with clear labels like "Issue 1 Option A", "Issue 1 Option B".
 ## Pharaoh Checkpoints (use throughout, not just at the end)
-- **Before planning**: recon (Phase 1)
-- **During plan writing**: `get_blast_radius` when evaluating impact; `search_functions` before proposing new code
-- **During review**: `get_blast_radius` on all new/changed functions; `check_reachability` on new exports
-- **After decisions**: `get_unused_code` to catch disconnections
+- **Before reviewing**: recon (Step 1 above)
+- **During review**: `get_blast_radius` when evaluating impact of changes; `search_functions` before suggesting new code
+- **After decisions**: `check_reachability` on all new exports; `get_unused_code` to catch disconnections
 - **Final sweep**: `get_blast_radius` on ALL new exports — zero callers on non-entry-points = plan is incomplete
 ## Workflow Rules
-- After each review section, pause and ask for feedback before moving on (BIG CHANGE mode)
+- After each section, pause and ask for feedback before moving on
 - Do not assume priorities on timeline or scale
 - If you see a better approach to the entire plan, say so BEFORE section-by-section review
-- Challenge the approach if you see a better one — your job is to find problems the user will regret later
+- Challenge the approach if you see a better one — your job is to find problems I'll regret later

package/skills/sessions/SKILL.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 name: sessions
 prompt-name: session-decomposition
-description: "Decompose work into parallel, isolated sessions using git worktrees. Each session gets fresh context, a narrow scope, and produces atomic commits. Prevents context window pollution from large tasks. Coordinate across sessions without shared state."
-version: 0.2.0
+description: "Decompose work into parallel, isolated sessions using git worktrees. Each session gets fresh context, a narrow scope, and produces atomic commits. Presents session prompts for user review before execution."
+version: 0.3.0
 homepage: https://pharaoh.so
 user-invocable: true
 metadata: {"emoji": "☥", "tags": ["sessions", "worktrees", "parallel-work", "context-management", "decomposition"]}
@@ -10,7 +10,7 @@ metadata: {"emoji": "☥", "tags": ["sessions", "worktrees", "parallel-work", "c
 # Session Decomposition
-Break large tasks into parallel, isolated work sessions. Each session runs in its own git worktree with fresh context, focused scope, and atomic commits. Prevents context window bloat and keeps each unit of work clean.
+Break large tasks into parallel, isolated work sessions. Each session runs in its own git worktree with fresh context, focused scope, and atomic commits.
 ## When to Use
@@ -25,9 +25,11 @@ Break large tasks into parallel, isolated work sessions. Each session runs in it
 - Work is sequential (each step depends on the previous)
 - Task fits comfortably in one session
-## Process
+## Step 1: Reconnaissance
-### 1. Decompose
+If Pharaoh MCP tools are available, call `get_codebase_map` and `get_module_context` on affected modules to understand the current landscape before decomposing.
+## Step 2: Decompose
 Break the task into sessions. Each session must:
@@ -36,19 +38,9 @@ Break the task into sessions. Each session must:
 - Be independently verifiable (tests pass, build succeeds)
 - Produce atomic commits that make sense on their own
-### 2. Create Worktrees
-For each session, create an isolated worktree:
+## Step 3: Write Session Prompts
-```bash
-git worktree add .worktrees/<session-name> -b <branch-name>
-```
-Install dependencies in each worktree. Verify clean baseline (tests pass).
-### 3. Write Session Prompts
-Each session gets a prompt containing:
+For each session, write a complete prompt containing:
 - **Goal:** what this session produces (1-2 sentences)
 - **Scope:** which files/modules to touch (explicit list)
@@ -56,11 +48,34 @@ Each session gets a prompt containing:
 - **Verification:** how to confirm the work is correct
 - **Context:** any architectural decisions or patterns to follow
-### 4. Execute Sessions
+## Step 4: Present for Review (MANDATORY — do NOT skip)
+**STOP. Paste every session prompt into the chat as a numbered list.**
+For each session, show:
+1. The session name
+2. The full prompt text
+3. Which sessions need `/plan` review (flag anything non-trivial)
+**Wait for the user to approve, modify, add, remove, or reorder sessions before proceeding.** Do not create worktrees or execute any work until the user explicitly approves the decomposition.
+If the user says "looks good" or similar, proceed. If they request changes, update the prompts and present again.
+## Step 5: Create Worktrees
+Only after user approval. For each session, create an isolated worktree:
+```bash
+git worktree add .worktrees/<session-name> -b <branch-name>
+```
+Install dependencies in each worktree. Verify clean baseline (tests pass).
+## Step 6: Execute Sessions
 Run each session independently. Sessions should not reference each other's work-in-progress — they operate on the same base commit.
-### 5. Integrate
+## Step 7: Integrate
 After all sessions complete:
@@ -84,3 +99,4 @@ After all sessions complete:
 - **Atomic commits** — each session's output should be a coherent, reviewable unit
 - **Verify before integrating** — never merge a session that doesn't pass its own checks
 - **Decomposition is the hard part** — spend time getting boundaries right before starting work
+- **The user reviews before execution** — always present prompts, never skip to building