npm - pan-wizard - Versions diffs - 2.9.0 → 3.4.1 - Mend

pan-wizard 2.9.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/README.md +8 -8
package/agents/pan-conductor.md +189 -0
package/agents/pan-counterfactual.md +112 -0
package/agents/pan-debugger.md +15 -1
package/agents/pan-document_code.md +21 -0
package/agents/pan-executor.md +16 -0
package/agents/pan-hardener.md +113 -0
package/agents/pan-integration-checker.md +2 -0
package/agents/pan-knowledge.md +81 -0
package/agents/pan-meta-reviewer.md +91 -0
package/agents/pan-plan-checker.md +2 -0
package/agents/pan-previewer.md +98 -0
package/agents/pan-project-researcher.md +4 -4
package/agents/pan-reviewer.md +2 -0
package/agents/pan-verifier.md +2 -0
package/bin/install-lib.cjs +197 -0
package/bin/install.js +1999 -1959
package/commands/pan/assumptions.md +38 -3
package/commands/pan/audit-deployment.md +6 -0
package/commands/pan/cost.md +132 -0
package/commands/pan/debug.md +71 -2
package/commands/pan/exec-phase.md +105 -0
package/commands/pan/focus-auto.md +199 -18
package/commands/pan/focus-design.md +67 -2
package/commands/pan/focus-exec.md +178 -47
package/commands/pan/focus-scan.md +17 -5
package/commands/pan/knowledge.md +129 -0
package/commands/pan/map-codebase.md +47 -6
package/commands/pan/mcp-bridge.md +145 -0
package/commands/pan/milestone-audit.md +23 -0
package/commands/pan/new-project.md +64 -0
package/commands/pan/pause.md +42 -1
package/commands/pan/plan-phase.md +95 -0
package/commands/pan/preview.md +114 -0
package/commands/pan/profile.md +37 -0
package/commands/pan/quick.md +15 -0
package/commands/pan/resume.md +62 -2
package/commands/pan/review-deep.md +128 -0
package/commands/pan/verify-phase.md +53 -0
package/commands/pan/what-if.md +146 -0
package/hooks/dist/pan-cost-logger.js +102 -0
package/hooks/dist/pan-statusline.js +154 -108
package/package.json +1 -1
package/pan-wizard-core/bin/lib/bridge.cjs +269 -0
package/pan-wizard-core/bin/lib/bus.cjs +251 -0
package/pan-wizard-core/bin/lib/codebase.cjs +118 -0
package/pan-wizard-core/bin/lib/constants.cjs +42 -1
package/pan-wizard-core/bin/lib/context-budget.cjs +27 -0
package/pan-wizard-core/bin/lib/core.cjs +91 -6
package/pan-wizard-core/bin/lib/cost.cjs +359 -0
package/pan-wizard-core/bin/lib/focus.cjs +105 -2
package/pan-wizard-core/bin/lib/init.cjs +5 -5
package/pan-wizard-core/bin/lib/knowledge.cjs +331 -0
package/pan-wizard-core/bin/lib/memory.cjs +252 -0
package/pan-wizard-core/bin/lib/phase.cjs +40 -13
package/pan-wizard-core/bin/lib/preview.cjs +480 -0
package/pan-wizard-core/bin/lib/review-deep.cjs +280 -0
package/pan-wizard-core/bin/lib/roadmap.cjs +4 -4
package/pan-wizard-core/bin/lib/state.cjs +2 -2
package/pan-wizard-core/bin/lib/verify.cjs +34 -1
package/pan-wizard-core/bin/lib/whatif.cjs +289 -0
package/pan-wizard-core/bin/pan-tools.cjs +239 -4
package/pan-wizard-core/templates/playbook.md +53 -0
package/pan-wizard-core/templates/preview-report.md +93 -0
package/pan-wizard-core/templates/roadmap.md +24 -24
package/pan-wizard-core/templates/state.md +12 -9
package/pan-wizard-core/workflows/plan-phase.md +1 -1
package/scripts/build-hooks.js +2 -1
package/scripts/generate-skills-docs.py +560 -0

package/commands/pan/review-deep.md ADDED Viewed

@@ -0,0 +1,128 @@
+---
+name: pan:review-deep
+group: Review
+description: Security audit + cross-reviewer check. OWASP/STRIDE pass by pan-hardener, then pan-meta-reviewer catches what the first pass missed. Writes consolidated deep-review.md.
+argument-hint: "<phase-number>"
+allowed-tools:
+  - Read
+  - Write
+  - Bash
+  - Glob
+  - Grep
+  - Task
+---
+<objective>
+Run a deeper review pass on a phase than `pan-reviewer` alone provides. Two new agents:
+1. **pan-hardener** — OWASP Top 10 (2025) + STRIDE threat model on files changed in the phase.
+2. **pan-meta-reviewer** — reads both the reviewer's and hardener's output, flags things both missed, disputes overstated severities.
+Outputs are merged by `review-deep.cjs` into a single `.planning/reviews/<phase>/deep-review.md` with verdict, coverage stats, and conflict table. An audit entry is published to the `review-handoff` bus channel for traceability.
+Consolidates Spec B v1's X-4 (self-review) + X-12 (harden) into a single command.
+</objective>
+<execution_context>
+@~/.claude/pan-wizard-core/bin/lib/review-deep.cjs
+@~/.claude/pan-wizard-core/bin/lib/bus.cjs
+@~/.claude/agents/pan-hardener.md
+@~/.claude/agents/pan-meta-reviewer.md
+</execution_context>
+<invocation_modes>
+### Standalone
+```
+/pan:review-deep 07
+```
+Run after `/pan:exec-phase 07` completes. Requires `pan-reviewer` to have already written its review to `.planning/phases/07/review.md` (exec-phase does this automatically).
+### Integrated with exec-phase
+```
+/pan:exec-phase 07 --deep-review
+```
+Runs the normal exec → reviewer pipeline, then auto-invokes this command. Recommended for phases touching auth, payment, PII, migrations, or public APIs.
+### Integrated with focus-exec
+```
+/pan:focus-exec --deep-review
+```
+Per-item deep review during focus campaigns. Useful for high-stakes batches.
+</invocation_modes>
+<process>
+1. **Load reviewer output** — read `.planning/phases/<N>/review.md` written by the earlier `pan-reviewer` step. If missing, warn and offer to run `pan-reviewer` first.
+2. **Spawn pan-hardener** (parallel-safe with step 3 isolation below, but recommended sequential for audit clarity):
+   - Prompt includes: `<files_to_read>` with phase plan + diff + reviewer output; `<output_path>` = `.planning/reviews/<N>/hardener.md`; `<framework_scope>` block reminding of OWASP/STRIDE coverage.
+   - Agent writes its findings to the output path, returns confirmation.
+3. **Spawn pan-meta-reviewer**:
+   - Prompt includes: `<files_to_read>` with both reviewer.md AND hardener.md (and representative diff snippets); `<output_path>` = `.planning/reviews/<N>/meta.md`.
+   - Agent reads both first-pass reports, identifies missed patterns, disputes overstated severities, writes to output path.
+4. **Merge** — call:
+   ```
+   pan-tools review-deep merge <N> \
+     --reviewer-file .planning/phases/<N>/review.md \
+     --hardener-file .planning/reviews/<N>/hardener.md \
+     --meta-file .planning/reviews/<N>/meta.md
+   ```
+   The merger parses all three, sorts by severity, computes verdict (`ok` | `ok_with_minor` | `fix_before_merge` | `review_required` | `block`), writes `.planning/reviews/<N>/deep-review.md`, and publishes an audit record to the `review-handoff` bus channel.
+5. **Report back** — echo verdict + finding count + conflict count. If verdict is `block` or `review_required`, recommend the user review `deep-review.md` before proceeding.
+</process>
+<verdict_semantics>
+| Verdict | Meaning | Action |
+|---------|---------|--------|
+| `ok` | No findings at any severity | Merge freely |
+| `ok_with_minor` | Only low/info findings | Merge with noted follow-ups |
+| `fix_before_merge` | Medium findings present | Fix or document before merge |
+| `review_required` | High findings present | Human sign-off required |
+| `block` | At least one critical | Do not merge |
+Verdict is driven by the highest-severity finding across all three sources. Meta-reviewer disputes can downgrade severity on specific findings but don't change the headline verdict — the merger trusts the consensus of the explicit severity labels.
+</verdict_semantics>
+<output_files>
+- `.planning/phases/<N>/review.md` — pan-reviewer output (written earlier by exec-phase)
+- `.planning/reviews/<N>/hardener.md` — pan-hardener output (new)
+- `.planning/reviews/<N>/meta.md` — pan-meta-reviewer output (new)
+- `.planning/reviews/<N>/deep-review.md` — merged consolidated report (final deliverable)
+- `.planning/bus/review-handoff.jsonl` — audit trail entry (append-only)
+</output_files>
+<runtime_compatibility>
+| Runtime | hardener | meta-reviewer | merge |
+|---------|----------|---------------|-------|
+| Claude Code | Full, thinking enabled (6000/4000 budget) | Full | Full |
+| OpenCode | Prose "think step-by-step" preamble substitutes for thinking | Same | Full (runtime-agnostic CLI) |
+| Gemini | Same | Same | Full |
+| Codex | Same | Same | Full |
+| Copilot | Same | Same | Full |
+The merger CLI (`pan-tools review-deep merge`) is pure Node.js and works identically across runtimes. Only the *quality* of the hardener and meta-reviewer outputs varies with model capability — Opus 4.7 with extended thinking produces the richest findings.
+</runtime_compatibility>
+<calibration_note>
+Deep review is opt-in for a reason: it costs roughly 3× a normal review (hardener + meta + merge adds two agent spawns per phase). Use it for high-stakes phases, not every phase. `--deep-review` gating by phase tags is a v3.4 candidate enhancement.
+</calibration_note>

package/commands/pan/verify-phase.md CHANGED Viewed

@@ -33,6 +33,59 @@ Phase: $ARGUMENTS (optional)
 Context files are resolved inside the workflow (`init verify-work`) and delegated via `<files_to_read>` blocks.
 </context>
+<investigate_before_judging>
+Never claim a feature works or doesn't work without reading the implementation first.
+Before each verification judgment:
+1. Read the source file(s) that implement the feature
+2. Read the test file(s) that cover it
+3. Run the tests
+4. Only then state your assessment with file:line evidence
+Do not speculate about code you have not opened.
+</investigate_before_judging>
+<citation_requirement>
+Every verdict (PASS, PARTIAL, FAIL) MUST include at least one file:line citation as evidence.
+**Before writing any judgment, scan your draft for unsourced claims.** If you find an assertion without evidence, stop and gather it before continuing.
+Format: `verdict: PASS — feature works as specified (src/billing.ts:42, tests/billing.test.ts:18-35)`
+**Grounding rules:**
+- PASS requires: file:line where the feature is implemented + test file:line where it's verified
+- PARTIAL requires: file:line showing what works + description of what's missing with expected location
+- FAIL requires: file:line showing the defect OR absence (grep result showing the expected function/export doesn't exist)
+- "I checked and it's not there" is NOT evidence — show the grep command and its empty result
+**Anti-pattern:**
+```
+BAD:  "FAIL — the retry logic doesn't handle timeouts"
+      → No evidence. Maybe it does handle timeouts and you didn't read far enough.
+GOOD: "FAIL — retry logic at api/client.ts:67-89 catches ConnectionError but not TimeoutError.
+       Grep for 'TimeoutError' in api/: 0 matches. tests/client.test.ts has no timeout test cases."
+```
+</citation_requirement>
+<reflexion_loop>
+After initial verification of each requirement:
+1. Score each requirement: PASS / PARTIAL / FAIL
+2. For PARTIAL or FAIL: state specifically what is missing or broken
+3. Re-read the requirement text and the implementation — did you miss anything?
+4. Revise the score if the re-read reveals evidence you overlooked
+5. Report only final scores after this review cycle
+This prevents premature FAIL verdicts from incomplete investigation.
+</reflexion_loop>
+<cache_priming>
+**Before the verifier agent runs**, prime the prompt cache once. The verifier reads project.md / requirements.md / roadmap.md every run; caching avoids ~15-50K input tokens per invocation.
+Run once:
+```
+pan-tools cache prime --summary
+```
+See [plan-phase.md](plan-phase.md) or [exec-phase.md](exec-phase.md) for the full explanation. No-op on non-Claude runtimes.
+</cache_priming>
 <process>
 Execute the verify-work workflow from @~/.claude/pan-wizard-core/workflows/verify-phase.md end-to-end.
 Preserve all workflow gates (session management, test presentation, diagnosis, fix planning, routing).

package/commands/pan/what-if.md ADDED Viewed

@@ -0,0 +1,146 @@
+---
+name: pan:what-if
+group: Foresight
+description: Explore a phase's alternative approach in an isolated git worktree. Replays the scenario, compares to the original plan, writes a report.
+argument-hint: "<phase-number> <scenario-text>"
+allowed-tools:
+  - Read
+  - Write
+  - Edit
+  - Bash
+  - Grep
+  - Glob
+  - Task
+---
+<objective>
+Safely explore "what if we had done X instead?" for a phase. Creates an isolated git worktree, spawns `pan-counterfactual` inside it, lets the agent experiment without touching the main tree, collects a structured comparison payload, writes `.planning/counterfactuals/<phase>-<slug>.md` in the main tree, and cleans up the worktree.
+Unchanged from Spec B v1's X-9. Already narrow enough to stand alone.
+</objective>
+<execution_context>
+@~/.claude/pan-wizard-core/bin/lib/whatif.cjs
+@~/.claude/agents/pan-counterfactual.md
+</execution_context>
+<invocation>
+```
+/pan:what-if 7 "Use Redis instead of Memcached"
+/pan:what-if 4 "Skip the migration step entirely"
+/pan:what-if 12 "What if we'd picked NoSQL at the start?"
+```
+**Requirements:**
+- Main project must be a git repository (worktrees require git).
+- Working tree can be dirty — worktree is based on current HEAD, your uncommitted changes stay in main.
+</invocation>
+<process>
+### Stage 1 — Prepare
+```
+pan-tools whatif prepare <phase> "<scenario text>"
+```
+The CLI:
+1. Validates the phase exists.
+2. Slugifies the scenario (lowercase, alphanumerics + hyphens, ≤50 chars).
+3. Creates a git worktree at `<parent-of-cwd>/pan-whatif-<phase>-<slug>-<ts>` on a fresh branch `pan-whatif/<phase>-<slug>-<ts>`.
+4. Returns `{phase, phase_name, scenario, slug, plans, summaries, has_executed, worktree: {worktree_path, branch, base}}`.
+If worktree creation fails (not a git repo, dirty tree blocking, etc.), abort with a clear error.
+### Stage 2 — Spawn pan-counterfactual
+Spawn the agent with its working directory set to `worktree_path`. Prompt includes:
+- `<files_to_read>` — the phase plan, any existing summary, the main project's `CLAUDE.md` so the agent understands conventions.
+- `<scenario>` — the user's scenario text verbatim.
+- `<worktree_path>` — so the agent knows the safe boundary.
+- `<time_budget>` — advisory (10-20 min of reasoning/file-ops).
+The agent explores, then returns a JSON payload with `{summary, differences, recommendations, risks, verdict}`.
+### Stage 3 — Write report in MAIN tree
+Run (from main tree, NOT worktree):
+```
+pan-tools whatif report <phase> "<scenario>" --comparison '<agent-json>'
+```
+This writes `.planning/counterfactuals/<phase>-<slug>.md`. The file belongs to the main tree and survives worktree cleanup.
+### Stage 4 — Cleanup
+```
+pan-tools whatif cleanup --worktree <path> --branch <name> --force
+```
+Removes the worktree directory and deletes the counterfactual branch. Best-effort: warnings are surfaced but don't block.
+### Stage 5 — Confirm
+Echo the report path and verdict to the user. Done.
+</process>
+<safety>
+**Worktree isolation is the safety mechanism.** The agent can edit files freely inside the worktree without affecting the main tree. Git treats worktrees as independent checkouts sharing the same object store.
+**The agent is instructed NOT to commit inside the worktree.** Commits would be wasted effort since the worktree is deleted after report generation. The agent contract calls this out explicitly.
+**The agent is instructed NOT to push or merge.** No remote-affecting git operations.
+**Cleanup is forced.** `--force` on worktree removal ensures even a worktree with uncommitted changes gets cleaned up. The report is the permanent artifact; the worktree is disposable.
+**If cleanup fails**, the worktree and branch remain. Re-run `pan-tools whatif cleanup` with the same args, or clean up manually:
+```
+git worktree remove --force <worktree_path>
+git branch -D <branch_name>
+```
+</safety>
+<output_paths>
+- `.planning/counterfactuals/<phase>-<slug>.md` — the comparison report (permanent)
+- `<parent>/pan-whatif-<phase>-<slug>-<ts>/` — the worktree (temporary, deleted after report)
+- branch `pan-whatif/<phase>-<slug>-<ts>` — the worktree's branch (deleted after report)
+Filename + branch include a timestamp so running what-if multiple times on the same phase+scenario produces distinct reports without overwriting.
+</output_paths>
+<runtime_compatibility>
+| Runtime | Support |
+|---------|---------|
+| Claude Code | Full — worktree + agent + report |
+| OpenCode | Partial — worktree + report work; agent spawn depends on runtime's task support |
+| Gemini CLI | Partial — same caveat |
+| Codex CLI | Partial — same caveat |
+| Copilot CLI | Partial — same caveat |
+The worktree and report layers are pure Node.js + git and work everywhere git is available. The agent orchestration varies by runtime's task-spawning capabilities. On any runtime that can't spawn an agent, the user can manually explore in the worktree and run `pan-tools whatif report` with a handwritten comparison JSON.
+</runtime_compatibility>
+<when_to_use>
+**Use `/pan:what-if` when:**
+- You're debating a decision mid-milestone and want to sample the alternative without rebuilding
+- A phase is complete and you want to retrospectively compare approaches
+- A reviewer asks "why not X?" and you want a structured answer
+**Skip `/pan:what-if` when:**
+- The alternative is trivially decidable from reading the plan (don't spawn an agent)
+- You're already committed and the exploration is sunk-cost sympathy
+- The main tree has massive uncommitted changes you don't want reflected in the worktree's base
+</when_to_use>

package/hooks/dist/pan-cost-logger.js ADDED Viewed

@@ -0,0 +1,102 @@
+#!/usr/bin/env node
+// PAN cost logger — SubagentStop hook (v3.4+).
+//
+// Claude Code fires SubagentStop when a Task-spawned sub-agent finishes.
+// The hook receives JSON on stdin describing the session, transcript path,
+// and (when available) usage metadata.
+//
+// We append a minimal record to .planning/metrics/tokens.jsonl so
+// `/pan:cost` reports reflect real agent spawns, not just manually-appended
+// entries. Token counts are best-effort: if the hook input doesn't carry
+// them, we log a record with zeros + a `source: "hook"` flag so the
+// aggregator distinguishes these from fully-instrumented records.
+//
+// This hook NEVER blocks the main agent loop — all errors are swallowed.
+const fs = require('fs');
+const path = require('path');
+const METRICS_DIR = 'metrics';
+const TOKENS_FILE = 'tokens.jsonl';
+/**
+ * Extract what we can from the SubagentStop event payload.
+ * Pure function — safe to test without stdin.
+ *
+ * @param {Object} data - Parsed SubagentStop event JSON
+ * @param {string} cwd - Project cwd (for path resolution)
+ * @returns {Object|null} Cost record, or null if the event should be ignored
+ */
+function buildCostRecord(data, cwd) {
+  if (!data || typeof data !== 'object') return null;
+  // Only log actual subagent stops; ignore other Stop variants.
+  if (data.hook_event_name && data.hook_event_name !== 'SubagentStop') return null;
+  const record = {
+    ts: new Date().toISOString(),
+    agent: data.agent_type || data.subagent_type || null,
+    command: null,
+    model: data.model || null,
+    tier: null,
+    input_tokens: extractNumber(data.usage, 'input_tokens') || 0,
+    output_tokens: extractNumber(data.usage, 'output_tokens') || 0,
+    cache_read_tokens: extractNumber(data.usage, 'cache_read_input_tokens') || 0,
+    cache_write_tokens: extractNumber(data.usage, 'cache_creation_input_tokens') || 0,
+    cost_usd: null,
+    phase: data.phase || null,
+    session: data.session_id || null,
+    source: 'hook',
+  };
+  return record;
+}
+function extractNumber(obj, key) {
+  if (!obj || typeof obj !== 'object') return 0;
+  const v = obj[key];
+  return typeof v === 'number' ? v : 0;
+}
+/**
+ * Append record to .planning/metrics/tokens.jsonl. Silently succeeds
+ * even if the file or directory can't be written — hook must not block.
+ *
+ * @param {string} cwd - Working directory (project root)
+ * @param {Object} record - Cost record from buildCostRecord
+ * @returns {boolean} true if written, false otherwise
+ */
+function appendRecord(cwd, record) {
+  if (!record) return false;
+  try {
+    const dir = path.join(cwd, '.planning', METRICS_DIR);
+    fs.mkdirSync(dir, { recursive: true });
+    fs.appendFileSync(path.join(dir, TOKENS_FILE), JSON.stringify(record) + '\n', 'utf-8');
+    return true;
+  } catch {
+    return false;
+  }
+}
+// ─── Stdin driver ───────────────────────────────────────────────────────────
+if (require.main === module) {
+  let input = '';
+  process.stdin.setEncoding('utf8');
+  process.stdin.on('data', chunk => input += chunk);
+  process.stdin.on('end', () => {
+    try {
+      const data = JSON.parse(input);
+      // Prefer cwd from the event (Claude Code sends it in most hook payloads);
+      // fall back to process.cwd() which is the project root when Claude Code
+      // invokes the hook.
+      const cwd = data.cwd || data.workspace?.current_dir || process.cwd();
+      const record = buildCostRecord(data, cwd);
+      appendRecord(cwd, record);
+    } catch {
+      // Silent fail — don't block agent loop on hook errors.
+    }
+  });
+}
+module.exports = { buildCostRecord, appendRecord, METRICS_DIR, TOKENS_FILE };