pan-wizard 2.9.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +31 -9
  2. package/agents/pan-conductor.md +189 -0
  3. package/agents/pan-counterfactual.md +112 -0
  4. package/agents/pan-debugger.md +15 -1
  5. package/agents/pan-distiller.md +82 -0
  6. package/agents/pan-document_code.md +21 -0
  7. package/agents/pan-executor.md +16 -0
  8. package/agents/pan-hardener.md +113 -0
  9. package/agents/pan-integration-checker.md +2 -0
  10. package/agents/pan-knowledge.md +81 -0
  11. package/agents/pan-meta-reviewer.md +91 -0
  12. package/agents/pan-optimizer.md +242 -0
  13. package/agents/pan-plan-checker.md +2 -0
  14. package/agents/pan-previewer.md +98 -0
  15. package/agents/pan-project-researcher.md +4 -4
  16. package/agents/pan-reviewer.md +2 -0
  17. package/agents/pan-verifier.md +2 -0
  18. package/bin/install-lib.cjs +197 -0
  19. package/bin/install.js +2048 -1959
  20. package/commands/pan/cost.md +132 -0
  21. package/commands/pan/exec-phase.md +15 -0
  22. package/commands/pan/focus-auto.md +168 -3
  23. package/commands/pan/focus-exec.md +21 -1
  24. package/commands/pan/focus-scan.md +6 -0
  25. package/commands/pan/git.md +223 -0
  26. package/commands/pan/knowledge.md +129 -0
  27. package/commands/pan/learn.md +61 -0
  28. package/commands/pan/map-codebase.md +15 -0
  29. package/commands/pan/mcp-bridge.md +145 -0
  30. package/commands/pan/milestone-done.md +9 -0
  31. package/commands/pan/optimize.md +86 -0
  32. package/commands/pan/plan-phase.md +11 -0
  33. package/commands/pan/preview.md +114 -0
  34. package/commands/pan/profile.md +37 -0
  35. package/commands/pan/review-deep.md +128 -0
  36. package/commands/pan/verify-phase.md +11 -0
  37. package/commands/pan/what-if.md +146 -0
  38. package/hooks/dist/pan-cost-logger.js +102 -0
  39. package/hooks/dist/pan-statusline.js +154 -108
  40. package/hooks/dist/pan-trace-logger.js +197 -0
  41. package/package.json +1 -1
  42. package/pan-wizard-core/bin/lib/bridge.cjs +269 -0
  43. package/pan-wizard-core/bin/lib/bus.cjs +251 -0
  44. package/pan-wizard-core/bin/lib/codebase.cjs +118 -0
  45. package/pan-wizard-core/bin/lib/commands.cjs +1 -0
  46. package/pan-wizard-core/bin/lib/constants.cjs +44 -1
  47. package/pan-wizard-core/bin/lib/context-budget.cjs +27 -0
  48. package/pan-wizard-core/bin/lib/core.cjs +91 -6
  49. package/pan-wizard-core/bin/lib/cost.cjs +359 -0
  50. package/pan-wizard-core/bin/lib/distill.cjs +510 -0
  51. package/pan-wizard-core/bin/lib/focus.cjs +108 -3
  52. package/pan-wizard-core/bin/lib/git.cjs +407 -0
  53. package/pan-wizard-core/bin/lib/init.cjs +5 -5
  54. package/pan-wizard-core/bin/lib/knowledge.cjs +331 -0
  55. package/pan-wizard-core/bin/lib/memory.cjs +252 -0
  56. package/pan-wizard-core/bin/lib/optimize.cjs +653 -0
  57. package/pan-wizard-core/bin/lib/phase.cjs +40 -13
  58. package/pan-wizard-core/bin/lib/preview.cjs +480 -0
  59. package/pan-wizard-core/bin/lib/review-deep.cjs +280 -0
  60. package/pan-wizard-core/bin/lib/roadmap.cjs +4 -4
  61. package/pan-wizard-core/bin/lib/state.cjs +2 -2
  62. package/pan-wizard-core/bin/lib/verify.cjs +34 -1
  63. package/pan-wizard-core/bin/lib/whatif.cjs +289 -0
  64. package/pan-wizard-core/bin/pan-tools.cjs +317 -4
  65. package/pan-wizard-core/templates/playbook.md +53 -0
  66. package/pan-wizard-core/templates/preview-report.md +93 -0
  67. package/pan-wizard-core/templates/roadmap.md +24 -24
  68. package/pan-wizard-core/templates/state.md +12 -9
  69. package/pan-wizard-core/workflows/exec-phase.md +97 -0
  70. package/pan-wizard-core/workflows/learn.md +91 -0
  71. package/pan-wizard-core/workflows/optimize.md +139 -0
  72. package/pan-wizard-core/workflows/plan-phase.md +28 -1
  73. package/pan-wizard-core/workflows/quick.md +7 -0
  74. package/pan-wizard-core/workflows/verify-phase.md +16 -0
  75. package/scripts/build-hooks.js +3 -1
@@ -124,6 +124,17 @@ ELSE:
124
124
  ```
125
125
  </routing_decision_tree>
126
126
 
127
+ <cache_priming>
128
+ **Before spawning research + planner agents, prime the prompt cache.** All sub-agents spawned within the next 5 minutes hit cached context instead of re-reading project.md / requirements.md / roadmap.md / state.md / standards.md.
129
+
130
+ Run once per invocation:
131
+ ```
132
+ pan-tools cache prime --summary
133
+ ```
134
+
135
+ Returns `{blocks: [{path, bytes, cache}], total_bytes, sha}`. On Claude Code with Opus 4.7, the host runtime translates these block references into `cache_control: ephemeral`. On non-Claude runtimes or older models this is a no-op — nothing breaks.
136
+ </cache_priming>
137
+
127
138
  <process>
128
139
  Execute the plan-phase workflow from @~/.claude/pan-wizard-core/workflows/plan-phase.md end-to-end.
129
140
  Preserve all workflow gates (validation, research, planning, verification loop, routing).
@@ -0,0 +1,114 @@
1
+ ---
2
+ name: pan:preview
3
+ group: Foresight
4
+ description: Preview what will happen — phase blast radius, phase dependency graph, or milestone ETA
5
+ argument-hint: "phase <N> | phases | milestone"
6
+ allowed-tools:
7
+ - Read
8
+ - Bash
9
+ - Glob
10
+ - Grep
11
+ - Write
12
+ - Task
13
+ ---
14
+
15
+ <objective>
16
+ Read-only foresight. Given a phase, a set of phases, or a milestone, produce a structured forecast: what files get touched, which tests might break, which phases can parallelize, when the milestone will actually finish.
17
+
18
+ Consolidates Spec B v1's architect + simulate + predict-milestone into one entry point with three modes. The data layer (`pan-tools preview …`) extracts structured inputs from `.planning/`; the `pan-previewer` agent analyzes and writes the report. No source code is modified.
19
+ </objective>
20
+
21
+ <execution_context>
22
+ @~/.claude/pan-wizard-core/bin/lib/preview.cjs
23
+ @~/.claude/pan-wizard-core/templates/preview-report.md
24
+ </execution_context>
25
+
26
+ <modes>
27
+
28
+ ### `phase <N>` — Blast radius of one phase
29
+
30
+ ```
31
+ /pan:preview phase 7
32
+ ```
33
+
34
+ **What it does:**
35
+ 1. `pan-tools preview phase <N>` returns `{files_mentioned, test_files_mentioned, risk_signals, risk_score, plans[], status}`.
36
+ 2. Spawn `pan-previewer` with the payload as `<preview_input>`.
37
+ 3. Agent writes `.planning/phases/<N>/preview.md` with files touched / tests at risk / migration steps / risk assessment / bottom line.
38
+
39
+ **Output:** `.planning/phases/<N>/preview.md`
40
+
41
+ ### `phases` — Cross-phase dependency graph
42
+
43
+ ```
44
+ /pan:preview phases
45
+ ```
46
+
47
+ **What it does:**
48
+ 1. `pan-tools preview phases` returns `{phases[], parallel_batches, mermaid, hidden_coupling_count}`.
49
+ 2. Spawn `pan-previewer` with `mode: phases` in the payload.
50
+ 3. Agent writes `.planning/architecture/dependency-graph.md` with mermaid DAG + parallel batches + hidden-coupling flags.
51
+
52
+ **Output:** `.planning/architecture/dependency-graph.md`
53
+
54
+ **Opus 4.7 1M-context bonus:** when the full repo fits in a single agent window, the agent cross-references plan text with actual source imports to catch coupling the frontmatter missed. On smaller-context models, the agent relies on data-layer output alone.
55
+
56
+ ### `milestone` — Completion ETA
57
+
58
+ ```
59
+ /pan:preview milestone
60
+ ```
61
+
62
+ **What it does:**
63
+ 1. `pan-tools preview milestone` returns `{phases_total, completed, remaining, avg_phase_duration_days, eta_date, confidence_pct, bottleneck, sample_size}`.
64
+ 2. Spawn `pan-previewer` with `mode: milestone`.
65
+ 3. Agent writes `.planning/milestones/preview-<today>.md` with ETA + confidence + bottleneck + caveats + bottom line.
66
+
67
+ **Output:** `.planning/milestones/preview-YYYY-MM-DD.md`
68
+
69
+ </modes>
70
+
71
+ <workflow>
72
+
73
+ **Before committing to a phase:** run `/pan:preview phase <N>` to see blast radius. A `risk_score ≥ 7` or a migration signal on auth files should prompt a review before `/pan:exec-phase`.
74
+
75
+ **Before committing to a milestone date externally:** run `/pan:preview milestone`. Look at `confidence_pct` and `sample_size`. If sample is <3, don't promise a date.
76
+
77
+ **Before running phases in parallel:** run `/pan:preview phases`. Parallel batches from the data layer are based on declared `depends_on` only; `hidden_coupling_count > 0` means there are cross-phase references the author should promote to explicit deps before parallelizing.
78
+
79
+ </workflow>
80
+
81
+ <process>
82
+
83
+ For all modes:
84
+
85
+ 1. Run the corresponding `pan-tools preview <mode>` subcommand.
86
+ 2. Parse its JSON output.
87
+ 3. Spawn `pan-previewer` with a prompt that includes:
88
+ - `<preview_input>` block carrying the full JSON payload (mode field set explicitly)
89
+ - `<output_path>` block with the target file path
90
+ - `<files_to_read>` block with any phase context files the agent should load
91
+ 4. Agent writes the report file and returns a short confirmation.
92
+ 5. Echo the output path to the user.
93
+
94
+ The agent does not need workflow context beyond what the data layer provides. Keep spawned-agent prompts lean — the agent's context budget is for reasoning about the structured input, not for loading the whole project.
95
+
96
+ </process>
97
+
98
+ <output_contract>
99
+ The command returns the path to the generated preview document. Never paste the report back into conversation output — the file is the deliverable; reference it by path.
100
+ </output_contract>
101
+
102
+ <runtime_compatibility>
103
+
104
+ | Runtime | phase | phases | milestone |
105
+ |---------|-------|--------|-----------|
106
+ | Claude Code | Full, thinking enabled | Full, 1M-ctx bonus on Opus 4.7 | Full |
107
+ | OpenCode | Full | Data-layer + simple report | Full |
108
+ | Gemini CLI | Full | Data-layer + simple report | Full |
109
+ | Codex CLI | Full | Data-layer + simple report | Full |
110
+ | Copilot CLI | Full | Data-layer + simple report | Full |
111
+
112
+ The data layer (`pan-tools preview …`) works identically on all runtimes. What varies is the quality of the agent's synthesis — Opus 4.7 with thinking catches subtler risks than smaller models.
113
+
114
+ </runtime_compatibility>
@@ -35,3 +35,40 @@ The workflow handles all logic including:
35
35
  5. Cost estimation display (relative cost multiplier per profile)
36
36
  6. Confirmation display
37
37
  </process>
38
+
39
+ <tier_decision_tree>
40
+ **Opus 4.7 capability-aware routing** (since v2.10.0 — E-7). Even within a single profile, PAN picks a tier per-call based on three hints: context estimate, whether the task needs extended thinking, and whether prompt cache is warm.
41
+
42
+ The decision order `resolveModel` applies after the baseline profile pick:
43
+
44
+ ```
45
+ Baseline tier (from MODEL_PROFILES[agent][profile])
46
+
47
+
48
+ ┌─────────────────────────────────────────────┐
49
+ │ context_estimate > 700K tokens? │── yes ──▶ force reasoning (only 1M-ctx tier)
50
+ └─────────────────────────────────────────────┘
51
+ │ no
52
+
53
+ ┌─────────────────────────────────────────────┐
54
+ │ needs_thinking AND tier == fast? │── yes ──▶ upgrade fast → mid
55
+ └─────────────────────────────────────────────┘
56
+ │ no
57
+
58
+ ┌─────────────────────────────────────────────┐
59
+ │ cache_warm AND !needs_thinking │── yes ──▶ downgrade mid → fast
60
+ │ AND context_estimate < 50K AND tier == mid │
61
+ └─────────────────────────────────────────────┘
62
+ │ no
63
+
64
+ Final tier → provider-native model name
65
+ ```
66
+
67
+ **Quick guide:**
68
+ - Heavy verification (plan-checker, verifier, integration-checker, reviewer, debugger): `needs_thinking: true` — baseline upgrades fast→mid.
69
+ - Map-codebase single-shot mode on Opus 4.7: `context_estimate > 700K` — forced to reasoning.
70
+ - Routine exec tasks with project.md cached: `cache_warm + small ctx` — mid gets downgraded to fast for a cost win.
71
+ - All rules are additive to the `quality` / `balanced` / `budget` profile you pick here — profile sets the floor, capability hints adjust upward or downward within that floor's band.
72
+
73
+ **Inspecting routing:** use `pan-tools resolve-model <agent> --metadata '{"context_estimate":900000,"needs_thinking":true}'` to see what tier a given hint set resolves to.
74
+ </tier_decision_tree>
@@ -0,0 +1,128 @@
1
+ ---
2
+ name: pan:review-deep
3
+ group: Review
4
+ description: Security audit + cross-reviewer check. OWASP/STRIDE pass by pan-hardener, then pan-meta-reviewer catches what the first pass missed. Writes consolidated deep-review.md.
5
+ argument-hint: "<phase-number>"
6
+ allowed-tools:
7
+ - Read
8
+ - Write
9
+ - Bash
10
+ - Glob
11
+ - Grep
12
+ - Task
13
+ ---
14
+
15
+ <objective>
16
+ Run a deeper review pass on a phase than `pan-reviewer` alone provides. Two new agents:
17
+
18
+ 1. **pan-hardener** — OWASP Top 10 (2025) + STRIDE threat model on files changed in the phase.
19
+ 2. **pan-meta-reviewer** — reads both the reviewer's and hardener's output, flags things both missed, disputes overstated severities.
20
+
21
+ Outputs are merged by `review-deep.cjs` into a single `.planning/reviews/<phase>/deep-review.md` with verdict, coverage stats, and conflict table. An audit entry is published to the `review-handoff` bus channel for traceability.
22
+
23
+ Consolidates Spec B v1's X-4 (self-review) + X-12 (harden) into a single command.
24
+ </objective>
25
+
26
+ <execution_context>
27
+ @~/.claude/pan-wizard-core/bin/lib/review-deep.cjs
28
+ @~/.claude/pan-wizard-core/bin/lib/bus.cjs
29
+ @~/.claude/agents/pan-hardener.md
30
+ @~/.claude/agents/pan-meta-reviewer.md
31
+ </execution_context>
32
+
33
+ <invocation_modes>
34
+
35
+ ### Standalone
36
+
37
+ ```
38
+ /pan:review-deep 07
39
+ ```
40
+
41
+ Run after `/pan:exec-phase 07` completes. Requires `pan-reviewer` to have already written its review to `.planning/phases/07/review.md` (exec-phase does this automatically).
42
+
43
+ ### Integrated with exec-phase
44
+
45
+ ```
46
+ /pan:exec-phase 07 --deep-review
47
+ ```
48
+
49
+ Runs the normal exec → reviewer pipeline, then auto-invokes this command. Recommended for phases touching auth, payment, PII, migrations, or public APIs.
50
+
51
+ ### Integrated with focus-exec
52
+
53
+ ```
54
+ /pan:focus-exec --deep-review
55
+ ```
56
+
57
+ Per-item deep review during focus campaigns. Useful for high-stakes batches.
58
+
59
+ </invocation_modes>
60
+
61
+ <process>
62
+
63
+ 1. **Load reviewer output** — read `.planning/phases/<N>/review.md` written by the earlier `pan-reviewer` step. If missing, warn and offer to run `pan-reviewer` first.
64
+
65
+ 2. **Spawn pan-hardener** (parallel-safe with step 3 isolation below, but recommended sequential for audit clarity):
66
+ - Prompt includes: `<files_to_read>` with phase plan + diff + reviewer output; `<output_path>` = `.planning/reviews/<N>/hardener.md`; `<framework_scope>` block reminding of OWASP/STRIDE coverage.
67
+ - Agent writes its findings to the output path, returns confirmation.
68
+
69
+ 3. **Spawn pan-meta-reviewer**:
70
+ - Prompt includes: `<files_to_read>` with both reviewer.md AND hardener.md (and representative diff snippets); `<output_path>` = `.planning/reviews/<N>/meta.md`.
71
+ - Agent reads both first-pass reports, identifies missed patterns, disputes overstated severities, writes to output path.
72
+
73
+ 4. **Merge** — call:
74
+ ```
75
+ pan-tools review-deep merge <N> \
76
+ --reviewer-file .planning/phases/<N>/review.md \
77
+ --hardener-file .planning/reviews/<N>/hardener.md \
78
+ --meta-file .planning/reviews/<N>/meta.md
79
+ ```
80
+ The merger parses all three, sorts by severity, computes verdict (`ok` | `ok_with_minor` | `fix_before_merge` | `review_required` | `block`), writes `.planning/reviews/<N>/deep-review.md`, and publishes an audit record to the `review-handoff` bus channel.
81
+
82
+ 5. **Report back** — echo verdict + finding count + conflict count. If verdict is `block` or `review_required`, recommend the user review `deep-review.md` before proceeding.
83
+
84
+ </process>
85
+
86
+ <verdict_semantics>
87
+
88
+ | Verdict | Meaning | Action |
89
+ |---------|---------|--------|
90
+ | `ok` | No findings at any severity | Merge freely |
91
+ | `ok_with_minor` | Only low/info findings | Merge with noted follow-ups |
92
+ | `fix_before_merge` | Medium findings present | Fix or document before merge |
93
+ | `review_required` | High findings present | Human sign-off required |
94
+ | `block` | At least one critical | Do not merge |
95
+
96
+ Verdict is driven by the highest-severity finding across all three sources. Meta-reviewer disputes can downgrade severity on specific findings but don't change the headline verdict — the merger trusts the consensus of the explicit severity labels.
97
+
98
+ </verdict_semantics>
99
+
100
+ <output_files>
101
+
102
+ - `.planning/phases/<N>/review.md` — pan-reviewer output (written earlier by exec-phase)
103
+ - `.planning/reviews/<N>/hardener.md` — pan-hardener output (new)
104
+ - `.planning/reviews/<N>/meta.md` — pan-meta-reviewer output (new)
105
+ - `.planning/reviews/<N>/deep-review.md` — merged consolidated report (final deliverable)
106
+ - `.planning/bus/review-handoff.jsonl` — audit trail entry (append-only)
107
+
108
+ </output_files>
109
+
110
+ <runtime_compatibility>
111
+
112
+ | Runtime | hardener | meta-reviewer | merge |
113
+ |---------|----------|---------------|-------|
114
+ | Claude Code | Full, thinking enabled (6000/4000 budget) | Full | Full |
115
+ | OpenCode | Prose "think step-by-step" preamble substitutes for thinking | Same | Full (runtime-agnostic CLI) |
116
+ | Gemini | Same | Same | Full |
117
+ | Codex | Same | Same | Full |
118
+ | Copilot | Same | Same | Full |
119
+
120
+ The merger CLI (`pan-tools review-deep merge`) is pure Node.js and works identically across runtimes. Only the *quality* of the hardener and meta-reviewer outputs varies with model capability — Opus 4.7 with extended thinking produces the richest findings.
121
+
122
+ </runtime_compatibility>
123
+
124
+ <calibration_note>
125
+
126
+ Deep review is opt-in for a reason: it costs roughly 3× a normal review (hardener + meta + merge adds two agent spawns per phase). Use it for high-stakes phases, not every phase. `--deep-review` gating by phase tags is a v3.4 candidate enhancement.
127
+
128
+ </calibration_note>
@@ -75,6 +75,17 @@ After initial verification of each requirement:
75
75
  This prevents premature FAIL verdicts from incomplete investigation.
76
76
  </reflexion_loop>
77
77
 
78
+ <cache_priming>
79
+ **Before the verifier agent runs**, prime the prompt cache once. The verifier reads project.md / requirements.md / roadmap.md every run; caching avoids ~15-50K input tokens per invocation.
80
+
81
+ Run once:
82
+ ```
83
+ pan-tools cache prime --summary
84
+ ```
85
+
86
+ See [plan-phase.md](plan-phase.md) or [exec-phase.md](exec-phase.md) for the full explanation. No-op on non-Claude runtimes.
87
+ </cache_priming>
88
+
78
89
  <process>
79
90
  Execute the verify-work workflow from @~/.claude/pan-wizard-core/workflows/verify-phase.md end-to-end.
80
91
  Preserve all workflow gates (session management, test presentation, diagnosis, fix planning, routing).
@@ -0,0 +1,146 @@
1
+ ---
2
+ name: pan:what-if
3
+ group: Foresight
4
+ description: Explore a phase's alternative approach in an isolated git worktree. Replays the scenario, compares to the original plan, writes a report.
5
+ argument-hint: "<phase-number> <scenario-text>"
6
+ allowed-tools:
7
+ - Read
8
+ - Write
9
+ - Edit
10
+ - Bash
11
+ - Grep
12
+ - Glob
13
+ - Task
14
+ ---
15
+
16
+ <objective>
17
+ Safely explore "what if we had done X instead?" for a phase. Creates an isolated git worktree, spawns `pan-counterfactual` inside it, lets the agent experiment without touching the main tree, collects a structured comparison payload, writes `.planning/counterfactuals/<phase>-<slug>.md` in the main tree, and cleans up the worktree.
18
+
19
+ Unchanged from Spec B v1's X-9. Already narrow enough to stand alone.
20
+ </objective>
21
+
22
+ <execution_context>
23
+ @~/.claude/pan-wizard-core/bin/lib/whatif.cjs
24
+ @~/.claude/agents/pan-counterfactual.md
25
+ </execution_context>
26
+
27
+ <invocation>
28
+
29
+ ```
30
+ /pan:what-if 7 "Use Redis instead of Memcached"
31
+ /pan:what-if 4 "Skip the migration step entirely"
32
+ /pan:what-if 12 "What if we'd picked NoSQL at the start?"
33
+ ```
34
+
35
+ **Requirements:**
36
+ - Main project must be a git repository (worktrees require git).
37
+ - Working tree can be dirty — worktree is based on current HEAD, your uncommitted changes stay in main.
38
+
39
+ </invocation>
40
+
41
+ <process>
42
+
43
+ ### Stage 1 — Prepare
44
+
45
+ ```
46
+ pan-tools whatif prepare <phase> "<scenario text>"
47
+ ```
48
+
49
+ The CLI:
50
+ 1. Validates the phase exists.
51
+ 2. Slugifies the scenario (lowercase, alphanumerics + hyphens, ≤50 chars).
52
+ 3. Creates a git worktree at `<parent-of-cwd>/pan-whatif-<phase>-<slug>-<ts>` on a fresh branch `pan-whatif/<phase>-<slug>-<ts>`.
53
+ 4. Returns `{phase, phase_name, scenario, slug, plans, summaries, has_executed, worktree: {worktree_path, branch, base}}`.
54
+
55
+ If worktree creation fails (not a git repo, dirty tree blocking, etc.), abort with a clear error.
56
+
57
+ ### Stage 2 — Spawn pan-counterfactual
58
+
59
+ Spawn the agent with its working directory set to `worktree_path`. Prompt includes:
60
+ - `<files_to_read>` — the phase plan, any existing summary, the main project's `CLAUDE.md` so the agent understands conventions.
61
+ - `<scenario>` — the user's scenario text verbatim.
62
+ - `<worktree_path>` — so the agent knows the safe boundary.
63
+ - `<time_budget>` — advisory (10-20 min of reasoning/file-ops).
64
+
65
+ The agent explores, then returns a JSON payload with `{summary, differences, recommendations, risks, verdict}`.
66
+
67
+ ### Stage 3 — Write report in MAIN tree
68
+
69
+ Run (from main tree, NOT worktree):
70
+
71
+ ```
72
+ pan-tools whatif report <phase> "<scenario>" --comparison '<agent-json>'
73
+ ```
74
+
75
+ This writes `.planning/counterfactuals/<phase>-<slug>.md`. The file belongs to the main tree and survives worktree cleanup.
76
+
77
+ ### Stage 4 — Cleanup
78
+
79
+ ```
80
+ pan-tools whatif cleanup --worktree <path> --branch <name> --force
81
+ ```
82
+
83
+ Removes the worktree directory and deletes the counterfactual branch. Best-effort: warnings are surfaced but don't block.
84
+
85
+ ### Stage 5 — Confirm
86
+
87
+ Echo the report path and verdict to the user. Done.
88
+
89
+ </process>
90
+
91
+ <safety>
92
+
93
+ **Worktree isolation is the safety mechanism.** The agent can edit files freely inside the worktree without affecting the main tree. Git treats worktrees as independent checkouts sharing the same object store.
94
+
95
+ **The agent is instructed NOT to commit inside the worktree.** Commits would be wasted effort since the worktree is deleted after report generation. The agent contract calls this out explicitly.
96
+
97
+ **The agent is instructed NOT to push or merge.** No remote-affecting git operations.
98
+
99
+ **Cleanup is forced.** `--force` on worktree removal ensures even a worktree with uncommitted changes gets cleaned up. The report is the permanent artifact; the worktree is disposable.
100
+
101
+ **If cleanup fails**, the worktree and branch remain. Re-run `pan-tools whatif cleanup` with the same args, or clean up manually:
102
+
103
+ ```
104
+ git worktree remove --force <worktree_path>
105
+ git branch -D <branch_name>
106
+ ```
107
+
108
+ </safety>
109
+
110
+ <output_paths>
111
+
112
+ - `.planning/counterfactuals/<phase>-<slug>.md` — the comparison report (permanent)
113
+ - `<parent>/pan-whatif-<phase>-<slug>-<ts>/` — the worktree (temporary, deleted after report)
114
+ - branch `pan-whatif/<phase>-<slug>-<ts>` — the worktree's branch (deleted after report)
115
+
116
+ Filename + branch include a timestamp so running what-if multiple times on the same phase+scenario produces distinct reports without overwriting.
117
+
118
+ </output_paths>
119
+
120
+ <runtime_compatibility>
121
+
122
+ | Runtime | Support |
123
+ |---------|---------|
124
+ | Claude Code | Full — worktree + agent + report |
125
+ | OpenCode | Partial — worktree + report work; agent spawn depends on runtime's task support |
126
+ | Gemini CLI | Partial — same caveat |
127
+ | Codex CLI | Partial — same caveat |
128
+ | Copilot CLI | Partial — same caveat |
129
+
130
+ The worktree and report layers are pure Node.js + git and work everywhere git is available. The agent orchestration varies by runtime's task-spawning capabilities. On any runtime that can't spawn an agent, the user can manually explore in the worktree and run `pan-tools whatif report` with a handwritten comparison JSON.
131
+
132
+ </runtime_compatibility>
133
+
134
+ <when_to_use>
135
+
136
+ **Use `/pan:what-if` when:**
137
+ - You're debating a decision mid-milestone and want to sample the alternative without rebuilding
138
+ - A phase is complete and you want to retrospectively compare approaches
139
+ - A reviewer asks "why not X?" and you want a structured answer
140
+
141
+ **Skip `/pan:what-if` when:**
142
+ - The alternative is trivially decidable from reading the plan (don't spawn an agent)
143
+ - You're already committed and the exploration is sunk-cost sympathy
144
+ - The main tree has massive uncommitted changes you don't want reflected in the worktree's base
145
+
146
+ </when_to_use>
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env node
2
+ // PAN cost logger — SubagentStop hook (v3.4+).
3
+ //
4
+ // Claude Code fires SubagentStop when a Task-spawned sub-agent finishes.
5
+ // The hook receives JSON on stdin describing the session, transcript path,
6
+ // and (when available) usage metadata.
7
+ //
8
+ // We append a minimal record to .planning/metrics/tokens.jsonl so
9
+ // `/pan:cost` reports reflect real agent spawns, not just manually-appended
10
+ // entries. Token counts are best-effort: if the hook input doesn't carry
11
+ // them, we log a record with zeros + a `source: "hook"` flag so the
12
+ // aggregator distinguishes these from fully-instrumented records.
13
+ //
14
+ // This hook NEVER blocks the main agent loop — all errors are swallowed.
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+
19
+ const METRICS_DIR = 'metrics';
20
+ const TOKENS_FILE = 'tokens.jsonl';
21
+
22
+ /**
23
+ * Extract what we can from the SubagentStop event payload.
24
+ * Pure function — safe to test without stdin.
25
+ *
26
+ * @param {Object} data - Parsed SubagentStop event JSON
27
+ * @param {string} cwd - Project cwd (for path resolution)
28
+ * @returns {Object|null} Cost record, or null if the event should be ignored
29
+ */
30
+ function buildCostRecord(data, cwd) {
31
+ if (!data || typeof data !== 'object') return null;
32
+
33
+ // Only log actual subagent stops; ignore other Stop variants.
34
+ if (data.hook_event_name && data.hook_event_name !== 'SubagentStop') return null;
35
+
36
+ const record = {
37
+ ts: new Date().toISOString(),
38
+ agent: data.agent_type || data.subagent_type || null,
39
+ command: null,
40
+ model: data.model || null,
41
+ tier: null,
42
+ input_tokens: extractNumber(data.usage, 'input_tokens') || 0,
43
+ output_tokens: extractNumber(data.usage, 'output_tokens') || 0,
44
+ cache_read_tokens: extractNumber(data.usage, 'cache_read_input_tokens') || 0,
45
+ cache_write_tokens: extractNumber(data.usage, 'cache_creation_input_tokens') || 0,
46
+ cost_usd: null,
47
+ phase: data.phase || null,
48
+ session: data.session_id || null,
49
+ source: 'hook',
50
+ };
51
+
52
+ return record;
53
+ }
54
+
55
+ function extractNumber(obj, key) {
56
+ if (!obj || typeof obj !== 'object') return 0;
57
+ const v = obj[key];
58
+ return typeof v === 'number' ? v : 0;
59
+ }
60
+
61
+ /**
62
+ * Append record to .planning/metrics/tokens.jsonl. Silently succeeds
63
+ * even if the file or directory can't be written — hook must not block.
64
+ *
65
+ * @param {string} cwd - Working directory (project root)
66
+ * @param {Object} record - Cost record from buildCostRecord
67
+ * @returns {boolean} true if written, false otherwise
68
+ */
69
+ function appendRecord(cwd, record) {
70
+ if (!record) return false;
71
+ try {
72
+ const dir = path.join(cwd, '.planning', METRICS_DIR);
73
+ fs.mkdirSync(dir, { recursive: true });
74
+ fs.appendFileSync(path.join(dir, TOKENS_FILE), JSON.stringify(record) + '\n', 'utf-8');
75
+ return true;
76
+ } catch {
77
+ return false;
78
+ }
79
+ }
80
+
81
+ // ─── Stdin driver ───────────────────────────────────────────────────────────
82
+
83
+ if (require.main === module) {
84
+ let input = '';
85
+ process.stdin.setEncoding('utf8');
86
+ process.stdin.on('data', chunk => input += chunk);
87
+ process.stdin.on('end', () => {
88
+ try {
89
+ const data = JSON.parse(input);
90
+ // Prefer cwd from the event (Claude Code sends it in most hook payloads);
91
+ // fall back to process.cwd() which is the project root when Claude Code
92
+ // invokes the hook.
93
+ const cwd = data.cwd || data.workspace?.current_dir || process.cwd();
94
+ const record = buildCostRecord(data, cwd);
95
+ appendRecord(cwd, record);
96
+ } catch {
97
+ // Silent fail — don't block agent loop on hook errors.
98
+ }
99
+ });
100
+ }
101
+
102
+ module.exports = { buildCostRecord, appendRecord, METRICS_DIR, TOKENS_FILE };