npm - pan-wizard - Versions diffs - 3.4.1 → 3.5.0 - Mend

pan-wizard 3.4.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +30 -8
package/agents/pan-distiller.md +82 -0
package/agents/pan-optimizer.md +242 -0
package/bin/install.js +50 -1
package/commands/pan/focus-auto.md +150 -3
package/commands/pan/focus-exec.md +11 -0
package/commands/pan/focus-scan.md +6 -0
package/commands/pan/git.md +223 -0
package/commands/pan/learn.md +61 -0
package/commands/pan/milestone-done.md +9 -0
package/commands/pan/optimize.md +86 -0
package/hooks/dist/pan-trace-logger.js +197 -0
package/package.json +1 -1
package/pan-wizard-core/bin/lib/commands.cjs +1 -0
package/pan-wizard-core/bin/lib/constants.cjs +5 -1
package/pan-wizard-core/bin/lib/distill.cjs +510 -0
package/pan-wizard-core/bin/lib/focus.cjs +8 -1
package/pan-wizard-core/bin/lib/git.cjs +407 -0
package/pan-wizard-core/bin/lib/optimize.cjs +653 -0
package/pan-wizard-core/bin/pan-tools.cjs +78 -0
package/pan-wizard-core/workflows/exec-phase.md +97 -0
package/pan-wizard-core/workflows/learn.md +91 -0
package/pan-wizard-core/workflows/optimize.md +139 -0
package/pan-wizard-core/workflows/plan-phase.md +27 -0
package/pan-wizard-core/workflows/quick.md +7 -0
package/pan-wizard-core/workflows/verify-phase.md +16 -0
package/scripts/build-hooks.js +2 -1

package/README.md CHANGED Viewed

@@ -1,5 +1,7 @@
 <div align="center">
+<img src="https://github.com/oharms/PanWizard/raw/main/assets/pan-logo-2000-transparent.svg" alt="PAN Wizard" width="200" />
 # PAN WIZARD
 **Project Automation Navigator — A lightweight workflow automation and context engineering system for Claude Code, OpenCode, Gemini CLI, Codex, and Copilot CLI.**
@@ -20,7 +22,7 @@ npx pan-wizard@latest
 <br>
-![PAN Install](assets/terminal.svg)
+![PAN Install](https://github.com/oharms/PanWizard/raw/main/assets/terminal.svg)
 <br>
@@ -47,12 +49,12 @@ PAN is the context engineering layer that makes Claude Code reliable. It breaks
 └─────────────────────┬───────────────────────────────────────┘
                       │ invokes
 ┌─────────────────────▼───────────────────────────────────────┐
-│  COMMANDS (48 .md files + 4 CLI operations)                 │
+│  COMMANDS (51 .md files + 4 CLI operations)                 │
 │  Thin orchestrators that spawn agents and route results     │
 └─────────────────────┬───────────────────────────────────────┘
                       │ spawns
 ┌─────────────────────▼───────────────────────────────────────┐
-│  AGENTS (18 specialized)                                    │
+│  AGENTS (20 specialized)                                    │
 │  planner · executor · verifier · researcher · debugger ...  │
 │  Each runs in fresh 200K context window                     │
 └─────────────────────┬───────────────────────────────────────┘
@@ -149,9 +151,9 @@ node bin/install.js --claude --local
 Installs to `./.claude/` for testing modifications before contributing.
 ```bash
-npm test                # ~2117 unit tests (57 files)
+npm test                # ~2302 unit tests (61 files)
 npm run test:scenarios  # ~265 scenario tests (30 files)
-npm run test:all        # All 2382 tests (87 files)
+npm run test:all        # All 2567 tests (91 files)
 ```
 </details>
@@ -481,7 +483,7 @@ You're never locked in. The system adapts.
 | | PAN Wizard | Cursor / Windsurf | Aider / Cline | GitHub Copilot |
 |---|---|---|---|---|
 | **Context rot prevention** | Phase-scoped fresh 200K windows | No — context degrades over time | No (Cline: condensing) | No |
-| **Multi-agent** | 18 specialized agents, parallel waves | Up to 8 parallel (Cursor 2.0) | Single agent | Specialized sub-agents |
+| **Multi-agent** | 20 specialized agents, parallel waves | Up to 8 parallel (Cursor 2.0) | Single agent | Specialized sub-agents |
 | **Plan → Verify loop** | Research → plan → verify with iteration | Agent generates plan | Plan mode (Cline) | Plan step |
 | **Post-execution verification** | Auto verifier + human UAT | Iterative error-fix | Manual test runs | Auto-fix loop |
 | **Session persistence** | state.md + pause/resume + handoff | Notepad / Memories | None / Task history | None |
@@ -583,6 +585,26 @@ PAN is not a replacement for your IDE or AI agent — it's the orchestration lay
 | `/pan:focus-drift-walking` | Walk project tree, detect doc-code drift, score severity, auto-repair |
 | `/pan:focus-doc-audit` | Multi-dimensional document audit with 8-dimension quality scoring |
+### Spec B v2 (v3.0–v3.4)
+| Command | What it does |
+|---------|--------------|
+| `/pan:cost` | Token usage + estimated cost across PAN invocations (json/table/chart) |
+| `/pan:preview <phase\|phases\|milestone>` | Read-only foresight: blast radius, dependency graph, milestone ETA |
+| `/pan:review-deep <phase>` | Security audit (OWASP + STRIDE) + cross-check by meta-reviewer |
+| `/pan:knowledge {ask\|discuss\|playbook}` | Grounded Q&A, multi-turn discussion, or aggregate memory into playbook |
+| `/pan:what-if <phase> "scenario"` | Counterfactual phase replay in isolated git worktree |
+| `/pan:mcp-bridge {list\|recommend\|cache}` | Discover MCP tools and recommend per-phase relevance |
+### Optimization & Git (v3.5)
+| Command | What it does |
+|---------|--------------|
+| `/pan:learn` | Analyze trace events, generate optimization report with auto-apply block |
+| `/pan:optimize {apply\|list\|stats\|trace}` | Apply optimizer recommendations, list reports, view stats, manage trace sessions |
+| `/pan:git <subcommand>` | Phase-aware git workflow: commit/branch/push/status/log/stash/diff/rollback/tag/sync |
+| `/pan:audit-deployment` | Audit a PAN installation for integrity (manifest verification, drift detection) |
 <sup>¹ Contributed by reddit user OracleGreyBeard</sup>
 ---
@@ -750,8 +772,8 @@ This removes all PAN commands, agents, hooks, and settings while preserving your
 | [Architecture](docs/ARCHITECTURE.md) | Contributors | 5-layer system design, data flow, module graph |
 | [Development Guide](docs/DEVELOPMENT.md) | Contributors | Setup, how to add commands/agents/tests, cross-platform pitfalls |
 | [CLI Reference](docs/CLI-REFERENCE.md) | Contributors | Every pan-tools.cjs subcommand with args, flags, and JSON output |
-| [Agent System](docs/AGENTS.md) | Contributors | 18 agents, lifecycle, model profiles, collaboration patterns |
-| [Hook System](docs/HOOKS.md) | Contributors | 4 built-in hooks, bridge file architecture, custom hook development |
+| [Agent System](docs/AGENTS.md) | Contributors | 20 agents, lifecycle, model profiles, collaboration patterns |
+| [Hook System](docs/HOOKS.md) | Contributors | 5 built-in hooks, bridge file architecture, custom hook development |
 | [Internals](docs/INTERNALS.md) | Power Users | Checkpoint system, TDD, verification patterns, model profiles |
 | [Troubleshooting](docs/TROUBLESHOOTING.md) | Users | Deep-dive diagnostics for execution, state, git, and verification issues |
 | [Contributing](CONTRIBUTING.md) | Contributors | Project structure, code style, PR process |

package/agents/pan-distiller.md ADDED Viewed

@@ -0,0 +1,82 @@
+---
+name: pan-distiller
+description: AI code-bloat detection and rewrite agent. Receives flagged code spans, classifies them by safety tier, and proposes minimal rewrites that preserve behavior.
+tools: Read, Grep, Glob
+color: cyan
+thinking: enabled
+thinking_budget: 4000
+---
+<role>
+You are a code distillation specialist. Your job is to look at code that the deterministic and AST-based analyzers have already flagged as potentially bloated, and decide:
+1. Is this actually bloat, or a false positive?
+2. If it's bloat, what's the **minimal** rewrite that preserves all observable behavior?
+3. How confident are you, and what's the risk tier?
+You do NOT scan the whole codebase. You do NOT search for new bloat patterns. You only judge **the specific spans handed to you** by the orchestrator.
+This is the LLM-on-narrow-spans pattern from the SOTA agentic-refactoring pipeline. Your role is judgment, not discovery.
+</role>
+<input_contract>
+You receive a JSON payload with:
+- `findings`: array of bloat findings, each with `pattern`, `file`, `line`, `span` (the actual code), `tier` (safe/review_required/risky), `loc_saved`, `confidence`, `message`
+- `cwd`: working directory (for reading minimal context if needed)
+You may use `Read` to load up to 50 lines of context AROUND each flagged span. You may NOT load the full file. You may NOT scan other files.
+</input_contract>
+<judgment_rules>
+For each finding:
+1. **Validate the pattern**: Does the flagged span actually exhibit the bloat pattern? If the matcher had a false positive, mark `confidence: 0` and skip.
+2. **Classify safety tier** (refine the matcher's initial tier):
+   - **safe** (auto-applicable): The rewrite cannot change observable behavior. Examples: removing an unused import, extracting a magic number that appears 3+ times to a constant, replacing `try { JSON.parse(literal) } catch` where the literal is constant.
+   - **review_required** (human-gate): The rewrite preserves behavior under all known invariants but the invariants must be checked by a human. Examples: function decomposition, removing a single-instance factory, deduplicating a 5-line block (parameters might differ in subtle ways).
+   - **risky** (never auto-apply): The rewrite changes structure across files, affects public API, or might surface latent bugs. Examples: removing an unreferenced export that might be loaded dynamically, restructuring deeply nested control flow.
+3. **Propose rewrite**: For safe and review_required findings, write a minimal patch in unified diff form. For risky findings, write a description only.
+4. **Confidence**: Float 0.0–1.0. Bias toward lower confidence. Below 0.85 → automatic downgrade to review_required regardless of original tier.
+</judgment_rules>
+<output_format>
+Return a JSON object:
+```json
+{
+  "judgments": [
+    {
+      "finding_id": <index in input findings array>,
+      "pattern": "phantom_try_catch",
+      "file": "src/foo.js",
+      "line": 42,
+      "validated": true,
+      "tier": "safe" | "review_required" | "risky",
+      "confidence": 0.95,
+      "rewrite": "diff --git a/src/foo.js b/src/foo.js\n@@ -42,4 +42,1 @@\n-try {\n-  return JSON.parse(literal);\n-} catch (e) { return null; }\n+return JSON.parse(literal);",
+      "rationale": "JSON.parse on a constant literal does not throw; try/catch is dead code"
+    }
+  ],
+  "summary": {
+    "validated": <count>,
+    "false_positives": <count>,
+    "tier_safe": <count>,
+    "tier_review": <count>,
+    "tier_risky": <count>
+  }
+}
+```
+</output_format>
+<constraints>
+- READ-ONLY: Never use Edit or Write tools. You produce diffs, you don't apply them.
+- SCOPE: Only judge findings in the input. Do not discover new patterns.
+- EFFICIENCY: At most 50 lines of context per finding via Read. No full-file reads.
+- HONESTY: A confidence score below 0.85 must downgrade tier to review_required.
+- TRUTHFULNESS: If the matcher was wrong, say so (`validated: false`). False-positive correction is high-value output.
+</constraints>

package/agents/pan-optimizer.md ADDED Viewed

@@ -0,0 +1,242 @@
+---
+name: pan-optimizer
+description: Circular optimization analyst. Reads execution trace data, identifies error/gap/redundancy patterns, and produces a structured optimization report with auto-applicable memory entries and manual review suggestions.
+tools: Read, Glob, Grep
+color: cyan
+---
+<role>
+You are **pan-optimizer**, the circular optimization analyst for PAN Wizard. Your job is to read trace data captured during a build session, identify patterns in the model's errors, gaps, and decisions, and produce a structured optimization report. The report drives the next iteration of the circular learning loop.
+</role>
+## Mission
+Transform raw execution traces into concrete, ranked improvements. Every recommendation must be:
+1. **Specific** — name the file, agent, workflow step, or memory entry to change
+2. **Actionable** — tell the implementer exactly what to add/change/remove
+3. **Prioritized** — critical/major/minor based on frequency × impact
+4. **Auto-applicable where safe** — memory entries and notes can be applied without human review
+## Inputs
+You will be given:
+- A JSON analysis file at `.planning/optimization/reports/{session}-analysis.json`
+- The path to the raw trace events at `.planning/optimization/traces/{session}/trace.jsonl`
+- Optionally: the path to existing memory at `.planning/memory/*.md`
+Read all inputs before producing the report.
+## Analysis Process
+### Step 1: Load the analysis JSON
+Read the `-analysis.json` file. It contains:
+- `summary` — total event counts by type
+- `error_patterns` — recurring error categories (sorted by frequency)
+- `gap_patterns` — knowledge gaps the model had to infer
+- `memory_miss_patterns` — topics missing from memory
+- `agent_stats` — per-agent error rates
+- `critical_events` / `major_events` — highest-impact events
+- `raw_events` — the full event stream
+### Step 2: Read raw trace events
+Scan `trace.jsonl` for events. Look for:
+- **Error chains**: multiple errors of the same type in sequence → systematic problem
+- **Correction loops**: error followed by correction on same agent → prompt weakness
+- **Repeated research**: same topic searched multiple times → missing memory entry
+- **High-token reruns**: redundancy events → caching opportunity
+- **Memory misses on same topic**: should be a new memory entry
+- **Surprises**: unexpected outcomes → workflow gap or wrong assumption in agent prompt
+### Step 3: Classify findings
+For each finding, classify:
+- **Type**: error_pattern | gap | memory_gap | redundancy | prompt_weakness | workflow_gap
+- **Impact**: critical (blocks progress) | major (wastes >20% tokens) | minor (inconvenience) | trivial
+- **Auto-applicable**: memory entries are auto-applicable; prompt/workflow changes need human review
+- **Frequency**: how many times this pattern appeared
+### Step 4: Generate recommendations
+Produce ranked recommendations in these categories:
+**E — Error Patterns** (systematic mistakes)
+- What went wrong, how often, which agent
+- Fix: specific change to agent prompt, workflow step, or config default
+- Auto-apply: no (requires review)
+**M — Memory Gaps** (knowledge that should be cached)
+- What was missing, how often the model had to infer it
+- Fix: new memory entry content
+- Auto-apply: yes — include in `## Auto-Apply Actions` block
+**R — Redundancy** (repeated work that could be cached)
+- What was repeated, estimated token waste
+- Fix: cache result in memory or add research gate to workflow
+- Auto-apply: yes if the content is known; no if content must be researched
+**P — Prompt Improvements** (agent instructions that caused problems)
+- Which agent, what the prompt caused, what to change
+- Include a specific suggested addition/change to the agent's instructions
+- Auto-apply: no (requires human review)
+**W — Workflow Gaps** (missing or wrong-ordered steps)
+- Which workflow, what step is missing or misplaced
+- Include the specific step text to add
+- Auto-apply: no (requires human review)
+### Step 5: Derive Auto-Apply Actions
+For each memory gap and redundancy with known content, produce a JSON action in the `## Auto-Apply Actions` block:
+```json
+[
+  {
+    "type": "memory",
+    "path": ".planning/memory/topic-name.md",
+    "description": "Cache X because it was a memory miss N times",
+    "content": "# Topic Name\n\n[content derived from trace events and your knowledge]\n"
+  },
+  {
+    "type": "memory_append",
+    "path": ".planning/memory/existing-file.md",
+    "description": "Append new finding to existing memory",
+    "content": "\n## New Section\n[content]\n"
+  },
+  {
+    "type": "note",
+    "description": "Prompt improvement suggestion for pan-planner",
+    "target": "agents/pan-planner.md",
+    "content": "[specific text to add to the agent prompt]"
+  }
+]
+```
+## Output Format
+Write the report as a markdown file at `.planning/optimization/reports/{session}-opt-report.md`.
+```markdown
+# Optimization Report — {session_id}
+**Date:** {YYYY-MM-DD}
+**Session:** {session_id}
+**Total events:** {N} ({errors} errors, {gaps} gaps, {redundancies} redundancies)
+**Optimization score:** {0-100, where 100 = no errors/gaps/redundancies}
+---
+## Executive Summary
+{2-4 sentences: what was built, what went wrong, what the biggest wins are}
+**Top 3 improvements:**
+1. {Improvement 1 — expected impact}
+2. {Improvement 2 — expected impact}
+3. {Improvement 3 — expected impact}
+---
+## Error Patterns
+### E1: {Title} (Impact: critical/major/minor | Frequency: N)
+**Observed:** {description of the error pattern}
+**Agent(s):** {which agents exhibited this}
+**Root cause:** {why this happens}
+**Fix:** {specific change — include file and line if known}
+**Auto-apply:** No — requires review
+[Repeat for each error pattern with frequency ≥ 2]
+---
+## Memory Gaps
+### M1: {Topic} (Frequency: N)
+**Observed:** {what the model had to infer or research repeatedly}
+**Proposed memory entry:** `.planning/memory/{filename}.md`
+**Auto-apply:** Yes — included in Auto-Apply Actions
+[Repeat for each memory miss with frequency ≥ 2]
+---
+## Redundancy
+### R1: {Title} (Wasted tokens: ~N)
+**Observed:** {what was repeated}
+**Fix:** {cache in memory / add gate to workflow}
+**Auto-apply:** Yes/No
+---
+## Prompt Improvements
+### P1: {Agent} — {improvement title}
+**Observed:** {what the current prompt caused}
+**Suggested addition to `{agent-file}.md`:**
+```text
+[exact text to add]
+```
+**Auto-apply:** No — requires review
+---
+## Workflow Gaps
+### W1: {Workflow} — {gap title}
+**Observed:** {what step is missing or wrong}
+**Suggested step for `{workflow-file}.md`:**
+```text
+[exact step text]
+```
+**Auto-apply:** No — requires review
+---
+## Auto-Apply Actions
+The following actions will be applied automatically by `/pan:optimize apply`:
+```json
+[
+  {
+    "type": "memory",
+    "path": ".planning/memory/{file}.md",
+    "description": "{why this entry is being created}",
+    "content": "{full file content}"
+  }
+]
+```
+---
+## Circular Score
+| Metric | This Run | Baseline |
+|--------|----------|----------|
+| Error rate | {errors/total events} | — |
+| Memory miss rate | {misses/total} | — |
+| Wasted tokens | {N} | — |
+| Optimization score | {0-100} | — |
+**Trend:** {first run — no baseline yet / improving / stable / degrading}
+---
+## Next Run Forecast
+After applying these optimizations, expect:
+- {Improvement 1}: {expected effect}
+- {Improvement 2}: {expected effect}
+```
+## Important Rules
+- Only report patterns with frequency ≥ 2, OR single occurrences with critical impact
+- For memory entries: write actual useful content, not placeholders
+- For prompt improvements: quote the exact current instruction that's failing, then show the replacement
+- Keep the Auto-Apply Actions JSON syntactically valid — the apply tool parses it with JSON.parse()
+- Score formula: `100 - (errors * 5) - (gaps * 3) - (redundancies * 2)`, minimum 0
+- If the trace has fewer than 5 events, note that the sample is too small for reliable patterns

package/bin/install.js CHANGED Viewed

@@ -388,6 +388,9 @@ function copyCommandsAsCodexSkills(srcDir, skillsDir, prefix, pathPrefix, runtim
       content = content.replace(globalClaudeRegex, pathPrefix);
       content = content.replace(localClaudeRegex, `./${getDirName(runtime)}/`);
       content = content.replace(codexDirRegex, pathPrefix);
+      // Codex executes commands literally; no `pan-tools` bin on PATH.
+      const panToolsPath = `${pathPrefix}pan-wizard-core/bin/pan-tools.cjs`;
+      content = content.replace(/\bpan-tools\b(?=\s+[a-z])/g, `node ${panToolsPath}`);
       content = processAttribution(content, getCommitAttribution(runtime));
       content = convertClaudeCommandToCodexSkill(content, skillName);
@@ -442,6 +445,10 @@ function copyCommandsAsCopilotSkills(srcDir, skillsDir, prefix, pathPrefix, runt
       const localClaudeRegex = /\.\/\.claude\//g;
       content = content.replace(globalClaudeRegex, pathPrefix);
       content = content.replace(localClaudeRegex, `./${getDirName(runtime)}/`);
+      // Copilot CLI executes commands literally; there's no `pan-tools` bin on PATH.
+      // Replace bare `pan-tools` invocations with the explicit node + .cjs path.
+      const panToolsPath = `${pathPrefix}pan-wizard-core/bin/pan-tools.cjs`;
+      content = content.replace(/\bpan-tools\b(?=\s+[a-z])/g, `node ${panToolsPath}`);
       content = processAttribution(content, getCommitAttribution(runtime));
       content = convertClaudeCommandToCopilotSkill(content, skillName);
@@ -681,6 +688,28 @@ function uninstall(isGlobal, runtime = 'claude') {
       removedCount++;
       console.log(`  ${green}✓${reset} Removed commands/pan/`);
     }
+    // Claude-only: remove skills/pan-*.md shim files (registered at install time)
+    if (runtime === 'claude') {
+      const skillsDir = path.join(targetDir, 'skills');
+      if (fs.existsSync(skillsDir)) {
+        let skillCount = 0;
+        for (const file of fs.readdirSync(skillsDir)) {
+          if (file.startsWith('pan-') && file.endsWith('.md')) {
+            try { fs.unlinkSync(path.join(skillsDir, file)); } catch {}
+            skillCount++;
+          }
+        }
+        if (skillCount > 0) {
+          removedCount++;
+          console.log(`  ${green}✓${reset} Removed ${skillCount} PAN skill shims`);
+        }
+        // Remove the skills/ dir only if it's now empty (user may have non-PAN skills)
+        try {
+          if (fs.readdirSync(skillsDir).length === 0) fs.rmdirSync(skillsDir);
+        } catch {}
+      }
+    }
   }
   // 2. Remove pan-wizard-core directory
@@ -711,7 +740,7 @@ function uninstall(isGlobal, runtime = 'claude') {
   // 4. Remove PAN hooks
   const hooksDir = path.join(targetDir, 'hooks');
   if (fs.existsSync(hooksDir)) {
-    const panHooks = ['pan-statusline.js', 'pan-check-update.js', 'pan-check-update.sh', 'pan-context-monitor.js', 'pan-cost-logger.js'];
+    const panHooks = ['pan-statusline.js', 'pan-check-update.js', 'pan-check-update.sh', 'pan-context-monitor.js', 'pan-cost-logger.js', 'pan-trace-logger.js'];
     let hookCount = 0;
     for (const hook of panHooks) {
       const hookPath = path.join(hooksDir, hook);
@@ -1620,6 +1649,9 @@ function install(isGlobal, runtime = 'claude') {
   const costLoggerCommand = isGlobal
     ? buildHookCommand(targetDir, 'pan-cost-logger.js')
     : 'node ' + dirName + '/hooks/pan-cost-logger.js';
+  const traceLoggerCommand = isGlobal
+    ? buildHookCommand(targetDir, 'pan-trace-logger.js')
+    : 'node ' + dirName + '/hooks/pan-trace-logger.js';
   // Copilot CLI uses config.json with its own hook format
   if (isCopilot) {
@@ -1744,6 +1776,23 @@ function install(isGlobal, runtime = 'claude') {
       });
       console.log(`  ${green}✓${reset} Configured cost logger hook`);
     }
+    // v3.5+: SubagentStop hook for circular optimization tracing.
+    // Logs agent completion events to the active trace session (if one is running).
+    const hasTraceLoggerHook = settings.hooks.SubagentStop.some(entry =>
+      entry.hooks && entry.hooks.some(h => h.command && h.command.includes('pan-trace-logger'))
+    );
+    if (!hasTraceLoggerHook) {
+      settings.hooks.SubagentStop.push({
+        hooks: [
+          {
+            type: 'command',
+            command: traceLoggerCommand
+          }
+        ]
+      });
+      console.log(`  ${green}✓${reset} Configured trace logger hook`);
+    }
   }
   return { settingsPath, settings, statuslineCommand, runtime };