pan-wizard 3.5.2 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -9
- package/agents/pan-executor.md +18 -0
- package/agents/pan-experiment-runner.md +126 -0
- package/agents/pan-phase-researcher.md +16 -0
- package/agents/pan-plan-checker.md +80 -0
- package/agents/pan-planner.md +19 -0
- package/agents/pan-reviewer.md +2 -0
- package/agents/pan-verifier.md +41 -0
- package/bin/install-lib.cjs +55 -0
- package/bin/install.js +71 -22
- package/commands/pan/debug.md +1 -1
- package/commands/pan/experiment.md +219 -0
- package/commands/pan/health.md +1 -1
- package/commands/pan/learn.md +15 -1
- package/commands/pan/links.md +102 -0
- package/commands/pan/optimize.md +13 -0
- package/commands/pan/patches.md +10 -1
- package/commands/pan/phase-tests.md +1 -4
- package/commands/pan/todo-add.md +1 -1
- package/commands/pan/todo-check.md +1 -1
- package/hooks/dist/pan-cost-logger.js +54 -4
- package/hooks/dist/pan-trace-logger.js +72 -3
- package/package.json +67 -66
- package/pan-wizard-core/bin/lib/codebase.cjs +2 -0
- package/pan-wizard-core/bin/lib/commands.cjs +8 -0
- package/pan-wizard-core/bin/lib/config.cjs +13 -2
- package/pan-wizard-core/bin/lib/context-budget.cjs +73 -0
- package/pan-wizard-core/bin/lib/core.cjs +13 -0
- package/pan-wizard-core/bin/lib/doc-lint/frontmatter.js +270 -0
- package/pan-wizard-core/bin/lib/doc-lint/reporter.js +45 -0
- package/pan-wizard-core/bin/lib/doc-lint/schema.js +202 -0
- package/pan-wizard-core/bin/lib/doc-lint/validate.js +190 -0
- package/pan-wizard-core/bin/lib/doc-lint/walk.js +135 -0
- package/pan-wizard-core/bin/lib/doc-lint.cjs +287 -0
- package/pan-wizard-core/bin/lib/experiment.cjs +502 -0
- package/pan-wizard-core/bin/lib/learn-index.cjs +235 -0
- package/pan-wizard-core/bin/lib/learn-lint.cjs +292 -0
- package/pan-wizard-core/bin/lib/links.cjs +549 -0
- package/pan-wizard-core/bin/lib/optimize.cjs +474 -1
- package/pan-wizard-core/bin/lib/runner.cjs +473 -0
- package/pan-wizard-core/bin/lib/verify.cjs +23 -0
- package/pan-wizard-core/bin/pan-tools.cjs +247 -3
- package/pan-wizard-core/learnings/README.md +70 -0
- package/pan-wizard-core/learnings/index.json +540 -0
- package/pan-wizard-core/learnings/internal/.gitkeep +2 -0
- package/pan-wizard-core/learnings/internal/experiment-runner.md +81 -0
- package/pan-wizard-core/learnings/internal/external-research.md +93 -0
- package/pan-wizard-core/learnings/internal/loop-design.md +33 -0
- package/pan-wizard-core/learnings/internal/pan-dev-bugs.md +181 -0
- package/pan-wizard-core/learnings/universal/.gitkeep +2 -0
- package/pan-wizard-core/learnings/universal/atomic-state.md +21 -0
- package/pan-wizard-core/learnings/universal/binary-io.md +21 -0
- package/pan-wizard-core/learnings/universal/comment-syntax.md +21 -0
- package/pan-wizard-core/learnings/universal/composition.md +33 -0
- package/pan-wizard-core/learnings/universal/concurrency.md +33 -0
- package/pan-wizard-core/learnings/universal/dag-scheduler.md +33 -0
- package/pan-wizard-core/learnings/universal/data-driven-design.md +21 -0
- package/pan-wizard-core/learnings/universal/design-process.md +21 -0
- package/pan-wizard-core/learnings/universal/empirical-spike.md +21 -0
- package/pan-wizard-core/learnings/universal/error-handling.md +23 -0
- package/pan-wizard-core/learnings/universal/error-paths.md +21 -0
- package/pan-wizard-core/learnings/universal/glob-semantics.md +21 -0
- package/pan-wizard-core/learnings/universal/idempotency.md +21 -0
- package/pan-wizard-core/learnings/universal/invariants.md +21 -0
- package/pan-wizard-core/learnings/universal/io-patterns.md +21 -0
- package/pan-wizard-core/learnings/universal/numeric-edge-cases.md +21 -0
- package/pan-wizard-core/learnings/universal/output-conventions.md +21 -0
- package/pan-wizard-core/learnings/universal/parser-design.md +21 -0
- package/pan-wizard-core/learnings/universal/phase-locking.md +21 -0
- package/pan-wizard-core/learnings/universal/pipe-friendly-cli.md +21 -0
- package/pan-wizard-core/learnings/universal/schema-design.md +21 -0
- package/pan-wizard-core/learnings/universal/secret-handling.md +21 -0
- package/pan-wizard-core/learnings/universal/streaming-io.md +21 -0
- package/pan-wizard-core/learnings/universal/test-patterns.md +57 -0
- package/pan-wizard-core/learnings/universal/test-strategy.md +33 -0
- package/pan-wizard-core/learnings/universal/unicode.md +21 -0
- package/pan-wizard-core/learnings/universal/vendor-pattern.md +21 -0
- package/pan-wizard-core/references/guardrails.md +58 -0
- package/pan-wizard-core/references/handoff-decisions.md +156 -0
- package/pan-wizard-core/references/schemas/pan-command.schema.yml +39 -0
- package/pan-wizard-core/references/verification-patterns.md +31 -0
- package/pan-wizard-core/templates/config.json +2 -1
- package/pan-wizard-core/templates/idea.md +52 -0
- package/pan-wizard-core/templates/summary-complex.md +14 -5
- package/pan-wizard-core/templates/summary-minimal.md +6 -0
- package/pan-wizard-core/templates/summary-standard.md +14 -3
- package/pan-wizard-core/workflows/discuss-phase.md +108 -1
- package/pan-wizard-core/workflows/exec-phase.md +37 -1
- package/pan-wizard-core/workflows/execute-plan.md +14 -0
- package/pan-wizard-core/workflows/health.md +23 -0
- package/pan-wizard-core/workflows/new-project.md +65 -81
- package/pan-wizard-core/workflows/plan-phase.md +58 -0
- package/pan-wizard-core/workflows/transition.md +102 -7
- package/pan-wizard-core/workflows/verify-phase.md +14 -0
- package/scripts/build-hooks.js +7 -1
- package/scripts/generate-skills-docs.py +10 -8
- package/scripts/git-hooks/pre-commit +40 -0
- package/scripts/release-check.js +184 -0
package/README.md
CHANGED
|
@@ -49,12 +49,12 @@ PAN is the context engineering layer that makes Claude Code reliable. It breaks
|
|
|
49
49
|
└─────────────────────┬───────────────────────────────────────┘
|
|
50
50
|
│ invokes
|
|
51
51
|
┌─────────────────────▼───────────────────────────────────────┐
|
|
52
|
-
│ COMMANDS (
|
|
52
|
+
│ COMMANDS (.md files + CLI operations) │
|
|
53
53
|
│ Thin orchestrators that spawn agents and route results │
|
|
54
54
|
└─────────────────────┬───────────────────────────────────────┘
|
|
55
55
|
│ spawns
|
|
56
56
|
┌─────────────────────▼───────────────────────────────────────┐
|
|
57
|
-
│ AGENTS (
|
|
57
|
+
│ AGENTS (specialized) │
|
|
58
58
|
│ planner · executor · verifier · researcher · debugger ... │
|
|
59
59
|
│ Each runs in fresh 200K context window │
|
|
60
60
|
└─────────────────────┬───────────────────────────────────────┘
|
|
@@ -151,9 +151,9 @@ node bin/install.js --claude --local
|
|
|
151
151
|
Installs to `./.claude/` for testing modifications before contributing.
|
|
152
152
|
|
|
153
153
|
```bash
|
|
154
|
-
npm test #
|
|
155
|
-
npm run test:scenarios #
|
|
156
|
-
npm run test:all # All
|
|
154
|
+
npm test # Unit tests
|
|
155
|
+
npm run test:scenarios # Scenario tests
|
|
156
|
+
npm run test:all # All tests (unit + scenario)
|
|
157
157
|
```
|
|
158
158
|
|
|
159
159
|
</details>
|
|
@@ -451,6 +451,24 @@ The orchestrator never does heavy lifting. It spawns agents, waits, integrates r
|
|
|
451
451
|
|
|
452
452
|
**The result:** You can run an entire phase — deep research, multiple plans created and verified, thousands of lines of code written across parallel executors, automated verification against goals — and your main context window stays at 30-40%. The work happens in fresh subagent contexts. Your session stays fast and responsive.
|
|
453
453
|
|
|
454
|
+
### Reasoning-Trace Handoff
|
|
455
|
+
|
|
456
|
+
When agents hand work off via files, only OUTPUTS get passed by default — not the reasoning that produced them. Per Cognition's "Don't build multi-agents" research (June 2025), silent decisions force downstream agents to reconcile contradictions blindly. PAN passes the reasoning explicitly:
|
|
457
|
+
|
|
458
|
+
- Plans carry a `## Plan Decisions` section (Locked / Open / Considered+rejected buckets) — the executor reads it before coding so it doesn't re-argue settled choices.
|
|
459
|
+
- Summaries carry an `## Implementation Decisions` section — the verifier reads it to understand WHY the executor deviated from the plan, not just THAT it did.
|
|
460
|
+
|
|
461
|
+
The plan-checker enforces this with two dedicated dimensions (Spec Sufficiency for Handoff, Decision Trace Completeness). Schema lives in `pan-wizard-core/references/handoff-decisions.md`.
|
|
462
|
+
|
|
463
|
+
### Self-Improving Learnings
|
|
464
|
+
|
|
465
|
+
PAN runs autonomous experiments in isolated folders, harvests the resulting telemetry, and promotes generalizable findings into a shipped patterns store at `pan-wizard-core/learnings/`:
|
|
466
|
+
|
|
467
|
+
- `learnings/universal/<topic>.md` — patterns that ship to every install (atomic-state, concurrency, idempotency, secret-handling, test-patterns, …). Loaded by planner / executor / verifier agents during their work.
|
|
468
|
+
- `learnings/internal/<topic>.md` — PAN-development patterns; source-only (stripped at install).
|
|
469
|
+
- `learnings/index.json` — topic→agent-relevance map. Workflows call `pan-tools learn topics-for --agent <role> --token-budget N` to load only relevant patterns instead of skim-everything (avoids the distractor-density anti-pattern).
|
|
470
|
+
- `pan-tools learn lint` — integrity check (duplicate IDs, dangling refs, scope leaks). Wired into `/check`.
|
|
471
|
+
|
|
454
472
|
### Atomic Git Commits
|
|
455
473
|
|
|
456
474
|
Each task gets its own commit immediately after completion:
|
|
@@ -483,7 +501,7 @@ You're never locked in. The system adapts.
|
|
|
483
501
|
| | PAN Wizard | Cursor / Windsurf | Aider / Cline | GitHub Copilot |
|
|
484
502
|
|---|---|---|---|---|
|
|
485
503
|
| **Context rot prevention** | Phase-scoped fresh 200K windows | No — context degrades over time | No (Cline: condensing) | No |
|
|
486
|
-
| **Multi-agent** |
|
|
504
|
+
| **Multi-agent** | Specialized agents, parallel waves | Up to 8 parallel (Cursor 2.0) | Single agent | Specialized sub-agents |
|
|
487
505
|
| **Plan → Verify loop** | Research → plan → verify with iteration | Agent generates plan | Plan mode (Cline) | Plan step |
|
|
488
506
|
| **Post-execution verification** | Auto verifier + human UAT | Iterative error-fix | Manual test runs | Auto-fix loop |
|
|
489
507
|
| **Session persistence** | state.md + pause/resume + handoff | Notepad / Memories | None / Task history | None |
|
|
@@ -553,7 +571,8 @@ PAN is not a replacement for your IDE or AI agent — it's the orchestration lay
|
|
|
553
571
|
| `/pan:todo-check` | List pending todos |
|
|
554
572
|
| `/pan:debug [desc]` | Systematic debugging with persistent state |
|
|
555
573
|
| `/pan:quick [--full]` | Execute ad-hoc task with PAN guarantees (`--full` adds plan-checking and verification) |
|
|
556
|
-
| `/pan:health [--repair] [--standards]` | Validate `.planning/` directory integrity
|
|
574
|
+
| `/pan:health [--repair] [--standards] [--full] [--drift] [--links]` | Validate `.planning/` directory integrity. `--repair` auto-fixes; `--standards` checks compliance; `--full` runs tests + build; `--drift` runs convention drift; `--links` attaches doc-code link-graph summary |
|
|
575
|
+
| `/pan:links [--strict]` | Validate the doc-code link graph: inline `[[<id>]]` refs, `// @pan:` source anchors, `require-code-mention` contracts (ADR-0027, v3.8.0+) |
|
|
557
576
|
| `/pan:phase-tests [N]` | Generate tests for a completed phase based on UAT criteria |
|
|
558
577
|
| `/pan:milestone-cleanup` | Archive accumulated phase directories from completed milestones |
|
|
559
578
|
| `/pan:retro` | Milestone retrospective — estimation accuracy, verification patterns, gap analysis |
|
|
@@ -768,11 +787,11 @@ This removes all PAN commands, agents, hooks, and settings while preserving your
|
|
|
768
787
|
|----------|----------|---------------|
|
|
769
788
|
| [User Guide](docs/USER-GUIDE.md) | Users | Workflow diagrams, command reference, config schema, troubleshooting |
|
|
770
789
|
| [FAQ](docs/FAQ.md) | Users | Common questions about cost, runtimes, customization |
|
|
771
|
-
| [Examples](docs/EXAMPLES.md) | Users |
|
|
790
|
+
| [Examples](docs/EXAMPLES.md) | Users | Worked examples from new project to cost-conscious development |
|
|
772
791
|
| [Architecture](docs/ARCHITECTURE.md) | Contributors | 5-layer system design, data flow, module graph |
|
|
773
792
|
| [Development Guide](docs/DEVELOPMENT.md) | Contributors | Setup, how to add commands/agents/tests, cross-platform pitfalls |
|
|
774
793
|
| [CLI Reference](docs/CLI-REFERENCE.md) | Contributors | Every pan-tools.cjs subcommand with args, flags, and JSON output |
|
|
775
|
-
| [Agent System](docs/AGENTS.md) | Contributors |
|
|
794
|
+
| [Agent System](docs/AGENTS.md) | Contributors | Agent inventory, lifecycle, model profiles, collaboration patterns |
|
|
776
795
|
| [Hook System](docs/HOOKS.md) | Contributors | 5 built-in hooks, bridge file architecture, custom hook development |
|
|
777
796
|
| [Internals](docs/INTERNALS.md) | Power Users | Checkpoint system, TDD, verification patterns, model profiles |
|
|
778
797
|
| [Troubleshooting](docs/TROUBLESHOOTING.md) | Users | Deep-dive diagnostics for execution, state, git, and verification issues |
|
package/agents/pan-executor.md
CHANGED
|
@@ -335,6 +335,17 @@ git commit -m "{type}({phase}-{plan}): {concise task description}
|
|
|
335
335
|
```
|
|
336
336
|
|
|
337
337
|
**5. Record hash:** `TASK_COMMIT=$(git rev-parse --short HEAD)` — track for SUMMARY.
|
|
338
|
+
|
|
339
|
+
**P-1605 (v3.7.5) — coalesce micro-task commits.** Per-task commits are the default. **Coalesce consecutive trivial tasks** into a single commit when ALL of:
|
|
340
|
+
|
|
341
|
+
1. Tasks are adjacent in the plan (no checkpoint, deviation, or test-gate between them).
|
|
342
|
+
2. Each task touches **fewer than 5 lines** OR is a pure rename / dead-code removal.
|
|
343
|
+
3. Tasks share the same commit `{type}` (e.g., all `chore` config tweaks, all `docs` typo fixes).
|
|
344
|
+
4. The combined commit message can describe the group in one short subject line + bullet list.
|
|
345
|
+
|
|
346
|
+
If a coalesced batch would exceed 5 tasks, split it. Coalescing is **not** allowed for `feat`/`fix` (substantive behavior changes — keep per-task for blame and revert granularity) or for any task with explicit verification criteria. Record the constituent task IDs in the commit body so SUMMARY hashes still resolve to logical work units.
|
|
347
|
+
|
|
348
|
+
This reduces commit-thrash on auto-mode runs (observed in panloop trace: 25 commits / 29 min on Phase 1 scaffolding, mostly micro-tweaks). Reverts and bisect granularity stay intact for substantive changes.
|
|
338
349
|
</task_commit_protocol>
|
|
339
350
|
|
|
340
351
|
<summary_creation>
|
|
@@ -348,6 +359,13 @@ After all tasks complete, create `{phase}-{plan}-summary.md` at `.planning/phase
|
|
|
348
359
|
|
|
349
360
|
**Title:** `# Phase [X] Plan [Y]: [Name] Summary`
|
|
350
361
|
|
|
362
|
+
**Reasoning trace handoff (P-RES-003):** Before any other section, read the plan's `## Plan Decisions` block. While implementing, track:
|
|
363
|
+
- Which `Open` (O-N) decisions you faced and which option you took
|
|
364
|
+
- Any `Locked` (D-N) decision you DEPARTED from (a deviation; must be justified)
|
|
365
|
+
- Open questions you couldn't resolve and want the verifier to focus on
|
|
366
|
+
|
|
367
|
+
Write these into the summary's `## Implementation Decisions` section per the schema in @~/.claude/pan-wizard-core/references/handoff-decisions.md. Bucket order: Taken → Deviations → Open Questions. If genuinely none, write the single-line disclaimer: "No deviations or open questions — implementation followed plan exactly." Empty buckets without that disclaimer are not acceptable; the verifier needs to distinguish "executor thought about it and there was nothing" from "executor forgot."
|
|
368
|
+
|
|
351
369
|
**One-liner must be substantive:**
|
|
352
370
|
- Good: "JWT auth with refresh rotation using jose library"
|
|
353
371
|
- Bad: "Authentication implemented"
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pan-experiment-runner
|
|
3
|
+
description: Drives an external AI coding session against an experiment folder. Observation-only — read-only relative to PAN source; writes only to the experiment folder's .planning/. Spawns the external runtime, watches its progress, decides when to declare the run done / failed / timed out. Used by the v3.7.0 self-improvement loop.
|
|
4
|
+
tools: Read, Bash, Glob, Grep
|
|
5
|
+
color: orange
|
|
6
|
+
thinking: enabled
|
|
7
|
+
thinking_budget: 6000
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
<role>
|
|
11
|
+
You are **pan-experiment-runner**, the watchdog for v3.7.0 self-improvement loop external runs.
|
|
12
|
+
|
|
13
|
+
You drive an **autonomous external** Claude Code (or Codex / Gemini / OpenCode) session against an isolated experiment folder. Your job: observe the external instance, decide when it's done, and surface progress. You do NOT do the build itself — the external session does.
|
|
14
|
+
|
|
15
|
+
**Spec:** `docs/specs/self_improvement_loop_featureai.md`
|
|
16
|
+
**Implementation:** `pan-wizard-core/bin/lib/runner.cjs`
|
|
17
|
+
</role>
|
|
18
|
+
|
|
19
|
+
<critical_constraints>
|
|
20
|
+
|
|
21
|
+
## Hard rules
|
|
22
|
+
|
|
23
|
+
You may NOT:
|
|
24
|
+
- Edit or write files in the **PAN source repo** (`d:/PanWizard/` or wherever it's cloned)
|
|
25
|
+
- Edit or write files in the **experiment folder's source code** (anything outside `<experiment>/.planning/`)
|
|
26
|
+
- Inject prompts into the running external instance (no mid-flight intervention)
|
|
27
|
+
- Modify the experiment's idea.md after scaffolding (the idea is the contract)
|
|
28
|
+
|
|
29
|
+
You MAY:
|
|
30
|
+
- Read any file in the experiment folder
|
|
31
|
+
- Tail the experiment's `.planning/state.md`, `.planning/agent-history.json`, summary files
|
|
32
|
+
- Update the experiment's `.planning/run-state.json` (managed by `runner.cjs`)
|
|
33
|
+
- Write trace events to `.planning/run-state.json`'s events array
|
|
34
|
+
- Surface progress to the orchestrating user via your reply
|
|
35
|
+
|
|
36
|
+
The agent's tool list excludes `Edit` and `Write` precisely to enforce this. If you find yourself wanting to fix something in the experiment, **stop and report instead** — the user can intervene manually.
|
|
37
|
+
|
|
38
|
+
</critical_constraints>
|
|
39
|
+
|
|
40
|
+
<stop_conditions>
|
|
41
|
+
|
|
42
|
+
## When to declare done
|
|
43
|
+
|
|
44
|
+
Stop conditions are checked by `runner.cjs` automatically (timeout, exit code, kill signal). You declare success / failure based on the run-state.json that runner.cjs produces:
|
|
45
|
+
|
|
46
|
+
| `run-state.json` `status` | `stop_reason` | Meaning |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| `done` | `success` | External instance exited 0; experiment build succeeded |
|
|
49
|
+
| `failed` | `error` | External instance exited non-zero; report the captured stderr |
|
|
50
|
+
| `failed` | `timeout` | External instance ran past the timeout; runner aborted it |
|
|
51
|
+
| `failed` | `manual` | Someone called `pan-tools experiment stop <slug>` |
|
|
52
|
+
|
|
53
|
+
After the runner exits, you may also examine the experiment's own `.planning/` to enrich the report:
|
|
54
|
+
- Did the external session actually create phases / plans / summaries?
|
|
55
|
+
- Are there unresolved blockers in `.planning/state.md`?
|
|
56
|
+
- How many trace events did the external session log to its own `.planning/optimization/traces/`?
|
|
57
|
+
|
|
58
|
+
</stop_conditions>
|
|
59
|
+
|
|
60
|
+
<workflow>
|
|
61
|
+
|
|
62
|
+
## Standard workflow
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# 1. Verify experiment exists and inspect manifest
|
|
66
|
+
node ~/.claude/pan-wizard-core/bin/pan-tools.cjs experiment manifest "${SLUG}"
|
|
67
|
+
|
|
68
|
+
# 2. Run the external session (blocks until done / failed / timeout)
|
|
69
|
+
node ~/.claude/pan-wizard-core/bin/pan-tools.cjs experiment run "${SLUG}" \
|
|
70
|
+
--runtime "${RUNTIME}" \
|
|
71
|
+
--timeout "${TIMEOUT_MS}"
|
|
72
|
+
|
|
73
|
+
# 3. Read the run state
|
|
74
|
+
node ~/.claude/pan-wizard-core/bin/pan-tools.cjs experiment status "${SLUG}"
|
|
75
|
+
|
|
76
|
+
# 4. (Optional) Inspect the experiment folder for richer context
|
|
77
|
+
ls -la "${EXPERIMENT_FOLDER}/.planning/"
|
|
78
|
+
cat "${EXPERIMENT_FOLDER}/.planning/state.md" 2>/dev/null
|
|
79
|
+
ls "${EXPERIMENT_FOLDER}/.planning/phases/" 2>/dev/null
|
|
80
|
+
|
|
81
|
+
# 5. Report a structured summary back to the orchestrator
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
</workflow>
|
|
85
|
+
|
|
86
|
+
<reporting_format>
|
|
87
|
+
|
|
88
|
+
## What to report
|
|
89
|
+
|
|
90
|
+
After the run completes, produce a concise structured summary:
|
|
91
|
+
|
|
92
|
+
```markdown
|
|
93
|
+
## Experiment Run: <slug>
|
|
94
|
+
|
|
95
|
+
**Status:** done | failed
|
|
96
|
+
**Stop reason:** success | error | timeout | manual
|
|
97
|
+
**Elapsed:** <duration>
|
|
98
|
+
**External runtime:** <claude | codex | gemini | opencode>
|
|
99
|
+
|
|
100
|
+
### What the external session produced
|
|
101
|
+
- Phases created: N
|
|
102
|
+
- Summaries written: N
|
|
103
|
+
- Final state.md status: <Active | Done | Blocked>
|
|
104
|
+
|
|
105
|
+
### Notable events
|
|
106
|
+
- <e.g., "Phase 1 verification failed; agent retried with --gaps">
|
|
107
|
+
- <e.g., "External session ran a tight 14-cycle focus-auto loop">
|
|
108
|
+
|
|
109
|
+
### Recommendation for /pan:learn
|
|
110
|
+
- Run `/pan:learn --experiment <slug>` to extract patterns from the trace.
|
|
111
|
+
- Especially check: <areas of high event density / repeated failures>
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
</reporting_format>
|
|
115
|
+
|
|
116
|
+
<related>
|
|
117
|
+
|
|
118
|
+
## Related
|
|
119
|
+
|
|
120
|
+
- `pan-wizard-core/bin/lib/runner.cjs` — implementation of run/tail/stop
|
|
121
|
+
- `pan-wizard-core/bin/lib/experiment.cjs` — experiment scaffolding
|
|
122
|
+
- `commands/pan/experiment.md` — user-facing command
|
|
123
|
+
- `agents/pan-optimizer.md` — consumed downstream by `/pan:learn --experiment <slug>`
|
|
124
|
+
- `docs/specs/self_improvement_loop_featureai.md` — full design
|
|
125
|
+
|
|
126
|
+
</related>
|
|
@@ -19,6 +19,22 @@ If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool t
|
|
|
19
19
|
- Document findings with confidence levels (HIGH/MEDIUM/LOW)
|
|
20
20
|
- Write research.md with sections the planner expects
|
|
21
21
|
- Return structured result to orchestrator
|
|
22
|
+
|
|
23
|
+
**P-1402 — Read project-level research FIRST:**
|
|
24
|
+
Before doing any new research, READ these project-level files if they exist
|
|
25
|
+
(they cover the broad technical territory once, so per-phase research can
|
|
26
|
+
emit only deltas/specifics):
|
|
27
|
+
|
|
28
|
+
- `.planning/research/architecture.md` — system structure
|
|
29
|
+
- `.planning/research/features.md` — capability surface
|
|
30
|
+
- `.planning/research/stack.md` — chosen libraries, versions, patterns
|
|
31
|
+
- `.planning/project.md` — overall vision and Core Value
|
|
32
|
+
|
|
33
|
+
Per-phase research.md should NOT re-derive material already in those files.
|
|
34
|
+
Reference them by relative path and emit only what's specific to THIS phase
|
|
35
|
+
(file paths to touch, edge cases to test, integration points unique to this
|
|
36
|
+
phase's plans). If your output substantially overlaps project-level research,
|
|
37
|
+
you've over-researched — trim to deltas before writing.
|
|
22
38
|
</role>
|
|
23
39
|
|
|
24
40
|
<project_context>
|
|
@@ -432,6 +432,86 @@ issue:
|
|
|
432
432
|
fix_hint: "Add security verification step or checklist reference in task verify elements"
|
|
433
433
|
```
|
|
434
434
|
|
|
435
|
+
## Dimension 11: Spec Sufficiency for Handoff (P-RES-004)
|
|
436
|
+
|
|
437
|
+
**Question:** Does this plan contain enough detail that the executor cannot make a divergent decision in the implicit space the plan does not constrain?
|
|
438
|
+
|
|
439
|
+
The shift: the prior dimensions check "is the plan good"; this dimension checks "is the plan complete enough to survive the context boundary between planner and executor."
|
|
440
|
+
|
|
441
|
+
**Empirical motivation:** The Specification Gap paper (arXiv:2603.24284) showed two-agent integration accuracy collapses 58% → 25% when spec detail is removed, while a single-agent baseline only drops 89% → 56%. Coordination cost is *quadratic* in spec incompleteness. PAN's planner→executor handoff is exactly this two-agent boundary.
|
|
442
|
+
|
|
443
|
+
**What to check (in addition to Dimensions 1-10):**
|
|
444
|
+
|
|
445
|
+
1. **Implicit-decision audit.** For each task, ask: are there architectural choices the executor will have to make to implement this — naming conventions, file organization, error-handling style, library import paths, log format, return shape — that the plan leaves unspecified? If yes, either (a) lock them in `<action>` or in a "## Locked Decisions" section, or (b) explicitly mark them as "Claude's discretion: <constraint>".
|
|
446
|
+
2. **Files-list completeness.** `<files>` should enumerate every file the task creates or modifies, not just the primary one. A plan that says `<files>src/auth.js</files>` but the task implies tests, types, exports → INCOMPLETE.
|
|
447
|
+
3. **Cross-plan handoff specs.** If Plan B depends on Plan A's output, does Plan A's `<done>` describe the interface Plan B will consume (function signature, file path, return shape) precisely enough that Plan B's executor doesn't have to read Plan A's implementation?
|
|
448
|
+
|
|
449
|
+
**Severity:**
|
|
450
|
+
- Implicit decision likely to cause executor divergence → `warning`
|
|
451
|
+
- Cross-plan handoff spec missing for declared dependency → `blocker`
|
|
452
|
+
- Files-list under-specifies a multi-file task → `warning`
|
|
453
|
+
|
|
454
|
+
**Example issue:**
|
|
455
|
+
```yaml
|
|
456
|
+
issue:
|
|
457
|
+
dimension: spec_sufficiency
|
|
458
|
+
severity: warning
|
|
459
|
+
description: "Task 02-01 creates an API endpoint but does not lock response shape; Plan 02-02 depends on consuming it"
|
|
460
|
+
plan: "02"
|
|
461
|
+
fix_hint: "Add explicit response schema (status, body shape, headers) to <action> or extract to a 'Locked Decisions' block"
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
## Dimension 12: Decision Trace Completeness (P-RES-003)
|
|
465
|
+
|
|
466
|
+
**Question:** Does plan.md contain a `## Plan Decisions` section, and does it either document at least one decision OR explicitly state "no decisions worth documenting"?
|
|
467
|
+
|
|
468
|
+
**Schema:** @~/.claude/pan-wizard-core/references/handoff-decisions.md
|
|
469
|
+
|
|
470
|
+
**Empirical motivation:** Cognition's "Don't Build Multi-Agents" (Jun 2025) named the dominant pipeline failure: agents pass artifacts but lose reasoning, and downstream agents reconcile blindly. PAN's planner→executor handoff is the specific instance. Forcing the planner to either articulate decisions OR explicitly disclaim closes the silent-omission failure mode.
|
|
471
|
+
|
|
472
|
+
**What to check:**
|
|
473
|
+
|
|
474
|
+
1. **Section presence.** plan.md MUST contain a `## Plan Decisions` heading. If absent → BLOCKER.
|
|
475
|
+
|
|
476
|
+
2. **Bucket structure.** Either:
|
|
477
|
+
- **All three buckets** (`### Locked`, `### Open`, `### Considered and rejected`) are present, AND at least one bucket has at least one item, OR
|
|
478
|
+
- **A single explicit disclaimer line** present: `No decisions worth documenting — plan is mechanical implementation of must_haves.`
|
|
479
|
+
- Anything else (e.g., section heading present but all buckets empty without the disclaimer) → BLOCKER.
|
|
480
|
+
|
|
481
|
+
3. **Out-of-order buckets** → WARNING (still parseable but harder to read).
|
|
482
|
+
|
|
483
|
+
4. **Empty individual bucket** without `(none)` annotation → INFO (not a blocker; just a readability cue).
|
|
484
|
+
|
|
485
|
+
**Severity matrix:**
|
|
486
|
+
|
|
487
|
+
| Issue | Severity |
|
|
488
|
+
|-------|----------|
|
|
489
|
+
| Section missing entirely | blocker |
|
|
490
|
+
| Section present, all buckets silently empty | blocker |
|
|
491
|
+
| Section present, has the explicit disclaimer | info (PASS) |
|
|
492
|
+
| Section present, ≥1 bucket has ≥1 item | info (PASS) |
|
|
493
|
+
| Buckets out of order | warning |
|
|
494
|
+
|
|
495
|
+
**Example issues:**
|
|
496
|
+
|
|
497
|
+
```yaml
|
|
498
|
+
issue:
|
|
499
|
+
dimension: decision_trace
|
|
500
|
+
severity: blocker
|
|
501
|
+
description: "plan.md missing '## Plan Decisions' section — executor cannot tell which decisions are locked vs open"
|
|
502
|
+
plan: "02"
|
|
503
|
+
fix_hint: "Add the section per @~/.claude/pan-wizard-core/references/handoff-decisions.md schema (locked/open/rejected buckets), or add the explicit 'No decisions worth documenting' disclaimer"
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
```yaml
|
|
507
|
+
issue:
|
|
508
|
+
dimension: decision_trace
|
|
509
|
+
severity: blocker
|
|
510
|
+
description: "Plan Decisions section present but all three buckets empty (no items, no disclaimer)"
|
|
511
|
+
plan: "01"
|
|
512
|
+
fix_hint: "Either fill at least one bucket OR replace with the single line 'No decisions worth documenting — plan is mechanical implementation of must_haves.'"
|
|
513
|
+
```
|
|
514
|
+
|
|
435
515
|
</verification_dimensions>
|
|
436
516
|
|
|
437
517
|
<verification_process>
|
package/agents/pan-planner.md
CHANGED
|
@@ -18,6 +18,8 @@ Your job: Produce plan.md files that Claude executors can implement without inte
|
|
|
18
18
|
**CRITICAL: Mandatory Initial Read**
|
|
19
19
|
If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
|
|
20
20
|
|
|
21
|
+
**Read also:** `~/.claude/pan-wizard-core/references/guardrails.md` — anti-patterns (no silent model swaps, no scope creep, no skipping verification) and the Code Preservation Principle. Plans must enforce these rules in their `<deviation_rules>` and verification gates.
|
|
22
|
+
|
|
21
23
|
**Core responsibilities:**
|
|
22
24
|
- **FIRST: Parse and honor user decisions from context.md** (locked decisions are NON-NEGOTIABLE)
|
|
23
25
|
- Decompose phases into parallel-optimized plans with 2-3 tasks each
|
|
@@ -421,9 +423,26 @@ Purpose: [Why this matters]
|
|
|
421
423
|
Output: [Artifacts created]
|
|
422
424
|
</objective>
|
|
423
425
|
|
|
426
|
+
## Plan Decisions
|
|
427
|
+
|
|
428
|
+
(See @~/.claude/pan-wizard-core/references/handoff-decisions.md for the schema.)
|
|
429
|
+
|
|
430
|
+
### Locked (executor MUST follow)
|
|
431
|
+
- D-1: [statement]. Why: [rationale]. Source: [context.md REQ-X | research.md | architecture].
|
|
432
|
+
|
|
433
|
+
### Open (executor's discretion within constraints)
|
|
434
|
+
- O-1: [decision space]. Constraints: [list]. Reason left open: [why].
|
|
435
|
+
|
|
436
|
+
### Considered and rejected
|
|
437
|
+
- R-1: [alternative]. Rejected because: [reason].
|
|
438
|
+
|
|
439
|
+
<!-- If genuinely none: replace ALL three buckets with the single line:
|
|
440
|
+
"No decisions worth documenting — plan is mechanical implementation of must_haves." -->
|
|
441
|
+
|
|
424
442
|
<execution_context>
|
|
425
443
|
@~/.claude/pan-wizard-core/workflows/execute-plan.md
|
|
426
444
|
@~/.claude/pan-wizard-core/templates/summary.md
|
|
445
|
+
@~/.claude/pan-wizard-core/references/handoff-decisions.md
|
|
427
446
|
</execution_context>
|
|
428
447
|
|
|
429
448
|
<context>
|
package/agents/pan-reviewer.md
CHANGED
|
@@ -15,6 +15,8 @@ Your job: Check convention compliance, security patterns, and code quality. You
|
|
|
15
15
|
**CRITICAL: Mandatory Initial Read**
|
|
16
16
|
If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
|
|
17
17
|
|
|
18
|
+
**Read also:** `~/.claude/pan-wizard-core/references/guardrails.md` — anti-patterns and the Code Preservation Principle. Findings that violate Code Preservation (silent scope expansion, model swaps, refactor-while-here) should be flagged at high severity.
|
|
19
|
+
|
|
18
20
|
**Critical mindset:** Review the actual code, not what summaries claim. Check for real issues that affect correctness, security, and maintainability.
|
|
19
21
|
</role>
|
|
20
22
|
|
package/agents/pan-verifier.md
CHANGED
|
@@ -100,6 +100,47 @@ grep -E "^| $PHASE_NUM" .planning/requirements.md 2>/dev/null
|
|
|
100
100
|
|
|
101
101
|
Extract phase goal from roadmap.md — this is the outcome to verify, not the tasks.
|
|
102
102
|
|
|
103
|
+
## Step 1c: Repo-Norms-First Verification (P-RES-005)
|
|
104
|
+
|
|
105
|
+
If `.planning/codebase/CONVENTIONS.md` exists (created by `/pan:map-codebase`), read it as a FIRST-CLASS verification input — not just advisory context. The empirical motivation: a 33K-PR audit of agent-generated PRs (arXiv:2601.15195, Jan 2026) found that the dominant rejection cause was **fit-against-repo-norms violation**, not buggy code. Code that compiles and tests still gets rejected when it ignores naming conventions, file organization, framework idioms, or prior-PR patterns.
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Read codebase conventions and structure if available
|
|
109
|
+
[ -f .planning/codebase/CONVENTIONS.md ] && cat .planning/codebase/CONVENTIONS.md
|
|
110
|
+
[ -f .planning/codebase/STRUCTURE.md ] && cat .planning/codebase/STRUCTURE.md
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**How to use this in verification:**
|
|
114
|
+
|
|
115
|
+
1. **Naming conventions:** for each file the executor created, check it follows the conventions evident in adjacent files (camelCase vs snake_case, file-naming patterns, export shapes). A diff that adds `getUserData.ts` next to existing `get-user-data.ts` is a finding even if both work.
|
|
116
|
+
|
|
117
|
+
2. **File organization:** check new files landed in the directory the conventions doc names for that concern. Auth code in `lib/auth/` not `src/auth/` if conventions said `lib/`.
|
|
118
|
+
|
|
119
|
+
3. **Framework idioms:** if CONVENTIONS.md names "we use X over Y because Z", check the executor didn't import Y. Generic correctness isn't enough — local idiom-fit matters.
|
|
120
|
+
|
|
121
|
+
4. **Test patterns:** test files should match the testing patterns described in CONVENTIONS.md (test file naming, fixture organization, assertion style).
|
|
122
|
+
|
|
123
|
+
If CONVENTIONS.md does not exist, skip this step silently. This is brownfield-only signal. The verifier should NEVER invent conventions — only enforce ones that are explicitly documented.
|
|
124
|
+
|
|
125
|
+
## Step 1b: Read the Reasoning Trace (P-RES-003)
|
|
126
|
+
|
|
127
|
+
For each plan/summary pair in this phase, also read:
|
|
128
|
+
|
|
129
|
+
- The plan's `## Plan Decisions` section (planner's `Locked` / `Open` / `Considered & rejected` buckets) — these are the constraints the executor was supposed to honor.
|
|
130
|
+
- The summary's `## Implementation Decisions` section (executor's `Taken` / `Deviations` / `Open questions for verifier` buckets) — these are what the executor actually did and what they want you to focus on.
|
|
131
|
+
|
|
132
|
+
Schema: @~/.claude/pan-wizard-core/references/handoff-decisions.md
|
|
133
|
+
|
|
134
|
+
**How to use this in verification:**
|
|
135
|
+
|
|
136
|
+
1. **Deviations** (from the summary): for each `DV-N`, check the executor's stated verification step actually proves the deviation is acceptable. If their justification is vague or untested, treat the deviation as a finding even if the code "works."
|
|
137
|
+
|
|
138
|
+
2. **Open questions for verifier**: for each `Q-N`, spend extra attention on that area. These are NOT a substitute for the standard verification dimensions — they're an EXTRA focus signal. Don't skip a check just because the executor didn't ask.
|
|
139
|
+
|
|
140
|
+
3. **Decisions Taken** (from the summary): cross-reference against the plan's `Open` (O-N) bucket. Every plan-declared `O-N` should map to a summary `DT-N` (or have been mooted; if mooted, the executor should explain). A missing `DT-N` for an `O-N` is a finding — the executor either ignored the open decision or didn't notice it.
|
|
141
|
+
|
|
142
|
+
4. **Locked decisions** (from the plan): silent violation is a finding. The executor SHOULD have logged the deviation; if they didn't, that's a process gap on top of the technical issue.
|
|
143
|
+
|
|
103
144
|
## Step 2: Establish Must-Haves (Initial Mode Only)
|
|
104
145
|
|
|
105
146
|
In re-verification mode, must-haves come from Step 0.
|
package/bin/install-lib.cjs
CHANGED
|
@@ -762,6 +762,59 @@ function detectModelCapabilities(modelName) {
|
|
|
762
762
|
return result;
|
|
763
763
|
}
|
|
764
764
|
|
|
765
|
+
// ─── Install verification ────────────────────────────────────────────────────
|
|
766
|
+
//
|
|
767
|
+
// IMPROVEMENT-TODO P0 (v3.7.10): post-install verification pass that catches
|
|
768
|
+
// silent copy/write failures from earlier stages. The installer has many
|
|
769
|
+
// `catch {}` blocks in copy paths (around copyWithPathReplacement, the codex/
|
|
770
|
+
// copilot skill builders, and the agent file writers); a final manifest-level
|
|
771
|
+
// sanity check is belt-and-braces. This function reads the just-written
|
|
772
|
+
// manifest and verifies every recorded file actually exists.
|
|
773
|
+
|
|
774
|
+
const fs_v = require('fs');
|
|
775
|
+
const path_v = require('path');
|
|
776
|
+
|
|
777
|
+
/**
|
|
778
|
+
* Verify installed files against the manifest.
|
|
779
|
+
*
|
|
780
|
+
* For each entry in manifest.files, check the file is present on disk at
|
|
781
|
+
* the expected location. We do NOT re-hash (the manifest was just written
|
|
782
|
+
* from these files, so re-hashing would only catch corruption-after-write
|
|
783
|
+
* which is a different concern). We DO catch the much more common case of
|
|
784
|
+
* "file silently failed to land in the first place."
|
|
785
|
+
*
|
|
786
|
+
* Also verifies critical anchor files that, if missing, mean the install is
|
|
787
|
+
* unusable: pan-tools.cjs, the dispatcher.
|
|
788
|
+
*
|
|
789
|
+
* @param {string} configDir - install root (e.g., ~/.claude or ./.codex)
|
|
790
|
+
* @param {object} manifest - the manifest object returned by writeManifest()
|
|
791
|
+
* @returns {object} { ok: bool, missing: string[], warnings: string[] }
|
|
792
|
+
*/
|
|
793
|
+
function verifyInstall(configDir, manifest) {
|
|
794
|
+
const missing = [];
|
|
795
|
+
const warnings = [];
|
|
796
|
+
|
|
797
|
+
// Critical anchor: pan-tools.cjs MUST exist; without it, no command works.
|
|
798
|
+
const dispatcherPath = path_v.join(configDir, 'pan-wizard-core', 'bin', 'pan-tools.cjs');
|
|
799
|
+
if (!fs_v.existsSync(dispatcherPath)) {
|
|
800
|
+
missing.push('pan-wizard-core/bin/pan-tools.cjs (dispatcher — install is unusable without it)');
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
// Manifest-level: every tracked file must exist.
|
|
804
|
+
if (manifest && manifest.files) {
|
|
805
|
+
for (const rel of Object.keys(manifest.files)) {
|
|
806
|
+
const abs = path_v.join(configDir, rel);
|
|
807
|
+
if (!fs_v.existsSync(abs)) {
|
|
808
|
+
missing.push(rel);
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
} else {
|
|
812
|
+
warnings.push('manifest is missing or has no files entry — verification is degraded');
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
return { ok: missing.length === 0, missing, warnings };
|
|
816
|
+
}
|
|
817
|
+
|
|
765
818
|
// ─── Exports ────────────────────────────────────────────────────────────────
|
|
766
819
|
|
|
767
820
|
module.exports = {
|
|
@@ -810,4 +863,6 @@ module.exports = {
|
|
|
810
863
|
buildClaudeSkillShim,
|
|
811
864
|
translateThinkingDirective,
|
|
812
865
|
stripThinkingFrontmatter,
|
|
866
|
+
// Install verification (v3.7.10)
|
|
867
|
+
verifyInstall,
|
|
813
868
|
};
|