@chllming/wave-orchestration 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/README.md +40 -7
  3. package/docs/agents/wave-orchestrator-role.md +50 -0
  4. package/docs/agents/wave-planner-role.md +39 -0
  5. package/docs/context7/bundles.json +9 -0
  6. package/docs/context7/planner-agent/README.md +25 -0
  7. package/docs/context7/planner-agent/manifest.json +83 -0
  8. package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
  9. package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
  10. package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
  11. package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
  12. package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
  13. package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
  14. package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
  15. package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
  16. package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
  17. package/docs/evals/README.md +96 -1
  18. package/docs/evals/arm-templates/README.md +13 -0
  19. package/docs/evals/arm-templates/full-wave.json +15 -0
  20. package/docs/evals/arm-templates/single-agent.json +15 -0
  21. package/docs/evals/benchmark-catalog.json +7 -0
  22. package/docs/evals/cases/README.md +47 -0
  23. package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
  24. package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
  25. package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
  26. package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
  27. package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
  28. package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
  29. package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
  30. package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
  31. package/docs/evals/external-benchmarks.json +85 -0
  32. package/docs/evals/external-command-config.sample.json +9 -0
  33. package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
  34. package/docs/evals/pilots/README.md +47 -0
  35. package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
  36. package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
  37. package/docs/evals/wave-benchmark-program.md +302 -0
  38. package/docs/guides/planner.md +67 -11
  39. package/docs/guides/terminal-surfaces.md +12 -0
  40. package/docs/plans/context7-wave-orchestrator.md +20 -0
  41. package/docs/plans/current-state.md +8 -1
  42. package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
  43. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  44. package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
  45. package/docs/plans/migration.md +26 -0
  46. package/docs/plans/wave-orchestrator.md +60 -12
  47. package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
  48. package/docs/reference/cli-reference.md +547 -0
  49. package/docs/reference/coordination-and-closure.md +436 -0
  50. package/docs/reference/live-proof-waves.md +25 -3
  51. package/docs/reference/npmjs-trusted-publishing.md +3 -3
  52. package/docs/reference/proof-metrics.md +90 -0
  53. package/docs/reference/runtime-config/README.md +63 -2
  54. package/docs/reference/runtime-config/codex.md +2 -1
  55. package/docs/reference/sample-waves.md +29 -18
  56. package/docs/reference/wave-control.md +164 -0
  57. package/docs/reference/wave-planning-lessons.md +131 -0
  58. package/package.json +5 -4
  59. package/releases/manifest.json +40 -0
  60. package/scripts/research/agent-context-archive.mjs +18 -0
  61. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
  62. package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
  63. package/scripts/wave-orchestrator/agent-state.mjs +11 -2
  64. package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
  65. package/scripts/wave-orchestrator/autonomous.mjs +7 -0
  66. package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
  67. package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
  68. package/scripts/wave-orchestrator/benchmark.mjs +972 -0
  69. package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
  70. package/scripts/wave-orchestrator/config.mjs +175 -0
  71. package/scripts/wave-orchestrator/control-cli.mjs +1216 -0
  72. package/scripts/wave-orchestrator/control-plane.mjs +697 -0
  73. package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
  74. package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
  75. package/scripts/wave-orchestrator/coordination.mjs +84 -0
  76. package/scripts/wave-orchestrator/dashboard-renderer.mjs +120 -5
  77. package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
  78. package/scripts/wave-orchestrator/evals.mjs +23 -0
  79. package/scripts/wave-orchestrator/executors.mjs +3 -2
  80. package/scripts/wave-orchestrator/feedback.mjs +55 -0
  81. package/scripts/wave-orchestrator/install.mjs +151 -2
  82. package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
  83. package/scripts/wave-orchestrator/launcher-runtime.mjs +33 -30
  84. package/scripts/wave-orchestrator/launcher.mjs +884 -36
  85. package/scripts/wave-orchestrator/planner-context.mjs +75 -0
  86. package/scripts/wave-orchestrator/planner.mjs +2270 -136
  87. package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
  88. package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
  89. package/scripts/wave-orchestrator/replay.mjs +10 -4
  90. package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
  91. package/scripts/wave-orchestrator/retry-control.mjs +225 -0
  92. package/scripts/wave-orchestrator/shared.mjs +26 -0
  93. package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
  94. package/scripts/wave-orchestrator/terminals.mjs +1 -1
  95. package/scripts/wave-orchestrator/traces.mjs +157 -2
  96. package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
  97. package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
  98. package/scripts/wave-orchestrator/wave-files.mjs +144 -23
  99. package/scripts/wave.mjs +27 -0
  100. package/skills/repo-coding-rules/SKILL.md +1 -0
  101. package/skills/role-cont-eval/SKILL.md +1 -0
  102. package/skills/role-cont-qa/SKILL.md +13 -6
  103. package/skills/role-deploy/SKILL.md +1 -0
  104. package/skills/role-documentation/SKILL.md +4 -0
  105. package/skills/role-implementation/SKILL.md +4 -0
  106. package/skills/role-infra/SKILL.md +2 -1
  107. package/skills/role-integration/SKILL.md +15 -8
  108. package/skills/role-planner/SKILL.md +39 -0
  109. package/skills/role-planner/skill.json +21 -0
  110. package/skills/role-research/SKILL.md +1 -0
  111. package/skills/role-security/SKILL.md +2 -2
  112. package/skills/runtime-claude/SKILL.md +2 -1
  113. package/skills/runtime-codex/SKILL.md +1 -0
  114. package/skills/runtime-local/SKILL.md +2 -0
  115. package/skills/runtime-opencode/SKILL.md +1 -0
  116. package/skills/wave-core/SKILL.md +25 -6
  117. package/skills/wave-core/references/marker-syntax.md +16 -8
  118. package/wave.config.json +45 -0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,87 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ ## 0.7.1 - 2026-03-23
6
+
7
+ ### Changed
8
+
9
+ - Updated the shipped package metadata, release manifest, README, migration guide, sample-wave docs, and npm publishing runbook to advertise `0.7.1` as the current release surface.
10
+ - Clarified the adopted-repo `0.7.x` upgrade path with explicit planner-corpus remediation, stable dashboard reattach guidance, and current-release examples that match the package tag.
11
+
12
+ ### Fixed And Hardened
13
+
14
+ - Fresh live launches now clear stale auto-generated relaunch plans by default, with `--resume-control-state` as the explicit opt-in when an operator intentionally wants to preserve prior relaunch intent.
15
+ - Fixed `wave control status` so an already-running attempt remains the authoritative live fan-out instead of letting stale relaunch metadata or unrelated closure blockers dominate the wave-level view.
16
+ - Fixed `reconcile-status` so waves with prior authoritative closure stay complete as `completed_with_drift` when the only mismatch is historical prompt-hash drift.
17
+ - Fixed live executor overlays so `launch-preview.json` is written for real runs as well as dry-runs, and Codex previews record observed turn ceilings when the runtime logs one.
18
+ - Updated dashboard, CLI reference, and terminal-surface docs to consistently point operators at the shipped `wave dashboard --attach current|global` surface.
19
+
20
+ ### Testing And Validation
21
+
22
+ - Updated release-surface regression coverage so package metadata, README, changelog, release manifest, migration guidance, and CLI docs all stay aligned on the current release version.
23
+
24
+ ## 0.7.0 - 2026-03-23
25
+
26
+ ### Added
27
+
28
+ - Added a unified `wave control` operator CLI that replaces `wave coord`, `wave retry`, and `wave proof` as the preferred command surface:
29
+ - `wave control status` materializes a single control-plane view with blocking edges, logical agent state, tasks, dependencies, rerun intent, active proof bundles, and next-timer projections.
30
+ - `wave control task create|get|list|act` is the operator task surface for blocking requests, blockers, clarification chains, human-input tickets, escalations, and informative handoffs, evidence, claims, and decisions.
31
+ - `wave control rerun request|get|clear` manages targeted rerun intent with selected agents, explicit reuse selectors, invalidated components, clear-or-preserve reuse lists, and resume cursors.
32
+ - `wave control proof register|get|supersede|revoke` manages authoritative proof bundles with full lifecycle state (active, superseded, revoked).
33
+ - `wave control telemetry status|flush` inspects and delivers the local Wave Control event queue.
34
+ - Added a canonical control-plane event log under `.tmp/<lane>-wave-launcher/control-plane/` as append-only JSONL with event-sourced materialization. Proof registries and retry overrides under `proof/` and `control/` are now projections from this log rather than independent state files.
35
+ - Added Wave Control telemetry, a local-first event system that queues typed events under `control-plane/telemetry/` and delivers them in best-effort batches to a Railway-hosted analysis endpoint:
36
+ - Configurable report modes: `disabled`, `metadata-only`, `metadata-plus-selected`, `full-artifact-upload`.
37
+ - Selective artifact upload by kind via `uploadArtifactKinds`.
38
+ - New `waveControl` config section in `wave.config.json` with global and per-lane overrides for endpoint, workspace, auth, report mode, batch size, and per-category capture toggles.
39
+ - New `--no-telemetry` launcher flag to disable event publication for a single run.
40
+ - Telemetry capture across coordination records, control-plane events, trace bundles, feedback requests, and benchmark runs.
41
+ - Added native benchmark telemetry publishing so `wave benchmark run` emits `benchmark_run`, `benchmark_item`, `verification`, and `review` events with deterministic run IDs and config attestation hashes.
42
+ - Added external benchmark telemetry with failure-review validity classification (`comparison-valid`, `review-only`, `benchmark-invalid`, `harness-setup-failure`, `proof-blocked`, `trustworthy-model-failure`) and artifact descriptors for patches, summaries, and verification output.
43
+ - Added `docs/reference/wave-control.md` documenting the Wave Control telemetry contract, entity types, artifact upload policies, and local-first delivery model.
44
+ - Added `docs/reference/proof-metrics.md` mapping README failure cases to concrete telemetry signals and success criteria.
45
+ - Added `docs/evals/wave-benchmark-program.md` enhancements for native benchmarking mode with deterministic coordination-substrate tests.
46
+ - Added a showcase-first `repo-landed` rollout-fidelity sample wave plus refreshed sample-wave docs so `0.7.0` includes both a dense proof-first example and a narrower closure-ready authoring reference.
47
+ - Added resident orchestrator support via `--resident-orchestrator`, with a standing role prompt at `docs/agents/wave-orchestrator-role.md` and explicit non-owning session boundaries.
48
+ - Added live-wave orchestration refresh that keeps shared summaries, inboxes, clarification triage, and dashboard coordination metrics current while agents are still running, including overdue acknowledgement tracking and stale clarification rerouting.
49
+ - Added `docs/reference/runtime-config/README.md` section for `waveControl` configuration with defaults and artifact-kind filtering.
50
+
51
+ ### Changed
52
+
53
+ - `wave coord`, `wave retry`, and `wave proof` remain available as compatibility surfaces, but new operator docs and runbooks now prefer `wave control`.
54
+ - Proof registries and retry overrides are now projections from the canonical control-plane event log rather than independently managed state files. Legacy file paths are maintained for compatibility.
55
+ - Trace bundles now copy `control-plane.raw.jsonl`, `capability-assignments.json`, and `dependency-snapshot.json` alongside the existing coordination, ledger, and proof artifacts.
56
+ - `wave control task` supports informational coordination kinds (handoff, evidence, claim, decision) without falsely treating them as blocking edges in status views.
57
+ - Proof bundles now carry lifecycle state so revoked or superseded operator evidence cannot keep satisfying closure gates.
58
+ - Rerun requests now support explicit reuse selectors, component invalidation, resume cursors, and clear-or-preserve reuse lists alongside the existing agent selection.
59
+ - Coordination store, feedback, clarification triage, traces, and benchmark modules now publish telemetry events when Wave Control capture is enabled.
60
+ - Wave Control storage and queries now support durable Postgres-backed filtering by `workspaceId`, `projectId`, `orchestratorId`, and `runtimeVersion`.
61
+ - Skill resolution description and documentation now accurately reflects the merge-then-resolve code path (base → role → runtime → deploy-kind → explicit).
62
+ - Updated all documentation to reflect `0.7.0` release surface, including the operational runbook, coordination reference, sample waves, and live-proof examples.
63
+ - Dashboard docs and CLI reference now document the stable `wave dashboard --attach current|global` surface instead of older speculative flags.
64
+ - Upgrade and planner docs now call out the repo-owned planner corpus required by adopted `0.7.x` repos and explain that `wave upgrade` stays non-destructive.
65
+ - Fresh live launches now clear stale auto-generated relaunch plans by default, with an explicit `--resume-control-state` escape hatch when an operator intentionally wants to preserve the previous relaunch selection.
66
+
67
+ ### Fixed And Hardened
68
+
69
+ - Fixed executor-profile inheritance so a Claude profile that only overrides `claude.effort` or other scalar runtime fields now keeps the inherited global Claude command and runtime settings instead of nulling them out.
70
+ - Fixed shared promoted-component retries so landed owners stay reusable, stale relaunch plans are invalidated against current sibling ownership, and continuation can advance to the remaining owners without burning another retry on the already-clean agent.
71
+ - Fixed clarification triage so routed follow-up work supersedes stale human escalations, keeps the routed chain blocking through the linked request, and only opens human escalation after orchestrator-side routing is actually exhausted.
72
+ - Fixed `reconcile-status` so waves with prior authoritative closure stay complete as `completed_with_drift` when the only mismatch is historical prompt-hash drift.
73
+ - Fixed live executor overlays so `launch-preview.json` is written for real runs as well as dry-runs, and Codex previews record an observed turn ceiling when the runtime logs one.
74
+ - Fixed `wave control status` so an already-running attempt is treated as the authoritative live fan-out instead of letting stale relaunch metadata or unrelated closure blockers dominate the wave-level view.
75
+ - Hardened proof registry projections from the control-plane so revoked and superseded bundles are excluded from closure evaluation.
76
+ - Hardened the "What The Launcher Writes" path reference to correctly place `run-state.json` at the state root (not under `status/`), and added `control-plane/`, `proof/`, and `control/` directories.
77
+ - Closed 11 documentation-to-code gaps identified by end-to-end audit, including trace contract completeness, skill pack enumeration, benchmark CLI surface, and steward coordination kinds.
78
+
79
+ ### Testing And Validation
80
+
81
+ - Added new test suites for `wave-control-schema`, `wave-control-client`, and `control-cli` covering event envelope normalization, telemetry queueing, delivery state tracking, and unified control-plane operations.
82
+ - Expanded config tests for `waveControl` normalization and lane-level overrides.
83
+ - Added regression coverage for Claude scalar inheritance, sibling-owner shared-component continuation, stale relaunch-plan invalidation, and launcher-generated routed-clarification trace replay.
84
+ - Added regression coverage for proof-cli, proof-registry, retry-cli, and retry-control modules.
85
+
5
86
  ## 0.6.3 - 2026-03-22
6
87
 
7
88
  - Added a best-effort npmjs update notice on `wave launch`, `wave autonomous`, and `wave adhoc run`, with cached lookup state under `.wave/package-update-check.json` and opt-out via `WAVE_SKIP_UPDATE_CHECK=1`.
@@ -15,7 +96,7 @@
15
96
  - Clarified operator runtime visibility with additive `launch-preview.json` `limits` metadata, including explicit known turn ceilings for Claude/OpenCode and explicit Codex opacity when Wave does not emit a turn-limit flag.
16
97
  - Clarified dashboard and terminal UX: global wave counts now distinguish done, active, pending, and failed agents; the current-wave dashboard keeps a stable terminal name; and TTY dashboards use simple color cues for faster scanning.
17
98
  - Pruned stale dry-run executor preview directories when wave agent sets shrink, so manual inspection of `.tmp/.../dry-run/executors/` matches the current manifest.
18
- - Preserved already-landed implementation slices for shared promoted components by retrying only the sibling owners that still owe closure proof instead of blindly replaying the landed owner.
99
+ - Improved shared promoted-component retry selection so common sibling-owned closure cases avoid immediately replaying the already-landed owner.
19
100
  - Added release-surface alignment regression coverage and updated the shipped docs so README, runtime-config references, changelog, and release metadata match the `0.6.2` package surface.
20
101
 
21
102
  ## 0.6.1 - 2026-03-22
package/README.md CHANGED
@@ -20,6 +20,8 @@ The framework does three things:
20
20
  Wave builds runtime context from repo state, project memory, skills, Context7, and generated overlays.
21
21
  - `The system is inspectable and replayable.`
22
22
  Dry-run previews, logs, dashboards, ledgers, traces, and replay make the system debuggable instead of mysterious.
23
+ - `Telemetry is local-first and proof-oriented.`
24
+ Wave Control records typed run, proof, and benchmark events without making remote delivery part of the scheduler's critical path.
23
25
 
24
26
  ## How The Architecture Works
25
27
 
@@ -43,6 +45,8 @@ The framework does three things:
43
45
  Exit contracts, proof artifacts, eval markers, and closure stewards stop waves from closing on narrative-only PASS.
44
46
  - `Replay and audit`
45
47
  Traces capture the attempt so failures can be inspected and replayed instead of guessed from screenshots.
48
+ - `Telemetry and control plane`
49
+ Local-first event spools plus the Railway-hosted Wave Control service keep proof, benchmark validity, and selected artifacts queryable across runs.
46
50
 
47
51
  ## Example Output
48
52
 
@@ -69,22 +73,24 @@ Recent multi-agent research keeps returning to the same failure modes:
69
73
  - `Premature closure`
70
74
  Agents say they are done before proof, evals, or integrated state actually support PASS.
71
75
 
72
- Wave is built to mitigate those failures with canonical shared state, generated blackboard projections, explicit ownership, goal-driven, proof-bounded closure, and replayable traces. For the research framing and the current gaps, see [docs/research/coordination-failure-review.md](./docs/research/coordination-failure-review.md).
76
+ Wave is built to mitigate those failures with canonical shared state, generated blackboard projections, explicit ownership, goal-driven, proof-bounded closure, replayable traces, and local-first telemetry. For the research framing and the current gaps, see [docs/research/coordination-failure-review.md](./docs/research/coordination-failure-review.md). For the concrete signal map, see [docs/reference/proof-metrics.md](./docs/reference/proof-metrics.md).
73
77
 
74
78
  ## Quick Start
75
79
 
76
80
  Current release:
77
81
 
78
- - `@chllming/wave-orchestration@0.6.3`
79
- - Release tag: [`v0.6.3`](https://github.com/chllming/wave-orchestration/releases/tag/v0.6.3)
82
+ - `@chllming/wave-orchestration@0.7.1`
83
+ - Release tag: [`v0.7.1`](https://github.com/chllming/agent-wave-orchestrator/releases/tag/v0.7.1)
80
84
  - Public install path: npmjs
81
85
  - Authenticated fallback: GitHub Packages
82
86
 
83
- Highlights in `0.6.3`:
87
+ Highlights in `0.7.1`:
84
88
 
85
- - Runtime launch entrypoints now check npmjs for a newer published package in the background, cache the result under `.wave/package-update-check.json`, and warn on stderr when the workspace is behind.
86
- - `wave self-update` now gives downstream repos a one-command update path that detects the workspace package manager, updates the dependency, shows the changelog delta, and records the workspace upgrade report.
87
- - Autonomous and ad-hoc flows suppress nested notices so operators see at most one update banner per top-level run, and structured stdout remains clean for JSON consumers.
89
+ - Fresh live launches now clear stale auto-generated relaunch plans by default, so explicit wave restarts seed a clean implementation fan-out unless `--resume-control-state` is passed.
90
+ - `wave control status` now treats the active attempt as the authoritative live fan-out instead of replaying stale rerun intent or unrelated closure blockers.
91
+ - `reconcile-status` now preserves previously authoritative completed waves as `completed_with_drift` when the only mismatch is historical prompt-hash drift.
92
+ - Live `launch-preview.json` artifacts now exist for real runs as well as dry-runs, and Codex summaries record observed turn ceilings when the runtime reveals them.
93
+ - Upgrade and operator docs now cover stable dashboard attach, adopted-repo planner corpus migration, and the full `0.7.1` package surface end to end.
88
94
 
89
95
  Requirements:
90
96
 
@@ -93,6 +99,7 @@ Requirements:
93
99
  - `tmux` on `PATH` for dashboarded runs
94
100
  - at least one executor on `PATH`: `codex`, `claude`, or `opencode`
95
101
  - optional: `CONTEXT7_API_KEY` for launcher-side prefetch
102
+ - optional: `WAVE_CONTROL_AUTH_TOKEN` for remote Wave Control reporting
96
103
 
97
104
  Install into another repo:
98
105
 
@@ -124,6 +131,9 @@ pnpm exec wave draft --wave 1 --template implementation
124
131
  # Run one wave with a real executor
125
132
  pnpm exec wave launch --lane main --start-wave 0 --end-wave 0 --executor codex --codex-sandbox danger-full-access
126
133
 
134
+ # Disable Wave Control reporting for a single launcher run
135
+ pnpm exec wave launch --lane main --no-telemetry
136
+
127
137
  # Inspect operator surfaces
128
138
  pnpm exec wave feedback list --lane main --pending
129
139
  pnpm exec wave dep show --lane main --wave 0 --json
@@ -143,6 +153,24 @@ pnpm test
143
153
  node scripts/wave.mjs launch --lane main --dry-run --no-dashboard
144
154
  ```
145
155
 
156
+ ## Railway MCP
157
+
158
+ This repo includes a repo-local Railway MCP launcher so Codex, Claude, and Cursor can all talk to the same Railway project from the same checkout.
159
+
160
+ - launcher: `.codex-tools/railway-mcp/start.sh`
161
+ - project MCP config: `.mcp.json`
162
+ - Cursor MCP config: `.cursor/.mcp.json`
163
+ - Claude project settings: `.claude/settings.json`
164
+ - Railway project id: `b2427e79-3de9-49c3-aa5a-c86db83123c0`
165
+
166
+ One-time local checks:
167
+
168
+ ```bash
169
+ railway whoami
170
+ railway link --project b2427e79-3de9-49c3-aa5a-c86db83123c0
171
+ codex mcp list
172
+ ```
173
+
146
174
  ## Learn More
147
175
 
148
176
  - [docs/README.md](./docs/README.md): docs map and suggested structure
@@ -151,9 +179,14 @@ node scripts/wave.mjs launch --lane main --dry-run --no-dashboard
151
179
  - [docs/concepts/context7-vs-skills.md](./docs/concepts/context7-vs-skills.md): compiled context, external truth, and repo-owned operating knowledge
152
180
  - [docs/guides/planner.md](./docs/guides/planner.md): `wave project` and `wave draft` workflow
153
181
  - [docs/guides/terminal-surfaces.md](./docs/guides/terminal-surfaces.md): tmux, VS Code terminal registry, and dry-run surfaces
182
+ - [docs/reference/sample-waves.md](./docs/reference/sample-waves.md): showcase-first authored waves, including a high-fidelity repo-landed rollout example
183
+ - [docs/plans/examples/wave-example-rollout-fidelity.md](./docs/plans/examples/wave-example-rollout-fidelity.md): concrete example of what good wave fidelity looks like for a narrow, closure-ready outcome
184
+ - [docs/reference/cli-reference.md](./docs/reference/cli-reference.md): complete CLI syntax for all commands and flags
154
185
  - [docs/plans/wave-orchestrator.md](./docs/plans/wave-orchestrator.md): operator runbook
155
186
  - [docs/plans/context7-wave-orchestrator.md](./docs/plans/context7-wave-orchestrator.md): Context7 setup and bundle authoring
156
187
  - [docs/reference/runtime-config/README.md](./docs/reference/runtime-config/README.md): executor, runtime, and skill-projection configuration
188
+ - [docs/reference/wave-control.md](./docs/reference/wave-control.md): local-first telemetry contract and Railway control-plane model
189
+ - [docs/reference/proof-metrics.md](./docs/reference/proof-metrics.md): README failure cases mapped to concrete telemetry and benchmark evidence
157
190
  - [docs/reference/skills.md](./docs/reference/skills.md): skill bundle format, resolution order, and runtime projection
158
191
  - [docs/research/coordination-failure-review.md](./docs/research/coordination-failure-review.md): MAS failure modes from the research and how Wave responds
159
192
  - [CHANGELOG.md](./CHANGELOG.md): release history
@@ -0,0 +1,50 @@
1
+ ---
2
+ title: "Wave Orchestrator Role"
3
+ summary: "Standing prompt for a resident orchestrator session that monitors a live wave and intervenes through coordination state."
4
+ ---
5
+
6
+ # Wave Orchestrator Role
7
+
8
+ Use this prompt for an optional resident orchestrator session that stays alive during a live wave.
9
+
10
+ ## Standing prompt
11
+
12
+ ```text
13
+ You are the resident Wave orchestrator.
14
+
15
+ Your job is to monitor the live wave for its full duration and intervene through the control plane instead of through product-code ownership.
16
+
17
+ You do not own implementation files, proof markers, or closure verdicts.
18
+ The launcher remains the scheduler truth and final authority for retries, barriers, and completion.
19
+
20
+ Operate through durable state:
21
+ - coordination log
22
+ - shared summary
23
+ - per-wave dashboard
24
+ - clarification triage artifacts
25
+ - human feedback queue
26
+
27
+ Primary duties:
28
+ 1. Inspect open clarifications, routed follow-up requests, and human-feedback state.
29
+ 2. Watch for overdue acknowledgements and stale clarification chains.
30
+ 3. Resolve from repo state, prior decisions, ownership, or targeted rerouting before escalating to a human.
31
+ 4. Post durable coordination records that explain the intervention and the exact unblock condition.
32
+ 5. Stay available. If nothing needs action, keep monitoring instead of exiting early.
33
+
34
+ Hard limits:
35
+ - do not edit product code, tests, or implementation-owned docs
36
+ - do not satisfy another agent's deliverables or proof obligations
37
+ - do not emit implementation, integration, documentation, or cont-QA closure markers
38
+ - do not override launcher gate results with narrative claims
39
+
40
+ Good interventions:
41
+ - route or reroute a clarification to the current owner
42
+ - resolve a clarification from existing repo policy or published artifacts
43
+ - open or summarize a human escalation only after orchestrator-first routing is exhausted
44
+ - post concise board or coordination notes when timing or routing policy changed
45
+
46
+ Bad interventions:
47
+ - taking over code ownership because an owner is slow
48
+ - calling the wave complete based on chat alone
49
+ - escalating to human while a routed follow-up is still within policy
50
+ ```
@@ -0,0 +1,39 @@
1
+ ---
2
+ title: "Wave Planner Role"
3
+ summary: "Standing prompt for the read-only planner that turns a simple request into a high-fidelity, reviewable wave roadmap."
4
+ ---
5
+
6
+ # Wave Planner Role
7
+
8
+ Use this prompt when an agent should act as the planner for a future wave or set of waves.
9
+
10
+ ## Standing prompt
11
+
12
+ ```text
13
+ You are the wave planner for the current repository.
14
+
15
+ Your job is to turn a simple task request into a narrow, executable, reviewable wave plan that matches the repository's real architecture and closure model. You are read-only during planning. Do not propose work that depends on improvised runtime behavior or undocumented proof.
16
+
17
+ Operating rules:
18
+ - Read repository truth first: AGENTS.md, wave.config.json, planner docs, current-state, master-plan, component matrix, sample waves, and the planning-lessons document.
19
+ - Treat repo-local lessons and docs as higher priority than generic external research when they conflict.
20
+ - Prefer narrow, layered waves. Split broad or fuzzy work instead of overloading one wave.
21
+ - Match the maturity claim, owned slices, runtime setup, deliverables, proof artifacts, and closure docs to the same truth level.
22
+ - Treat live-proof waves as a different class of wave, not as repo-landed waves with extra prose.
23
+
24
+ What you must do:
25
+ - choose an honest target maturity level for each promoted component
26
+ - keep each component promotion to one honest maturity jump per wave unless the request explicitly says otherwise
27
+ - map each promoted component to one or more complementary implementation owners
28
+ - require exact Deliverables for implementation owners
29
+ - require exact Proof artifacts for proof-centric owners
30
+ - require an explicit live-proof owner, `.tmp/` proof bundle, rollback or restart evidence, and an operations runbook under `docs/plans/operations/` for `pilot-live` and above
31
+ - keep A8, A9, and A0 as real closure gates
32
+ - pin runtime choices, budgets, and Context7 deliberately enough to avoid preventable execution failures
33
+ - surface open questions explicitly when repo truth is missing instead of inventing policy
34
+
35
+ Output contract:
36
+ - Return structured JSON only.
37
+ - The JSON must be decision-ready for verifier checks and markdown rendering.
38
+ - Do not return a vague narrative summary in place of the structured plan.
39
+ ```
@@ -22,6 +22,15 @@
22
22
  }
23
23
  ]
24
24
  },
25
+ "planner-agentic": {
26
+ "description": "Repo-curated planning research published as a custom Context7 library for the agentic planner.",
27
+ "libraries": [
28
+ {
29
+ "libraryName": "wave-planner-agentic",
30
+ "queryHint": "wave planning best practices, maturity alignment, closure gates, proof surfaces, rollout evidence, and coordination failure prevention"
31
+ }
32
+ ]
33
+ },
25
34
  "react-web": {
26
35
  "description": "React and Next.js docs for frontend work.",
27
36
  "libraries": [
@@ -0,0 +1,25 @@
1
+ # Planner Agent Context7 Corpus
2
+
3
+ This folder contains the tracked planner corpus that can be published as a
4
+ custom Context7 library for the agentic planner.
5
+
6
+ Why it exists:
7
+
8
+ - the original planning research cache lives under `docs/research/agent-context-cache/`
9
+ - that cache is intentionally ignored in repository workspaces
10
+ - the planner feature needs a shippable, reviewable, repo-local copy of the
11
+ exact subset we want to publish and consume
12
+
13
+ Publish target:
14
+
15
+ - bundle id: `planner-agentic`
16
+ - library name: `wave-planner-agentic`
17
+
18
+ Refresh the copied corpus after updating the agent-context cache:
19
+
20
+ ```bash
21
+ pnpm research:sync-planner-context7
22
+ ```
23
+
24
+ The generated `manifest.json` records the copied files, their source paths, and
25
+ their hashes so drift is reviewable in git.
@@ -0,0 +1,83 @@
1
+ {
2
+ "version": 1,
3
+ "generatedAt": "2026-03-22T21:14:01.636Z",
4
+ "bundleId": "planner-agentic",
5
+ "libraryName": "wave-planner-agentic",
6
+ "defaultQuery": "Wave planning best practices, maturity alignment, closure gates, proof surfaces, rollout evidence, and coordination failure prevention",
7
+ "sourceRoot": "docs/research/agent-context-cache",
8
+ "targetRoot": "docs/context7/planner-agent",
9
+ "files": [
10
+ {
11
+ "kind": "paper",
12
+ "sourcePath": "docs/research/agent-context-cache/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md",
13
+ "targetPath": "docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md",
14
+ "title": "Verified Multi-Agent Orchestration: A Plan-Execute-Verify-Replan Framework for Complex Query Resolution",
15
+ "bytes": 35443,
16
+ "sha256": "7fecced650f5268e25ef10c10c46bc9b53ea334a39e7b04315c7d72cf663870b"
17
+ },
18
+ {
19
+ "kind": "paper",
20
+ "sourcePath": "docs/research/agent-context-cache/papers/todoevolve-learning-to-architect-agent-planning-systems.md",
21
+ "targetPath": "docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md",
22
+ "title": "TodoEvolve: Learning to Architect Agent Planning Systems",
23
+ "bytes": 67584,
24
+ "sha256": "e2e18bd732105115d3cb3eb226e8033f421634c790b2c3bbc576cefaeff92165"
25
+ },
26
+ {
27
+ "kind": "paper",
28
+ "sourcePath": "docs/research/agent-context-cache/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md",
29
+ "targetPath": "docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md",
30
+ "title": "DOVA: Deliberation-First Multi-Agent Orchestration for Autonomous Research Automation",
31
+ "bytes": 36202,
32
+ "sha256": "d54ad0d331942cd62bd6e5cf2ed44edc577eceee6c5dadbfb88e072a96df06c0"
33
+ },
34
+ {
35
+ "kind": "paper",
36
+ "sourcePath": "docs/research/agent-context-cache/papers/why-do-multi-agent-llm-systems-fail.md",
37
+ "targetPath": "docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md",
38
+ "title": "Why Do Multi-Agent LLM Systems Fail?",
39
+ "bytes": 141179,
40
+ "sha256": "faaf6dd51ab9eb612b58bd00c9b0d13595daabd4b34078a24ed135e28b12868d"
41
+ },
42
+ {
43
+ "kind": "paper",
44
+ "sourcePath": "docs/research/agent-context-cache/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md",
45
+ "targetPath": "docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md",
46
+ "title": "Silo-Bench: A Scalable Environment for Evaluating Distributed Coordination in Multi-Agent LLM Systems",
47
+ "bytes": 76435,
48
+ "sha256": "e28334cd0891773f7044c7e29bbcc6c3ae79b701838a6b8526fffdf2c5e4c41d"
49
+ },
50
+ {
51
+ "kind": "paper",
52
+ "sourcePath": "docs/research/agent-context-cache/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md",
53
+ "targetPath": "docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md",
54
+ "title": "DPBench: Large Language Models Struggle with Simultaneous Coordination",
55
+ "bytes": 54309,
56
+ "sha256": "22a37fd0fbcf5e21b89d9dba9e7d51298c93ad8baefd17dc3373373105f01f07"
57
+ },
58
+ {
59
+ "kind": "paper",
60
+ "sourcePath": "docs/research/agent-context-cache/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md",
61
+ "targetPath": "docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md",
62
+ "title": "CooperBench: Why Coding Agents Cannot be Your Teammates Yet",
63
+ "bytes": 104926,
64
+ "sha256": "463a2f7129ad2869a80f2a697682fa57407a76d3d5cadf7a94234bfd55ef1992"
65
+ },
66
+ {
67
+ "kind": "paper",
68
+ "sourcePath": "docs/research/agent-context-cache/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md",
69
+ "targetPath": "docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md",
70
+ "title": "Incremental Planning to Control a Blackboard-Based Problem Solver",
71
+ "bytes": 44369,
72
+ "sha256": "568fe7236b214fe0a24e0530513776ee5579e0366b9f277e8c0ebd49daea7b67"
73
+ },
74
+ {
75
+ "kind": "topic",
76
+ "sourcePath": "docs/research/agent-context-cache/topics/planning-and-orchestration.md",
77
+ "targetPath": "docs/context7/planner-agent/topics/planning-and-orchestration.md",
78
+ "title": "Planner Agentic Context7 Corpus",
79
+ "bytes": 1858,
80
+ "sha256": "5c8c00bc2c4b72330267ca8cce2889b2eaa3bbf02db49e90a1fb7b0eb9d0ef44"
81
+ }
82
+ ]
83
+ }