@chllming/wave-orchestration 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CHANGELOG.md +64 -1
  2. package/README.md +44 -8
  3. package/docs/agents/wave-orchestrator-role.md +50 -0
  4. package/docs/agents/wave-planner-role.md +39 -0
  5. package/docs/context7/bundles.json +9 -0
  6. package/docs/context7/planner-agent/README.md +25 -0
  7. package/docs/context7/planner-agent/manifest.json +83 -0
  8. package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
  9. package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
  10. package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
  11. package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
  12. package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
  13. package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
  14. package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
  15. package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
  16. package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
  17. package/docs/evals/README.md +96 -1
  18. package/docs/evals/arm-templates/README.md +13 -0
  19. package/docs/evals/arm-templates/full-wave.json +15 -0
  20. package/docs/evals/arm-templates/single-agent.json +15 -0
  21. package/docs/evals/benchmark-catalog.json +7 -0
  22. package/docs/evals/cases/README.md +47 -0
  23. package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
  24. package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
  25. package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
  26. package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
  27. package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
  28. package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
  29. package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
  30. package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
  31. package/docs/evals/external-benchmarks.json +85 -0
  32. package/docs/evals/external-command-config.sample.json +9 -0
  33. package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
  34. package/docs/evals/pilots/README.md +47 -0
  35. package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
  36. package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
  37. package/docs/evals/wave-benchmark-program.md +302 -0
  38. package/docs/guides/planner.md +48 -11
  39. package/docs/plans/context7-wave-orchestrator.md +20 -0
  40. package/docs/plans/current-state.md +9 -1
  41. package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
  42. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  43. package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
  44. package/docs/plans/wave-orchestrator.md +73 -11
  45. package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
  46. package/docs/reference/coordination-and-closure.md +436 -0
  47. package/docs/reference/live-proof-waves.md +25 -3
  48. package/docs/reference/npmjs-trusted-publishing.md +3 -3
  49. package/docs/reference/proof-metrics.md +90 -0
  50. package/docs/reference/runtime-config/README.md +61 -0
  51. package/docs/reference/sample-waves.md +29 -18
  52. package/docs/reference/wave-control.md +164 -0
  53. package/docs/reference/wave-planning-lessons.md +131 -0
  54. package/package.json +5 -4
  55. package/releases/manifest.json +33 -0
  56. package/scripts/research/agent-context-archive.mjs +18 -0
  57. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
  58. package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
  59. package/scripts/wave-autonomous.mjs +2 -4
  60. package/scripts/wave-orchestrator/adhoc.mjs +32 -11
  61. package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
  62. package/scripts/wave-orchestrator/autonomous.mjs +27 -6
  63. package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
  64. package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
  65. package/scripts/wave-orchestrator/benchmark.mjs +972 -0
  66. package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
  67. package/scripts/wave-orchestrator/config.mjs +175 -0
  68. package/scripts/wave-orchestrator/control-cli.mjs +1123 -0
  69. package/scripts/wave-orchestrator/control-plane.mjs +697 -0
  70. package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
  71. package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
  72. package/scripts/wave-orchestrator/coordination.mjs +84 -0
  73. package/scripts/wave-orchestrator/dashboard-renderer.mjs +38 -3
  74. package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
  75. package/scripts/wave-orchestrator/evals.mjs +23 -0
  76. package/scripts/wave-orchestrator/executors.mjs +3 -2
  77. package/scripts/wave-orchestrator/feedback.mjs +55 -0
  78. package/scripts/wave-orchestrator/install.mjs +253 -26
  79. package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
  80. package/scripts/wave-orchestrator/launcher-runtime.mjs +24 -21
  81. package/scripts/wave-orchestrator/launcher.mjs +800 -35
  82. package/scripts/wave-orchestrator/package-update-notice.mjs +230 -0
  83. package/scripts/wave-orchestrator/package-version.mjs +32 -0
  84. package/scripts/wave-orchestrator/planner-context.mjs +75 -0
  85. package/scripts/wave-orchestrator/planner.mjs +2270 -136
  86. package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
  87. package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
  88. package/scripts/wave-orchestrator/replay.mjs +10 -4
  89. package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
  90. package/scripts/wave-orchestrator/retry-control.mjs +225 -0
  91. package/scripts/wave-orchestrator/shared.mjs +26 -0
  92. package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
  93. package/scripts/wave-orchestrator/traces.mjs +157 -2
  94. package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
  95. package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
  96. package/scripts/wave-orchestrator/wave-files.mjs +17 -5
  97. package/scripts/wave.mjs +39 -2
  98. package/skills/repo-coding-rules/SKILL.md +1 -0
  99. package/skills/role-cont-eval/SKILL.md +1 -0
  100. package/skills/role-cont-qa/SKILL.md +13 -6
  101. package/skills/role-deploy/SKILL.md +1 -0
  102. package/skills/role-documentation/SKILL.md +4 -0
  103. package/skills/role-implementation/SKILL.md +4 -0
  104. package/skills/role-infra/SKILL.md +2 -1
  105. package/skills/role-integration/SKILL.md +15 -8
  106. package/skills/role-planner/SKILL.md +39 -0
  107. package/skills/role-planner/skill.json +21 -0
  108. package/skills/role-research/SKILL.md +1 -0
  109. package/skills/role-security/SKILL.md +2 -2
  110. package/skills/runtime-claude/SKILL.md +2 -1
  111. package/skills/runtime-codex/SKILL.md +1 -0
  112. package/skills/runtime-local/SKILL.md +2 -0
  113. package/skills/runtime-opencode/SKILL.md +1 -0
  114. package/skills/wave-core/SKILL.md +25 -6
  115. package/skills/wave-core/references/marker-syntax.md +16 -8
  116. package/wave.config.json +45 -0
@@ -1,23 +1,29 @@
1
1
  ---
2
2
  title: "Sample Waves"
3
- summary: "A showcase-first sample wave that demonstrates the current 0.6.1 Wave surface."
3
+ summary: "Showcase-first sample waves that demonstrate the current 0.7.0 Wave surface."
4
4
  ---
5
5
 
6
6
  # Sample Waves
7
7
 
8
- This guide points to one showcase-first sample wave that demonstrates the current `0.6.1` authored Wave surface.
8
+ This guide points to showcase-first sample waves that demonstrate the current `0.7.0` authored Wave surface.
9
9
 
10
- The example is intentionally denser than a typical production wave. Its job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready file.
10
+ The examples are intentionally denser than typical production waves. Their job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready files.
11
11
 
12
- ## Canonical Example
12
+ ## Canonical Examples
13
+
14
+ - [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md)
15
+ Shows what a good `repo-landed` outcome looks like when one promoted component only closes honestly if desired-state records, reconcile-loop substrate, and cluster-view surfaces land together. It emphasizes maturity discipline, explicit deliverables, and shared-plan closure without drifting into `pilot-live` claims.
13
16
 
14
17
  - [Full modern sample wave](../plans/examples/wave-example-live-proof.md)
15
- Shows the combined `0.6.1` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
18
+ Shows the combined `0.7.0` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
16
19
 
17
- ## What This Example Teaches
20
+ ## What These Examples Teach
18
21
 
19
- - the standard closure-role structure with `A0`, `E0`, `A8`, and `A9`
20
- - wave-level `## Eval targets`
22
+ - the standard closure-role structure with `A0`, `A8`, and `A9`
23
+ - `E0` and wave-level `## Eval targets` in the full modern sample
24
+ - honest `repo-landed` maturity framing without `pilot-live` drift
25
+ - multi-slice component promotion where all sibling owners must land together
26
+ - shared-plan and component-matrix closure as part of the architecture truth
21
27
  - delegated versus pinned benchmark selection
22
28
  - coordination benchmark families from `docs/evals/benchmark-catalog.json`
23
29
  - richer executor blocks, runtime budgets, and retry policy
@@ -32,8 +38,11 @@ The example is intentionally denser than a typical production wave. Its job is t
32
38
 
33
39
  ## Feature Coverage Map
34
40
 
35
- This sample covers the main surfaces added or hardened for `0.6.1`:
41
+ Together these samples cover the main surfaces added or hardened for `0.7.0`:
36
42
 
43
+ - repo-landed maturity discipline and anti-overclaim framing
44
+ - explicit shared-plan closure for future-wave safety
45
+ - coordinated component slices with per-agent deliverables
37
46
  - planner-era authored wave structure
38
47
  - cross-runtime `### Skills`
39
48
  - richer `### Executor` blocks and runtime budgets
@@ -53,6 +62,7 @@ This sample covers the main surfaces added or hardened for `0.6.1`:
53
62
  Copy more literally when:
54
63
 
55
64
  - you need the section layout
65
+ - you want a concrete example of what good repo-landed wave fidelity looks like
56
66
  - you want concrete wording for delegated versus pinned benchmark targets
57
67
  - you want a proof-first owner example with local artifact bundles and sticky retry
58
68
 
@@ -65,23 +75,24 @@ Adapt more aggressively when:
65
75
 
66
76
  ## How This Example Maps To Other Docs
67
77
 
68
- - Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use this sample to see how a human would enrich the generated draft.
69
- - Use [docs/evals/README.md](../evals/README.md) with this sample when you need to see delegated and pinned benchmark targets in a real wave.
70
- - Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with this sample when you need proof-first authoring for `pilot-live` and above.
78
+ - Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use these samples to see how a human would enrich the generated draft for either repo-landed or proof-first work.
79
+ - Use [docs/evals/README.md](../evals/README.md) with the full modern sample when you need to see delegated and pinned benchmark targets in a real wave.
80
+ - Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with the full modern sample when you need proof-first authoring for `pilot-live` and above.
71
81
  - Use [docs/plans/wave-orchestrator.md](../plans/wave-orchestrator.md) for the operational runbook that explains how the launcher interprets these sections.
72
82
 
73
83
  ## Suggested Reading Order
74
84
 
75
- 1. Start with [Full modern sample wave](../plans/examples/wave-example-live-proof.md).
76
- 2. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
77
- 3. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
85
+ 1. Start with [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md) if you want the clearest example of good closure-ready wave fidelity for a repo-only outcome.
86
+ 2. Read [Full modern sample wave](../plans/examples/wave-example-live-proof.md) if you want the denser proof-first and eval-heavy surface.
87
+ 3. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
88
+ 4. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
78
89
 
79
- ## Why This Example Lives In `docs/plans/examples/`
90
+ ## Why These Examples Live In `docs/plans/examples/`
80
91
 
81
- The example lives outside `docs/plans/waves/` on purpose.
92
+ The examples live outside `docs/plans/waves/` on purpose.
82
93
 
83
94
  That keeps it:
84
95
 
85
96
  - easy to browse as teaching material
86
97
  - clearly separate from the repo's real launcher-facing wave sequence
87
- - safe to evolve as reference material without implying that it is part of the current lane's actual plan history
98
+ - safe to evolve as reference material without implying that they are part of the current lane's actual plan history
@@ -0,0 +1,164 @@
1
+ ---
2
+ title: "Wave Control"
3
+ summary: "Canonical telemetry, artifact upload policy, and the local-first reporting contract for the Railway-hosted Wave control plane."
4
+ ---
5
+
6
+ # Wave Control
7
+
8
+ Wave Control is the telemetry and analysis plane for Wave runs.
9
+
10
+ The design rule is:
11
+
12
+ - local files stay authoritative
13
+ - remote reporting is best-effort
14
+ - dashboards and markdown remain projections over typed local state
15
+
16
+ ## What Gets Reported
17
+
18
+ Wave Control normalizes these entity types:
19
+
20
+ - `wave_run`
21
+ - `agent_run`
22
+ - `coordination_record`
23
+ - `task`
24
+ - `attempt`
25
+ - `gate`
26
+ - `proof_bundle`
27
+ - `rerun_request`
28
+ - `human_input`
29
+ - `artifact`
30
+ - `benchmark_run`
31
+ - `benchmark_item`
32
+ - `verification`
33
+ - `review`
34
+
35
+ This lets the control plane answer:
36
+
37
+ - what happened in a run
38
+ - which proof and benchmark artifacts back a claim
39
+ - whether a benchmark result is comparison-valid or only diagnostic
40
+ - which coordination failures blocked closure
41
+
42
+ ## Run Identity
43
+
44
+ Every Wave Control event carries a normalized run identity.
45
+
46
+ The key fields are:
47
+
48
+ - `workspaceId`
49
+ - `projectId`
50
+ - `runKind`
51
+ - `runId`
52
+ - `lane`
53
+ - `wave`
54
+ - `attempt`
55
+ - `agentId`
56
+ - `orchestratorId`
57
+ - `runtimeVersion`
58
+ - `benchmarkRunId`
59
+ - `benchmarkItemId`
60
+
61
+ Why these fields matter:
62
+
63
+ - `workspaceId` separates whole adopted workspaces
64
+ - `projectId` separates product or repo identities inside one control plane
65
+ - `orchestratorId` separates resident orchestrators or control-plane owners
66
+ - `runtimeVersion` lets operators compare behavior across Wave releases without guessing from deploy timestamps
67
+
68
+ These are first-class query dimensions in the service, not only free-form event payload fields.
69
+
70
+ ## Proof Signals
71
+
72
+ Wave Control is intended to make the main README claims measurable.
73
+
74
+ For the explicit README-failure-case-to-signal map, see [proof-metrics.md](./proof-metrics.md).
75
+
76
+ Signals to preserve:
77
+
78
+ - canonical-state fidelity:
79
+ `coordination_record`, `wave_run`, `attempt`, and `artifact` telemetry prove the scheduler truth came from JSON state, not only markdown boards
80
+ - evidence pooling:
81
+ integration and closure telemetry should cite the proof artifacts and evidence refs they relied on
82
+ - contradiction repair:
83
+ gate and review telemetry should show unresolved conflicts, repair creation, and repair resolution
84
+ - expert routing:
85
+ targeted assignments, reroutes, and final recommendation ownership should remain visible
86
+ - premature closure prevention:
87
+ gate snapshots, proof completeness, block reasons, reruns, and cont-QA reversal should be durable
88
+ - benchmark trust:
89
+ every benchmark item should distinguish capability from validity
90
+
91
+ ## Artifact Contract
92
+
93
+ Selected artifacts are described with typed descriptors:
94
+
95
+ ```json
96
+ {
97
+ "path": ".tmp/main-wave-launcher/traces/wave-1/attempt-1/quality.json",
98
+ "kind": "trace-quality",
99
+ "required": true,
100
+ "present": true,
101
+ "sha256": "abc123...",
102
+ "bytes": 2048,
103
+ "contentType": "application/json",
104
+ "uploadPolicy": "selected"
105
+ }
106
+ ```
107
+
108
+ Upload policy meanings:
109
+
110
+ - `local-only`: keep only the descriptor remotely
111
+ - `metadata-only`: report path, hash, size, and presence only
112
+ - `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected`
113
+ - `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected` or `full-artifact-upload` **and** the artifact kind is allowed by `waveControl.uploadArtifactKinds`
114
+ - `full`: upload the artifact body in `full-artifact-upload` flows; if `uploadArtifactKinds` is set, keep the kind allowlist aligned with that policy
115
+
116
+ ## Runtime Config
117
+
118
+ `wave.config.json` can declare:
119
+
120
+ ```json
121
+ {
122
+ "waveControl": {
123
+ "endpoint": "https://wave-control.up.railway.app/api/v1",
124
+ "workspaceId": "my-workspace",
125
+ "projectId": "wave-orchestration",
126
+ "authTokenEnvVar": "WAVE_CONTROL_AUTH_TOKEN",
127
+ "reportMode": "metadata-plus-selected",
128
+ "uploadArtifactKinds": [
129
+ "trace-run-metadata",
130
+ "trace-quality",
131
+ "benchmark-results"
132
+ ]
133
+ }
134
+ }
135
+ ```
136
+
137
+ Lane overrides may refine the same surface under `lanes.<lane>.waveControl`.
138
+
139
+ For a single run, operators can disable Wave Control reporting entirely with:
140
+
141
+ ```bash
142
+ pnpm exec wave launch --lane main --no-telemetry
143
+ ```
144
+
145
+ That suppresses the local telemetry spool and remote delivery for that invocation, while leaving the canonical runtime artifacts and local control-plane state intact.
146
+
147
+ ## Delivery Model
148
+
149
+ Wave Control reporting should:
150
+
151
+ - append local telemetry first
152
+ - queue pending uploads under `.tmp/<lane>-wave-launcher/control-plane/telemetry/`
153
+ - respect `waveControl.uploadArtifactKinds` before uploading any selected artifact body
154
+ - cap pending remote uploads with `waveControl.maxPendingEvents` by dropping the oldest queued remote-delivery files, while keeping the local `events.jsonl` stream intact
155
+ - retry delivery with idempotency keys
156
+ - never fail a live run, proof registration, or benchmark because the network is unavailable
157
+
158
+ The Railway-hosted `services/wave-control` service is an analysis surface, not the scheduler of record.
159
+
160
+ The service package lives under `services/wave-control/`.
161
+
162
+ For durable telemetry retention, attach Railway Postgres to `wave-control` so the
163
+ service receives `DATABASE_URL`. Without that variable, the service falls back to the
164
+ in-memory store and only keeps data until the process restarts.
@@ -0,0 +1,131 @@
1
+ ---
2
+ summary: "Lessons from Waves 4-9 on what makes future waves succeed or fail."
3
+ read_when:
4
+ - Drafting a new wave
5
+ - Splitting or renumbering future waves
6
+ - Deciding whether a wave should target repo-landed, pilot-live, or above
7
+ title: "Wave Planning Lessons"
8
+ ---
9
+
10
+ # Wave Planning Lessons
11
+
12
+ This document captures the practical lessons from Waves 4-9. The main theme is
13
+ simple: waves succeed when the declared maturity target, the owned slices, the
14
+ runtime setup, and the closure artifacts all describe the same truth.
15
+
16
+ ## 1. One honest maturity jump per wave
17
+
18
+ - Treat `repo-landed`, `pilot-live`, `qa-proved`, `fleet-ready`,
19
+ `cutover-ready`, and `deprecation-ready` as materially different bars.
20
+ - A wave should promote a component by one honest maturity step, not silently
21
+ combine multiple levels of proof in one broad plan.
22
+ - If a wave only lands code and tests, the target is usually `repo-landed`, not
23
+ `pilot-live`.
24
+ - If a wave claims `pilot-live` or above, the wave must own real deploy/live
25
+ proof and rollback evidence.
26
+
27
+ ## 2. Live-proof waves are a different class of wave
28
+
29
+ - `pilot-live` and above need an explicit live-proof owner, not just
30
+ implementation agents plus A8/A9/A0.
31
+ - Live-proof waves need a canonical proof bundle under `.tmp/` and one owned
32
+ operations runbook under `docs/plans/operations/`.
33
+ - The proof bundle must contain restart or rollback evidence, not only one-shot
34
+ success.
35
+ - External operator commands and captured evidence must be part of the authored
36
+ wave, not improvised during execution.
37
+
38
+ ## 3. Component promotions must map to owned slices
39
+
40
+ - Every promoted component needs one or more implementation owners and one
41
+ shared proof story.
42
+ - If multiple agents contribute to one promoted component, their slices must be
43
+ obviously complementary, not overlapping guesses.
44
+ - Shared components should not cause one agent to be retried just because a
45
+ sibling owner is still finishing; each agent must be able to complete its own
46
+ slice honestly.
47
+
48
+ ## 4. Deliverables must be explicit and machine-checkable
49
+
50
+ - Every implementation agent should declare `### Deliverables`.
51
+ - For live-proof waves, use `### Proof artifacts` in addition to deliverables.
52
+ - Deliverables should be exact files or artifact manifests, not vague “test
53
+ coverage” or “docs updated” expectations.
54
+ - Missing deliverables should fail the wave even if the code mostly landed.
55
+
56
+ ## 5. Closure must update the shared planning truth
57
+
58
+ - A9 should always update `current-state`, `master-plan`, `migration`, and the
59
+ component cutover matrix when a wave changes what later waves may safely
60
+ assume.
61
+ - The evaluator should reject a wave if the repo’s planning truth still implies
62
+ an older maturity level after the code has landed.
63
+ - Shared-plan closure is not paperwork; it is part of architecture truth.
64
+
65
+ ## 6. Use A8 to reconcile reality before docs and evaluation
66
+
67
+ - A8 is the place to detect contradictions between slices, missing ownership,
68
+ and proof gaps before A9 and A0 run.
69
+ - A8 should judge `ready-for-doc-closure` versus `needs-more-work` based on the
70
+ landed artifact set, not on agent intent.
71
+ - Waves were materially more reliable once A8 became a true closure gate rather
72
+ than optional synthesis.
73
+
74
+ ## 7. Runtime setup matters as much as wave prose
75
+
76
+ - Do not use small fixed turn caps for synthesis-heavy or closure-heavy agents.
77
+ Bound them with `budget.minutes`, not `budget.turns`.
78
+ - Pin exact model and reasoning settings for each runtime. Ambiguous profiles
79
+ create unclear failure modes.
80
+ - Avoid cross-runtime fallback on live-proof or deploy-sensitive slices unless
81
+ there is a very good reason.
82
+ - Context7 should be explicit and real; unresolved bundles create noise instead
83
+ of help.
84
+
85
+ ## 8. Repo-local proof and live proof are different
86
+
87
+ - Repo-local tests and docs can justify `repo-landed`.
88
+ - Live host validation, admitted runtime behavior, rollback drills, and operator
89
+ surfaces are what justify `pilot-live` and above.
90
+ - Do not let “the code exists” be treated as “the deployment works.”
91
+
92
+ ## 9. Architecture-facing status surfaces must be future-safe
93
+
94
+ - Status and projection code should be keyed to the real future topology, not
95
+ the smallest test case that passes today.
96
+ - If a status model will later carry multiple runtime classes, providers, or
97
+ lanes, the substrate must preserve that identity now.
98
+ - Closed enums and typed contracts should be validated as closed enums and typed
99
+ contracts, not accepted as arbitrary strings.
100
+
101
+ ## 10. The best waves are narrow, layered, and boring
102
+
103
+ - Narrow waves close more reliably than broad waves.
104
+ - A good wave answers:
105
+ - what exact maturity level is being claimed
106
+ - what exact artifacts prove it
107
+ - who owns repo implementation
108
+ - who owns live proof, if any
109
+ - what A9 must update
110
+ - what A0 must refuse to overclaim
111
+ - If a wave still sounds ambitious and fuzzy after writing the deliverables,
112
+ split it again.
113
+
114
+ ## 11. Future-wave checklist
115
+
116
+ - Does the component promotion match the real maturity level being claimed?
117
+ - Does every promoted component have an implementation owner?
118
+ - If the target is `pilot-live` or above, is there an explicit live-proof owner?
119
+ - Are deliverables and proof artifacts exact and machine-checkable?
120
+ - Are current-state and matrix updates part of A9 closure?
121
+ - Are A8 and A0 told what would make the wave fail honestly?
122
+ - Are runtime pins, Context7 bundles, and budgets specific enough to avoid
123
+ preventable execution failures?
124
+ - Would a reviewer understand the difference between “code landed” and
125
+ “component promoted” just by reading the wave file?
126
+
127
+ ## Bottom line
128
+
129
+ The successful waves were not the ones with the most code. They were the ones
130
+ where the wave file, the runtime setup, the artifacts, and the planning docs all
131
+ made the same claim at the same level of maturity.
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@chllming/wave-orchestration",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "license": "MIT",
5
5
  "description": "Generic wave-based multi-agent orchestration for repository work.",
6
6
  "repository": {
7
7
  "type": "git",
8
- "url": "git+https://github.com/chllming/wave-orchestration.git"
8
+ "url": "git+https://github.com/chllming/agent-wave-orchestrator.git"
9
9
  },
10
- "homepage": "https://github.com/chllming/wave-orchestration#readme",
10
+ "homepage": "https://github.com/chllming/agent-wave-orchestrator#readme",
11
11
  "bugs": {
12
- "url": "https://github.com/chllming/wave-orchestration/issues"
12
+ "url": "https://github.com/chllming/agent-wave-orchestrator/issues"
13
13
  },
14
14
  "publishConfig": {
15
15
  "access": "public"
@@ -41,6 +41,7 @@
41
41
  "context7:api-check": "bash scripts/context7-export-env.sh run bash scripts/context7-api-check.sh",
42
42
  "research:import-agent-context": "node scripts/research/import-agent-context-archive.mjs scripts/research/manifests/agent-context-expanded-2026-03-22.mjs",
43
43
  "research:index-agent-context": "node scripts/research/generate-agent-context-indexes.mjs",
44
+ "research:sync-planner-context7": "node scripts/research/sync-planner-context7-bundle.mjs",
44
45
  "research:refresh-agent-context": "pnpm research:import-agent-context && pnpm research:index-agent-context",
45
46
  "test": "vitest run --config vitest.config.ts",
46
47
  "wave": "node scripts/wave.mjs",
@@ -2,6 +2,39 @@
2
2
  "schemaVersion": 1,
3
3
  "packageName": "@chllming/wave-orchestration",
4
4
  "releases": [
5
+ {
6
+ "version": "0.7.0",
7
+ "date": "2026-03-23",
8
+ "summary": "Unified wave control operator CLI, canonical control-plane event log, Wave Control telemetry, live-wave orchestration refresh, and resident orchestrator support.",
9
+ "features": [
10
+ "Unified `wave control` CLI with `status`, `task`, `rerun`, `proof`, and `telemetry` sub-surfaces replacing `wave coord`/`wave retry`/`wave proof` as the preferred operator interface.",
11
+ "Canonical control-plane event log under `.tmp/<lane>-wave-launcher/control-plane/` with event-sourced materialization for proof bundles, rerun requests, operator tasks, and attempt lifecycle.",
12
+ "Wave Control telemetry: local-first event queueing with best-effort batch delivery, configurable report modes, selective artifact upload, and per-category capture toggles.",
13
+ "Live-wave orchestration refresh that keeps coordination surfaces, clarification triage, and dashboard metrics current during active execution.",
14
+ "Resident orchestrator support via `--resident-orchestrator` for long-running non-owning monitoring sessions.",
15
+ "Native and external benchmark telemetry with failure-review validity classification and config attestation hashing."
16
+ ],
17
+ "manualSteps": [
18
+ "Existing `wave coord`, `wave retry`, and `wave proof` commands remain available as compatibility surfaces. No migration required, but new operator docs prefer `wave control`.",
19
+ "To enable Wave Control telemetry, add a `waveControl` section to `wave.config.json` with at minimum an `endpoint` and `workspaceId`. Pass `--no-telemetry` to disable for a single run."
20
+ ],
21
+ "breaking": false
22
+ },
23
+ {
24
+ "version": "0.6.3",
25
+ "date": "2026-03-22",
26
+ "summary": "Runtime npmjs update notices plus a one-command self-update flow for downstream repos.",
27
+ "features": [
28
+ "Top-level runtime entrypoints now perform a best-effort npmjs version check, cache the result under `.wave/package-update-check.json`, and emit a non-blocking stderr notice when a newer `@chllming/wave-orchestration` release is available.",
29
+ "New `wave self-update` detects the workspace package manager, updates the package dependency to the latest published release, prints the changelog delta since the recorded install, and then runs `wave upgrade`.",
30
+ "Autonomous and ad-hoc flows now suppress nested update notices so operators see at most one banner per top-level run, while structured stdout such as `wave adhoc run --json` remains parseable."
31
+ ],
32
+ "manualSteps": [
33
+ "No migration is required. If you prefer not to check npmjs at runtime on a workstation, set `WAVE_SKIP_UPDATE_CHECK=1` in that shell environment.",
34
+ "After upgrading, try `pnpm exec wave self-update` once in an adopted repo to confirm the workspace package manager and install-state workflow behave the way you expect."
35
+ ],
36
+ "breaking": false
37
+ },
5
38
  {
6
39
  "version": "0.6.2",
7
40
  "date": "2026-03-22",
@@ -14,6 +14,12 @@ export const TOPIC_DEFINITIONS = [
14
14
  description:
15
15
  "Planning topology, verifier and replanner loops, protocol-driven coordination, and blackboard-aware orchestration patterns for multi-agent systems.",
16
16
  },
17
+ {
18
+ id: "agent-cooperation-and-coordination",
19
+ title: "Agent Cooperation and Coordination",
20
+ description:
21
+ "Benchmarks and failure analyses for inter-agent cooperation, commitment tracking, communication quality, negotiation, and teammate-style coordination.",
22
+ },
17
23
  {
18
24
  id: "long-running-agents-and-compaction",
19
25
  title: "Long-Running Agents and Compaction",
@@ -103,6 +109,15 @@ const SKILLS_TOPIC_OVERRIDE_SLUGS = new Set([
103
109
  "meta-context-engineering-via-agentic-skill-evolution",
104
110
  ]);
105
111
 
112
+ const COOPERATION_TOPIC_OVERRIDE_SLUGS = new Set([
113
+ "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
114
+ "why-do-multi-agent-llm-systems-fail",
115
+ "systematic-failures-in-collective-reasoning-under-distributed-information-in-multi-agent-llms",
116
+ "silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems",
117
+ "dpbench-large-language-models-struggle-with-simultaneous-coordination",
118
+ "multi-agent-teams-hold-experts-back",
119
+ ]);
120
+
106
121
  function escapeInlinePipes(value) {
107
122
  return String(value ?? "").replaceAll("|", "\\|");
108
123
  }
@@ -252,6 +267,9 @@ export function inferTopics(entry, section = null) {
252
267
  if (SKILLS_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
253
268
  topics.push("skills-and-procedural-memory");
254
269
  }
270
+ if (COOPERATION_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
271
+ topics.push("agent-cooperation-and-coordination");
272
+ }
255
273
 
256
274
  if (hasDeclaredTopics) {
257
275
  return unique(topics);
@@ -3,6 +3,7 @@ import baseManifest from "./harness-and-blackboard-2026-03-21.mjs";
3
3
  const TOPICS = {
4
4
  HARNESS: "harnesses-and-practice",
5
5
  PLANNING: "planning-and-orchestration",
6
+ COOPERATION: "agent-cooperation-and-coordination",
6
7
  LONG_RUNNING: "long-running-agents-and-compaction",
7
8
  SKILLS: "skills-and-procedural-memory",
8
9
  BLACKBOARD: "blackboard-and-shared-workspaces",
@@ -521,6 +522,22 @@ const planningManifest = [
521
522
  fit: "Useful benchmark for testing whether coordination-heavy planning systems scale beyond serial reasoning.",
522
523
  topics: [TOPICS.PLANNING, TOPICS.REPO],
523
524
  }),
525
+ arxivPaper("2601.13295", {
526
+ title: "CooperBench: Why Coding Agents Cannot be Your Teammates Yet",
527
+ slug: "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
528
+ authors:
529
+ "Arpandeep Khatua, Hao Zhu, Peter Tran, Arya Prabhudesai, Frederic Sadrieh, Johann K. Lieberwirth, Xinkai Yu, Yicheng Fu, Michael J. Ryan, Jiaxin Pei, Diyi Yang",
530
+ year: 2026,
531
+ researchBucket: "P0 direct hits",
532
+ mapsTo:
533
+ "Collaborative coding benchmark for inter-agent cooperation, communication quality, commitment tracking, and coordination failures.",
534
+ fit: "Direct benchmark for whether coding agents behave like usable teammates instead of isolated solo solvers.",
535
+ additionalSource: "https://cooperbench.com",
536
+ additionalPdf: "https://cooperbench.com/static/pdfs/main.pdf",
537
+ notes:
538
+ "Project site hosts the same paper PDF plus leaderboard, dataset, and trajectory viewer for the benchmark.",
539
+ topics: [TOPICS.PLANNING, TOPICS.COOPERATION, TOPICS.REPO],
540
+ }),
524
541
  arxivPaper("2602.01011", {
525
542
  title: "Multi-Agent Teams Hold Experts Back",
526
543
  slug: "multi-agent-teams-hold-experts-back",
@@ -0,0 +1,133 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { PACKAGE_ROOT } from "../wave-orchestrator/roots.mjs";
5
+ import {
6
+ PLANNER_CONTEXT7_BUNDLE_ID,
7
+ PLANNER_CONTEXT7_DEFAULT_QUERY,
8
+ PLANNER_CONTEXT7_LIBRARY_NAME,
9
+ PLANNER_CONTEXT7_SOURCE_DIR,
10
+ PLANNER_CONTEXT7_SOURCE_FILES,
11
+ } from "../wave-orchestrator/planner-context.mjs";
12
+
13
+ function ensureDirectory(dirPath) {
14
+ fs.mkdirSync(dirPath, { recursive: true });
15
+ }
16
+
17
+ function sha256Text(text) {
18
+ return crypto.createHash("sha256").update(text, "utf8").digest("hex");
19
+ }
20
+
21
+ function extractFrontmatterValue(text, key) {
22
+ const match = String(text || "").match(new RegExp(`^${key}:\\s*['"]?(.+?)['"]?$`, "m"));
23
+ return match ? match[1].trim() : "";
24
+ }
25
+
26
+ function extractMarkdownHeading(text) {
27
+ const match = String(text || "").match(/^#\s+(.+)$/m);
28
+ return match ? match[1].trim() : "";
29
+ }
30
+
31
+ function renderPlannerTopicIndex(copiedFiles) {
32
+ const paperLines = copiedFiles
33
+ .filter((entry) => entry.kind === "paper")
34
+ .map((file) => {
35
+ return `- [${file.title || file.targetPath.split("/").pop()}](../papers/${path.basename(file.targetPath)})`;
36
+ });
37
+ return [
38
+ "---",
39
+ "summary: 'Curated planning and orchestration corpus exported for the agentic planner Context7 bundle.'",
40
+ "read_when:",
41
+ " - You are publishing or refreshing the planner-agentic Context7 library",
42
+ " - You need the exact planner research subset that Wave ships for agentic planning",
43
+ "title: 'Planner Agentic Context7 Corpus'",
44
+ "---",
45
+ "",
46
+ "# Planner Agentic Context7 Corpus",
47
+ "",
48
+ "This file is the tracked topic index for the planner-specific Context7 corpus.",
49
+ "It intentionally references only the copied files that ship under",
50
+ "`docs/context7/planner-agent/`.",
51
+ "",
52
+ "## Included papers",
53
+ "",
54
+ ...paperLines,
55
+ "",
56
+ ].join("\n");
57
+ }
58
+
59
+ function writePlannerContextFile(targetPath, text) {
60
+ ensureDirectory(path.dirname(targetPath));
61
+ fs.writeFileSync(targetPath, text, "utf8");
62
+ return {
63
+ bytes: Buffer.byteLength(text, "utf8"),
64
+ sha256: sha256Text(text),
65
+ };
66
+ }
67
+
68
+ function copyPlannerContextFile(entry, copiedFiles) {
69
+ const sourcePath = path.join(PACKAGE_ROOT, entry.sourcePath);
70
+ const targetPath = path.join(PACKAGE_ROOT, entry.targetPath);
71
+ if (!fs.existsSync(sourcePath)) {
72
+ throw new Error(`Planner Context7 source file is missing: ${entry.sourcePath}`);
73
+ }
74
+ ensureDirectory(path.dirname(targetPath));
75
+ const text = fs.readFileSync(sourcePath, "utf8");
76
+ const written =
77
+ entry.kind === "topic"
78
+ ? writePlannerContextFile(
79
+ targetPath,
80
+ renderPlannerTopicIndex(copiedFiles),
81
+ )
82
+ : writePlannerContextFile(targetPath, text);
83
+ return {
84
+ kind: entry.kind,
85
+ sourcePath: entry.sourcePath,
86
+ targetPath: entry.targetPath,
87
+ title:
88
+ entry.kind === "topic"
89
+ ? "Planner Agentic Context7 Corpus"
90
+ : extractFrontmatterValue(text, "title") || extractMarkdownHeading(text) || path.basename(entry.targetPath),
91
+ ...written,
92
+ };
93
+ }
94
+
95
+ function writeManifest(files) {
96
+ const manifestPath = path.join(PACKAGE_ROOT, PLANNER_CONTEXT7_SOURCE_DIR, "manifest.json");
97
+ ensureDirectory(path.dirname(manifestPath));
98
+ fs.writeFileSync(
99
+ manifestPath,
100
+ `${JSON.stringify(
101
+ {
102
+ version: 1,
103
+ generatedAt: new Date().toISOString(),
104
+ bundleId: PLANNER_CONTEXT7_BUNDLE_ID,
105
+ libraryName: PLANNER_CONTEXT7_LIBRARY_NAME,
106
+ defaultQuery: PLANNER_CONTEXT7_DEFAULT_QUERY,
107
+ sourceRoot: "docs/research/agent-context-cache",
108
+ targetRoot: PLANNER_CONTEXT7_SOURCE_DIR,
109
+ files,
110
+ },
111
+ null,
112
+ 2,
113
+ )}\n`,
114
+ "utf8",
115
+ );
116
+ }
117
+
118
+ function main() {
119
+ const files = [];
120
+ const orderedEntries = [
121
+ ...PLANNER_CONTEXT7_SOURCE_FILES.filter((entry) => entry.kind !== "topic"),
122
+ ...PLANNER_CONTEXT7_SOURCE_FILES.filter((entry) => entry.kind === "topic"),
123
+ ];
124
+ for (const entry of orderedEntries) {
125
+ files.push(copyPlannerContextFile(entry, files));
126
+ }
127
+ writeManifest(files);
128
+ console.log(
129
+ `[planner-context7] synced ${files.length} files into ${PLANNER_CONTEXT7_SOURCE_DIR}`,
130
+ );
131
+ }
132
+
133
+ main();