@chllming/wave-orchestration 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -1
- package/README.md +39 -7
- package/docs/agents/wave-orchestrator-role.md +50 -0
- package/docs/agents/wave-planner-role.md +39 -0
- package/docs/context7/bundles.json +9 -0
- package/docs/context7/planner-agent/README.md +25 -0
- package/docs/context7/planner-agent/manifest.json +83 -0
- package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
- package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
- package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
- package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
- package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
- package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
- package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
- package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
- package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
- package/docs/evals/README.md +96 -1
- package/docs/evals/arm-templates/README.md +13 -0
- package/docs/evals/arm-templates/full-wave.json +15 -0
- package/docs/evals/arm-templates/single-agent.json +15 -0
- package/docs/evals/benchmark-catalog.json +7 -0
- package/docs/evals/cases/README.md +47 -0
- package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
- package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
- package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
- package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
- package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
- package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
- package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
- package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
- package/docs/evals/external-benchmarks.json +85 -0
- package/docs/evals/external-command-config.sample.json +9 -0
- package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
- package/docs/evals/pilots/README.md +47 -0
- package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
- package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
- package/docs/evals/wave-benchmark-program.md +302 -0
- package/docs/guides/planner.md +48 -11
- package/docs/plans/context7-wave-orchestrator.md +20 -0
- package/docs/plans/current-state.md +8 -1
- package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
- package/docs/plans/wave-orchestrator.md +62 -11
- package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
- package/docs/reference/coordination-and-closure.md +436 -0
- package/docs/reference/live-proof-waves.md +25 -3
- package/docs/reference/npmjs-trusted-publishing.md +3 -3
- package/docs/reference/proof-metrics.md +90 -0
- package/docs/reference/runtime-config/README.md +61 -0
- package/docs/reference/sample-waves.md +29 -18
- package/docs/reference/wave-control.md +164 -0
- package/docs/reference/wave-planning-lessons.md +131 -0
- package/package.json +5 -4
- package/releases/manifest.json +18 -0
- package/scripts/research/agent-context-archive.mjs +18 -0
- package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
- package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
- package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
- package/scripts/wave-orchestrator/autonomous.mjs +7 -0
- package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
- package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
- package/scripts/wave-orchestrator/benchmark.mjs +972 -0
- package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
- package/scripts/wave-orchestrator/config.mjs +175 -0
- package/scripts/wave-orchestrator/control-cli.mjs +1123 -0
- package/scripts/wave-orchestrator/control-plane.mjs +697 -0
- package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
- package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
- package/scripts/wave-orchestrator/coordination.mjs +84 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +38 -3
- package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
- package/scripts/wave-orchestrator/evals.mjs +23 -0
- package/scripts/wave-orchestrator/executors.mjs +3 -2
- package/scripts/wave-orchestrator/feedback.mjs +55 -0
- package/scripts/wave-orchestrator/install.mjs +55 -1
- package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
- package/scripts/wave-orchestrator/launcher-runtime.mjs +24 -21
- package/scripts/wave-orchestrator/launcher.mjs +796 -35
- package/scripts/wave-orchestrator/planner-context.mjs +75 -0
- package/scripts/wave-orchestrator/planner.mjs +2270 -136
- package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
- package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
- package/scripts/wave-orchestrator/replay.mjs +10 -4
- package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
- package/scripts/wave-orchestrator/retry-control.mjs +225 -0
- package/scripts/wave-orchestrator/shared.mjs +26 -0
- package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
- package/scripts/wave-orchestrator/traces.mjs +157 -2
- package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
- package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
- package/scripts/wave-orchestrator/wave-files.mjs +17 -5
- package/scripts/wave.mjs +27 -0
- package/skills/repo-coding-rules/SKILL.md +1 -0
- package/skills/role-cont-eval/SKILL.md +1 -0
- package/skills/role-cont-qa/SKILL.md +13 -6
- package/skills/role-deploy/SKILL.md +1 -0
- package/skills/role-documentation/SKILL.md +4 -0
- package/skills/role-implementation/SKILL.md +4 -0
- package/skills/role-infra/SKILL.md +2 -1
- package/skills/role-integration/SKILL.md +15 -8
- package/skills/role-planner/SKILL.md +39 -0
- package/skills/role-planner/skill.json +21 -0
- package/skills/role-research/SKILL.md +1 -0
- package/skills/role-security/SKILL.md +2 -2
- package/skills/runtime-claude/SKILL.md +2 -1
- package/skills/runtime-codex/SKILL.md +1 -0
- package/skills/runtime-local/SKILL.md +2 -0
- package/skills/runtime-opencode/SKILL.md +1 -0
- package/skills/wave-core/SKILL.md +25 -6
- package/skills/wave-core/references/marker-syntax.md +16 -8
- package/wave.config.json +45 -0
|
@@ -1,23 +1,29 @@
|
|
|
1
1
|
---
|
|
2
2
|
title: "Sample Waves"
|
|
3
|
-
summary: "
|
|
3
|
+
summary: "Showcase-first sample waves that demonstrate the current 0.7.0 Wave surface."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Sample Waves
|
|
7
7
|
|
|
8
|
-
This guide points to
|
|
8
|
+
This guide points to showcase-first sample waves that demonstrate the current `0.7.0` authored Wave surface.
|
|
9
9
|
|
|
10
|
-
The
|
|
10
|
+
The examples are intentionally denser than typical production waves. Their job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready files.
|
|
11
11
|
|
|
12
|
-
## Canonical
|
|
12
|
+
## Canonical Examples
|
|
13
|
+
|
|
14
|
+
- [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md)
|
|
15
|
+
Shows what a good `repo-landed` outcome looks like when one promoted component only closes honestly if desired-state records, reconcile-loop substrate, and cluster-view surfaces land together. It emphasizes maturity discipline, explicit deliverables, and shared-plan closure without drifting into `pilot-live` claims.
|
|
13
16
|
|
|
14
17
|
- [Full modern sample wave](../plans/examples/wave-example-live-proof.md)
|
|
15
|
-
Shows the combined `0.
|
|
18
|
+
Shows the combined `0.7.0` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
|
|
16
19
|
|
|
17
|
-
## What
|
|
20
|
+
## What These Examples Teach
|
|
18
21
|
|
|
19
|
-
- the standard closure-role structure with `A0`, `
|
|
20
|
-
- wave-level `## Eval targets`
|
|
22
|
+
- the standard closure-role structure with `A0`, `A8`, and `A9`
|
|
23
|
+
- `E0` and wave-level `## Eval targets` in the full modern sample
|
|
24
|
+
- honest `repo-landed` maturity framing without `pilot-live` drift
|
|
25
|
+
- multi-slice component promotion where all sibling owners must land together
|
|
26
|
+
- shared-plan and component-matrix closure as part of the architecture truth
|
|
21
27
|
- delegated versus pinned benchmark selection
|
|
22
28
|
- coordination benchmark families from `docs/evals/benchmark-catalog.json`
|
|
23
29
|
- richer executor blocks, runtime budgets, and retry policy
|
|
@@ -32,8 +38,11 @@ The example is intentionally denser than a typical production wave. Its job is t
|
|
|
32
38
|
|
|
33
39
|
## Feature Coverage Map
|
|
34
40
|
|
|
35
|
-
|
|
41
|
+
Together these samples cover the main surfaces added or hardened for `0.7.0`:
|
|
36
42
|
|
|
43
|
+
- repo-landed maturity discipline and anti-overclaim framing
|
|
44
|
+
- explicit shared-plan closure for future-wave safety
|
|
45
|
+
- coordinated component slices with per-agent deliverables
|
|
37
46
|
- planner-era authored wave structure
|
|
38
47
|
- cross-runtime `### Skills`
|
|
39
48
|
- richer `### Executor` blocks and runtime budgets
|
|
@@ -53,6 +62,7 @@ This sample covers the main surfaces added or hardened for `0.6.1`:
|
|
|
53
62
|
Copy more literally when:
|
|
54
63
|
|
|
55
64
|
- you need the section layout
|
|
65
|
+
- you want a concrete example of what good repo-landed wave fidelity looks like
|
|
56
66
|
- you want concrete wording for delegated versus pinned benchmark targets
|
|
57
67
|
- you want a proof-first owner example with local artifact bundles and sticky retry
|
|
58
68
|
|
|
@@ -65,23 +75,24 @@ Adapt more aggressively when:
|
|
|
65
75
|
|
|
66
76
|
## How This Example Maps To Other Docs
|
|
67
77
|
|
|
68
|
-
- Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use
|
|
69
|
-
- Use [docs/evals/README.md](../evals/README.md) with
|
|
70
|
-
- Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with
|
|
78
|
+
- Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use these samples to see how a human would enrich the generated draft for either repo-landed or proof-first work.
|
|
79
|
+
- Use [docs/evals/README.md](../evals/README.md) with the full modern sample when you need to see delegated and pinned benchmark targets in a real wave.
|
|
80
|
+
- Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with the full modern sample when you need proof-first authoring for `pilot-live` and above.
|
|
71
81
|
- Use [docs/plans/wave-orchestrator.md](../plans/wave-orchestrator.md) for the operational runbook that explains how the launcher interprets these sections.
|
|
72
82
|
|
|
73
83
|
## Suggested Reading Order
|
|
74
84
|
|
|
75
|
-
1. Start with [
|
|
76
|
-
2. Read [
|
|
77
|
-
3. Read [docs/
|
|
85
|
+
1. Start with [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md) if you want the clearest example of good closure-ready wave fidelity for a repo-only outcome.
|
|
86
|
+
2. Read [Full modern sample wave](../plans/examples/wave-example-live-proof.md) if you want the denser proof-first and eval-heavy surface.
|
|
87
|
+
3. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
|
|
88
|
+
4. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
|
|
78
89
|
|
|
79
|
-
## Why
|
|
90
|
+
## Why These Examples Live In `docs/plans/examples/`
|
|
80
91
|
|
|
81
|
-
The
|
|
92
|
+
The examples live outside `docs/plans/waves/` on purpose.
|
|
82
93
|
|
|
83
94
|
That keeps it:
|
|
84
95
|
|
|
85
96
|
- easy to browse as teaching material
|
|
86
97
|
- clearly separate from the repo's real launcher-facing wave sequence
|
|
87
|
-
- safe to evolve as reference material without implying that
|
|
98
|
+
- safe to evolve as reference material without implying that they are part of the current lane's actual plan history
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Wave Control"
|
|
3
|
+
summary: "Canonical telemetry, artifact upload policy, and the local-first reporting contract for the Railway-hosted Wave control plane."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Wave Control
|
|
7
|
+
|
|
8
|
+
Wave Control is the telemetry and analysis plane for Wave runs.
|
|
9
|
+
|
|
10
|
+
The design rule is:
|
|
11
|
+
|
|
12
|
+
- local files stay authoritative
|
|
13
|
+
- remote reporting is best-effort
|
|
14
|
+
- dashboards and markdown remain projections over typed local state
|
|
15
|
+
|
|
16
|
+
## What Gets Reported
|
|
17
|
+
|
|
18
|
+
Wave Control normalizes these entity types:
|
|
19
|
+
|
|
20
|
+
- `wave_run`
|
|
21
|
+
- `agent_run`
|
|
22
|
+
- `coordination_record`
|
|
23
|
+
- `task`
|
|
24
|
+
- `attempt`
|
|
25
|
+
- `gate`
|
|
26
|
+
- `proof_bundle`
|
|
27
|
+
- `rerun_request`
|
|
28
|
+
- `human_input`
|
|
29
|
+
- `artifact`
|
|
30
|
+
- `benchmark_run`
|
|
31
|
+
- `benchmark_item`
|
|
32
|
+
- `verification`
|
|
33
|
+
- `review`
|
|
34
|
+
|
|
35
|
+
This lets the control plane answer:
|
|
36
|
+
|
|
37
|
+
- what happened in a run
|
|
38
|
+
- which proof and benchmark artifacts back a claim
|
|
39
|
+
- whether a benchmark result is comparison-valid or only diagnostic
|
|
40
|
+
- which coordination failures blocked closure
|
|
41
|
+
|
|
42
|
+
## Run Identity
|
|
43
|
+
|
|
44
|
+
Every Wave Control event carries a normalized run identity.
|
|
45
|
+
|
|
46
|
+
The key fields are:
|
|
47
|
+
|
|
48
|
+
- `workspaceId`
|
|
49
|
+
- `projectId`
|
|
50
|
+
- `runKind`
|
|
51
|
+
- `runId`
|
|
52
|
+
- `lane`
|
|
53
|
+
- `wave`
|
|
54
|
+
- `attempt`
|
|
55
|
+
- `agentId`
|
|
56
|
+
- `orchestratorId`
|
|
57
|
+
- `runtimeVersion`
|
|
58
|
+
- `benchmarkRunId`
|
|
59
|
+
- `benchmarkItemId`
|
|
60
|
+
|
|
61
|
+
Why these fields matter:
|
|
62
|
+
|
|
63
|
+
- `workspaceId` separates whole adopted workspaces
|
|
64
|
+
- `projectId` separates product or repo identities inside one control plane
|
|
65
|
+
- `orchestratorId` separates resident orchestrators or control-plane owners
|
|
66
|
+
- `runtimeVersion` lets operators compare behavior across Wave releases without guessing from deploy timestamps
|
|
67
|
+
|
|
68
|
+
These are first-class query dimensions in the service, not only free-form event payload fields.
|
|
69
|
+
|
|
70
|
+
## Proof Signals
|
|
71
|
+
|
|
72
|
+
Wave Control is intended to make the main README claims measurable.
|
|
73
|
+
|
|
74
|
+
For the explicit README-failure-case-to-signal map, see [proof-metrics.md](./proof-metrics.md).
|
|
75
|
+
|
|
76
|
+
Signals to preserve:
|
|
77
|
+
|
|
78
|
+
- canonical-state fidelity:
|
|
79
|
+
`coordination_record`, `wave_run`, `attempt`, and `artifact` telemetry prove the scheduler truth came from JSON state, not only markdown boards
|
|
80
|
+
- evidence pooling:
|
|
81
|
+
integration and closure telemetry should cite the proof artifacts and evidence refs they relied on
|
|
82
|
+
- contradiction repair:
|
|
83
|
+
gate and review telemetry should show unresolved conflicts, repair creation, and repair resolution
|
|
84
|
+
- expert routing:
|
|
85
|
+
targeted assignments, reroutes, and final recommendation ownership should remain visible
|
|
86
|
+
- premature closure prevention:
|
|
87
|
+
gate snapshots, proof completeness, block reasons, reruns, and cont-QA reversal should be durable
|
|
88
|
+
- benchmark trust:
|
|
89
|
+
every benchmark item should distinguish capability from validity
|
|
90
|
+
|
|
91
|
+
## Artifact Contract
|
|
92
|
+
|
|
93
|
+
Selected artifacts are described with typed descriptors:
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{
|
|
97
|
+
"path": ".tmp/main-wave-launcher/traces/wave-1/attempt-1/quality.json",
|
|
98
|
+
"kind": "trace-quality",
|
|
99
|
+
"required": true,
|
|
100
|
+
"present": true,
|
|
101
|
+
"sha256": "abc123...",
|
|
102
|
+
"bytes": 2048,
|
|
103
|
+
"contentType": "application/json",
|
|
104
|
+
"uploadPolicy": "selected"
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Upload policy meanings:
|
|
109
|
+
|
|
110
|
+
- `local-only`: keep only the descriptor remotely
|
|
111
|
+
- `metadata-only`: report path, hash, size, and presence only
|
|
112
|
+
- `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected`
|
|
113
|
+
- `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected` or `full-artifact-upload` **and** the artifact kind is allowed by `waveControl.uploadArtifactKinds`
|
|
114
|
+
- `full`: upload the artifact body in `full-artifact-upload` flows; if `uploadArtifactKinds` is set, keep the kind allowlist aligned with that policy
|
|
115
|
+
|
|
116
|
+
## Runtime Config
|
|
117
|
+
|
|
118
|
+
`wave.config.json` can declare:
|
|
119
|
+
|
|
120
|
+
```json
|
|
121
|
+
{
|
|
122
|
+
"waveControl": {
|
|
123
|
+
"endpoint": "https://wave-control.up.railway.app/api/v1",
|
|
124
|
+
"workspaceId": "my-workspace",
|
|
125
|
+
"projectId": "wave-orchestration",
|
|
126
|
+
"authTokenEnvVar": "WAVE_CONTROL_AUTH_TOKEN",
|
|
127
|
+
"reportMode": "metadata-plus-selected",
|
|
128
|
+
"uploadArtifactKinds": [
|
|
129
|
+
"trace-run-metadata",
|
|
130
|
+
"trace-quality",
|
|
131
|
+
"benchmark-results"
|
|
132
|
+
]
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Lane overrides may refine the same surface under `lanes.<lane>.waveControl`.
|
|
138
|
+
|
|
139
|
+
For a single run, operators can disable Wave Control reporting entirely with:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
pnpm exec wave launch --lane main --no-telemetry
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
That suppresses the local telemetry spool and remote delivery for that invocation, while leaving the canonical runtime artifacts and local control-plane state intact.
|
|
146
|
+
|
|
147
|
+
## Delivery Model
|
|
148
|
+
|
|
149
|
+
Wave Control reporting should:
|
|
150
|
+
|
|
151
|
+
- append local telemetry first
|
|
152
|
+
- queue pending uploads under `.tmp/<lane>-wave-launcher/control-plane/telemetry/`
|
|
153
|
+
- respect `waveControl.uploadArtifactKinds` before uploading any selected artifact body
|
|
154
|
+
- cap pending remote uploads with `waveControl.maxPendingEvents` by dropping the oldest queued remote-delivery files, while keeping the local `events.jsonl` stream intact
|
|
155
|
+
- retry delivery with idempotency keys
|
|
156
|
+
- never fail a live run, proof registration, or benchmark because the network is unavailable
|
|
157
|
+
|
|
158
|
+
The Railway-hosted `services/wave-control` service is an analysis surface, not the scheduler of record.
|
|
159
|
+
|
|
160
|
+
The service package lives under `services/wave-control/`.
|
|
161
|
+
|
|
162
|
+
For durable telemetry retention, attach Railway Postgres to `wave-control` so the
|
|
163
|
+
service receives `DATABASE_URL`. Without that variable, the service falls back to the
|
|
164
|
+
in-memory store and only keeps data until the process restarts.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
---
|
|
2
|
+
summary: "Lessons from Waves 4-9 on what makes future waves succeed or fail."
|
|
3
|
+
read_when:
|
|
4
|
+
- Drafting a new wave
|
|
5
|
+
- Splitting or renumbering future waves
|
|
6
|
+
- Deciding whether a wave should target repo-landed, pilot-live, or above
|
|
7
|
+
title: "Wave Planning Lessons"
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Wave Planning Lessons
|
|
11
|
+
|
|
12
|
+
This document captures the practical lessons from Waves 4-9. The main theme is
|
|
13
|
+
simple: waves succeed when the declared maturity target, the owned slices, the
|
|
14
|
+
runtime setup, and the closure artifacts all describe the same truth.
|
|
15
|
+
|
|
16
|
+
## 1. One honest maturity jump per wave
|
|
17
|
+
|
|
18
|
+
- Treat `repo-landed`, `pilot-live`, `qa-proved`, `fleet-ready`,
|
|
19
|
+
`cutover-ready`, and `deprecation-ready` as materially different bars.
|
|
20
|
+
- A wave should promote a component by one honest maturity step, not silently
|
|
21
|
+
combine multiple levels of proof in one broad plan.
|
|
22
|
+
- If a wave only lands code and tests, the target is usually `repo-landed`, not
|
|
23
|
+
`pilot-live`.
|
|
24
|
+
- If a wave claims `pilot-live` or above, the wave must own real deploy/live
|
|
25
|
+
proof and rollback evidence.
|
|
26
|
+
|
|
27
|
+
## 2. Live-proof waves are a different class of wave
|
|
28
|
+
|
|
29
|
+
- `pilot-live` and above need an explicit live-proof owner, not just
|
|
30
|
+
implementation agents plus A8/A9/A0.
|
|
31
|
+
- Live-proof waves need a canonical proof bundle under `.tmp/` and one owned
|
|
32
|
+
operations runbook under `docs/plans/operations/`.
|
|
33
|
+
- The proof bundle must contain restart or rollback evidence, not only one-shot
|
|
34
|
+
success.
|
|
35
|
+
- External operator commands and captured evidence must be part of the authored
|
|
36
|
+
wave, not improvised during execution.
|
|
37
|
+
|
|
38
|
+
## 3. Component promotions must map to owned slices
|
|
39
|
+
|
|
40
|
+
- Every promoted component needs one or more implementation owners and one
|
|
41
|
+
shared proof story.
|
|
42
|
+
- If multiple agents contribute to one promoted component, their slices must be
|
|
43
|
+
obviously complementary, not overlapping guesses.
|
|
44
|
+
- Shared components should not cause one agent to be retried just because a
|
|
45
|
+
sibling owner is still finishing; each agent must be able to complete its own
|
|
46
|
+
slice honestly.
|
|
47
|
+
|
|
48
|
+
## 4. Deliverables must be explicit and machine-checkable
|
|
49
|
+
|
|
50
|
+
- Every implementation agent should declare `### Deliverables`.
|
|
51
|
+
- For live-proof waves, use `### Proof artifacts` in addition to deliverables.
|
|
52
|
+
- Deliverables should be exact files or artifact manifests, not vague “test
|
|
53
|
+
coverage” or “docs updated” expectations.
|
|
54
|
+
- Missing deliverables should fail the wave even if the code mostly landed.
|
|
55
|
+
|
|
56
|
+
## 5. Closure must update the shared planning truth
|
|
57
|
+
|
|
58
|
+
- A9 should always update `current-state`, `master-plan`, `migration`, and the
|
|
59
|
+
component cutover matrix when a wave changes what later waves may safely
|
|
60
|
+
assume.
|
|
61
|
+
- The evaluator should reject a wave if the repo’s planning truth still implies
|
|
62
|
+
an older maturity level after the code has landed.
|
|
63
|
+
- Shared-plan closure is not paperwork; it is part of architecture truth.
|
|
64
|
+
|
|
65
|
+
## 6. Use A8 to reconcile reality before docs and evaluation
|
|
66
|
+
|
|
67
|
+
- A8 is the place to detect contradictions between slices, missing ownership,
|
|
68
|
+
and proof gaps before A9 and A0 run.
|
|
69
|
+
- A8 should judge `ready-for-doc-closure` versus `needs-more-work` based on the
|
|
70
|
+
landed artifact set, not on agent intent.
|
|
71
|
+
- Waves were materially more reliable once A8 became a true closure gate rather
|
|
72
|
+
than optional synthesis.
|
|
73
|
+
|
|
74
|
+
## 7. Runtime setup matters as much as wave prose
|
|
75
|
+
|
|
76
|
+
- Do not use small fixed turn caps for synthesis-heavy or closure-heavy agents.
|
|
77
|
+
Bound them with `budget.minutes`, not `budget.turns`.
|
|
78
|
+
- Pin exact model and reasoning settings for each runtime. Ambiguous profiles
|
|
79
|
+
create unclear failure modes.
|
|
80
|
+
- Avoid cross-runtime fallback on live-proof or deploy-sensitive slices unless
|
|
81
|
+
there is a very good reason.
|
|
82
|
+
- Context7 should be explicit and real; unresolved bundles create noise instead
|
|
83
|
+
of help.
|
|
84
|
+
|
|
85
|
+
## 8. Repo-local proof and live proof are different
|
|
86
|
+
|
|
87
|
+
- Repo-local tests and docs can justify `repo-landed`.
|
|
88
|
+
- Live host validation, admitted runtime behavior, rollback drills, and operator
|
|
89
|
+
surfaces are what justify `pilot-live` and above.
|
|
90
|
+
- Do not let “the code exists” be treated as “the deployment works.”
|
|
91
|
+
|
|
92
|
+
## 9. Architecture-facing status surfaces must be future-safe
|
|
93
|
+
|
|
94
|
+
- Status and projection code should be keyed to the real future topology, not
|
|
95
|
+
the smallest test case that passes today.
|
|
96
|
+
- If a status model will later carry multiple runtime classes, providers, or
|
|
97
|
+
lanes, the substrate must preserve that identity now.
|
|
98
|
+
- Closed enums and typed contracts should be validated as closed enums and typed
|
|
99
|
+
contracts, not accepted as arbitrary strings.
|
|
100
|
+
|
|
101
|
+
## 10. The best waves are narrow, layered, and boring
|
|
102
|
+
|
|
103
|
+
- Narrow waves close more reliably than broad waves.
|
|
104
|
+
- A good wave answers:
|
|
105
|
+
- what exact maturity level is being claimed
|
|
106
|
+
- what exact artifacts prove it
|
|
107
|
+
- who owns repo implementation
|
|
108
|
+
- who owns live proof, if any
|
|
109
|
+
- what A9 must update
|
|
110
|
+
- what A0 must refuse to overclaim
|
|
111
|
+
- If a wave still sounds ambitious and fuzzy after writing the deliverables,
|
|
112
|
+
split it again.
|
|
113
|
+
|
|
114
|
+
## 11. Future-wave checklist
|
|
115
|
+
|
|
116
|
+
- Does the component promotion match the real maturity level being claimed?
|
|
117
|
+
- Does every promoted component have an implementation owner?
|
|
118
|
+
- If the target is `pilot-live` or above, is there an explicit live-proof owner?
|
|
119
|
+
- Are deliverables and proof artifacts exact and machine-checkable?
|
|
120
|
+
- Are current-state and matrix updates part of A9 closure?
|
|
121
|
+
- Are A8 and A0 told what would make the wave fail honestly?
|
|
122
|
+
- Are runtime pins, Context7 bundles, and budgets specific enough to avoid
|
|
123
|
+
preventable execution failures?
|
|
124
|
+
- Would a reviewer understand the difference between “code landed” and
|
|
125
|
+
“component promoted” just by reading the wave file?
|
|
126
|
+
|
|
127
|
+
## Bottom line
|
|
128
|
+
|
|
129
|
+
The successful waves were not the ones with the most code. They were the ones
|
|
130
|
+
where the wave file, the runtime setup, the artifacts, and the planning docs all
|
|
131
|
+
made the same claim at the same level of maturity.
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chllming/wave-orchestration",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"description": "Generic wave-based multi-agent orchestration for repository work.",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
8
|
-
"url": "git+https://github.com/chllming/wave-
|
|
8
|
+
"url": "git+https://github.com/chllming/agent-wave-orchestrator.git"
|
|
9
9
|
},
|
|
10
|
-
"homepage": "https://github.com/chllming/wave-
|
|
10
|
+
"homepage": "https://github.com/chllming/agent-wave-orchestrator#readme",
|
|
11
11
|
"bugs": {
|
|
12
|
-
"url": "https://github.com/chllming/wave-
|
|
12
|
+
"url": "https://github.com/chllming/agent-wave-orchestrator/issues"
|
|
13
13
|
},
|
|
14
14
|
"publishConfig": {
|
|
15
15
|
"access": "public"
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
"context7:api-check": "bash scripts/context7-export-env.sh run bash scripts/context7-api-check.sh",
|
|
42
42
|
"research:import-agent-context": "node scripts/research/import-agent-context-archive.mjs scripts/research/manifests/agent-context-expanded-2026-03-22.mjs",
|
|
43
43
|
"research:index-agent-context": "node scripts/research/generate-agent-context-indexes.mjs",
|
|
44
|
+
"research:sync-planner-context7": "node scripts/research/sync-planner-context7-bundle.mjs",
|
|
44
45
|
"research:refresh-agent-context": "pnpm research:import-agent-context && pnpm research:index-agent-context",
|
|
45
46
|
"test": "vitest run --config vitest.config.ts",
|
|
46
47
|
"wave": "node scripts/wave.mjs",
|
package/releases/manifest.json
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
"schemaVersion": 1,
|
|
3
3
|
"packageName": "@chllming/wave-orchestration",
|
|
4
4
|
"releases": [
|
|
5
|
+
{
|
|
6
|
+
"version": "0.7.0",
|
|
7
|
+
"date": "2026-03-23",
|
|
8
|
+
"summary": "Unified wave control operator CLI, canonical control-plane event log, Wave Control telemetry, live-wave orchestration refresh, and resident orchestrator support.",
|
|
9
|
+
"features": [
|
|
10
|
+
"Unified `wave control` CLI with `status`, `task`, `rerun`, `proof`, and `telemetry` sub-surfaces replacing `wave coord`/`wave retry`/`wave proof` as the preferred operator interface.",
|
|
11
|
+
"Canonical control-plane event log under `.tmp/<lane>-wave-launcher/control-plane/` with event-sourced materialization for proof bundles, rerun requests, operator tasks, and attempt lifecycle.",
|
|
12
|
+
"Wave Control telemetry: local-first event queueing with best-effort batch delivery, configurable report modes, selective artifact upload, and per-category capture toggles.",
|
|
13
|
+
"Live-wave orchestration refresh that keeps coordination surfaces, clarification triage, and dashboard metrics current during active execution.",
|
|
14
|
+
"Resident orchestrator support via `--resident-orchestrator` for long-running non-owning monitoring sessions.",
|
|
15
|
+
"Native and external benchmark telemetry with failure-review validity classification and config attestation hashing."
|
|
16
|
+
],
|
|
17
|
+
"manualSteps": [
|
|
18
|
+
"Existing `wave coord`, `wave retry`, and `wave proof` commands remain available as compatibility surfaces. No migration required, but new operator docs prefer `wave control`.",
|
|
19
|
+
"To enable Wave Control telemetry, add a `waveControl` section to `wave.config.json` with at minimum an `endpoint` and `workspaceId`. Pass `--no-telemetry` to disable for a single run."
|
|
20
|
+
],
|
|
21
|
+
"breaking": false
|
|
22
|
+
},
|
|
5
23
|
{
|
|
6
24
|
"version": "0.6.3",
|
|
7
25
|
"date": "2026-03-22",
|
|
@@ -14,6 +14,12 @@ export const TOPIC_DEFINITIONS = [
|
|
|
14
14
|
description:
|
|
15
15
|
"Planning topology, verifier and replanner loops, protocol-driven coordination, and blackboard-aware orchestration patterns for multi-agent systems.",
|
|
16
16
|
},
|
|
17
|
+
{
|
|
18
|
+
id: "agent-cooperation-and-coordination",
|
|
19
|
+
title: "Agent Cooperation and Coordination",
|
|
20
|
+
description:
|
|
21
|
+
"Benchmarks and failure analyses for inter-agent cooperation, commitment tracking, communication quality, negotiation, and teammate-style coordination.",
|
|
22
|
+
},
|
|
17
23
|
{
|
|
18
24
|
id: "long-running-agents-and-compaction",
|
|
19
25
|
title: "Long-Running Agents and Compaction",
|
|
@@ -103,6 +109,15 @@ const SKILLS_TOPIC_OVERRIDE_SLUGS = new Set([
|
|
|
103
109
|
"meta-context-engineering-via-agentic-skill-evolution",
|
|
104
110
|
]);
|
|
105
111
|
|
|
112
|
+
const COOPERATION_TOPIC_OVERRIDE_SLUGS = new Set([
|
|
113
|
+
"cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
|
|
114
|
+
"why-do-multi-agent-llm-systems-fail",
|
|
115
|
+
"systematic-failures-in-collective-reasoning-under-distributed-information-in-multi-agent-llms",
|
|
116
|
+
"silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems",
|
|
117
|
+
"dpbench-large-language-models-struggle-with-simultaneous-coordination",
|
|
118
|
+
"multi-agent-teams-hold-experts-back",
|
|
119
|
+
]);
|
|
120
|
+
|
|
106
121
|
function escapeInlinePipes(value) {
|
|
107
122
|
return String(value ?? "").replaceAll("|", "\\|");
|
|
108
123
|
}
|
|
@@ -252,6 +267,9 @@ export function inferTopics(entry, section = null) {
|
|
|
252
267
|
if (SKILLS_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
|
|
253
268
|
topics.push("skills-and-procedural-memory");
|
|
254
269
|
}
|
|
270
|
+
if (COOPERATION_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
|
|
271
|
+
topics.push("agent-cooperation-and-coordination");
|
|
272
|
+
}
|
|
255
273
|
|
|
256
274
|
if (hasDeclaredTopics) {
|
|
257
275
|
return unique(topics);
|
|
@@ -3,6 +3,7 @@ import baseManifest from "./harness-and-blackboard-2026-03-21.mjs";
|
|
|
3
3
|
const TOPICS = {
|
|
4
4
|
HARNESS: "harnesses-and-practice",
|
|
5
5
|
PLANNING: "planning-and-orchestration",
|
|
6
|
+
COOPERATION: "agent-cooperation-and-coordination",
|
|
6
7
|
LONG_RUNNING: "long-running-agents-and-compaction",
|
|
7
8
|
SKILLS: "skills-and-procedural-memory",
|
|
8
9
|
BLACKBOARD: "blackboard-and-shared-workspaces",
|
|
@@ -521,6 +522,22 @@ const planningManifest = [
|
|
|
521
522
|
fit: "Useful benchmark for testing whether coordination-heavy planning systems scale beyond serial reasoning.",
|
|
522
523
|
topics: [TOPICS.PLANNING, TOPICS.REPO],
|
|
523
524
|
}),
|
|
525
|
+
arxivPaper("2601.13295", {
|
|
526
|
+
title: "CooperBench: Why Coding Agents Cannot be Your Teammates Yet",
|
|
527
|
+
slug: "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
|
|
528
|
+
authors:
|
|
529
|
+
"Arpandeep Khatua, Hao Zhu, Peter Tran, Arya Prabhudesai, Frederic Sadrieh, Johann K. Lieberwirth, Xinkai Yu, Yicheng Fu, Michael J. Ryan, Jiaxin Pei, Diyi Yang",
|
|
530
|
+
year: 2026,
|
|
531
|
+
researchBucket: "P0 direct hits",
|
|
532
|
+
mapsTo:
|
|
533
|
+
"Collaborative coding benchmark for inter-agent cooperation, communication quality, commitment tracking, and coordination failures.",
|
|
534
|
+
fit: "Direct benchmark for whether coding agents behave like usable teammates instead of isolated solo solvers.",
|
|
535
|
+
additionalSource: "https://cooperbench.com",
|
|
536
|
+
additionalPdf: "https://cooperbench.com/static/pdfs/main.pdf",
|
|
537
|
+
notes:
|
|
538
|
+
"Project site hosts the same paper PDF plus leaderboard, dataset, and trajectory viewer for the benchmark.",
|
|
539
|
+
topics: [TOPICS.PLANNING, TOPICS.COOPERATION, TOPICS.REPO],
|
|
540
|
+
}),
|
|
524
541
|
arxivPaper("2602.01011", {
|
|
525
542
|
title: "Multi-Agent Teams Hold Experts Back",
|
|
526
543
|
slug: "multi-agent-teams-hold-experts-back",
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { PACKAGE_ROOT } from "../wave-orchestrator/roots.mjs";
|
|
5
|
+
import {
|
|
6
|
+
PLANNER_CONTEXT7_BUNDLE_ID,
|
|
7
|
+
PLANNER_CONTEXT7_DEFAULT_QUERY,
|
|
8
|
+
PLANNER_CONTEXT7_LIBRARY_NAME,
|
|
9
|
+
PLANNER_CONTEXT7_SOURCE_DIR,
|
|
10
|
+
PLANNER_CONTEXT7_SOURCE_FILES,
|
|
11
|
+
} from "../wave-orchestrator/planner-context.mjs";
|
|
12
|
+
|
|
13
|
+
function ensureDirectory(dirPath) {
|
|
14
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function sha256Text(text) {
|
|
18
|
+
return crypto.createHash("sha256").update(text, "utf8").digest("hex");
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function extractFrontmatterValue(text, key) {
|
|
22
|
+
const match = String(text || "").match(new RegExp(`^${key}:\\s*['"]?(.+?)['"]?$`, "m"));
|
|
23
|
+
return match ? match[1].trim() : "";
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function extractMarkdownHeading(text) {
|
|
27
|
+
const match = String(text || "").match(/^#\s+(.+)$/m);
|
|
28
|
+
return match ? match[1].trim() : "";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function renderPlannerTopicIndex(copiedFiles) {
|
|
32
|
+
const paperLines = copiedFiles
|
|
33
|
+
.filter((entry) => entry.kind === "paper")
|
|
34
|
+
.map((file) => {
|
|
35
|
+
return `- [${file.title || file.targetPath.split("/").pop()}](../papers/${path.basename(file.targetPath)})`;
|
|
36
|
+
});
|
|
37
|
+
return [
|
|
38
|
+
"---",
|
|
39
|
+
"summary: 'Curated planning and orchestration corpus exported for the agentic planner Context7 bundle.'",
|
|
40
|
+
"read_when:",
|
|
41
|
+
" - You are publishing or refreshing the planner-agentic Context7 library",
|
|
42
|
+
" - You need the exact planner research subset that Wave ships for agentic planning",
|
|
43
|
+
"title: 'Planner Agentic Context7 Corpus'",
|
|
44
|
+
"---",
|
|
45
|
+
"",
|
|
46
|
+
"# Planner Agentic Context7 Corpus",
|
|
47
|
+
"",
|
|
48
|
+
"This file is the tracked topic index for the planner-specific Context7 corpus.",
|
|
49
|
+
"It intentionally references only the copied files that ship under",
|
|
50
|
+
"`docs/context7/planner-agent/`.",
|
|
51
|
+
"",
|
|
52
|
+
"## Included papers",
|
|
53
|
+
"",
|
|
54
|
+
...paperLines,
|
|
55
|
+
"",
|
|
56
|
+
].join("\n");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function writePlannerContextFile(targetPath, text) {
|
|
60
|
+
ensureDirectory(path.dirname(targetPath));
|
|
61
|
+
fs.writeFileSync(targetPath, text, "utf8");
|
|
62
|
+
return {
|
|
63
|
+
bytes: Buffer.byteLength(text, "utf8"),
|
|
64
|
+
sha256: sha256Text(text),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function copyPlannerContextFile(entry, copiedFiles) {
|
|
69
|
+
const sourcePath = path.join(PACKAGE_ROOT, entry.sourcePath);
|
|
70
|
+
const targetPath = path.join(PACKAGE_ROOT, entry.targetPath);
|
|
71
|
+
if (!fs.existsSync(sourcePath)) {
|
|
72
|
+
throw new Error(`Planner Context7 source file is missing: ${entry.sourcePath}`);
|
|
73
|
+
}
|
|
74
|
+
ensureDirectory(path.dirname(targetPath));
|
|
75
|
+
const text = fs.readFileSync(sourcePath, "utf8");
|
|
76
|
+
const written =
|
|
77
|
+
entry.kind === "topic"
|
|
78
|
+
? writePlannerContextFile(
|
|
79
|
+
targetPath,
|
|
80
|
+
renderPlannerTopicIndex(copiedFiles),
|
|
81
|
+
)
|
|
82
|
+
: writePlannerContextFile(targetPath, text);
|
|
83
|
+
return {
|
|
84
|
+
kind: entry.kind,
|
|
85
|
+
sourcePath: entry.sourcePath,
|
|
86
|
+
targetPath: entry.targetPath,
|
|
87
|
+
title:
|
|
88
|
+
entry.kind === "topic"
|
|
89
|
+
? "Planner Agentic Context7 Corpus"
|
|
90
|
+
: extractFrontmatterValue(text, "title") || extractMarkdownHeading(text) || path.basename(entry.targetPath),
|
|
91
|
+
...written,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function writeManifest(files) {
|
|
96
|
+
const manifestPath = path.join(PACKAGE_ROOT, PLANNER_CONTEXT7_SOURCE_DIR, "manifest.json");
|
|
97
|
+
ensureDirectory(path.dirname(manifestPath));
|
|
98
|
+
fs.writeFileSync(
|
|
99
|
+
manifestPath,
|
|
100
|
+
`${JSON.stringify(
|
|
101
|
+
{
|
|
102
|
+
version: 1,
|
|
103
|
+
generatedAt: new Date().toISOString(),
|
|
104
|
+
bundleId: PLANNER_CONTEXT7_BUNDLE_ID,
|
|
105
|
+
libraryName: PLANNER_CONTEXT7_LIBRARY_NAME,
|
|
106
|
+
defaultQuery: PLANNER_CONTEXT7_DEFAULT_QUERY,
|
|
107
|
+
sourceRoot: "docs/research/agent-context-cache",
|
|
108
|
+
targetRoot: PLANNER_CONTEXT7_SOURCE_DIR,
|
|
109
|
+
files,
|
|
110
|
+
},
|
|
111
|
+
null,
|
|
112
|
+
2,
|
|
113
|
+
)}\n`,
|
|
114
|
+
"utf8",
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function main() {
|
|
119
|
+
const files = [];
|
|
120
|
+
const orderedEntries = [
|
|
121
|
+
...PLANNER_CONTEXT7_SOURCE_FILES.filter((entry) => entry.kind !== "topic"),
|
|
122
|
+
...PLANNER_CONTEXT7_SOURCE_FILES.filter((entry) => entry.kind === "topic"),
|
|
123
|
+
];
|
|
124
|
+
for (const entry of orderedEntries) {
|
|
125
|
+
files.push(copyPlannerContextFile(entry, files));
|
|
126
|
+
}
|
|
127
|
+
writeManifest(files);
|
|
128
|
+
console.log(
|
|
129
|
+
`[planner-context7] synced ${files.length} files into ${PLANNER_CONTEXT7_SOURCE_DIR}`,
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
main();
|