npm - @chllming/wave-orchestration - Versions diffs - 0.6.3 → 0.7.1 - Mend

@chllming/wave-orchestration 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

package/CHANGELOG.md +82 -1
package/README.md +40 -7
package/docs/agents/wave-orchestrator-role.md +50 -0
package/docs/agents/wave-planner-role.md +39 -0
package/docs/context7/bundles.json +9 -0
package/docs/context7/planner-agent/README.md +25 -0
package/docs/context7/planner-agent/manifest.json +83 -0
package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
package/docs/evals/README.md +96 -1
package/docs/evals/arm-templates/README.md +13 -0
package/docs/evals/arm-templates/full-wave.json +15 -0
package/docs/evals/arm-templates/single-agent.json +15 -0
package/docs/evals/benchmark-catalog.json +7 -0
package/docs/evals/cases/README.md +47 -0
package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
package/docs/evals/external-benchmarks.json +85 -0
package/docs/evals/external-command-config.sample.json +9 -0
package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
package/docs/evals/pilots/README.md +47 -0
package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
package/docs/evals/wave-benchmark-program.md +302 -0
package/docs/guides/planner.md +67 -11
package/docs/guides/terminal-surfaces.md +12 -0
package/docs/plans/context7-wave-orchestrator.md +20 -0
package/docs/plans/current-state.md +8 -1
package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
package/docs/plans/examples/wave-example-live-proof.md +1 -1
package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
package/docs/plans/migration.md +26 -0
package/docs/plans/wave-orchestrator.md +60 -12
package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
package/docs/reference/cli-reference.md +547 -0
package/docs/reference/coordination-and-closure.md +436 -0
package/docs/reference/live-proof-waves.md +25 -3
package/docs/reference/npmjs-trusted-publishing.md +3 -3
package/docs/reference/proof-metrics.md +90 -0
package/docs/reference/runtime-config/README.md +63 -2
package/docs/reference/runtime-config/codex.md +2 -1
package/docs/reference/sample-waves.md +29 -18
package/docs/reference/wave-control.md +164 -0
package/docs/reference/wave-planning-lessons.md +131 -0
package/package.json +5 -4
package/releases/manifest.json +40 -0
package/scripts/research/agent-context-archive.mjs +18 -0
package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
package/scripts/wave-orchestrator/agent-state.mjs +11 -2
package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
package/scripts/wave-orchestrator/autonomous.mjs +7 -0
package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
package/scripts/wave-orchestrator/benchmark.mjs +972 -0
package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
package/scripts/wave-orchestrator/config.mjs +175 -0
package/scripts/wave-orchestrator/control-cli.mjs +1216 -0
package/scripts/wave-orchestrator/control-plane.mjs +697 -0
package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
package/scripts/wave-orchestrator/coordination.mjs +84 -0
package/scripts/wave-orchestrator/dashboard-renderer.mjs +120 -5
package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
package/scripts/wave-orchestrator/evals.mjs +23 -0
package/scripts/wave-orchestrator/executors.mjs +3 -2
package/scripts/wave-orchestrator/feedback.mjs +55 -0
package/scripts/wave-orchestrator/install.mjs +151 -2
package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
package/scripts/wave-orchestrator/launcher-runtime.mjs +33 -30
package/scripts/wave-orchestrator/launcher.mjs +884 -36
package/scripts/wave-orchestrator/planner-context.mjs +75 -0
package/scripts/wave-orchestrator/planner.mjs +2270 -136
package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
package/scripts/wave-orchestrator/replay.mjs +10 -4
package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
package/scripts/wave-orchestrator/retry-control.mjs +225 -0
package/scripts/wave-orchestrator/shared.mjs +26 -0
package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
package/scripts/wave-orchestrator/terminals.mjs +1 -1
package/scripts/wave-orchestrator/traces.mjs +157 -2
package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
package/scripts/wave-orchestrator/wave-files.mjs +144 -23
package/scripts/wave.mjs +27 -0
package/skills/repo-coding-rules/SKILL.md +1 -0
package/skills/role-cont-eval/SKILL.md +1 -0
package/skills/role-cont-qa/SKILL.md +13 -6
package/skills/role-deploy/SKILL.md +1 -0
package/skills/role-documentation/SKILL.md +4 -0
package/skills/role-implementation/SKILL.md +4 -0
package/skills/role-infra/SKILL.md +2 -1
package/skills/role-integration/SKILL.md +15 -8
package/skills/role-planner/SKILL.md +39 -0
package/skills/role-planner/skill.json +21 -0
package/skills/role-research/SKILL.md +1 -0
package/skills/role-security/SKILL.md +2 -2
package/skills/runtime-claude/SKILL.md +2 -1
package/skills/runtime-codex/SKILL.md +1 -0
package/skills/runtime-local/SKILL.md +2 -0
package/skills/runtime-opencode/SKILL.md +1 -0
package/skills/wave-core/SKILL.md +25 -6
package/skills/wave-core/references/marker-syntax.md +16 -8
package/wave.config.json +45 -0

package/docs/reference/runtime-config/README.md CHANGED Viewed

@@ -6,6 +6,7 @@ Use it when you need the full supported surface for:
 - `wave.config.json`
 - `lanes.<lane>.executors`
+- `waveControl`
 - `executors.profiles.<profile>`
 - per-agent `### Executor` blocks inside a wave file
@@ -74,6 +75,63 @@ These fields are shared across runtimes:
 - [claude.md](./claude.md)
 - [opencode.md](./opencode.md)
+## Wave Control
+`wave.config.json` may also declare a `waveControl` block for local-first telemetry delivery.
+Supported top-level fields:
+| Key | Type | Default | Purpose |
+| --- | --- | --- | --- |
+| `enabled` | boolean | `true` | Master switch for local queueing and remote delivery |
+| `endpoint` | string | unset | Base URL for the Railway-hosted `services/wave-control` API |
+| `workspaceId` | string | derived from repo path | Stable workspace identity used across runs |
+| `projectId` | string | derived from `projectName` | Stable project/repo identity used for cross-workspace reporting and filtering |
+| `authTokenEnvVar` | string | `WAVE_CONTROL_AUTH_TOKEN` | Environment variable name holding the bearer token |
+| `reportMode` | string | `metadata-plus-selected` | `disabled`, `metadata-only`, `metadata-plus-selected`, or `full-artifact-upload` |
+| `uploadArtifactKinds` | string[] | selected proof/trace/benchmark kinds | Artifact classes eligible for body upload when an artifact's upload policy requests a body |
+| `requestTimeoutMs` | integer | `5000` | Per-batch network timeout |
+| `flushBatchSize` | integer | `25` | Max queued telemetry events flushed per batch |
+| `maxPendingEvents` | integer | `1000` | Cap for pending remote-delivery queue files; oldest pending uploads are dropped from the remote queue while the local `events.jsonl` stream remains authoritative |
+| `captureCoordinationRecords` | boolean | `true` | Emit `coordination_record` telemetry |
+| `captureControlPlaneEvents` | boolean | `true` | Emit `wave_run`, `attempt`, `proof_bundle`, and related control-plane events |
+| `captureTraceBundles` | boolean | `true` | Emit finalized trace-bundle artifacts and gate snapshots |
+| `captureBenchmarkRuns` | boolean | `true` | Emit `benchmark_run`, `benchmark_item`, `verification`, and `review` events |
+Lane overrides may refine the same keys under `lanes.<lane>.waveControl`.
+One-run override:
+- `wave launch --no-telemetry` disables Wave Control queueing and remote delivery for that launcher invocation without changing the repo config.
+Example:
+```json
+{
+  "waveControl": {
+    "endpoint": "https://wave-control.up.railway.app/api/v1",
+    "workspaceId": "wave-main",
+    "projectId": "wave-orchestration",
+    "reportMode": "metadata-plus-selected",
+    "uploadArtifactKinds": [
+      "trace-run-metadata",
+      "trace-quality",
+      "benchmark-results"
+    ]
+  }
+}
+```
+Runtime-emitted Wave Control events also attach:
+- `orchestratorId` from the active launcher or resident orchestrator
+- `runtimeVersion` from the installed Wave package metadata
+Those fields are queryable in the `wave-control` service alongside `workspaceId`,
+`projectId`, `runKind`, `runId`, `lane`, and benchmark ids.
+See [../wave-control.md](../wave-control.md) for the event contract and upload-policy model.
 ## Generated Artifacts
 Wave writes runtime artifacts here:
@@ -83,7 +141,7 @@ Wave writes runtime artifacts here:
 Common files:
-- `launch-preview.json`: resolved invocation lines, env vars, retry mode, and structured attempt/turn-limit metadata
+- `launch-preview.json`: resolved invocation lines, env vars, retry mode, and structured attempt/turn-limit metadata for both dry-run and live launches
 - `skills.resolved.md`: compact metadata-first skill catalog for the selected agent and runtime
 - `skills.expanded.md`: full canonical/debug skill payload with `SKILL.md` bodies and adapters
 - `skills.metadata.json`: resolved skill ids, activation metadata, permissions, hashes, and generated artifact paths
@@ -92,6 +150,9 @@ Common files:
 - `claude-settings.json`: generated Claude settings overlay when inline settings data is present
 - `opencode-agent-prompt.txt`: generated OpenCode harness prompt overlay
 - `opencode.json`: generated OpenCode runtime config overlay
+- `.tmp/<lane>-wave-launcher/control-plane/telemetry/events.jsonl`: local-first Wave Control event stream
+- `.tmp/<lane>-wave-launcher/control-plane/telemetry/pending/`: queued event batches awaiting remote delivery
+- `.tmp/<lane>-wave-launcher/control-plane/telemetry/delivery-state.json`: remote-delivery counters and last-error state
 Runtime-specific delivery:
@@ -100,7 +161,7 @@ Runtime-specific delivery:
 - OpenCode injects the compact catalog into `opencode.json` and attaches `skill.json`, `SKILL.md`, the selected adapter, and recursive `references/**` files through `--file`.
 - Local keeps skills prompt-only.
-`launch-preview.json` also records the resolved skill metadata plus a `limits` section. For Claude and OpenCode, that section reports the known turn ceiling and whether it came from the runtime-specific setting or generic `budget.turns`. For Codex, it explicitly records that Wave emitted no turn-limit flag and that any effective ceiling may come from the selected Codex profile or upstream runtime.
+`launch-preview.json` also records the resolved skill metadata plus a `limits` section. For Claude and OpenCode, that section reports the known turn ceiling and whether it came from the runtime-specific setting or generic `budget.turns`. For Codex, it explicitly records that Wave emitted no turn-limit flag and that any effective ceiling may come from the selected Codex profile or upstream runtime. If a live Codex run later terminates with a visible `Reached max turns (N)` log line, Wave appends that observed ceiling back into the live `launch-preview.json` as runtime evidence rather than pretending Wave set it.
 ## Recommended Validation Path

package/docs/reference/runtime-config/codex.md CHANGED Viewed

@@ -21,6 +21,7 @@ Wave launches Codex with `codex exec` and pipes the generated task prompt throug
 - There is no `executors.codex.model` key today. Use profile `model` or per-agent `model`.
 - Generic `budget.turns` does not set a Codex turn limit. If Codex stops on a turn ceiling, that limit came from the selected Codex profile or upstream Codex runtime, not from a Wave-emitted CLI flag.
+- Live runs still write `launch-preview.json`. If Codex later logs `Reached max turns (N)`, Wave records that observed ceiling under `limits.observedTurnLimit` with source `runtime-log`.
 - `codex.images`, `codex.add_dirs`, and `codex.config` accept either a string array in `wave.config.json` or a comma-separated list in a wave file.
 - Relative paths are passed to Codex relative to the repository root because Wave launches the executor from the repo workspace.
@@ -78,4 +79,4 @@ For a dry run, inspect:
 - `launch-preview.json` for the final `codex exec` command
 - any referenced prompt file under `.tmp/<lane>-wave-launcher/dry-run/prompts/`
-The preview records the exact `--profile`, repeated `-c`, `--image`, and `--add-dir` flags that Wave would use in a live launch. It also includes a `limits` block that makes Wave's Codex visibility explicit: `turnLimitSource: "not-set-by-wave"` means Wave emitted no Codex turn-limit flag, so any effective ceiling is external to the Wave CLI invocation.
+The preview records the exact `--profile`, repeated `-c`, `--image`, and `--add-dir` flags that Wave would use in a live launch. It also includes a `limits` block that makes Wave's Codex visibility explicit: `turnLimitSource: "not-set-by-wave"` means Wave emitted no Codex turn-limit flag, so any effective ceiling is external to the Wave CLI invocation. On a live run, that same preview file may later gain `observedTurnLimit` if the Codex runtime reports the ceiling in its log output.

package/docs/reference/sample-waves.md CHANGED Viewed

@@ -1,23 +1,29 @@
 ---
 title: "Sample Waves"
-summary: "A showcase-first sample wave that demonstrates the current 0.6.1 Wave surface."
+summary: "Showcase-first sample waves that demonstrate the current 0.7.1 Wave surface."
 ---
 # Sample Waves
-This guide points to one showcase-first sample wave that demonstrates the current `0.6.1` authored Wave surface.
+This guide points to showcase-first sample waves that demonstrate the current `0.7.1` authored Wave surface.
-The example is intentionally denser than a typical production wave. Its job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready file.
+The examples are intentionally denser than typical production waves. Their job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready files.
-## Canonical Example
+## Canonical Examples
+- [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md)
+  Shows what a good `repo-landed` outcome looks like when one promoted component only closes honestly if desired-state records, reconcile-loop substrate, and cluster-view surfaces land together. It emphasizes maturity discipline, explicit deliverables, and shared-plan closure without drifting into `pilot-live` claims.
 - [Full modern sample wave](../plans/examples/wave-example-live-proof.md)
-  Shows the combined `0.6.1` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
+  Shows the combined `0.7.1` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
-## What This Example Teaches
+## What These Examples Teach
-- the standard closure-role structure with `A0`, `E0`, `A8`, and `A9`
-- wave-level `## Eval targets`
+- the standard closure-role structure with `A0`, `A8`, and `A9`
+- `E0` and wave-level `## Eval targets` in the full modern sample
+- honest `repo-landed` maturity framing without `pilot-live` drift
+- multi-slice component promotion where all sibling owners must land together
+- shared-plan and component-matrix closure as part of the architecture truth
 - delegated versus pinned benchmark selection
 - coordination benchmark families from `docs/evals/benchmark-catalog.json`
 - richer executor blocks, runtime budgets, and retry policy
@@ -32,8 +38,11 @@ The example is intentionally denser than a typical production wave. Its job is t
 ## Feature Coverage Map
-This sample covers the main surfaces added or hardened for `0.6.1`:
+Together these samples cover the main surfaces added or hardened for `0.7.1`:
+- repo-landed maturity discipline and anti-overclaim framing
+- explicit shared-plan closure for future-wave safety
+- coordinated component slices with per-agent deliverables
 - planner-era authored wave structure
 - cross-runtime `### Skills`
 - richer `### Executor` blocks and runtime budgets
@@ -53,6 +62,7 @@ This sample covers the main surfaces added or hardened for `0.6.1`:
 Copy more literally when:
 - you need the section layout
+- you want a concrete example of what good repo-landed wave fidelity looks like
 - you want concrete wording for delegated versus pinned benchmark targets
 - you want a proof-first owner example with local artifact bundles and sticky retry
@@ -65,23 +75,24 @@ Adapt more aggressively when:
 ## How This Example Maps To Other Docs
-- Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use this sample to see how a human would enrich the generated draft.
-- Use [docs/evals/README.md](../evals/README.md) with this sample when you need to see delegated and pinned benchmark targets in a real wave.
-- Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with this sample when you need proof-first authoring for `pilot-live` and above.
+- Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use these samples to see how a human would enrich the generated draft for either repo-landed or proof-first work.
+- Use [docs/evals/README.md](../evals/README.md) with the full modern sample when you need to see delegated and pinned benchmark targets in a real wave.
+- Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with the full modern sample when you need proof-first authoring for `pilot-live` and above.
 - Use [docs/plans/wave-orchestrator.md](../plans/wave-orchestrator.md) for the operational runbook that explains how the launcher interprets these sections.
 ## Suggested Reading Order
-1. Start with [Full modern sample wave](../plans/examples/wave-example-live-proof.md).
-2. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
-3. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
+1. Start with [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md) if you want the clearest example of good closure-ready wave fidelity for a repo-only outcome.
+2. Read [Full modern sample wave](../plans/examples/wave-example-live-proof.md) if you want the denser proof-first and eval-heavy surface.
+3. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
+4. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
-## Why This Example Lives In `docs/plans/examples/`
+## Why These Examples Live In `docs/plans/examples/`
-The example lives outside `docs/plans/waves/` on purpose.
+The examples live outside `docs/plans/waves/` on purpose.
 That keeps it:
 - easy to browse as teaching material
 - clearly separate from the repo's real launcher-facing wave sequence
-- safe to evolve as reference material without implying that it is part of the current lane's actual plan history
+- safe to evolve as reference material without implying that they are part of the current lane's actual plan history

package/docs/reference/wave-control.md ADDED Viewed

@@ -0,0 +1,164 @@
+---
+title: "Wave Control"
+summary: "Canonical telemetry, artifact upload policy, and the local-first reporting contract for the Railway-hosted Wave control plane."
+---
+# Wave Control
+Wave Control is the telemetry and analysis plane for Wave runs.
+The design rule is:
+- local files stay authoritative
+- remote reporting is best-effort
+- dashboards and markdown remain projections over typed local state
+## What Gets Reported
+Wave Control normalizes these entity types:
+- `wave_run`
+- `agent_run`
+- `coordination_record`
+- `task`
+- `attempt`
+- `gate`
+- `proof_bundle`
+- `rerun_request`
+- `human_input`
+- `artifact`
+- `benchmark_run`
+- `benchmark_item`
+- `verification`
+- `review`
+This lets the control plane answer:
+- what happened in a run
+- which proof and benchmark artifacts back a claim
+- whether a benchmark result is comparison-valid or only diagnostic
+- which coordination failures blocked closure
+## Run Identity
+Every Wave Control event carries a normalized run identity.
+The key fields are:
+- `workspaceId`
+- `projectId`
+- `runKind`
+- `runId`
+- `lane`
+- `wave`
+- `attempt`
+- `agentId`
+- `orchestratorId`
+- `runtimeVersion`
+- `benchmarkRunId`
+- `benchmarkItemId`
+Why these fields matter:
+- `workspaceId` separates whole adopted workspaces
+- `projectId` separates product or repo identities inside one control plane
+- `orchestratorId` separates resident orchestrators or control-plane owners
+- `runtimeVersion` lets operators compare behavior across Wave releases without guessing from deploy timestamps
+These are first-class query dimensions in the service, not only free-form event payload fields.
+## Proof Signals
+Wave Control is intended to make the main README claims measurable.
+For the explicit README-failure-case-to-signal map, see [proof-metrics.md](./proof-metrics.md).
+Signals to preserve:
+- canonical-state fidelity:
+  `coordination_record`, `wave_run`, `attempt`, and `artifact` telemetry prove the scheduler truth came from JSON state, not only markdown boards
+- evidence pooling:
+  integration and closure telemetry should cite the proof artifacts and evidence refs they relied on
+- contradiction repair:
+  gate and review telemetry should show unresolved conflicts, repair creation, and repair resolution
+- expert routing:
+  targeted assignments, reroutes, and final recommendation ownership should remain visible
+- premature closure prevention:
+  gate snapshots, proof completeness, block reasons, reruns, and cont-QA reversal should be durable
+- benchmark trust:
+  every benchmark item should distinguish capability from validity
+## Artifact Contract
+Selected artifacts are described with typed descriptors:
+```json
+{
+  "path": ".tmp/main-wave-launcher/traces/wave-1/attempt-1/quality.json",
+  "kind": "trace-quality",
+  "required": true,
+  "present": true,
+  "sha256": "abc123...",
+  "bytes": 2048,
+  "contentType": "application/json",
+  "uploadPolicy": "selected"
+}
+```
+Upload policy meanings:
+- `local-only`: keep only the descriptor remotely
+- `metadata-only`: report path, hash, size, and presence only
+- `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected`
+- `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected` or `full-artifact-upload` **and** the artifact kind is allowed by `waveControl.uploadArtifactKinds`
+- `full`: upload the artifact body in `full-artifact-upload` flows; if `uploadArtifactKinds` is set, keep the kind allowlist aligned with that policy
+## Runtime Config
+`wave.config.json` can declare:
+```json
+{
+  "waveControl": {
+    "endpoint": "https://wave-control.up.railway.app/api/v1",
+    "workspaceId": "my-workspace",
+    "projectId": "wave-orchestration",
+    "authTokenEnvVar": "WAVE_CONTROL_AUTH_TOKEN",
+    "reportMode": "metadata-plus-selected",
+    "uploadArtifactKinds": [
+      "trace-run-metadata",
+      "trace-quality",
+      "benchmark-results"
+    ]
+  }
+}
+```
+Lane overrides may refine the same surface under `lanes.<lane>.waveControl`.
+For a single run, operators can disable Wave Control reporting entirely with:
+```bash
+pnpm exec wave launch --lane main --no-telemetry
+```
+That suppresses the local telemetry spool and remote delivery for that invocation, while leaving the canonical runtime artifacts and local control-plane state intact.
+## Delivery Model
+Wave Control reporting should:
+- append local telemetry first
+- queue pending uploads under `.tmp/<lane>-wave-launcher/control-plane/telemetry/`
+- respect `waveControl.uploadArtifactKinds` before uploading any selected artifact body
+- cap pending remote uploads with `waveControl.maxPendingEvents` by dropping the oldest queued remote-delivery files, while keeping the local `events.jsonl` stream intact
+- retry delivery with idempotency keys
+- never fail a live run, proof registration, or benchmark because the network is unavailable
+The Railway-hosted `services/wave-control` service is an analysis surface, not the scheduler of record.
+The service package lives under `services/wave-control/`.
+For durable telemetry retention, attach Railway Postgres to `wave-control` so the
+service receives `DATABASE_URL`. Without that variable, the service falls back to the
+in-memory store and only keeps data until the process restarts.

package/docs/reference/wave-planning-lessons.md ADDED Viewed

@@ -0,0 +1,131 @@
+---
+summary: "Lessons from Waves 4-9 on what makes future waves succeed or fail."
+read_when:
+  - Drafting a new wave
+  - Splitting or renumbering future waves
+  - Deciding whether a wave should target repo-landed, pilot-live, or above
+title: "Wave Planning Lessons"
+---
+# Wave Planning Lessons
+This document captures the practical lessons from Waves 4-9. The main theme is
+simple: waves succeed when the declared maturity target, the owned slices, the
+runtime setup, and the closure artifacts all describe the same truth.
+## 1. One honest maturity jump per wave
+- Treat `repo-landed`, `pilot-live`, `qa-proved`, `fleet-ready`,
+  `cutover-ready`, and `deprecation-ready` as materially different bars.
+- A wave should promote a component by one honest maturity step, not silently
+  combine multiple levels of proof in one broad plan.
+- If a wave only lands code and tests, the target is usually `repo-landed`, not
+  `pilot-live`.
+- If a wave claims `pilot-live` or above, the wave must own real deploy/live
+  proof and rollback evidence.
+## 2. Live-proof waves are a different class of wave
+- `pilot-live` and above need an explicit live-proof owner, not just
+  implementation agents plus A8/A9/A0.
+- Live-proof waves need a canonical proof bundle under `.tmp/` and one owned
+  operations runbook under `docs/plans/operations/`.
+- The proof bundle must contain restart or rollback evidence, not only one-shot
+  success.
+- External operator commands and captured evidence must be part of the authored
+  wave, not improvised during execution.
+## 3. Component promotions must map to owned slices
+- Every promoted component needs one or more implementation owners and one
+  shared proof story.
+- If multiple agents contribute to one promoted component, their slices must be
+  obviously complementary, not overlapping guesses.
+- Shared components should not cause one agent to be retried just because a
+  sibling owner is still finishing; each agent must be able to complete its own
+  slice honestly.
+## 4. Deliverables must be explicit and machine-checkable
+- Every implementation agent should declare `### Deliverables`.
+- For live-proof waves, use `### Proof artifacts` in addition to deliverables.
+- Deliverables should be exact files or artifact manifests, not vague “test
+  coverage” or “docs updated” expectations.
+- Missing deliverables should fail the wave even if the code mostly landed.
+## 5. Closure must update the shared planning truth
+- A9 should always update `current-state`, `master-plan`, `migration`, and the
+  component cutover matrix when a wave changes what later waves may safely
+  assume.
+- The evaluator should reject a wave if the repo’s planning truth still implies
+  an older maturity level after the code has landed.
+- Shared-plan closure is not paperwork; it is part of architecture truth.
+## 6. Use A8 to reconcile reality before docs and evaluation
+- A8 is the place to detect contradictions between slices, missing ownership,
+  and proof gaps before A9 and A0 run.
+- A8 should judge `ready-for-doc-closure` versus `needs-more-work` based on the
+  landed artifact set, not on agent intent.
+- Waves were materially more reliable once A8 became a true closure gate rather
+  than optional synthesis.
+## 7. Runtime setup matters as much as wave prose
+- Do not use small fixed turn caps for synthesis-heavy or closure-heavy agents.
+  Bound them with `budget.minutes`, not `budget.turns`.
+- Pin exact model and reasoning settings for each runtime. Ambiguous profiles
+  create unclear failure modes.
+- Avoid cross-runtime fallback on live-proof or deploy-sensitive slices unless
+  there is a very good reason.
+- Context7 should be explicit and real; unresolved bundles create noise instead
+  of help.
+## 8. Repo-local proof and live proof are different
+- Repo-local tests and docs can justify `repo-landed`.
+- Live host validation, admitted runtime behavior, rollback drills, and operator
+  surfaces are what justify `pilot-live` and above.
+- Do not let “the code exists” be treated as “the deployment works.”
+## 9. Architecture-facing status surfaces must be future-safe
+- Status and projection code should be keyed to the real future topology, not
+  the smallest test case that passes today.
+- If a status model will later carry multiple runtime classes, providers, or
+  lanes, the substrate must preserve that identity now.
+- Closed enums and typed contracts should be validated as closed enums and typed
+  contracts, not accepted as arbitrary strings.
+## 10. The best waves are narrow, layered, and boring
+- Narrow waves close more reliably than broad waves.
+- A good wave answers:
+  - what exact maturity level is being claimed
+  - what exact artifacts prove it
+  - who owns repo implementation
+  - who owns live proof, if any
+  - what A9 must update
+  - what A0 must refuse to overclaim
+- If a wave still sounds ambitious and fuzzy after writing the deliverables,
+  split it again.
+## 11. Future-wave checklist
+- Does the component promotion match the real maturity level being claimed?
+- Does every promoted component have an implementation owner?
+- If the target is `pilot-live` or above, is there an explicit live-proof owner?
+- Are deliverables and proof artifacts exact and machine-checkable?
+- Are current-state and matrix updates part of A9 closure?
+- Are A8 and A0 told what would make the wave fail honestly?
+- Are runtime pins, Context7 bundles, and budgets specific enough to avoid
+  preventable execution failures?
+- Would a reviewer understand the difference between “code landed” and
+  “component promoted” just by reading the wave file?
+## Bottom line
+The successful waves were not the ones with the most code. They were the ones
+where the wave file, the runtime setup, the artifacts, and the planning docs all
+made the same claim at the same level of maturity.

package/package.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "name": "@chllming/wave-orchestration",
-  "version": "0.6.3",
+  "version": "0.7.1",
   "license": "MIT",
   "description": "Generic wave-based multi-agent orchestration for repository work.",
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/chllming/wave-orchestration.git"
+    "url": "git+https://github.com/chllming/agent-wave-orchestrator.git"
   },
-  "homepage": "https://github.com/chllming/wave-orchestration#readme",
+  "homepage": "https://github.com/chllming/agent-wave-orchestrator#readme",
   "bugs": {
-    "url": "https://github.com/chllming/wave-orchestration/issues"
+    "url": "https://github.com/chllming/agent-wave-orchestrator/issues"
   },
   "publishConfig": {
     "access": "public"
@@ -41,6 +41,7 @@
     "context7:api-check": "bash scripts/context7-export-env.sh run bash scripts/context7-api-check.sh",
     "research:import-agent-context": "node scripts/research/import-agent-context-archive.mjs scripts/research/manifests/agent-context-expanded-2026-03-22.mjs",
     "research:index-agent-context": "node scripts/research/generate-agent-context-indexes.mjs",
+    "research:sync-planner-context7": "node scripts/research/sync-planner-context7-bundle.mjs",
     "research:refresh-agent-context": "pnpm research:import-agent-context && pnpm research:index-agent-context",
     "test": "vitest run --config vitest.config.ts",
     "wave": "node scripts/wave.mjs",

package/releases/manifest.json CHANGED Viewed

@@ -2,6 +2,46 @@
   "schemaVersion": 1,
   "packageName": "@chllming/wave-orchestration",
   "releases": [
+    {
+      "version": "0.7.1",
+      "date": "2026-03-23",
+      "summary": "Run-control hardening, completed-with-drift reconcile preservation, live Codex ceiling visibility, and 0.7.1 release-surface alignment.",
+      "features": [
+        "Fresh live launches now clear stale auto-generated relaunch plans by default, so explicit wave starts recompute the implementation fan-out unless `--resume-control-state` is passed.",
+        "`wave control status` now treats the active attempt as the authoritative live fan-out instead of replaying stale rerun intent or unrelated closure blockers.",
+        "Historical `reconcile-status` now preserves previously authoritative completed waves as `completed_with_drift` when the only mismatch is prompt-hash drift.",
+        "Live executor overlays now always write `launch-preview.json`, and Codex summaries record an observed turn ceiling when the runtime reports one.",
+        "Shipped package docs, migration guidance, sample-wave references, and npm publishing instructions now point at the `0.7.1` release surface."
+      ],
+      "manualSteps": [
+        "If you intentionally want to reuse a prior auto-generated relaunch selection on a fresh live start, pass `--resume-control-state` explicitly.",
+        "Use `pnpm exec wave dashboard --lane <lane> --attach current` or `--attach global` to reattach to live tmux-backed dashboards without resolving sockets or session names by hand.",
+        "If an adopted `0.6.x` repo fails `wave doctor` after the `0.7.x` upgrade, sync the repo-owned planner starter surface (`docs/agents/wave-planner-role.md`, `skills/role-planner/`, `docs/context7/planner-agent/`, `docs/reference/wave-planning-lessons.md`, and the `planner-agentic` bundle entry) before relying on planner-aware validation."
+      ],
+      "breaking": false
+    },
+    {
+      "version": "0.7.0",
+      "date": "2026-03-23",
+      "summary": "Unified wave control operator CLI, canonical control-plane event log, Wave Control telemetry, live-wave orchestration refresh, and resident orchestrator support.",
+      "features": [
+        "Unified `wave control` CLI with `status`, `task`, `rerun`, `proof`, and `telemetry` sub-surfaces replacing `wave coord`/`wave retry`/`wave proof` as the preferred operator interface.",
+        "Canonical control-plane event log under `.tmp/<lane>-wave-launcher/control-plane/` with event-sourced materialization for proof bundles, rerun requests, operator tasks, and attempt lifecycle.",
+        "Wave Control telemetry: local-first event queueing with best-effort batch delivery, configurable report modes, selective artifact upload, and per-category capture toggles.",
+        "Live-wave orchestration refresh that keeps coordination surfaces, clarification triage, and dashboard metrics current during active execution.",
+        "Resident orchestrator support via `--resident-orchestrator` for long-running non-owning monitoring sessions.",
+        "Native and external benchmark telemetry with failure-review validity classification and config attestation hashing.",
+        "Stable dashboard reattach via `wave dashboard --attach current|global`, plus live `launch-preview.json` artifacts that preserve observed Codex turn ceilings without pretending Wave set them.",
+        "Historical `reconcile-status` now preserves previously authoritative completed waves as completed-with-drift when the only mismatch is prompt-hash drift.",
+        "Fresh live launches now clear stale auto-generated relaunch plans by default, while `wave control status` treats the active attempt as the authoritative fan-out instead of replaying stale relaunch state."
+      ],
+      "manualSteps": [
+        "Existing `wave coord`, `wave retry`, and `wave proof` commands remain available as compatibility surfaces. No migration required, but new operator docs prefer `wave control`.",
+        "To enable Wave Control telemetry, add a `waveControl` section to `wave.config.json` with at minimum an `endpoint` and `workspaceId`. Pass `--no-telemetry` to disable for a single run.",
+        "If an adopted `0.6.x` repo fails `wave doctor` after the `0.7.x` upgrade, sync the repo-owned planner starter surface (`docs/agents/wave-planner-role.md`, `skills/role-planner/`, `docs/context7/planner-agent/`, `docs/reference/wave-planning-lessons.md`, and the `planner-agentic` bundle entry) before relying on planner-aware validation."
+      ],
+      "breaking": false
+    },
     {
       "version": "0.6.3",
       "date": "2026-03-22",

package/scripts/research/agent-context-archive.mjs CHANGED Viewed

@@ -14,6 +14,12 @@ export const TOPIC_DEFINITIONS = [
     description:
       "Planning topology, verifier and replanner loops, protocol-driven coordination, and blackboard-aware orchestration patterns for multi-agent systems.",
   },
+  {
+    id: "agent-cooperation-and-coordination",
+    title: "Agent Cooperation and Coordination",
+    description:
+      "Benchmarks and failure analyses for inter-agent cooperation, commitment tracking, communication quality, negotiation, and teammate-style coordination.",
+  },
   {
     id: "long-running-agents-and-compaction",
     title: "Long-Running Agents and Compaction",
@@ -103,6 +109,15 @@ const SKILLS_TOPIC_OVERRIDE_SLUGS = new Set([
   "meta-context-engineering-via-agentic-skill-evolution",
 ]);
+const COOPERATION_TOPIC_OVERRIDE_SLUGS = new Set([
+  "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
+  "why-do-multi-agent-llm-systems-fail",
+  "systematic-failures-in-collective-reasoning-under-distributed-information-in-multi-agent-llms",
+  "silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems",
+  "dpbench-large-language-models-struggle-with-simultaneous-coordination",
+  "multi-agent-teams-hold-experts-back",
+]);
 function escapeInlinePipes(value) {
   return String(value ?? "").replaceAll("|", "\\|");
 }
@@ -252,6 +267,9 @@ export function inferTopics(entry, section = null) {
   if (SKILLS_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
     topics.push("skills-and-procedural-memory");
   }
+  if (COOPERATION_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
+    topics.push("agent-cooperation-and-coordination");
+  }
   if (hasDeclaredTopics) {
     return unique(topics);

package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs CHANGED Viewed

@@ -3,6 +3,7 @@ import baseManifest from "./harness-and-blackboard-2026-03-21.mjs";
 const TOPICS = {
   HARNESS: "harnesses-and-practice",
   PLANNING: "planning-and-orchestration",
+  COOPERATION: "agent-cooperation-and-coordination",
   LONG_RUNNING: "long-running-agents-and-compaction",
   SKILLS: "skills-and-procedural-memory",
   BLACKBOARD: "blackboard-and-shared-workspaces",
@@ -521,6 +522,22 @@ const planningManifest = [
     fit: "Useful benchmark for testing whether coordination-heavy planning systems scale beyond serial reasoning.",
     topics: [TOPICS.PLANNING, TOPICS.REPO],
   }),
+  arxivPaper("2601.13295", {
+    title: "CooperBench: Why Coding Agents Cannot be Your Teammates Yet",
+    slug: "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
+    authors:
+      "Arpandeep Khatua, Hao Zhu, Peter Tran, Arya Prabhudesai, Frederic Sadrieh, Johann K. Lieberwirth, Xinkai Yu, Yicheng Fu, Michael J. Ryan, Jiaxin Pei, Diyi Yang",
+    year: 2026,
+    researchBucket: "P0 direct hits",
+    mapsTo:
+      "Collaborative coding benchmark for inter-agent cooperation, communication quality, commitment tracking, and coordination failures.",
+    fit: "Direct benchmark for whether coding agents behave like usable teammates instead of isolated solo solvers.",
+    additionalSource: "https://cooperbench.com",
+    additionalPdf: "https://cooperbench.com/static/pdfs/main.pdf",
+    notes:
+      "Project site hosts the same paper PDF plus leaderboard, dataset, and trajectory viewer for the benchmark.",
+    topics: [TOPICS.PLANNING, TOPICS.COOPERATION, TOPICS.REPO],
+  }),
   arxivPaper("2602.01011", {
     title: "Multi-Agent Teams Hold Experts Back",
     slug: "multi-agent-teams-hold-experts-back",