@chllming/wave-orchestration 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/README.md +40 -7
  3. package/docs/agents/wave-orchestrator-role.md +50 -0
  4. package/docs/agents/wave-planner-role.md +39 -0
  5. package/docs/context7/bundles.json +9 -0
  6. package/docs/context7/planner-agent/README.md +25 -0
  7. package/docs/context7/planner-agent/manifest.json +83 -0
  8. package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
  9. package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
  10. package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
  11. package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
  12. package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
  13. package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
  14. package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
  15. package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
  16. package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
  17. package/docs/evals/README.md +96 -1
  18. package/docs/evals/arm-templates/README.md +13 -0
  19. package/docs/evals/arm-templates/full-wave.json +15 -0
  20. package/docs/evals/arm-templates/single-agent.json +15 -0
  21. package/docs/evals/benchmark-catalog.json +7 -0
  22. package/docs/evals/cases/README.md +47 -0
  23. package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
  24. package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
  25. package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
  26. package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
  27. package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
  28. package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
  29. package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
  30. package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
  31. package/docs/evals/external-benchmarks.json +85 -0
  32. package/docs/evals/external-command-config.sample.json +9 -0
  33. package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
  34. package/docs/evals/pilots/README.md +47 -0
  35. package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
  36. package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
  37. package/docs/evals/wave-benchmark-program.md +302 -0
  38. package/docs/guides/planner.md +67 -11
  39. package/docs/guides/terminal-surfaces.md +12 -0
  40. package/docs/plans/context7-wave-orchestrator.md +20 -0
  41. package/docs/plans/current-state.md +8 -1
  42. package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
  43. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  44. package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
  45. package/docs/plans/migration.md +26 -0
  46. package/docs/plans/wave-orchestrator.md +60 -12
  47. package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
  48. package/docs/reference/cli-reference.md +547 -0
  49. package/docs/reference/coordination-and-closure.md +436 -0
  50. package/docs/reference/live-proof-waves.md +25 -3
  51. package/docs/reference/npmjs-trusted-publishing.md +3 -3
  52. package/docs/reference/proof-metrics.md +90 -0
  53. package/docs/reference/runtime-config/README.md +63 -2
  54. package/docs/reference/runtime-config/codex.md +2 -1
  55. package/docs/reference/sample-waves.md +29 -18
  56. package/docs/reference/wave-control.md +164 -0
  57. package/docs/reference/wave-planning-lessons.md +131 -0
  58. package/package.json +5 -4
  59. package/releases/manifest.json +40 -0
  60. package/scripts/research/agent-context-archive.mjs +18 -0
  61. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
  62. package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
  63. package/scripts/wave-orchestrator/agent-state.mjs +11 -2
  64. package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
  65. package/scripts/wave-orchestrator/autonomous.mjs +7 -0
  66. package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
  67. package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
  68. package/scripts/wave-orchestrator/benchmark.mjs +972 -0
  69. package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
  70. package/scripts/wave-orchestrator/config.mjs +175 -0
  71. package/scripts/wave-orchestrator/control-cli.mjs +1216 -0
  72. package/scripts/wave-orchestrator/control-plane.mjs +697 -0
  73. package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
  74. package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
  75. package/scripts/wave-orchestrator/coordination.mjs +84 -0
  76. package/scripts/wave-orchestrator/dashboard-renderer.mjs +120 -5
  77. package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
  78. package/scripts/wave-orchestrator/evals.mjs +23 -0
  79. package/scripts/wave-orchestrator/executors.mjs +3 -2
  80. package/scripts/wave-orchestrator/feedback.mjs +55 -0
  81. package/scripts/wave-orchestrator/install.mjs +151 -2
  82. package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
  83. package/scripts/wave-orchestrator/launcher-runtime.mjs +33 -30
  84. package/scripts/wave-orchestrator/launcher.mjs +884 -36
  85. package/scripts/wave-orchestrator/planner-context.mjs +75 -0
  86. package/scripts/wave-orchestrator/planner.mjs +2270 -136
  87. package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
  88. package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
  89. package/scripts/wave-orchestrator/replay.mjs +10 -4
  90. package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
  91. package/scripts/wave-orchestrator/retry-control.mjs +225 -0
  92. package/scripts/wave-orchestrator/shared.mjs +26 -0
  93. package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
  94. package/scripts/wave-orchestrator/terminals.mjs +1 -1
  95. package/scripts/wave-orchestrator/traces.mjs +157 -2
  96. package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
  97. package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
  98. package/scripts/wave-orchestrator/wave-files.mjs +144 -23
  99. package/scripts/wave.mjs +27 -0
  100. package/skills/repo-coding-rules/SKILL.md +1 -0
  101. package/skills/role-cont-eval/SKILL.md +1 -0
  102. package/skills/role-cont-qa/SKILL.md +13 -6
  103. package/skills/role-deploy/SKILL.md +1 -0
  104. package/skills/role-documentation/SKILL.md +4 -0
  105. package/skills/role-implementation/SKILL.md +4 -0
  106. package/skills/role-infra/SKILL.md +2 -1
  107. package/skills/role-integration/SKILL.md +15 -8
  108. package/skills/role-planner/SKILL.md +39 -0
  109. package/skills/role-planner/skill.json +21 -0
  110. package/skills/role-research/SKILL.md +1 -0
  111. package/skills/role-security/SKILL.md +2 -2
  112. package/skills/runtime-claude/SKILL.md +2 -1
  113. package/skills/runtime-codex/SKILL.md +1 -0
  114. package/skills/runtime-local/SKILL.md +2 -0
  115. package/skills/runtime-opencode/SKILL.md +1 -0
  116. package/skills/wave-core/SKILL.md +25 -6
  117. package/skills/wave-core/references/marker-syntax.md +16 -8
  118. package/wave.config.json +45 -0
@@ -6,6 +6,7 @@ Use it when you need the full supported surface for:
6
6
 
7
7
  - `wave.config.json`
8
8
  - `lanes.<lane>.executors`
9
+ - `waveControl`
9
10
  - `executors.profiles.<profile>`
10
11
  - per-agent `### Executor` blocks inside a wave file
11
12
 
@@ -74,6 +75,63 @@ These fields are shared across runtimes:
74
75
  - [claude.md](./claude.md)
75
76
  - [opencode.md](./opencode.md)
76
77
 
78
+ ## Wave Control
79
+
80
+ `wave.config.json` may also declare a `waveControl` block for local-first telemetry delivery.
81
+
82
+ Supported top-level fields:
83
+
84
+ | Key | Type | Default | Purpose |
85
+ | --- | --- | --- | --- |
86
+ | `enabled` | boolean | `true` | Master switch for local queueing and remote delivery |
87
+ | `endpoint` | string | unset | Base URL for the Railway-hosted `services/wave-control` API |
88
+ | `workspaceId` | string | derived from repo path | Stable workspace identity used across runs |
89
+ | `projectId` | string | derived from `projectName` | Stable project/repo identity used for cross-workspace reporting and filtering |
90
+ | `authTokenEnvVar` | string | `WAVE_CONTROL_AUTH_TOKEN` | Environment variable name holding the bearer token |
91
+ | `reportMode` | string | `metadata-plus-selected` | `disabled`, `metadata-only`, `metadata-plus-selected`, or `full-artifact-upload` |
92
+ | `uploadArtifactKinds` | string[] | selected proof/trace/benchmark kinds | Artifact classes eligible for body upload when an artifact's upload policy requests a body |
93
+ | `requestTimeoutMs` | integer | `5000` | Per-batch network timeout |
94
+ | `flushBatchSize` | integer | `25` | Max queued telemetry events flushed per batch |
95
+ | `maxPendingEvents` | integer | `1000` | Cap for pending remote-delivery queue files; oldest pending uploads are dropped from the remote queue while the local `events.jsonl` stream remains authoritative |
96
+ | `captureCoordinationRecords` | boolean | `true` | Emit `coordination_record` telemetry |
97
+ | `captureControlPlaneEvents` | boolean | `true` | Emit `wave_run`, `attempt`, `proof_bundle`, and related control-plane events |
98
+ | `captureTraceBundles` | boolean | `true` | Emit finalized trace-bundle artifacts and gate snapshots |
99
+ | `captureBenchmarkRuns` | boolean | `true` | Emit `benchmark_run`, `benchmark_item`, `verification`, and `review` events |
100
+
101
+ Lane overrides may refine the same keys under `lanes.<lane>.waveControl`.
102
+
103
+ One-run override:
104
+
105
+ - `wave launch --no-telemetry` disables Wave Control queueing and remote delivery for that launcher invocation without changing the repo config.
106
+
107
+ Example:
108
+
109
+ ```json
110
+ {
111
+ "waveControl": {
112
+ "endpoint": "https://wave-control.up.railway.app/api/v1",
113
+ "workspaceId": "wave-main",
114
+ "projectId": "wave-orchestration",
115
+ "reportMode": "metadata-plus-selected",
116
+ "uploadArtifactKinds": [
117
+ "trace-run-metadata",
118
+ "trace-quality",
119
+ "benchmark-results"
120
+ ]
121
+ }
122
+ }
123
+ ```
124
+
125
+ Runtime-emitted Wave Control events also attach:
126
+
127
+ - `orchestratorId` from the active launcher or resident orchestrator
128
+ - `runtimeVersion` from the installed Wave package metadata
129
+
130
+ Those fields are queryable in the `wave-control` service alongside `workspaceId`,
131
+ `projectId`, `runKind`, `runId`, `lane`, and benchmark ids.
132
+
133
+ See [../wave-control.md](../wave-control.md) for the event contract and upload-policy model.
134
+
77
135
  ## Generated Artifacts
78
136
 
79
137
  Wave writes runtime artifacts here:
@@ -83,7 +141,7 @@ Wave writes runtime artifacts here:
83
141
 
84
142
  Common files:
85
143
 
86
- - `launch-preview.json`: resolved invocation lines, env vars, retry mode, and structured attempt/turn-limit metadata
144
+ - `launch-preview.json`: resolved invocation lines, env vars, retry mode, and structured attempt/turn-limit metadata for both dry-run and live launches
87
145
  - `skills.resolved.md`: compact metadata-first skill catalog for the selected agent and runtime
88
146
  - `skills.expanded.md`: full canonical/debug skill payload with `SKILL.md` bodies and adapters
89
147
  - `skills.metadata.json`: resolved skill ids, activation metadata, permissions, hashes, and generated artifact paths
@@ -92,6 +150,9 @@ Common files:
92
150
  - `claude-settings.json`: generated Claude settings overlay when inline settings data is present
93
151
  - `opencode-agent-prompt.txt`: generated OpenCode harness prompt overlay
94
152
  - `opencode.json`: generated OpenCode runtime config overlay
153
+ - `.tmp/<lane>-wave-launcher/control-plane/telemetry/events.jsonl`: local-first Wave Control event stream
154
+ - `.tmp/<lane>-wave-launcher/control-plane/telemetry/pending/`: queued event batches awaiting remote delivery
155
+ - `.tmp/<lane>-wave-launcher/control-plane/telemetry/delivery-state.json`: remote-delivery counters and last-error state
95
156
 
96
157
  Runtime-specific delivery:
97
158
 
@@ -100,7 +161,7 @@ Runtime-specific delivery:
100
161
  - OpenCode injects the compact catalog into `opencode.json` and attaches `skill.json`, `SKILL.md`, the selected adapter, and recursive `references/**` files through `--file`.
101
162
  - Local keeps skills prompt-only.
102
163
 
103
- `launch-preview.json` also records the resolved skill metadata plus a `limits` section. For Claude and OpenCode, that section reports the known turn ceiling and whether it came from the runtime-specific setting or generic `budget.turns`. For Codex, it explicitly records that Wave emitted no turn-limit flag and that any effective ceiling may come from the selected Codex profile or upstream runtime.
164
+ `launch-preview.json` also records the resolved skill metadata plus a `limits` section. For Claude and OpenCode, that section reports the known turn ceiling and whether it came from the runtime-specific setting or generic `budget.turns`. For Codex, it explicitly records that Wave emitted no turn-limit flag and that any effective ceiling may come from the selected Codex profile or upstream runtime. If a live Codex run later terminates with a visible `Reached max turns (N)` log line, Wave appends that observed ceiling back into the live `launch-preview.json` as runtime evidence rather than pretending Wave set it.
104
165
 
105
166
  ## Recommended Validation Path
106
167
 
@@ -21,6 +21,7 @@ Wave launches Codex with `codex exec` and pipes the generated task prompt throug
21
21
 
22
22
  - There is no `executors.codex.model` key today. Use profile `model` or per-agent `model`.
23
23
  - Generic `budget.turns` does not set a Codex turn limit. If Codex stops on a turn ceiling, that limit came from the selected Codex profile or upstream Codex runtime, not from a Wave-emitted CLI flag.
24
+ - Live runs still write `launch-preview.json`. If Codex later logs `Reached max turns (N)`, Wave records that observed ceiling under `limits.observedTurnLimit` with source `runtime-log`.
24
25
  - `codex.images`, `codex.add_dirs`, and `codex.config` accept either a string array in `wave.config.json` or a comma-separated list in a wave file.
25
26
  - Relative paths are passed to Codex relative to the repository root because Wave launches the executor from the repo workspace.
26
27
 
@@ -78,4 +79,4 @@ For a dry run, inspect:
78
79
  - `launch-preview.json` for the final `codex exec` command
79
80
  - any referenced prompt file under `.tmp/<lane>-wave-launcher/dry-run/prompts/`
80
81
 
81
- The preview records the exact `--profile`, repeated `-c`, `--image`, and `--add-dir` flags that Wave would use in a live launch. It also includes a `limits` block that makes Wave's Codex visibility explicit: `turnLimitSource: "not-set-by-wave"` means Wave emitted no Codex turn-limit flag, so any effective ceiling is external to the Wave CLI invocation.
82
+ The preview records the exact `--profile`, repeated `-c`, `--image`, and `--add-dir` flags that Wave would use in a live launch. It also includes a `limits` block that makes Wave's Codex visibility explicit: `turnLimitSource: "not-set-by-wave"` means Wave emitted no Codex turn-limit flag, so any effective ceiling is external to the Wave CLI invocation. On a live run, that same preview file may later gain `observedTurnLimit` if the Codex runtime reports the ceiling in its log output.
@@ -1,23 +1,29 @@
1
1
  ---
2
2
  title: "Sample Waves"
3
- summary: "A showcase-first sample wave that demonstrates the current 0.6.1 Wave surface."
3
+ summary: "Showcase-first sample waves that demonstrate the current 0.7.1 Wave surface."
4
4
  ---
5
5
 
6
6
  # Sample Waves
7
7
 
8
- This guide points to one showcase-first sample wave that demonstrates the current `0.6.1` authored Wave surface.
8
+ This guide points to showcase-first sample waves that demonstrate the current `0.7.1` authored Wave surface.
9
9
 
10
- The example is intentionally denser than a typical production wave. Its job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready file.
10
+ The examples are intentionally denser than typical production waves. Their job is to teach the current authoring and runtime surface quickly, not to be the smallest possible launch-ready files.
11
11
 
12
- ## Canonical Example
12
+ ## Canonical Examples
13
+
14
+ - [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md)
15
+ Shows what a good `repo-landed` outcome looks like when one promoted component only closes honestly if desired-state records, reconcile-loop substrate, and cluster-view surfaces land together. It emphasizes maturity discipline, explicit deliverables, and shared-plan closure without drifting into `pilot-live` claims.
13
16
 
14
17
  - [Full modern sample wave](../plans/examples/wave-example-live-proof.md)
15
- Shows the combined `0.6.1` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
18
+ Shows the combined `0.7.1` authored surface in one file: closure roles, `E0`, optional security review, delegated and pinned benchmark targets, richer executor config, `### Skills`, `### Capabilities`, `### Deliverables`, `### Exit contract`, `### Proof artifacts`, sticky retry, deploy environments, and proof-first live-wave structure.
16
19
 
17
- ## What This Example Teaches
20
+ ## What These Examples Teach
18
21
 
19
- - the standard closure-role structure with `A0`, `E0`, `A8`, and `A9`
20
- - wave-level `## Eval targets`
22
+ - the standard closure-role structure with `A0`, `A8`, and `A9`
23
+ - `E0` and wave-level `## Eval targets` in the full modern sample
24
+ - honest `repo-landed` maturity framing without `pilot-live` drift
25
+ - multi-slice component promotion where all sibling owners must land together
26
+ - shared-plan and component-matrix closure as part of the architecture truth
21
27
  - delegated versus pinned benchmark selection
22
28
  - coordination benchmark families from `docs/evals/benchmark-catalog.json`
23
29
  - richer executor blocks, runtime budgets, and retry policy
@@ -32,8 +38,11 @@ The example is intentionally denser than a typical production wave. Its job is t
32
38
 
33
39
  ## Feature Coverage Map
34
40
 
35
- This sample covers the main surfaces added or hardened for `0.6.1`:
41
+ Together these samples cover the main surfaces added or hardened for `0.7.1`:
36
42
 
43
+ - repo-landed maturity discipline and anti-overclaim framing
44
+ - explicit shared-plan closure for future-wave safety
45
+ - coordinated component slices with per-agent deliverables
37
46
  - planner-era authored wave structure
38
47
  - cross-runtime `### Skills`
39
48
  - richer `### Executor` blocks and runtime budgets
@@ -53,6 +62,7 @@ This sample covers the main surfaces added or hardened for `0.6.1`:
53
62
  Copy more literally when:
54
63
 
55
64
  - you need the section layout
65
+ - you want a concrete example of what good repo-landed wave fidelity looks like
56
66
  - you want concrete wording for delegated versus pinned benchmark targets
57
67
  - you want a proof-first owner example with local artifact bundles and sticky retry
58
68
 
@@ -65,23 +75,24 @@ Adapt more aggressively when:
65
75
 
66
76
  ## How This Example Maps To Other Docs
67
77
 
68
- - Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use this sample to see how a human would enrich the generated draft.
69
- - Use [docs/evals/README.md](../evals/README.md) with this sample when you need to see delegated and pinned benchmark targets in a real wave.
70
- - Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with this sample when you need proof-first authoring for `pilot-live` and above.
78
+ - Use [docs/guides/planner.md](../guides/planner.md) for the planner-generated baseline, then use these samples to see how a human would enrich the generated draft for either repo-landed or proof-first work.
79
+ - Use [docs/evals/README.md](../evals/README.md) with the full modern sample when you need to see delegated and pinned benchmark targets in a real wave.
80
+ - Use [docs/reference/live-proof-waves.md](./live-proof-waves.md) with the full modern sample when you need proof-first authoring for `pilot-live` and above.
71
81
  - Use [docs/plans/wave-orchestrator.md](../plans/wave-orchestrator.md) for the operational runbook that explains how the launcher interprets these sections.
72
82
 
73
83
  ## Suggested Reading Order
74
84
 
75
- 1. Start with [Full modern sample wave](../plans/examples/wave-example-live-proof.md).
76
- 2. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
77
- 3. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
85
+ 1. Start with [High-fidelity repo-landed rollout wave](../plans/examples/wave-example-rollout-fidelity.md) if you want the clearest example of good closure-ready wave fidelity for a repo-only outcome.
86
+ 2. Read [Full modern sample wave](../plans/examples/wave-example-live-proof.md) if you want the denser proof-first and eval-heavy surface.
87
+ 3. Read [docs/evals/README.md](../evals/README.md) if you want more background on benchmark target selection.
88
+ 4. Read [docs/reference/live-proof-waves.md](./live-proof-waves.md) if you want more detail on proof-first `pilot-live` authoring.
78
89
 
79
- ## Why This Example Lives In `docs/plans/examples/`
90
+ ## Why These Examples Live In `docs/plans/examples/`
80
91
 
81
- The example lives outside `docs/plans/waves/` on purpose.
92
+ The examples live outside `docs/plans/waves/` on purpose.
82
93
 
83
94
  That keeps it:
84
95
 
85
96
  - easy to browse as teaching material
86
97
  - clearly separate from the repo's real launcher-facing wave sequence
87
- - safe to evolve as reference material without implying that it is part of the current lane's actual plan history
98
+ - safe to evolve as reference material without implying that they are part of the current lane's actual plan history
@@ -0,0 +1,164 @@
1
+ ---
2
+ title: "Wave Control"
3
+ summary: "Canonical telemetry, artifact upload policy, and the local-first reporting contract for the Railway-hosted Wave control plane."
4
+ ---
5
+
6
+ # Wave Control
7
+
8
+ Wave Control is the telemetry and analysis plane for Wave runs.
9
+
10
+ The design rule is:
11
+
12
+ - local files stay authoritative
13
+ - remote reporting is best-effort
14
+ - dashboards and markdown remain projections over typed local state
15
+
16
+ ## What Gets Reported
17
+
18
+ Wave Control normalizes these entity types:
19
+
20
+ - `wave_run`
21
+ - `agent_run`
22
+ - `coordination_record`
23
+ - `task`
24
+ - `attempt`
25
+ - `gate`
26
+ - `proof_bundle`
27
+ - `rerun_request`
28
+ - `human_input`
29
+ - `artifact`
30
+ - `benchmark_run`
31
+ - `benchmark_item`
32
+ - `verification`
33
+ - `review`
34
+
35
+ This lets the control plane answer:
36
+
37
+ - what happened in a run
38
+ - which proof and benchmark artifacts back a claim
39
+ - whether a benchmark result is comparison-valid or only diagnostic
40
+ - which coordination failures blocked closure
41
+
42
+ ## Run Identity
43
+
44
+ Every Wave Control event carries a normalized run identity.
45
+
46
+ The key fields are:
47
+
48
+ - `workspaceId`
49
+ - `projectId`
50
+ - `runKind`
51
+ - `runId`
52
+ - `lane`
53
+ - `wave`
54
+ - `attempt`
55
+ - `agentId`
56
+ - `orchestratorId`
57
+ - `runtimeVersion`
58
+ - `benchmarkRunId`
59
+ - `benchmarkItemId`
60
+
61
+ Why these fields matter:
62
+
63
+ - `workspaceId` separates whole adopted workspaces
64
+ - `projectId` separates product or repo identities inside one control plane
65
+ - `orchestratorId` separates resident orchestrators or control-plane owners
66
+ - `runtimeVersion` lets operators compare behavior across Wave releases without guessing from deploy timestamps
67
+
68
+ These are first-class query dimensions in the service, not only free-form event payload fields.
69
+
70
+ ## Proof Signals
71
+
72
+ Wave Control is intended to make the main README claims measurable.
73
+
74
+ For the explicit README-failure-case-to-signal map, see [proof-metrics.md](./proof-metrics.md).
75
+
76
+ Signals to preserve:
77
+
78
+ - canonical-state fidelity:
79
+ `coordination_record`, `wave_run`, `attempt`, and `artifact` telemetry prove the scheduler truth came from JSON state, not only markdown boards
80
+ - evidence pooling:
81
+ integration and closure telemetry should cite the proof artifacts and evidence refs they relied on
82
+ - contradiction repair:
83
+ gate and review telemetry should show unresolved conflicts, repair creation, and repair resolution
84
+ - expert routing:
85
+ targeted assignments, reroutes, and final recommendation ownership should remain visible
86
+ - premature closure prevention:
87
+ gate snapshots, proof completeness, block reasons, reruns, and cont-QA reversal should be durable
88
+ - benchmark trust:
89
+ every benchmark item should distinguish capability from validity
90
+
91
+ ## Artifact Contract
92
+
93
+ Selected artifacts are described with typed descriptors:
94
+
95
+ ```json
96
+ {
97
+ "path": ".tmp/main-wave-launcher/traces/wave-1/attempt-1/quality.json",
98
+ "kind": "trace-quality",
99
+ "required": true,
100
+ "present": true,
101
+ "sha256": "abc123...",
102
+ "bytes": 2048,
103
+ "contentType": "application/json",
104
+ "uploadPolicy": "selected"
105
+ }
106
+ ```
107
+
108
+ Upload policy meanings:
109
+
110
+ - `local-only`: keep only the descriptor remotely
111
+ - `metadata-only`: report path, hash, size, and presence only
112
+ - `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected`
113
+ - `selected`: upload metadata plus the artifact body when the runtime is in `metadata-plus-selected` or `full-artifact-upload` **and** the artifact kind is allowed by `waveControl.uploadArtifactKinds`
114
+ - `full`: upload the artifact body in `full-artifact-upload` flows; if `uploadArtifactKinds` is set, keep the kind allowlist aligned with that policy
115
+
116
+ ## Runtime Config
117
+
118
+ `wave.config.json` can declare:
119
+
120
+ ```json
121
+ {
122
+ "waveControl": {
123
+ "endpoint": "https://wave-control.up.railway.app/api/v1",
124
+ "workspaceId": "my-workspace",
125
+ "projectId": "wave-orchestration",
126
+ "authTokenEnvVar": "WAVE_CONTROL_AUTH_TOKEN",
127
+ "reportMode": "metadata-plus-selected",
128
+ "uploadArtifactKinds": [
129
+ "trace-run-metadata",
130
+ "trace-quality",
131
+ "benchmark-results"
132
+ ]
133
+ }
134
+ }
135
+ ```
136
+
137
+ Lane overrides may refine the same surface under `lanes.<lane>.waveControl`.
138
+
139
+ For a single run, operators can disable Wave Control reporting entirely with:
140
+
141
+ ```bash
142
+ pnpm exec wave launch --lane main --no-telemetry
143
+ ```
144
+
145
+ That suppresses the local telemetry spool and remote delivery for that invocation, while leaving the canonical runtime artifacts and local control-plane state intact.
146
+
147
+ ## Delivery Model
148
+
149
+ Wave Control reporting should:
150
+
151
+ - append local telemetry first
152
+ - queue pending uploads under `.tmp/<lane>-wave-launcher/control-plane/telemetry/`
153
+ - respect `waveControl.uploadArtifactKinds` before uploading any selected artifact body
154
+ - cap pending remote uploads with `waveControl.maxPendingEvents` by dropping the oldest queued remote-delivery files, while keeping the local `events.jsonl` stream intact
155
+ - retry delivery with idempotency keys
156
+ - never fail a live run, proof registration, or benchmark because the network is unavailable
157
+
158
+ The Railway-hosted `services/wave-control` service is an analysis surface, not the scheduler of record.
159
+
160
+ The service package lives under `services/wave-control/`.
161
+
162
+ For durable telemetry retention, attach Railway Postgres to `wave-control` so the
163
+ service receives `DATABASE_URL`. Without that variable, the service falls back to the
164
+ in-memory store and only keeps data until the process restarts.
@@ -0,0 +1,131 @@
1
+ ---
2
+ summary: "Lessons from Waves 4-9 on what makes future waves succeed or fail."
3
+ read_when:
4
+ - Drafting a new wave
5
+ - Splitting or renumbering future waves
6
+ - Deciding whether a wave should target repo-landed, pilot-live, or above
7
+ title: "Wave Planning Lessons"
8
+ ---
9
+
10
+ # Wave Planning Lessons
11
+
12
+ This document captures the practical lessons from Waves 4-9. The main theme is
13
+ simple: waves succeed when the declared maturity target, the owned slices, the
14
+ runtime setup, and the closure artifacts all describe the same truth.
15
+
16
+ ## 1. One honest maturity jump per wave
17
+
18
+ - Treat `repo-landed`, `pilot-live`, `qa-proved`, `fleet-ready`,
19
+ `cutover-ready`, and `deprecation-ready` as materially different bars.
20
+ - A wave should promote a component by one honest maturity step, not silently
21
+ combine multiple levels of proof in one broad plan.
22
+ - If a wave only lands code and tests, the target is usually `repo-landed`, not
23
+ `pilot-live`.
24
+ - If a wave claims `pilot-live` or above, the wave must own real deploy/live
25
+ proof and rollback evidence.
26
+
27
+ ## 2. Live-proof waves are a different class of wave
28
+
29
+ - `pilot-live` and above need an explicit live-proof owner, not just
30
+ implementation agents plus A8/A9/A0.
31
+ - Live-proof waves need a canonical proof bundle under `.tmp/` and one owned
32
+ operations runbook under `docs/plans/operations/`.
33
+ - The proof bundle must contain restart or rollback evidence, not only one-shot
34
+ success.
35
+ - External operator commands and captured evidence must be part of the authored
36
+ wave, not improvised during execution.
37
+
38
+ ## 3. Component promotions must map to owned slices
39
+
40
+ - Every promoted component needs one or more implementation owners and one
41
+ shared proof story.
42
+ - If multiple agents contribute to one promoted component, their slices must be
43
+ obviously complementary, not overlapping guesses.
44
+ - Shared components should not cause one agent to be retried just because a
45
+ sibling owner is still finishing; each agent must be able to complete its own
46
+ slice honestly.
47
+
48
+ ## 4. Deliverables must be explicit and machine-checkable
49
+
50
+ - Every implementation agent should declare `### Deliverables`.
51
+ - For live-proof waves, use `### Proof artifacts` in addition to deliverables.
52
+ - Deliverables should be exact files or artifact manifests, not vague “test
53
+ coverage” or “docs updated” expectations.
54
+ - Missing deliverables should fail the wave even if the code mostly landed.
55
+
56
+ ## 5. Closure must update the shared planning truth
57
+
58
+ - A9 should always update `current-state`, `master-plan`, `migration`, and the
59
+ component cutover matrix when a wave changes what later waves may safely
60
+ assume.
61
+ - The evaluator should reject a wave if the repo’s planning truth still implies
62
+ an older maturity level after the code has landed.
63
+ - Shared-plan closure is not paperwork; it is part of architecture truth.
64
+
65
+ ## 6. Use A8 to reconcile reality before docs and evaluation
66
+
67
+ - A8 is the place to detect contradictions between slices, missing ownership,
68
+ and proof gaps before A9 and A0 run.
69
+ - A8 should judge `ready-for-doc-closure` versus `needs-more-work` based on the
70
+ landed artifact set, not on agent intent.
71
+ - Waves were materially more reliable once A8 became a true closure gate rather
72
+ than optional synthesis.
73
+
74
+ ## 7. Runtime setup matters as much as wave prose
75
+
76
+ - Do not use small fixed turn caps for synthesis-heavy or closure-heavy agents.
77
+ Bound them with `budget.minutes`, not `budget.turns`.
78
+ - Pin exact model and reasoning settings for each runtime. Ambiguous profiles
79
+ create unclear failure modes.
80
+ - Avoid cross-runtime fallback on live-proof or deploy-sensitive slices unless
81
+ there is a very good reason.
82
+ - Context7 should be explicit and real; unresolved bundles create noise instead
83
+ of help.
84
+
85
+ ## 8. Repo-local proof and live proof are different
86
+
87
+ - Repo-local tests and docs can justify `repo-landed`.
88
+ - Live host validation, admitted runtime behavior, rollback drills, and operator
89
+ surfaces are what justify `pilot-live` and above.
90
+ - Do not let “the code exists” be treated as “the deployment works.”
91
+
92
+ ## 9. Architecture-facing status surfaces must be future-safe
93
+
94
+ - Status and projection code should be keyed to the real future topology, not
95
+ the smallest test case that passes today.
96
+ - If a status model will later carry multiple runtime classes, providers, or
97
+ lanes, the substrate must preserve that identity now.
98
+ - Closed enums and typed contracts should be validated as closed enums and typed
99
+ contracts, not accepted as arbitrary strings.
100
+
101
+ ## 10. The best waves are narrow, layered, and boring
102
+
103
+ - Narrow waves close more reliably than broad waves.
104
+ - A good wave answers:
105
+ - what exact maturity level is being claimed
106
+ - what exact artifacts prove it
107
+ - who owns repo implementation
108
+ - who owns live proof, if any
109
+ - what A9 must update
110
+ - what A0 must refuse to overclaim
111
+ - If a wave still sounds ambitious and fuzzy after writing the deliverables,
112
+ split it again.
113
+
114
+ ## 11. Future-wave checklist
115
+
116
+ - Does the component promotion match the real maturity level being claimed?
117
+ - Does every promoted component have an implementation owner?
118
+ - If the target is `pilot-live` or above, is there an explicit live-proof owner?
119
+ - Are deliverables and proof artifacts exact and machine-checkable?
120
+ - Are current-state and matrix updates part of A9 closure?
121
+ - Are A8 and A0 told what would make the wave fail honestly?
122
+ - Are runtime pins, Context7 bundles, and budgets specific enough to avoid
123
+ preventable execution failures?
124
+ - Would a reviewer understand the difference between “code landed” and
125
+ “component promoted” just by reading the wave file?
126
+
127
+ ## Bottom line
128
+
129
+ The successful waves were not the ones with the most code. They were the ones
130
+ where the wave file, the runtime setup, the artifacts, and the planning docs all
131
+ made the same claim at the same level of maturity.
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@chllming/wave-orchestration",
3
- "version": "0.6.3",
3
+ "version": "0.7.1",
4
4
  "license": "MIT",
5
5
  "description": "Generic wave-based multi-agent orchestration for repository work.",
6
6
  "repository": {
7
7
  "type": "git",
8
- "url": "git+https://github.com/chllming/wave-orchestration.git"
8
+ "url": "git+https://github.com/chllming/agent-wave-orchestrator.git"
9
9
  },
10
- "homepage": "https://github.com/chllming/wave-orchestration#readme",
10
+ "homepage": "https://github.com/chllming/agent-wave-orchestrator#readme",
11
11
  "bugs": {
12
- "url": "https://github.com/chllming/wave-orchestration/issues"
12
+ "url": "https://github.com/chllming/agent-wave-orchestrator/issues"
13
13
  },
14
14
  "publishConfig": {
15
15
  "access": "public"
@@ -41,6 +41,7 @@
41
41
  "context7:api-check": "bash scripts/context7-export-env.sh run bash scripts/context7-api-check.sh",
42
42
  "research:import-agent-context": "node scripts/research/import-agent-context-archive.mjs scripts/research/manifests/agent-context-expanded-2026-03-22.mjs",
43
43
  "research:index-agent-context": "node scripts/research/generate-agent-context-indexes.mjs",
44
+ "research:sync-planner-context7": "node scripts/research/sync-planner-context7-bundle.mjs",
44
45
  "research:refresh-agent-context": "pnpm research:import-agent-context && pnpm research:index-agent-context",
45
46
  "test": "vitest run --config vitest.config.ts",
46
47
  "wave": "node scripts/wave.mjs",
@@ -2,6 +2,46 @@
2
2
  "schemaVersion": 1,
3
3
  "packageName": "@chllming/wave-orchestration",
4
4
  "releases": [
5
+ {
6
+ "version": "0.7.1",
7
+ "date": "2026-03-23",
8
+ "summary": "Run-control hardening, completed-with-drift reconcile preservation, live Codex ceiling visibility, and 0.7.1 release-surface alignment.",
9
+ "features": [
10
+ "Fresh live launches now clear stale auto-generated relaunch plans by default, so explicit wave starts recompute the implementation fan-out unless `--resume-control-state` is passed.",
11
+ "`wave control status` now treats the active attempt as the authoritative live fan-out instead of replaying stale rerun intent or unrelated closure blockers.",
12
+ "Historical `reconcile-status` now preserves previously authoritative completed waves as `completed_with_drift` when the only mismatch is prompt-hash drift.",
13
+ "Live executor overlays now always write `launch-preview.json`, and Codex summaries record an observed turn ceiling when the runtime reports one.",
14
+ "Shipped package docs, migration guidance, sample-wave references, and npm publishing instructions now point at the `0.7.1` release surface."
15
+ ],
16
+ "manualSteps": [
17
+ "If you intentionally want to reuse a prior auto-generated relaunch selection on a fresh live start, pass `--resume-control-state` explicitly.",
18
+ "Use `pnpm exec wave dashboard --lane <lane> --attach current` or `--attach global` to reattach to live tmux-backed dashboards without resolving sockets or session names by hand.",
19
+ "If an adopted `0.6.x` repo fails `wave doctor` after the `0.7.x` upgrade, sync the repo-owned planner starter surface (`docs/agents/wave-planner-role.md`, `skills/role-planner/`, `docs/context7/planner-agent/`, `docs/reference/wave-planning-lessons.md`, and the `planner-agentic` bundle entry) before relying on planner-aware validation."
20
+ ],
21
+ "breaking": false
22
+ },
23
+ {
24
+ "version": "0.7.0",
25
+ "date": "2026-03-23",
26
+ "summary": "Unified wave control operator CLI, canonical control-plane event log, Wave Control telemetry, live-wave orchestration refresh, and resident orchestrator support.",
27
+ "features": [
28
+ "Unified `wave control` CLI with `status`, `task`, `rerun`, `proof`, and `telemetry` sub-surfaces replacing `wave coord`/`wave retry`/`wave proof` as the preferred operator interface.",
29
+ "Canonical control-plane event log under `.tmp/<lane>-wave-launcher/control-plane/` with event-sourced materialization for proof bundles, rerun requests, operator tasks, and attempt lifecycle.",
30
+ "Wave Control telemetry: local-first event queueing with best-effort batch delivery, configurable report modes, selective artifact upload, and per-category capture toggles.",
31
+ "Live-wave orchestration refresh that keeps coordination surfaces, clarification triage, and dashboard metrics current during active execution.",
32
+ "Resident orchestrator support via `--resident-orchestrator` for long-running non-owning monitoring sessions.",
33
+ "Native and external benchmark telemetry with failure-review validity classification and config attestation hashing.",
34
+ "Stable dashboard reattach via `wave dashboard --attach current|global`, plus live `launch-preview.json` artifacts that preserve observed Codex turn ceilings without pretending Wave set them.",
35
+ "Historical `reconcile-status` now preserves previously authoritative completed waves as completed-with-drift when the only mismatch is prompt-hash drift.",
36
+ "Fresh live launches now clear stale auto-generated relaunch plans by default, while `wave control status` treats the active attempt as the authoritative fan-out instead of replaying stale relaunch state."
37
+ ],
38
+ "manualSteps": [
39
+ "Existing `wave coord`, `wave retry`, and `wave proof` commands remain available as compatibility surfaces. No migration required, but new operator docs prefer `wave control`.",
40
+ "To enable Wave Control telemetry, add a `waveControl` section to `wave.config.json` with at minimum an `endpoint` and `workspaceId`. Pass `--no-telemetry` to disable for a single run.",
41
+ "If an adopted `0.6.x` repo fails `wave doctor` after the `0.7.x` upgrade, sync the repo-owned planner starter surface (`docs/agents/wave-planner-role.md`, `skills/role-planner/`, `docs/context7/planner-agent/`, `docs/reference/wave-planning-lessons.md`, and the `planner-agentic` bundle entry) before relying on planner-aware validation."
42
+ ],
43
+ "breaking": false
44
+ },
5
45
  {
6
46
  "version": "0.6.3",
7
47
  "date": "2026-03-22",
@@ -14,6 +14,12 @@ export const TOPIC_DEFINITIONS = [
14
14
  description:
15
15
  "Planning topology, verifier and replanner loops, protocol-driven coordination, and blackboard-aware orchestration patterns for multi-agent systems.",
16
16
  },
17
+ {
18
+ id: "agent-cooperation-and-coordination",
19
+ title: "Agent Cooperation and Coordination",
20
+ description:
21
+ "Benchmarks and failure analyses for inter-agent cooperation, commitment tracking, communication quality, negotiation, and teammate-style coordination.",
22
+ },
17
23
  {
18
24
  id: "long-running-agents-and-compaction",
19
25
  title: "Long-Running Agents and Compaction",
@@ -103,6 +109,15 @@ const SKILLS_TOPIC_OVERRIDE_SLUGS = new Set([
103
109
  "meta-context-engineering-via-agentic-skill-evolution",
104
110
  ]);
105
111
 
112
+ const COOPERATION_TOPIC_OVERRIDE_SLUGS = new Set([
113
+ "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
114
+ "why-do-multi-agent-llm-systems-fail",
115
+ "systematic-failures-in-collective-reasoning-under-distributed-information-in-multi-agent-llms",
116
+ "silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems",
117
+ "dpbench-large-language-models-struggle-with-simultaneous-coordination",
118
+ "multi-agent-teams-hold-experts-back",
119
+ ]);
120
+
106
121
  function escapeInlinePipes(value) {
107
122
  return String(value ?? "").replaceAll("|", "\\|");
108
123
  }
@@ -252,6 +267,9 @@ export function inferTopics(entry, section = null) {
252
267
  if (SKILLS_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
253
268
  topics.push("skills-and-procedural-memory");
254
269
  }
270
+ if (COOPERATION_TOPIC_OVERRIDE_SLUGS.has(entry.slug)) {
271
+ topics.push("agent-cooperation-and-coordination");
272
+ }
255
273
 
256
274
  if (hasDeclaredTopics) {
257
275
  return unique(topics);
@@ -3,6 +3,7 @@ import baseManifest from "./harness-and-blackboard-2026-03-21.mjs";
3
3
  const TOPICS = {
4
4
  HARNESS: "harnesses-and-practice",
5
5
  PLANNING: "planning-and-orchestration",
6
+ COOPERATION: "agent-cooperation-and-coordination",
6
7
  LONG_RUNNING: "long-running-agents-and-compaction",
7
8
  SKILLS: "skills-and-procedural-memory",
8
9
  BLACKBOARD: "blackboard-and-shared-workspaces",
@@ -521,6 +522,22 @@ const planningManifest = [
521
522
  fit: "Useful benchmark for testing whether coordination-heavy planning systems scale beyond serial reasoning.",
522
523
  topics: [TOPICS.PLANNING, TOPICS.REPO],
523
524
  }),
525
+ arxivPaper("2601.13295", {
526
+ title: "CooperBench: Why Coding Agents Cannot be Your Teammates Yet",
527
+ slug: "cooperbench-why-coding-agents-cannot-be-your-teammates-yet",
528
+ authors:
529
+ "Arpandeep Khatua, Hao Zhu, Peter Tran, Arya Prabhudesai, Frederic Sadrieh, Johann K. Lieberwirth, Xinkai Yu, Yicheng Fu, Michael J. Ryan, Jiaxin Pei, Diyi Yang",
530
+ year: 2026,
531
+ researchBucket: "P0 direct hits",
532
+ mapsTo:
533
+ "Collaborative coding benchmark for inter-agent cooperation, communication quality, commitment tracking, and coordination failures.",
534
+ fit: "Direct benchmark for whether coding agents behave like usable teammates instead of isolated solo solvers.",
535
+ additionalSource: "https://cooperbench.com",
536
+ additionalPdf: "https://cooperbench.com/static/pdfs/main.pdf",
537
+ notes:
538
+ "Project site hosts the same paper PDF plus leaderboard, dataset, and trajectory viewer for the benchmark.",
539
+ topics: [TOPICS.PLANNING, TOPICS.COOPERATION, TOPICS.REPO],
540
+ }),
524
541
  arxivPaper("2602.01011", {
525
542
  title: "Multi-Agent Teams Hold Experts Back",
526
543
  slug: "multi-agent-teams-hold-experts-back",