@chllming/wave-orchestration 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/README.md +549 -0
- package/docs/agents/wave-deploy-verifier-role.md +34 -0
- package/docs/agents/wave-documentation-role.md +30 -0
- package/docs/agents/wave-evaluator-role.md +43 -0
- package/docs/agents/wave-infra-role.md +34 -0
- package/docs/agents/wave-integration-role.md +32 -0
- package/docs/agents/wave-launcher-role.md +37 -0
- package/docs/context7/bundles.json +91 -0
- package/docs/plans/component-cutover-matrix.json +112 -0
- package/docs/plans/component-cutover-matrix.md +49 -0
- package/docs/plans/context7-wave-orchestrator.md +130 -0
- package/docs/plans/current-state.md +44 -0
- package/docs/plans/master-plan.md +16 -0
- package/docs/plans/migration.md +23 -0
- package/docs/plans/wave-orchestrator.md +254 -0
- package/docs/plans/waves/wave-0.md +165 -0
- package/docs/reference/github-packages-setup.md +52 -0
- package/docs/reference/migration-0.2-to-0.5.md +622 -0
- package/docs/reference/npmjs-trusted-publishing.md +55 -0
- package/docs/reference/repository-guidance.md +18 -0
- package/docs/reference/runtime-config/README.md +85 -0
- package/docs/reference/runtime-config/claude.md +105 -0
- package/docs/reference/runtime-config/codex.md +81 -0
- package/docs/reference/runtime-config/opencode.md +93 -0
- package/docs/research/agent-context-sources.md +57 -0
- package/docs/roadmap.md +626 -0
- package/package.json +53 -0
- package/releases/manifest.json +101 -0
- package/scripts/context7-api-check.sh +21 -0
- package/scripts/context7-export-env.sh +52 -0
- package/scripts/research/agent-context-archive.mjs +472 -0
- package/scripts/research/generate-agent-context-indexes.mjs +85 -0
- package/scripts/research/import-agent-context-archive.mjs +793 -0
- package/scripts/research/manifests/harness-and-blackboard-2026-03-21.mjs +201 -0
- package/scripts/wave-autonomous.mjs +13 -0
- package/scripts/wave-cli-bootstrap.mjs +27 -0
- package/scripts/wave-dashboard.mjs +11 -0
- package/scripts/wave-human-feedback.mjs +11 -0
- package/scripts/wave-launcher.mjs +11 -0
- package/scripts/wave-local-executor.mjs +13 -0
- package/scripts/wave-orchestrator/agent-state.mjs +416 -0
- package/scripts/wave-orchestrator/autonomous.mjs +367 -0
- package/scripts/wave-orchestrator/clarification-triage.mjs +605 -0
- package/scripts/wave-orchestrator/config.mjs +848 -0
- package/scripts/wave-orchestrator/context7.mjs +464 -0
- package/scripts/wave-orchestrator/coord-cli.mjs +286 -0
- package/scripts/wave-orchestrator/coordination-store.mjs +987 -0
- package/scripts/wave-orchestrator/coordination.mjs +768 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +254 -0
- package/scripts/wave-orchestrator/dashboard-state.mjs +473 -0
- package/scripts/wave-orchestrator/dep-cli.mjs +219 -0
- package/scripts/wave-orchestrator/docs-queue.mjs +75 -0
- package/scripts/wave-orchestrator/executors.mjs +385 -0
- package/scripts/wave-orchestrator/feedback.mjs +372 -0
- package/scripts/wave-orchestrator/install.mjs +540 -0
- package/scripts/wave-orchestrator/launcher.mjs +3879 -0
- package/scripts/wave-orchestrator/ledger.mjs +332 -0
- package/scripts/wave-orchestrator/local-executor.mjs +263 -0
- package/scripts/wave-orchestrator/replay.mjs +246 -0
- package/scripts/wave-orchestrator/roots.mjs +10 -0
- package/scripts/wave-orchestrator/routing-state.mjs +542 -0
- package/scripts/wave-orchestrator/shared.mjs +405 -0
- package/scripts/wave-orchestrator/terminals.mjs +209 -0
- package/scripts/wave-orchestrator/traces.mjs +1094 -0
- package/scripts/wave-orchestrator/wave-files.mjs +1923 -0
- package/scripts/wave.mjs +103 -0
- package/wave.config.json +115 -0
package/docs/roadmap.md
ADDED
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
# Wave Orchestrator Roadmap
|
|
2
|
+
|
|
3
|
+
This roadmap records the highest-value upgrades for Wave Orchestration while preserving the current architecture:
|
|
4
|
+
|
|
5
|
+
- lane-scoped runs
|
|
6
|
+
- wave markdown as the authored plan surface
|
|
7
|
+
- multi-role agents with explicit ownership
|
|
8
|
+
- component promotions, exit contracts, documentation stewardship, and evaluator closure
|
|
9
|
+
|
|
10
|
+
The goal is not to replace waves with a different orchestration model. The goal is to make the existing wave model more durable for long-running, multi-agent, multi-lane repository work.
|
|
11
|
+
|
|
12
|
+
## Current Status
|
|
13
|
+
|
|
14
|
+
As of the current repository state:
|
|
15
|
+
|
|
16
|
+
- the package-first install and upgrade flow is shipped
|
|
17
|
+
- the canonical coordination JSONL store, rendered board projection, compiled inboxes, per-wave ledger, docs queue, integration summaries, and trace bundles are shipped
|
|
18
|
+
- `A8` integration stewardship and staged closure are shipped
|
|
19
|
+
- orchestrator-first clarification triage and human-escalation artifacts are shipped
|
|
20
|
+
- per-agent executor profiles, per-lane runtime policy, hard runtime mix targets, retry-time fallback, and generic budgets are shipped
|
|
21
|
+
- required inbound cross-lane dependency tickets now block both autonomous wave launch and lane finalization
|
|
22
|
+
- integration summaries now carry actionable evidence for claims, interface drift, proof gaps, docs gaps, and deploy or ops risk
|
|
23
|
+
- cumulative `quality.json` metrics and internal, read-only hermetic trace replay validation are shipped
|
|
24
|
+
- capability-targeted requests now become explicit helper assignments with deterministic assignee selection, ledger/traces coverage, and closure barriers
|
|
25
|
+
- typed cross-lane dependency workflows now have operator commands, per-wave dependency snapshots, and replay-visible gating
|
|
26
|
+
|
|
27
|
+
The remaining roadmap work is mostly about extending those foundations rather than inventing a new orchestration model.
|
|
28
|
+
|
|
29
|
+
## Design Position
|
|
30
|
+
|
|
31
|
+
The recent harness and blackboard sources point in the same direction:
|
|
32
|
+
|
|
33
|
+
- compaction alone is not enough for long-running work
|
|
34
|
+
- append-only communication logs are useful, but not sufficient as the canonical coordination substrate
|
|
35
|
+
- messaging quality matters less than whether the system can integrate distributed findings into a coherent decision
|
|
36
|
+
- runtime choice should be treated as authored plan data, not only as a launch-time default
|
|
37
|
+
- clarification should stay inside the harness loop until the orchestrator can prove that human input is actually required
|
|
38
|
+
- the harness needs reproducible traces, explicit loop control, and durable state across sessions
|
|
39
|
+
|
|
40
|
+
Wave Orchestration already has a strong base:
|
|
41
|
+
|
|
42
|
+
- wave parsing and role imports
|
|
43
|
+
- lane-scoped state under `.tmp/`
|
|
44
|
+
- canonical coordination JSONL plus generated message boards
|
|
45
|
+
- compiled shared summaries and per-agent inboxes
|
|
46
|
+
- per-agent executor overrides, profiles, per-lane runtime policy, and retry fallback for Codex, Claude, and OpenCode
|
|
47
|
+
- structured proof and documentation markers
|
|
48
|
+
- integration, documentation, and evaluator closure sweep
|
|
49
|
+
- a file-backed human feedback queue plus orchestrator-first clarification triage
|
|
50
|
+
- ledger, docs queue, and trace bundles
|
|
51
|
+
|
|
52
|
+
The next step is to evolve the harness from “agents write progress notes” into “agents coordinate through typed shared state, compiled inboxes, runtime-aware planning, and an explicit integration phase.”
|
|
53
|
+
|
|
54
|
+
## What To Keep
|
|
55
|
+
|
|
56
|
+
These parts of the current model should stay:
|
|
57
|
+
|
|
58
|
+
- Wave markdown remains the authored planning surface.
|
|
59
|
+
- Lanes remain the top-level isolation unit for separate workstreams.
|
|
60
|
+
- Agent IDs and role prompts remain the basic execution model.
|
|
61
|
+
- The per-agent `### Executor` section remains the planning surface for runtime choice; it just becomes richer and more enforceable.
|
|
62
|
+
- Exit contracts, component promotions, documentation stewardship, and evaluator closure remain the primary completion controls.
|
|
63
|
+
- The markdown message board remains as a human-readable audit view.
|
|
64
|
+
|
|
65
|
+
## Highest-Value Addons
|
|
66
|
+
|
|
67
|
+
### 1. Canonical Coordination Store
|
|
68
|
+
|
|
69
|
+
Add a lane- and wave-scoped structured coordination store and treat the markdown message board as a rendered view.
|
|
70
|
+
|
|
71
|
+
Why this is highest value:
|
|
72
|
+
|
|
73
|
+
- The current board format in `scripts/wave-orchestrator/coordination.mjs` is easy to read but weak as machine state.
|
|
74
|
+
- The parser depends on regexes and free-text fields.
|
|
75
|
+
- Blackboard-style coordination works best when requests, claims, evidence, blockers, and decisions are explicit typed objects.
|
|
76
|
+
|
|
77
|
+
Proposed artifact:
|
|
78
|
+
|
|
79
|
+
- `.tmp/<lane>-wave-launcher/coordination/wave-<n>.jsonl`
|
|
80
|
+
|
|
81
|
+
Proposed record kinds:
|
|
82
|
+
|
|
83
|
+
- `request`
|
|
84
|
+
- `ack`
|
|
85
|
+
- `claim`
|
|
86
|
+
- `evidence`
|
|
87
|
+
- `decision`
|
|
88
|
+
- `blocker`
|
|
89
|
+
- `handoff`
|
|
90
|
+
- `human-feedback`
|
|
91
|
+
- `integration-summary`
|
|
92
|
+
|
|
93
|
+
Required fields:
|
|
94
|
+
|
|
95
|
+
- `id`
|
|
96
|
+
- `kind`
|
|
97
|
+
- `wave`
|
|
98
|
+
- `lane`
|
|
99
|
+
- `agentId`
|
|
100
|
+
- `targets`
|
|
101
|
+
- `status`
|
|
102
|
+
- `priority`
|
|
103
|
+
- `artifactRefs`
|
|
104
|
+
- `dependsOn`
|
|
105
|
+
- `closureCondition`
|
|
106
|
+
- `createdAt`
|
|
107
|
+
- `updatedAt`
|
|
108
|
+
- `confidence`
|
|
109
|
+
- `summary`
|
|
110
|
+
- `detail`
|
|
111
|
+
|
|
112
|
+
Compatibility rule:
|
|
113
|
+
|
|
114
|
+
- keep writing the markdown board, but generate it from the coordination store and append a short human-readable projection
|
|
115
|
+
|
|
116
|
+
### 2. Agent Inbox Compiler
|
|
117
|
+
|
|
118
|
+
Stop injecting a raw board tail into every agent prompt. Compile role-specific inboxes from the canonical coordination state.
|
|
119
|
+
|
|
120
|
+
Why this is high value:
|
|
121
|
+
|
|
122
|
+
- Long-running harness guidance favors explicit handoff artifacts and short “get up to speed” paths.
|
|
123
|
+
- Raw tail snapshots are noisy and lose important old-but-still-open obligations.
|
|
124
|
+
- Multi-agent blackboard systems work when the current blackboard state determines who should act next and what they should see.
|
|
125
|
+
|
|
126
|
+
Proposed artifacts:
|
|
127
|
+
|
|
128
|
+
- `.tmp/<lane>-wave-launcher/inboxes/wave-<n>/<agent-id>.md`
|
|
129
|
+
- `.tmp/<lane>-wave-launcher/inboxes/wave-<n>/shared-summary.md`
|
|
130
|
+
|
|
131
|
+
Each inbox should contain:
|
|
132
|
+
|
|
133
|
+
- owned open requests
|
|
134
|
+
- claims that conflict with this agent’s work
|
|
135
|
+
- unresolved blockers affecting owned files or components
|
|
136
|
+
- required doc deltas
|
|
137
|
+
- human feedback relevant to that agent
|
|
138
|
+
- integration findings from prior attempts
|
|
139
|
+
- only the minimal recent audit context needed for recovery
|
|
140
|
+
|
|
141
|
+
Prompt change:
|
|
142
|
+
|
|
143
|
+
- `buildExecutionPrompt` should inject the compiled inbox plus the shared wave summary, not the last N characters of the board
|
|
144
|
+
|
|
145
|
+
### 3. Explicit Integration Phase Before Final Closure
|
|
146
|
+
|
|
147
|
+
Add a dedicated integration phase between implementation completion and documentation/evaluator closure.
|
|
148
|
+
|
|
149
|
+
Why this is essential:
|
|
150
|
+
|
|
151
|
+
- Silo-Bench shows a communication-reasoning gap: agents can exchange enough information and still fail to integrate it.
|
|
152
|
+
- DOVA’s strongest pattern is ensemble breadth, blackboard transparency, then iterative refinement.
|
|
153
|
+
- The current closure sweep checks implementation, docs, evaluator, and infra, but does not assign integration as a first-class role.
|
|
154
|
+
|
|
155
|
+
Proposed model:
|
|
156
|
+
|
|
157
|
+
- reserve a configurable integration steward, default `A8`
|
|
158
|
+
- the integration steward does not own feature implementation
|
|
159
|
+
- it owns synthesis, conflict detection, integration risk, and open dependency reconciliation
|
|
160
|
+
|
|
161
|
+
Integration outputs:
|
|
162
|
+
|
|
163
|
+
- `.tmp/<lane>-wave-launcher/integration/wave-<n>.json`
|
|
164
|
+
- `.tmp/<lane>-wave-launcher/integration/wave-<n>.md`
|
|
165
|
+
|
|
166
|
+
Required fields:
|
|
167
|
+
|
|
168
|
+
- open claims
|
|
169
|
+
- conflicting claims
|
|
170
|
+
- unresolved blockers
|
|
171
|
+
- changed interfaces
|
|
172
|
+
- cross-component impacts
|
|
173
|
+
- proof gaps
|
|
174
|
+
- doc gaps
|
|
175
|
+
- release/deploy risks
|
|
176
|
+
- final recommendation: `ready-for-doc-closure` or `needs-more-work`
|
|
177
|
+
|
|
178
|
+
Gate rule:
|
|
179
|
+
|
|
180
|
+
- the documentation steward and evaluator do not run their final pass until the integration steward emits a final integration summary
|
|
181
|
+
|
|
182
|
+
### 4. Durable Wave Task Ledger
|
|
183
|
+
|
|
184
|
+
Add a machine-readable wave ledger separate from the coordination log.
|
|
185
|
+
|
|
186
|
+
Why this matters:
|
|
187
|
+
|
|
188
|
+
- Anthropic’s initializer/progress/feature-list pattern and OpenAI’s repository-as-system-of-record point to the same need: durable task state
|
|
189
|
+
- a coordination stream is not the same thing as a canonical ledger of what is left
|
|
190
|
+
|
|
191
|
+
Proposed artifact:
|
|
192
|
+
|
|
193
|
+
- `.tmp/<lane>-wave-launcher/ledger/wave-<n>.json`
|
|
194
|
+
|
|
195
|
+
Track:
|
|
196
|
+
|
|
197
|
+
- tasks and subgoals derived from the wave
|
|
198
|
+
- owner agent
|
|
199
|
+
- current state
|
|
200
|
+
- proof status
|
|
201
|
+
- docs status
|
|
202
|
+
- infra/deploy status
|
|
203
|
+
- dependent tasks
|
|
204
|
+
- baseline verification status
|
|
205
|
+
|
|
206
|
+
Use:
|
|
207
|
+
|
|
208
|
+
- the autonomous runner should use the ledger, not only run-state, to decide whether to continue, relaunch a role, or stop
|
|
209
|
+
|
|
210
|
+
### 5. Communication-Aware Scheduling
|
|
211
|
+
|
|
212
|
+
Use coordination state to drive execution decisions.
|
|
213
|
+
|
|
214
|
+
Why this matters:
|
|
215
|
+
|
|
216
|
+
- the current dashboard renders communication health, but the launcher and autonomous runner do not meaningfully act on it
|
|
217
|
+
- blackboard systems are strongest when blackboard state affects who runs next
|
|
218
|
+
|
|
219
|
+
Additions:
|
|
220
|
+
|
|
221
|
+
- if an agent has unacknowledged targeted requests, prioritize or relaunch that agent
|
|
222
|
+
- if a high-priority blocker remains unresolved, prevent wave completion
|
|
223
|
+
- if integration detects unresolved cross-agent contradictions, force a focused follow-up round
|
|
224
|
+
- if only documentation deltas remain, relaunch only the documentation steward
|
|
225
|
+
- if only deployment or infra proof remains, relaunch only the relevant infra/deploy role
|
|
226
|
+
|
|
227
|
+
### 6. Mixed-Runtime Planning And Runtime Profiles
|
|
228
|
+
|
|
229
|
+
Treat executor choice as authored plan data at wave design time, not only as a launcher default.
|
|
230
|
+
|
|
231
|
+
Why this is useful:
|
|
232
|
+
|
|
233
|
+
- The current harness already supports per-agent executor selection, but the planning surface is too narrow for real mixed-runtime lane design.
|
|
234
|
+
- Different roles benefit from different runtimes: implementation, evaluation, documentation, integration, and infra/deploy do not need identical execution substrates.
|
|
235
|
+
- The OpenAI App Server pattern and OPENDEV's provider-conditional harness design both point toward a stable harness loop with swappable underlying runtimes.
|
|
236
|
+
|
|
237
|
+
Wave file change:
|
|
238
|
+
|
|
239
|
+
- strengthen `### Executor` from optional override into a first-class planning section for roles that need non-default runtime behavior
|
|
240
|
+
- allow runtime profiles plus inline overrides
|
|
241
|
+
|
|
242
|
+
Recommended keys:
|
|
243
|
+
|
|
244
|
+
- `id`
|
|
245
|
+
- `profile`
|
|
246
|
+
- `model`
|
|
247
|
+
- `fallbacks`
|
|
248
|
+
- `tags`
|
|
249
|
+
- `budget.turns`
|
|
250
|
+
- `budget.minutes`
|
|
251
|
+
- `codex.command`
|
|
252
|
+
- `codex.sandbox`
|
|
253
|
+
- `claude.command`
|
|
254
|
+
- `claude.agent`
|
|
255
|
+
- `claude.permission_mode`
|
|
256
|
+
- `claude.permission_prompt_tool`
|
|
257
|
+
- `claude.max_turns`
|
|
258
|
+
- `claude.mcp_config`
|
|
259
|
+
- `claude.settings`
|
|
260
|
+
- `claude.output_format`
|
|
261
|
+
- `claude.allowed_tools`
|
|
262
|
+
- `claude.disallowed_tools`
|
|
263
|
+
- `opencode.command`
|
|
264
|
+
- `opencode.agent`
|
|
265
|
+
- `opencode.attach`
|
|
266
|
+
- `opencode.format`
|
|
267
|
+
- `opencode.steps`
|
|
268
|
+
- `opencode.instructions`
|
|
269
|
+
- `opencode.permission`
|
|
270
|
+
|
|
271
|
+
Lane config additions:
|
|
272
|
+
|
|
273
|
+
- `executors.profiles.<profile-name>`
|
|
274
|
+
- `lanes.<lane>.runtimeMixTargets`
|
|
275
|
+
- `lanes.<lane>.defaultExecutorByRole`
|
|
276
|
+
- `lanes.<lane>.fallbackExecutorOrder`
|
|
277
|
+
|
|
278
|
+
Example runtime mix target:
|
|
279
|
+
|
|
280
|
+
- `codex: 3`
|
|
281
|
+
- `claude: 3`
|
|
282
|
+
- `opencode: 2`
|
|
283
|
+
|
|
284
|
+
Use:
|
|
285
|
+
|
|
286
|
+
- planners assign runtime and runtime profile inside the wave, not only at launch time
|
|
287
|
+
- launcher validation accepts only supported runtime fields and rejects silent drift
|
|
288
|
+
- the orchestrator can reassign an agent only when the fallback policy allows it
|
|
289
|
+
- dashboards, ledgers, and traces report runtime by agent, by role, and by fallback path
|
|
290
|
+
|
|
291
|
+
### 7. Capability-Based Volunteer Roles
|
|
292
|
+
|
|
293
|
+
Extend fixed roles with optional capability-based volunteering.
|
|
294
|
+
|
|
295
|
+
Why this is useful:
|
|
296
|
+
|
|
297
|
+
- the blackboard papers show that rigid controller knowledge does not scale well
|
|
298
|
+
- the current wave format already supports multiple roles; capability tags make routing smarter without removing explicit ownership
|
|
299
|
+
|
|
300
|
+
Wave file addition:
|
|
301
|
+
|
|
302
|
+
- optional `### Capabilities`
|
|
303
|
+
|
|
304
|
+
Examples:
|
|
305
|
+
|
|
306
|
+
- `integration`
|
|
307
|
+
- `docs-shared-plan`
|
|
308
|
+
- `infra-k8s`
|
|
309
|
+
- `deploy-railway`
|
|
310
|
+
- `schema-migration`
|
|
311
|
+
- `frontend-validation`
|
|
312
|
+
|
|
313
|
+
Use:
|
|
314
|
+
|
|
315
|
+
- requests can target a named agent or a capability class
|
|
316
|
+
- the launcher can assign the next step to the least-busy matching agent or a configured preferred role
|
|
317
|
+
|
|
318
|
+
### 8. Orchestrator-First Clarification And Feedback Triage
|
|
319
|
+
|
|
320
|
+
Put the orchestrator, not the human, on the first line for unresolved questions.
|
|
321
|
+
|
|
322
|
+
Why:
|
|
323
|
+
|
|
324
|
+
- the current feedback queue is useful but separate from the main shared workspace
|
|
325
|
+
- autonomous mode currently treats pending feedback as a blocking condition rather than as a triage problem the harness should try to solve
|
|
326
|
+
- many requests can be resolved from repository guidance, ownership rules, prior wave decisions, or current coordination state without asking a human
|
|
327
|
+
|
|
328
|
+
Clarification ladder:
|
|
329
|
+
|
|
330
|
+
1. the agent checks its inbox, ledger, coordination store, owned files, and repo guidance
|
|
331
|
+
2. if still blocked, it emits a typed `clarification-request`
|
|
332
|
+
3. the orchestrator triages the request and either:
|
|
333
|
+
- answers directly with `orchestrator-guidance`
|
|
334
|
+
- routes it to another agent as a targeted request
|
|
335
|
+
- resolves it from existing policy or prior decisions
|
|
336
|
+
- escalates to a human when external intent is truly missing
|
|
337
|
+
4. only unresolved product, policy, safety, or externally-owned decisions become human tickets
|
|
338
|
+
|
|
339
|
+
Proposed record kinds:
|
|
340
|
+
|
|
341
|
+
- `clarification-request`
|
|
342
|
+
- `orchestrator-guidance`
|
|
343
|
+
- `resolved-by-policy`
|
|
344
|
+
- `human-escalation`
|
|
345
|
+
- `human-feedback`
|
|
346
|
+
|
|
347
|
+
Proposed artifacts:
|
|
348
|
+
|
|
349
|
+
- `.tmp/<lane>-wave-launcher/feedback/triage/wave-<n>.jsonl`
|
|
350
|
+
- `.tmp/<lane>-wave-launcher/feedback/triage/wave-<n>/pending-human.md`
|
|
351
|
+
|
|
352
|
+
Escalation policy:
|
|
353
|
+
|
|
354
|
+
- escalate only for missing business intent, conflicting top-level instructions, security or compliance ambiguity, external-system risk, or repeated failed orchestrator resolution attempts
|
|
355
|
+
- autonomous mode should drain orchestrator-resolvable clarification items before refusing to continue
|
|
356
|
+
- answered human feedback should be written back into the coordination store and wave ledger so the same question is not asked twice
|
|
357
|
+
|
|
358
|
+
### 9. Reproducible Harness Traces
|
|
359
|
+
|
|
360
|
+
The base trace-and-replay layer is now shipped. The remaining work is operator-facing replay tooling and larger continuous-history scenario sets.
|
|
361
|
+
|
|
362
|
+
Why this is mandatory:
|
|
363
|
+
|
|
364
|
+
- VeRO and EvoClaw both argue that long-running agent systems need reproducible traces and continuous-history evaluation
|
|
365
|
+
- without this, harness changes are anecdotal
|
|
366
|
+
|
|
367
|
+
Current per-attempt trace bundle:
|
|
368
|
+
|
|
369
|
+
- wave file hash
|
|
370
|
+
- prompt fingerprints
|
|
371
|
+
- compiled inboxes
|
|
372
|
+
- coordination store snapshot
|
|
373
|
+
- structured markers from logs
|
|
374
|
+
- exit contract outcomes
|
|
375
|
+
- integration summary
|
|
376
|
+
- evaluator verdict
|
|
377
|
+
- docs closure state
|
|
378
|
+
- runtime budgets and retries
|
|
379
|
+
- cumulative quality metrics
|
|
380
|
+
- gate snapshot and artifact-presence metadata
|
|
381
|
+
- replay context and cumulative history snapshot for hermetic replay
|
|
382
|
+
|
|
383
|
+
Artifact:
|
|
384
|
+
|
|
385
|
+
- `.tmp/<lane>-wave-launcher/traces/wave-<n>/attempt-<k>/`
|
|
386
|
+
|
|
387
|
+
Current contract:
|
|
388
|
+
|
|
389
|
+
- dry-run remains pre-attempt only and should not create `attempt-<k>` snapshots
|
|
390
|
+
- `traceVersion: 2` bundles are hermetic and replayable in isolation
|
|
391
|
+
- replay is read-only and revalidates recorded artifact hashes
|
|
392
|
+
- launched agents carry copied summary artifacts, and promoted-component waves carry the copied component matrix JSON
|
|
393
|
+
- legacy `traceVersion: 1` bundles remain best-effort with explicit warnings
|
|
394
|
+
- replay validation is internal today, not a public CLI
|
|
395
|
+
|
|
396
|
+
## Upgraded Architecture
|
|
397
|
+
|
|
398
|
+
### Current Model
|
|
399
|
+
|
|
400
|
+
Current flow, simplified:
|
|
401
|
+
|
|
402
|
+
1. Parse wave file.
|
|
403
|
+
2. Launch one session per agent.
|
|
404
|
+
3. Ask all agents to coordinate on a markdown board.
|
|
405
|
+
4. Parse logs and structured markers.
|
|
406
|
+
5. Run documentation closure and evaluator closure.
|
|
407
|
+
|
|
408
|
+
This is workable, but it leaves five gaps:
|
|
409
|
+
|
|
410
|
+
- communication is mostly free-text
|
|
411
|
+
- integration is implicit
|
|
412
|
+
- runtime planning is still too lane-default and not expressive enough for deliberate mixed-runtime teams
|
|
413
|
+
- clarification escalates too early to a human queue
|
|
414
|
+
- scheduling is not strongly driven by shared state
|
|
415
|
+
|
|
416
|
+
### Proposed Model
|
|
417
|
+
|
|
418
|
+
Upgraded flow, still wave- and lane-native:
|
|
419
|
+
|
|
420
|
+
1. Parse the wave file into the manifest, runtime plan, and wave ledger.
|
|
421
|
+
2. Resolve executor profiles, fallback policy, and runtime-mix targets for the lane.
|
|
422
|
+
3. Build or update the canonical coordination store.
|
|
423
|
+
4. Compile the shared summary and per-agent inboxes.
|
|
424
|
+
5. Launch implementation, infra, deploy, docs, research, or evaluation roles based on the ledger, runtime plan, and open requests.
|
|
425
|
+
6. Let the orchestrator triage clarification requests and resolve or route them before escalating to a human.
|
|
426
|
+
7. Continuously ingest structured outputs into the coordination store and ledger.
|
|
427
|
+
8. Run a dedicated integration phase to synthesize all claims and remaining gaps.
|
|
428
|
+
9. Run documentation closure using the integration summary.
|
|
429
|
+
10. Run evaluator closure using the integration summary plus final doc state.
|
|
430
|
+
11. Persist the attempt trace bundle for replay and evaluation.
|
|
431
|
+
|
|
432
|
+
## Recommended Role Model
|
|
433
|
+
|
|
434
|
+
This role model works with the current multi-role architecture and extends it rather than replacing it:
|
|
435
|
+
|
|
436
|
+
- `A0` evaluator
|
|
437
|
+
- `A8` integration steward
|
|
438
|
+
- `A9` documentation steward
|
|
439
|
+
- implementation roles, each owning explicit files and components
|
|
440
|
+
- optional infra role for identity, admission, machine conformance, or deployment substrates
|
|
441
|
+
- optional deploy verifier role for rollout, health, and operational proof
|
|
442
|
+
|
|
443
|
+
Responsibilities:
|
|
444
|
+
|
|
445
|
+
- implementation roles produce code, proofs, and doc deltas
|
|
446
|
+
- infra/deploy roles produce structured environment proof
|
|
447
|
+
- integration steward synthesizes cross-role state
|
|
448
|
+
- documentation steward reconciles shared docs and component matrix
|
|
449
|
+
- evaluator decides whether the wave is coherent enough to pass
|
|
450
|
+
|
|
451
|
+
## Runtime Planning And Lane Mix
|
|
452
|
+
|
|
453
|
+
Wave orchestration should support a deliberate runtime mix inside one lane. A lane can run `3 codex`, `2 claude`, and `2 opencode` agents as long as the wave declares which agent prefers which runtime and what fallbacks are allowed.
|
|
454
|
+
|
|
455
|
+
Recommended starting mapping for this repo:
|
|
456
|
+
|
|
457
|
+
- implementation and test-fix roles: `codex`
|
|
458
|
+
- integration steward, evaluator, and documentation steward: `claude`
|
|
459
|
+
- exploratory helper, research, and CLI-heavy ops roles: `opencode`
|
|
460
|
+
- infra and deploy roles: choose `codex` or `opencode` based on the command workflow and tooling needs, not by habit
|
|
461
|
+
|
|
462
|
+
Planning rules:
|
|
463
|
+
|
|
464
|
+
- every agent in a deliberate mixed-runtime wave should declare `### Executor`
|
|
465
|
+
- runtime reassignment during execution must preserve ownership and leave an audit record
|
|
466
|
+
- runtime profiles should capture the common presets such as `implement-fast`, `deep-review`, `docs-pass`, and `ops-triage`
|
|
467
|
+
- integration summaries should report the final runtime used by each agent and whether any fallback fired
|
|
468
|
+
|
|
469
|
+
This keeps runtime choice visible in the authored plan instead of hiding it inside CLI defaults.
|
|
470
|
+
|
|
471
|
+
## Lanes And Cross-Lane Coordination
|
|
472
|
+
|
|
473
|
+
Lanes should remain isolated in execution state but gain typed cross-lane dependency tickets.
|
|
474
|
+
|
|
475
|
+
Current strength:
|
|
476
|
+
|
|
477
|
+
- lane-scoped paths already exist
|
|
478
|
+
- an orchestrator board already exists
|
|
479
|
+
|
|
480
|
+
Upgrade:
|
|
481
|
+
|
|
482
|
+
- add `.tmp/wave-orchestrator/dependencies/<lane>.jsonl`
|
|
483
|
+
- each cross-lane dependency is a typed ticket with owner lane, requester lane, closure condition, and related waves
|
|
484
|
+
- lane autonomous mode should refuse to finalize if it has unresolved required inbound dependencies
|
|
485
|
+
|
|
486
|
+
This keeps lane isolation while making cross-lane work explicit and schedulable.
|
|
487
|
+
|
|
488
|
+
## Documentation Upgrades
|
|
489
|
+
|
|
490
|
+
The current documentation steward role is good, but it is overloaded.
|
|
491
|
+
|
|
492
|
+
Improve it by adding:
|
|
493
|
+
|
|
494
|
+
- doc delta extraction from implementation markers into a machine-readable queue
|
|
495
|
+
- explicit shared-plan reconciliation checklist
|
|
496
|
+
- component-matrix reconciliation checklist
|
|
497
|
+
- release-notes or changelog queue when a wave changes public package behavior
|
|
498
|
+
- a per-wave runtime assignment summary so doc and eval roles can see which runtime owned which artifacts
|
|
499
|
+
|
|
500
|
+
Documentation should consume integration outputs, not rediscover them from raw logs.
|
|
501
|
+
|
|
502
|
+
## Evaluation Upgrades
|
|
503
|
+
|
|
504
|
+
The harness should move from “wave passed or failed” to “wave quality is replayable and comparable.”
|
|
505
|
+
|
|
506
|
+
Add:
|
|
507
|
+
|
|
508
|
+
- per-wave regression datasets
|
|
509
|
+
- replayable trace bundles
|
|
510
|
+
- scoring for communication health, integration quality, and proof quality
|
|
511
|
+
- continuous-history benchmark scenarios, not only single-wave success
|
|
512
|
+
- runtime-mix reporting so success can be segmented by executor and by role
|
|
513
|
+
- clarification reporting so orchestrator-resolved questions and human escalations are both measurable
|
|
514
|
+
|
|
515
|
+
Suggested metrics:
|
|
516
|
+
|
|
517
|
+
- unresolved request count at closure
|
|
518
|
+
- integration contradiction count
|
|
519
|
+
- documentation drift count
|
|
520
|
+
- proof completeness ratio
|
|
521
|
+
- relaunch count by role
|
|
522
|
+
- relaunch count by executor
|
|
523
|
+
- runtime fallback rate
|
|
524
|
+
- mean time to first acknowledgement
|
|
525
|
+
- mean time to blocker resolution
|
|
526
|
+
- orchestrator clarification resolution rate
|
|
527
|
+
- human escalation rate
|
|
528
|
+
- evaluator reversal rate between early and final verdicts
|
|
529
|
+
|
|
530
|
+
## Infra And DevOps Upgrades
|
|
531
|
+
|
|
532
|
+
The harness already has structured deploy and infra markers. The next step is to make them durable and wave-aware.
|
|
533
|
+
|
|
534
|
+
Add:
|
|
535
|
+
|
|
536
|
+
- infra proof records into the coordination store and ledger
|
|
537
|
+
- deploy readiness and deploy verification as separate states
|
|
538
|
+
- environment baseline checks at wave start
|
|
539
|
+
- executor binary, credential, and profile availability checks for every runtime referenced by the wave
|
|
540
|
+
- required rollback or recovery guidance for waves that touch live systems
|
|
541
|
+
|
|
542
|
+
For infra- or deploy-heavy lanes, the integration steward should treat infra proof as first-class, not as a side detail in implementation logs.
|
|
543
|
+
|
|
544
|
+
## Prioritized Delivery Order
|
|
545
|
+
|
|
546
|
+
### Phase 1: Coordination And Planning Foundation
|
|
547
|
+
|
|
548
|
+
- canonical coordination store
|
|
549
|
+
- markdown board as rendered view
|
|
550
|
+
- per-agent inbox compiler
|
|
551
|
+
- full per-agent `### Executor` schema with runtime profiles
|
|
552
|
+
- typed clarification and human-feedback events
|
|
553
|
+
|
|
554
|
+
Why first:
|
|
555
|
+
|
|
556
|
+
- every other improvement depends on better shared state, a durable runtime plan, and a typed clarification model
|
|
557
|
+
|
|
558
|
+
### Phase 2: Integration And Scheduling
|
|
559
|
+
|
|
560
|
+
- integration steward role
|
|
561
|
+
- integration summary artifacts
|
|
562
|
+
- communication-aware relaunch and closure rules
|
|
563
|
+
- orchestrator-first clarification resolver
|
|
564
|
+
- wave ledger
|
|
565
|
+
|
|
566
|
+
Why second:
|
|
567
|
+
|
|
568
|
+
- this closes the communication-reasoning gap and the too-early human escalation loop without changing the authored wave format
|
|
569
|
+
|
|
570
|
+
### Phase 3: Evaluation And Replay
|
|
571
|
+
|
|
572
|
+
- shipped:
|
|
573
|
+
- trace bundles
|
|
574
|
+
- cumulative wave quality metrics
|
|
575
|
+
- runtime-mix and clarification metrics
|
|
576
|
+
- internal replay validation against stored attempt bundles
|
|
577
|
+
- launcher-generated replay acceptance coverage for hermetic pass, clarification, blocking, and retry/fallback traces
|
|
578
|
+
- still open:
|
|
579
|
+
- larger continuous-history replay scenario sets across more than one wave
|
|
580
|
+
- a public replay CLI if the internal helper proves stable
|
|
581
|
+
|
|
582
|
+
Why third:
|
|
583
|
+
|
|
584
|
+
- once state and flow are structured, evaluation becomes meaningful
|
|
585
|
+
|
|
586
|
+
### Phase 4: Capability Routing And Cross-Lane Dependencies
|
|
587
|
+
|
|
588
|
+
- shipped:
|
|
589
|
+
- capability tags
|
|
590
|
+
- deterministic helper-assignment routing from open requests
|
|
591
|
+
- helper-assignment snapshots under `.tmp/<lane>-wave-launcher/assignments/`
|
|
592
|
+
- typed `wave dep post|show|resolve|render` operator workflows
|
|
593
|
+
- per-wave inbound/outbound dependency snapshots under `.tmp/<lane>-wave-launcher/dependencies/`
|
|
594
|
+
- dependency-aware gating, inboxes, dashboards, and trace/replay artifacts
|
|
595
|
+
- still open:
|
|
596
|
+
- larger multi-lane benchmark scenarios that stress dependency resolution across more than one wave
|
|
597
|
+
- richer dependency-specific operator dashboards if the current JSON and markdown projections prove insufficient
|
|
598
|
+
|
|
599
|
+
Why fourth:
|
|
600
|
+
|
|
601
|
+
- this only became high leverage once the coordination, integration, and replay layers were already trustworthy
|
|
602
|
+
|
|
603
|
+
## Immediate Recommendation
|
|
604
|
+
|
|
605
|
+
The highest-value near-term upgrade is:
|
|
606
|
+
|
|
607
|
+
1. canonical coordination store
|
|
608
|
+
2. compiled agent inboxes
|
|
609
|
+
3. explicit integration steward and integration summary
|
|
610
|
+
4. full planning-time runtime profiles in `### Executor`
|
|
611
|
+
5. orchestrator-first clarification triage
|
|
612
|
+
|
|
613
|
+
That combination gives the harness the biggest improvement in:
|
|
614
|
+
|
|
615
|
+
- long-running robustness
|
|
616
|
+
- intra-agent messaging quality
|
|
617
|
+
- mixed-runtime planning quality
|
|
618
|
+
- reduced unnecessary human interruption
|
|
619
|
+
- closure reliability
|
|
620
|
+
- lane and multi-role scalability
|
|
621
|
+
|
|
622
|
+
without forcing a rewrite of wave files, lane structure, or existing proof markers.
|
|
623
|
+
|
|
624
|
+
## Source References
|
|
625
|
+
|
|
626
|
+
The canonical source list now lives at the bottom of [README.md](../../README.md). Keep the committed source manifest in [docs/research/agent-context-sources.md](./research/agent-context-sources.md) and keep hydrated caches local-only.
|
package/package.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@chllming/wave-orchestration",
|
|
3
|
+
"version": "0.5.1",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"description": "Generic wave-based multi-agent orchestration for repository work.",
|
|
6
|
+
"packageManager": "pnpm@10.23.0",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/chllming/wave-orchestration.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://github.com/chllming/wave-orchestration#readme",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/chllming/wave-orchestration/issues"
|
|
14
|
+
},
|
|
15
|
+
"publishConfig": {
|
|
16
|
+
"access": "public"
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"scripts",
|
|
20
|
+
"docs",
|
|
21
|
+
"releases",
|
|
22
|
+
"wave.config.json",
|
|
23
|
+
"README.md",
|
|
24
|
+
"CHANGELOG.md"
|
|
25
|
+
],
|
|
26
|
+
"bin": {
|
|
27
|
+
"wave": "scripts/wave.mjs",
|
|
28
|
+
"wave-launch": "scripts/wave-launcher.mjs",
|
|
29
|
+
"wave-autonomous": "scripts/wave-autonomous.mjs",
|
|
30
|
+
"wave-feedback": "scripts/wave-human-feedback.mjs",
|
|
31
|
+
"wave-dashboard": "scripts/wave-dashboard.mjs",
|
|
32
|
+
"wave-local-executor": "scripts/wave-local-executor.mjs"
|
|
33
|
+
},
|
|
34
|
+
"scripts": {
|
|
35
|
+
"context7:api-check": "bash scripts/context7-export-env.sh run bash scripts/context7-api-check.sh",
|
|
36
|
+
"research:import-agent-context": "node scripts/research/import-agent-context-archive.mjs scripts/research/manifests/harness-and-blackboard-2026-03-21.mjs",
|
|
37
|
+
"research:index-agent-context": "node scripts/research/generate-agent-context-indexes.mjs",
|
|
38
|
+
"research:refresh-agent-context": "pnpm research:import-agent-context && pnpm research:index-agent-context",
|
|
39
|
+
"test": "vitest run --config vitest.config.ts",
|
|
40
|
+
"wave": "node scripts/wave.mjs",
|
|
41
|
+
"wave:autonomous": "node scripts/wave-autonomous.mjs",
|
|
42
|
+
"wave:dashboard": "node scripts/wave-dashboard.mjs",
|
|
43
|
+
"wave:feedback": "node scripts/wave-human-feedback.mjs",
|
|
44
|
+
"wave:launch": "node scripts/wave-launcher.mjs",
|
|
45
|
+
"wave:local": "node scripts/wave-local-executor.mjs"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"@mozilla/readability": "^0.6.0",
|
|
49
|
+
"jsdom": "^29.0.1",
|
|
50
|
+
"pdfjs-dist": "^5.5.207",
|
|
51
|
+
"vitest": "3.2.4"
|
|
52
|
+
}
|
|
53
|
+
}
|