@chllming/wave-orchestration 0.8.9 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +57 -0
  2. package/README.md +135 -18
  3. package/docs/README.md +9 -3
  4. package/docs/architecture/README.md +1498 -0
  5. package/docs/concepts/context7-vs-skills.md +1 -1
  6. package/docs/concepts/operating-modes.md +3 -3
  7. package/docs/concepts/what-is-a-wave.md +1 -1
  8. package/docs/guides/author-and-run-waves.md +27 -4
  9. package/docs/guides/monorepo-projects.md +226 -0
  10. package/docs/guides/planner.md +10 -3
  11. package/docs/guides/{recommendations-0.8.9.md → recommendations-0.9.1.md} +8 -7
  12. package/docs/guides/sandboxed-environments.md +158 -0
  13. package/docs/guides/terminal-surfaces.md +14 -12
  14. package/docs/plans/current-state.md +11 -7
  15. package/docs/plans/end-state-architecture.md +3 -1
  16. package/docs/plans/examples/wave-example-design-handoff.md +3 -1
  17. package/docs/plans/examples/wave-example-live-proof.md +6 -1
  18. package/docs/plans/examples/wave-example-rollout-fidelity.md +2 -0
  19. package/docs/plans/migration.md +48 -18
  20. package/docs/plans/sandbox-end-state-architecture.md +153 -0
  21. package/docs/plans/wave-orchestrator.md +4 -4
  22. package/docs/reference/cli-reference.md +125 -57
  23. package/docs/reference/coordination-and-closure.md +1 -1
  24. package/docs/reference/github-packages-setup.md +1 -1
  25. package/docs/reference/migration-0.2-to-0.5.md +9 -7
  26. package/docs/reference/npmjs-token-publishing.md +53 -0
  27. package/docs/reference/npmjs-trusted-publishing.md +4 -50
  28. package/docs/reference/package-publishing-flow.md +272 -0
  29. package/docs/reference/runtime-config/README.md +140 -12
  30. package/docs/reference/sample-waves.md +100 -5
  31. package/docs/reference/skills.md +1 -1
  32. package/docs/reference/wave-control.md +23 -5
  33. package/docs/roadmap.md +43 -201
  34. package/package.json +1 -1
  35. package/releases/manifest.json +38 -0
  36. package/scripts/wave-orchestrator/adhoc.mjs +49 -17
  37. package/scripts/wave-orchestrator/agent-process-runner.mjs +344 -0
  38. package/scripts/wave-orchestrator/agent-state.mjs +0 -1
  39. package/scripts/wave-orchestrator/artifact-schemas.mjs +7 -0
  40. package/scripts/wave-orchestrator/autonomous.mjs +96 -29
  41. package/scripts/wave-orchestrator/benchmark-external.mjs +23 -7
  42. package/scripts/wave-orchestrator/benchmark.mjs +33 -10
  43. package/scripts/wave-orchestrator/closure-engine.mjs +138 -17
  44. package/scripts/wave-orchestrator/config.mjs +239 -24
  45. package/scripts/wave-orchestrator/control-cli.mjs +71 -28
  46. package/scripts/wave-orchestrator/coord-cli.mjs +22 -14
  47. package/scripts/wave-orchestrator/coordination-store.mjs +8 -0
  48. package/scripts/wave-orchestrator/dashboard-renderer.mjs +123 -44
  49. package/scripts/wave-orchestrator/dep-cli.mjs +47 -21
  50. package/scripts/wave-orchestrator/derived-state-engine.mjs +6 -3
  51. package/scripts/wave-orchestrator/feedback.mjs +28 -11
  52. package/scripts/wave-orchestrator/gate-engine.mjs +106 -38
  53. package/scripts/wave-orchestrator/human-input-resolution.mjs +5 -1
  54. package/scripts/wave-orchestrator/install.mjs +13 -0
  55. package/scripts/wave-orchestrator/launcher-progress.mjs +91 -0
  56. package/scripts/wave-orchestrator/launcher-runtime.mjs +179 -68
  57. package/scripts/wave-orchestrator/launcher.mjs +222 -53
  58. package/scripts/wave-orchestrator/ledger.mjs +7 -2
  59. package/scripts/wave-orchestrator/planner.mjs +48 -27
  60. package/scripts/wave-orchestrator/project-profile.mjs +31 -8
  61. package/scripts/wave-orchestrator/projection-writer.mjs +13 -1
  62. package/scripts/wave-orchestrator/proof-cli.mjs +18 -12
  63. package/scripts/wave-orchestrator/reducer-snapshot.mjs +6 -0
  64. package/scripts/wave-orchestrator/retry-cli.mjs +19 -13
  65. package/scripts/wave-orchestrator/retry-control.mjs +3 -3
  66. package/scripts/wave-orchestrator/retry-engine.mjs +93 -6
  67. package/scripts/wave-orchestrator/role-helpers.mjs +30 -0
  68. package/scripts/wave-orchestrator/session-supervisor.mjs +94 -85
  69. package/scripts/wave-orchestrator/shared.mjs +77 -14
  70. package/scripts/wave-orchestrator/supervisor-cli.mjs +1306 -0
  71. package/scripts/wave-orchestrator/terminals.mjs +12 -32
  72. package/scripts/wave-orchestrator/tmux-adapter.mjs +300 -0
  73. package/scripts/wave-orchestrator/wave-control-client.mjs +84 -16
  74. package/scripts/wave-orchestrator/wave-files.mjs +43 -6
  75. package/scripts/wave.mjs +13 -0
@@ -7,13 +7,14 @@ import {
7
7
  DEFAULT_WAIT_PROGRESS_INTERVAL_MS,
8
8
  REPO_ROOT,
9
9
  ensureDirectory,
10
+ readJsonOrNull,
10
11
  shellQuote,
12
+ sleep,
11
13
  writeJsonAtomic,
12
14
  } from "./shared.mjs";
13
15
  import { readStatusCodeIfPresent } from "./dashboard-state.mjs";
14
16
  import { buildExecutorLaunchSpec } from "./executors.mjs";
15
17
  import { hashAgentPromptFingerprint, prefetchContext7ForSelection } from "./context7.mjs";
16
- import { killTmuxSessionIfExists } from "./terminals.mjs";
17
18
  import { isDesignAgent, resolveDesignReportPath, resolveWaveRoleBindings } from "./role-helpers.mjs";
18
19
  import {
19
20
  resolveAgentSkills,
@@ -25,6 +26,10 @@ import {
25
26
  agentSignalPath,
26
27
  agentUsesSignalHygiene,
27
28
  } from "./signals.mjs";
29
+ import {
30
+ spawnAgentProcessRunner,
31
+ terminateAgentProcessRuntime,
32
+ } from "./agent-process-runner.mjs";
28
33
 
29
34
  export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths) {
30
35
  runInfo.agent.skillsResolved = resolveAgentSkills(
@@ -35,32 +40,67 @@ export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths)
35
40
  return runInfo.agent.skillsResolved;
36
41
  }
37
42
 
38
- export function collectUnexpectedSessionFailures(
43
+ export function applyLaunchResultToRun(
44
+ runInfo,
45
+ launchResult,
46
+ {
47
+ attempt = null,
48
+ fallbackExecutorId = null,
49
+ fallbackSkills = null,
50
+ } = {},
51
+ ) {
52
+ if (!runInfo || !launchResult) {
53
+ return runInfo;
54
+ }
55
+ if (attempt !== null && attempt !== undefined) {
56
+ runInfo.lastLaunchAttempt = attempt;
57
+ }
58
+ runInfo.lastPromptHash = launchResult.promptHash || null;
59
+ runInfo.lastContext7 = launchResult.context7 || null;
60
+ runInfo.lastExecutorId = launchResult.executorId || fallbackExecutorId || null;
61
+ runInfo.lastSkillProjection = launchResult.skills || fallbackSkills || null;
62
+ runInfo.runtimePath = launchResult.runtimePath || runInfo.runtimePath || null;
63
+ runInfo.sessionBackend = launchResult.sessionBackend || runInfo.sessionBackend || "process";
64
+ runInfo.attachMode = launchResult.attachMode || runInfo.attachMode || "log-tail";
65
+ return runInfo;
66
+ }
67
+
68
+ export function collectUnexpectedSessionWarnings(
39
69
  lanePaths,
40
70
  agentRuns,
41
71
  pendingAgentIds,
42
72
  { listLaneTmuxSessionNamesFn },
43
73
  ) {
44
- const activeSessionNames = new Set(listLaneTmuxSessionNamesFn(lanePaths));
45
- const failures = [];
74
+ const warnings = [];
46
75
  for (const run of agentRuns) {
47
76
  if (!pendingAgentIds.has(run.agent.agentId) || fs.existsSync(run.statusPath)) {
48
77
  continue;
49
78
  }
50
- if (activeSessionNames.has(run.sessionName)) {
79
+ if (!run.runtimePath || !fs.existsSync(run.runtimePath)) {
51
80
  continue;
52
81
  }
53
- failures.push({
82
+ const runtimeRecord = JSON.parse(fs.readFileSync(run.runtimePath, "utf8"));
83
+ if (!runtimeRecord || typeof runtimeRecord !== "object") {
84
+ continue;
85
+ }
86
+ if (runtimeRecord.terminalDisposition !== "projection-missing") {
87
+ continue;
88
+ }
89
+ warnings.push({
54
90
  agentId: run.agent.agentId,
55
- statusCode: "session-missing",
91
+ statusCode: "terminal-session-missing",
56
92
  logPath: path.relative(REPO_ROOT, run.logPath),
57
- detail: `tmux session ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
93
+ detail: `terminal projection for ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
58
94
  });
59
95
  }
60
- return failures;
96
+ return warnings;
61
97
  }
62
98
 
63
- export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
99
+ export async function launchAgentSession(
100
+ lanePaths,
101
+ params,
102
+ { spawnRunnerFn = spawnAgentProcessRunner } = {},
103
+ ) {
64
104
  const {
65
105
  wave,
66
106
  waveDefinition = null,
@@ -84,11 +124,22 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
84
124
  context7Enabled,
85
125
  designExecutionMode = null,
86
126
  dryRun = false,
127
+ runtimePath = null,
87
128
  } = params;
88
129
  ensureDirectory(path.dirname(promptPath));
89
130
  ensureDirectory(path.dirname(logPath));
90
131
  ensureDirectory(path.dirname(statusPath));
132
+ if (runtimePath && fs.existsSync(runtimePath)) {
133
+ const priorRuntime = readJsonOrNull(runtimePath);
134
+ if (priorRuntime && typeof priorRuntime === "object") {
135
+ await terminateAgentProcessRuntime(priorRuntime);
136
+ }
137
+ }
91
138
  fs.rmSync(statusPath, { force: true });
139
+ if (runtimePath) {
140
+ ensureDirectory(path.dirname(runtimePath));
141
+ fs.rmSync(runtimePath, { force: true });
142
+ }
92
143
 
93
144
  const context7 = await prefetchContext7ForSelection(agent.context7Resolved, {
94
145
  cacheDir: lanePaths.context7CacheDir,
@@ -170,7 +221,6 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
170
221
  skills: summarizeResolvedSkills(agent.skillsResolved),
171
222
  };
172
223
  }
173
- killTmuxSessionIfExists(lanePaths.tmuxSocketName, sessionName);
174
224
 
175
225
  const executionLines = [];
176
226
  if (launchSpec.env) {
@@ -195,6 +245,9 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
195
245
  executionLines.push("rate_attempt=1");
196
246
  executionLines.push("status=1");
197
247
  executionLines.push('while [ "$rate_attempt" -le "$max_rate_attempts" ]; do');
248
+ executionLines.push(
249
+ ` attempt_log_offset=$(wc -c < ${shellQuote(logPath)} 2>/dev/null || echo 0)`,
250
+ );
198
251
  for (const line of launchSpec.invocationLines) {
199
252
  executionLines.push(` ${line}`);
200
253
  }
@@ -206,7 +259,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
206
259
  executionLines.push(" break");
207
260
  executionLines.push(" fi");
208
261
  executionLines.push(
209
- ` if tail -n 120 ${shellQuote(logPath)} | grep -Eqi '429 Too Many Requests|exceeded retry limit|last status: 429|rate limit'; then`,
262
+ ` if tail -c +$((attempt_log_offset + 1)) ${shellQuote(logPath)} | grep -Eqi '429 Too Many Requests|exceeded retry limit|last status: 429|rate limit'; then`,
210
263
  );
211
264
  executionLines.push(" sleep_seconds=$((rate_delay_base * (2 ** (rate_attempt - 1))))");
212
265
  executionLines.push(
@@ -231,23 +284,66 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
231
284
  `export WAVE_ORCHESTRATOR_ID=${shellQuote(orchestratorId || "")}`,
232
285
  `export WAVE_EXECUTOR_MODE=${shellQuote(resolvedExecutorMode)}`,
233
286
  ...executionLines,
234
- `node -e ${shellQuote(
235
- "const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
236
- )} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
237
- `echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
238
- "exit \"$status\"",
239
287
  ].join("\n");
240
-
241
- runTmuxFn(
242
- lanePaths,
243
- ["new-session", "-d", "-s", sessionName, `bash -lc ${shellQuote(command)}`],
244
- `launch session ${sessionName}`,
245
- );
288
+ const payloadPath = path.join(overlayDir, "runner-payload.json");
289
+ const initialRuntimeRecord = runtimePath
290
+ ? {
291
+ runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
292
+ waveNumber: wave,
293
+ attempt: Number(attempt || 1),
294
+ agentId: agent.agentId,
295
+ sessionName,
296
+ tmuxSessionName: null,
297
+ sessionBackend: "process",
298
+ attachMode: "log-tail",
299
+ runnerPid: null,
300
+ executorPid: null,
301
+ pid: null,
302
+ pgid: null,
303
+ startedAt: new Date().toISOString(),
304
+ lastHeartbeatAt: new Date().toISOString(),
305
+ statusPath,
306
+ logPath,
307
+ exitCode: null,
308
+ exitReason: null,
309
+ terminalDisposition: "launching",
310
+ }
311
+ : null;
312
+ if (runtimePath && initialRuntimeRecord) {
313
+ writeJsonAtomic(runtimePath, initialRuntimeRecord);
314
+ }
315
+ const runner = spawnRunnerFn({
316
+ payloadPath,
317
+ runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
318
+ lane: lanePaths.lane,
319
+ waveNumber: wave,
320
+ attempt: Number(attempt || 1),
321
+ agentId: agent.agentId,
322
+ sessionName,
323
+ runtimePath,
324
+ statusPath,
325
+ logPath,
326
+ promptHash,
327
+ orchestratorId: orchestratorId || "",
328
+ executorId: resolvedExecutorMode,
329
+ env: launchSpec.env || {},
330
+ command,
331
+ });
332
+ if (runtimePath && initialRuntimeRecord) {
333
+ writeJsonAtomic(runtimePath, {
334
+ ...initialRuntimeRecord,
335
+ runnerPid: runner?.runnerPid || null,
336
+ lastHeartbeatAt: new Date().toISOString(),
337
+ });
338
+ }
246
339
  return {
247
340
  promptHash,
248
341
  context7,
249
342
  executorId: resolvedExecutorMode,
250
343
  skills: summarizeResolvedSkills(agent.skillsResolved),
344
+ runtimePath,
345
+ sessionBackend: "process",
346
+ attachMode: "log-tail",
251
347
  };
252
348
  }
253
349
 
@@ -256,7 +352,7 @@ export async function waitForWaveCompletion(
256
352
  agentRuns,
257
353
  timeoutMinutes,
258
354
  onProgress = null,
259
- { collectUnexpectedSessionFailuresFn },
355
+ { collectUnexpectedSessionWarningsFn = () => [] },
260
356
  ) {
261
357
  const defaultTimeoutMs = timeoutMinutes * 60 * 1000;
262
358
  const startedAt = Date.now();
@@ -272,8 +368,7 @@ export async function waitForWaveCompletion(
272
368
  );
273
369
  const pending = new Set(agentRuns.map((run) => run.agent.agentId));
274
370
  const timedOutAgentIds = new Set();
275
- let sessionFailures = [];
276
-
371
+ let sessionWarnings = [];
277
372
  const refreshPending = () => {
278
373
  for (const run of agentRuns) {
279
374
  if (pending.has(run.agent.agentId) && fs.existsSync(run.statusPath)) {
@@ -282,51 +377,58 @@ export async function waitForWaveCompletion(
282
377
  }
283
378
  };
284
379
 
285
- await new Promise((resolve) => {
286
- const interval = setInterval(() => {
287
- refreshPending();
288
- onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
289
- if (pending.size === 0) {
290
- clearInterval(interval);
291
- resolve();
292
- return;
293
- }
294
- sessionFailures = collectUnexpectedSessionFailuresFn(lanePaths, agentRuns, pending);
295
- if (sessionFailures.length > 0) {
296
- onProgress?.({
297
- pendingAgentIds: new Set(pending),
298
- timedOut: false,
299
- failures: sessionFailures,
300
- });
301
- clearInterval(interval);
302
- resolve();
303
- return;
380
+ while (true) {
381
+ refreshPending();
382
+ onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
383
+ if (pending.size === 0) {
384
+ break;
385
+ }
386
+ sessionWarnings = collectUnexpectedSessionWarningsFn(lanePaths, agentRuns, pending);
387
+ if (sessionWarnings.length > 0) {
388
+ onProgress?.({
389
+ pendingAgentIds: new Set(pending),
390
+ timedOut: false,
391
+ warnings: sessionWarnings,
392
+ });
393
+ }
394
+ const now = Date.now();
395
+ for (const run of agentRuns) {
396
+ if (!pending.has(run.agent.agentId)) {
397
+ continue;
304
398
  }
305
- const now = Date.now();
306
- for (const run of agentRuns) {
307
- if (!pending.has(run.agent.agentId)) {
308
- continue;
309
- }
310
- const deadline = timeoutAtByAgentId.get(run.agent.agentId) || startedAt + defaultTimeoutMs;
311
- if (now <= deadline) {
312
- continue;
399
+ if (run.runtimePath && fs.existsSync(run.runtimePath)) {
400
+ try {
401
+ const runtimeRecord = readJsonOrNull(run.runtimePath);
402
+ if (
403
+ runtimeRecord &&
404
+ typeof runtimeRecord === "object" &&
405
+ ["completed", "failed", "terminated"].includes(
406
+ String(runtimeRecord.terminalDisposition || ""),
407
+ )
408
+ ) {
409
+ pending.delete(run.agent.agentId);
410
+ continue;
411
+ }
412
+ } catch {
413
+ // best-effort runtime observation only
313
414
  }
314
- timedOutAgentIds.add(run.agent.agentId);
315
- pending.delete(run.agent.agentId);
316
- killTmuxSessionIfExists(lanePaths.tmuxSocketName, run.sessionName);
317
415
  }
318
- if (pending.size === 0) {
319
- clearInterval(interval);
320
- resolve();
416
+ const deadline = timeoutAtByAgentId.get(run.agent.agentId) || startedAt + defaultTimeoutMs;
417
+ if (now <= deadline) {
418
+ continue;
321
419
  }
322
- }, DEFAULT_WAIT_PROGRESS_INTERVAL_MS);
323
- refreshPending();
324
- onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
325
- });
326
-
327
- if (sessionFailures.length > 0) {
328
- onProgress?.({ pendingAgentIds: new Set(), timedOut: false, failures: sessionFailures });
329
- return { failures: sessionFailures, timedOut: false };
420
+ timedOutAgentIds.add(run.agent.agentId);
421
+ pending.delete(run.agent.agentId);
422
+ const runtimeRecord =
423
+ run.runtimePath && fs.existsSync(run.runtimePath) ? readJsonOrNull(run.runtimePath) : null;
424
+ if (runtimeRecord) {
425
+ await terminateAgentProcessRuntime(runtimeRecord);
426
+ }
427
+ }
428
+ if (pending.size === 0) {
429
+ break;
430
+ }
431
+ await sleep(DEFAULT_WAIT_PROGRESS_INTERVAL_MS);
330
432
  }
331
433
 
332
434
  const failures = [];
@@ -336,10 +438,19 @@ export async function waitForWaveCompletion(
336
438
  continue;
337
439
  }
338
440
  if (code === null || timedOutAgentIds.has(run.agent.agentId)) {
441
+ let runtimeRecord = null;
442
+ if (run.runtimePath && fs.existsSync(run.runtimePath)) {
443
+ runtimeRecord = readJsonOrNull(run.runtimePath);
444
+ }
339
445
  failures.push({
340
446
  agentId: run.agent.agentId,
341
- statusCode: timedOutAgentIds.has(run.agent.agentId) ? "timeout-no-status" : "missing-status",
447
+ statusCode: timedOutAgentIds.has(run.agent.agentId)
448
+ ? "timeout-no-status"
449
+ : runtimeRecord?.terminalDisposition === "failed"
450
+ ? "runtime-failed-before-status"
451
+ : "missing-status",
342
452
  logPath: path.relative(REPO_ROOT, run.logPath),
453
+ detail: runtimeRecord?.exitReason || null,
343
454
  });
344
455
  continue;
345
456
  }