@chllming/wave-orchestration 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +57 -0
  2. package/LICENSE.md +21 -0
  3. package/README.md +133 -20
  4. package/docs/README.md +12 -4
  5. package/docs/agents/wave-security-role.md +1 -0
  6. package/docs/architecture/README.md +1498 -0
  7. package/docs/concepts/operating-modes.md +2 -2
  8. package/docs/guides/author-and-run-waves.md +14 -4
  9. package/docs/guides/planner.md +2 -2
  10. package/docs/guides/{recommendations-0.9.0.md → recommendations-0.9.2.md} +8 -7
  11. package/docs/guides/sandboxed-environments.md +158 -0
  12. package/docs/guides/terminal-surfaces.md +14 -12
  13. package/docs/plans/current-state.md +11 -3
  14. package/docs/plans/end-state-architecture.md +3 -1
  15. package/docs/plans/examples/wave-example-design-handoff.md +1 -1
  16. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  17. package/docs/plans/migration.md +70 -19
  18. package/docs/plans/sandbox-end-state-architecture.md +153 -0
  19. package/docs/reference/cli-reference.md +71 -7
  20. package/docs/reference/coordination-and-closure.md +18 -1
  21. package/docs/reference/corridor.md +225 -0
  22. package/docs/reference/github-packages-setup.md +1 -1
  23. package/docs/reference/migration-0.2-to-0.5.md +9 -7
  24. package/docs/reference/npmjs-token-publishing.md +53 -0
  25. package/docs/reference/npmjs-trusted-publishing.md +4 -50
  26. package/docs/reference/package-publishing-flow.md +272 -0
  27. package/docs/reference/runtime-config/README.md +61 -3
  28. package/docs/reference/sample-waves.md +5 -5
  29. package/docs/reference/skills.md +1 -1
  30. package/docs/reference/wave-control.md +358 -27
  31. package/docs/roadmap.md +39 -204
  32. package/package.json +1 -1
  33. package/releases/manifest.json +38 -0
  34. package/scripts/wave-cli-bootstrap.mjs +52 -1
  35. package/scripts/wave-orchestrator/agent-process-runner.mjs +344 -0
  36. package/scripts/wave-orchestrator/agent-state.mjs +0 -1
  37. package/scripts/wave-orchestrator/artifact-schemas.mjs +7 -0
  38. package/scripts/wave-orchestrator/autonomous.mjs +47 -14
  39. package/scripts/wave-orchestrator/closure-engine.mjs +138 -17
  40. package/scripts/wave-orchestrator/config.mjs +199 -3
  41. package/scripts/wave-orchestrator/context7.mjs +231 -29
  42. package/scripts/wave-orchestrator/control-cli.mjs +42 -5
  43. package/scripts/wave-orchestrator/coordination.mjs +14 -0
  44. package/scripts/wave-orchestrator/corridor.mjs +363 -0
  45. package/scripts/wave-orchestrator/dashboard-renderer.mjs +115 -43
  46. package/scripts/wave-orchestrator/derived-state-engine.mjs +44 -4
  47. package/scripts/wave-orchestrator/gate-engine.mjs +126 -38
  48. package/scripts/wave-orchestrator/install.mjs +46 -0
  49. package/scripts/wave-orchestrator/launcher-progress.mjs +91 -0
  50. package/scripts/wave-orchestrator/launcher-runtime.mjs +290 -75
  51. package/scripts/wave-orchestrator/launcher.mjs +201 -53
  52. package/scripts/wave-orchestrator/ledger.mjs +7 -2
  53. package/scripts/wave-orchestrator/planner.mjs +1 -0
  54. package/scripts/wave-orchestrator/projection-writer.mjs +36 -1
  55. package/scripts/wave-orchestrator/provider-runtime.mjs +104 -0
  56. package/scripts/wave-orchestrator/reducer-snapshot.mjs +6 -0
  57. package/scripts/wave-orchestrator/retry-control.mjs +3 -3
  58. package/scripts/wave-orchestrator/retry-engine.mjs +93 -6
  59. package/scripts/wave-orchestrator/role-helpers.mjs +30 -0
  60. package/scripts/wave-orchestrator/session-supervisor.mjs +94 -85
  61. package/scripts/wave-orchestrator/shared.mjs +1 -0
  62. package/scripts/wave-orchestrator/supervisor-cli.mjs +1306 -0
  63. package/scripts/wave-orchestrator/terminals.mjs +12 -32
  64. package/scripts/wave-orchestrator/tmux-adapter.mjs +300 -0
  65. package/scripts/wave-orchestrator/traces.mjs +25 -0
  66. package/scripts/wave-orchestrator/wave-control-client.mjs +14 -1
  67. package/scripts/wave-orchestrator/wave-files.mjs +38 -5
  68. package/scripts/wave.mjs +13 -0
@@ -1,20 +1,31 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { buildExecutionPrompt } from "./coordination.mjs";
4
+ import {
5
+ materializeWaveCorridorContext,
6
+ renderCorridorPromptContext,
7
+ waveCorridorContextPath,
8
+ } from "./corridor.mjs";
4
9
  import {
5
10
  DEFAULT_AGENT_RATE_LIMIT_BASE_DELAY_SECONDS,
6
11
  DEFAULT_AGENT_RATE_LIMIT_MAX_DELAY_SECONDS,
7
12
  DEFAULT_WAIT_PROGRESS_INTERVAL_MS,
8
13
  REPO_ROOT,
9
14
  ensureDirectory,
15
+ readJsonOrNull,
10
16
  shellQuote,
17
+ sleep,
11
18
  writeJsonAtomic,
12
19
  } from "./shared.mjs";
13
20
  import { readStatusCodeIfPresent } from "./dashboard-state.mjs";
14
21
  import { buildExecutorLaunchSpec } from "./executors.mjs";
15
22
  import { hashAgentPromptFingerprint, prefetchContext7ForSelection } from "./context7.mjs";
16
- import { killTmuxSessionIfExists } from "./terminals.mjs";
17
- import { isDesignAgent, resolveDesignReportPath, resolveWaveRoleBindings } from "./role-helpers.mjs";
23
+ import {
24
+ isDesignAgent,
25
+ isSecurityReviewAgent,
26
+ resolveDesignReportPath,
27
+ resolveWaveRoleBindings,
28
+ } from "./role-helpers.mjs";
18
29
  import {
19
30
  resolveAgentSkills,
20
31
  summarizeResolvedSkills,
@@ -25,6 +36,48 @@ import {
25
36
  agentSignalPath,
26
37
  agentUsesSignalHygiene,
27
38
  } from "./signals.mjs";
39
+ import {
40
+ spawnAgentProcessRunner,
41
+ terminateAgentProcessRuntime,
42
+ } from "./agent-process-runner.mjs";
43
+ import {
44
+ requestWaveControlCredentialEnv,
45
+ requestWaveControlProviderEnv,
46
+ } from "./provider-runtime.mjs";
47
+
48
+ function redactPreviewEnv(env = {}, redactedKeys = []) {
49
+ const output = { ...(env || {}) };
50
+ for (const key of redactedKeys) {
51
+ if (Object.prototype.hasOwnProperty.call(output, key)) {
52
+ output[key] = "[redacted]";
53
+ }
54
+ }
55
+ return output;
56
+ }
57
+
58
+ function buildDryRunContext7Preview(selection) {
59
+ if (
60
+ !selection ||
61
+ selection.bundleId === "none" ||
62
+ !Array.isArray(selection.libraries) ||
63
+ selection.libraries.length === 0
64
+ ) {
65
+ return {
66
+ mode: "none",
67
+ selection,
68
+ promptText: "",
69
+ snippetHash: "",
70
+ warning: "",
71
+ };
72
+ }
73
+ return {
74
+ mode: "dry-run",
75
+ selection,
76
+ promptText: "",
77
+ snippetHash: "",
78
+ warning: "Context7 prefetch skipped during dry-run preview.",
79
+ };
80
+ }
28
81
 
29
82
  export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths) {
30
83
  runInfo.agent.skillsResolved = resolveAgentSkills(
@@ -35,32 +88,67 @@ export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths)
35
88
  return runInfo.agent.skillsResolved;
36
89
  }
37
90
 
38
- export function collectUnexpectedSessionFailures(
91
+ export function applyLaunchResultToRun(
92
+ runInfo,
93
+ launchResult,
94
+ {
95
+ attempt = null,
96
+ fallbackExecutorId = null,
97
+ fallbackSkills = null,
98
+ } = {},
99
+ ) {
100
+ if (!runInfo || !launchResult) {
101
+ return runInfo;
102
+ }
103
+ if (attempt !== null && attempt !== undefined) {
104
+ runInfo.lastLaunchAttempt = attempt;
105
+ }
106
+ runInfo.lastPromptHash = launchResult.promptHash || null;
107
+ runInfo.lastContext7 = launchResult.context7 || null;
108
+ runInfo.lastExecutorId = launchResult.executorId || fallbackExecutorId || null;
109
+ runInfo.lastSkillProjection = launchResult.skills || fallbackSkills || null;
110
+ runInfo.runtimePath = launchResult.runtimePath || runInfo.runtimePath || null;
111
+ runInfo.sessionBackend = launchResult.sessionBackend || runInfo.sessionBackend || "process";
112
+ runInfo.attachMode = launchResult.attachMode || runInfo.attachMode || "log-tail";
113
+ return runInfo;
114
+ }
115
+
116
+ export function collectUnexpectedSessionWarnings(
39
117
  lanePaths,
40
118
  agentRuns,
41
119
  pendingAgentIds,
42
120
  { listLaneTmuxSessionNamesFn },
43
121
  ) {
44
- const activeSessionNames = new Set(listLaneTmuxSessionNamesFn(lanePaths));
45
- const failures = [];
122
+ const warnings = [];
46
123
  for (const run of agentRuns) {
47
124
  if (!pendingAgentIds.has(run.agent.agentId) || fs.existsSync(run.statusPath)) {
48
125
  continue;
49
126
  }
50
- if (activeSessionNames.has(run.sessionName)) {
127
+ if (!run.runtimePath || !fs.existsSync(run.runtimePath)) {
51
128
  continue;
52
129
  }
53
- failures.push({
130
+ const runtimeRecord = JSON.parse(fs.readFileSync(run.runtimePath, "utf8"));
131
+ if (!runtimeRecord || typeof runtimeRecord !== "object") {
132
+ continue;
133
+ }
134
+ if (runtimeRecord.terminalDisposition !== "projection-missing") {
135
+ continue;
136
+ }
137
+ warnings.push({
54
138
  agentId: run.agent.agentId,
55
- statusCode: "session-missing",
139
+ statusCode: "terminal-session-missing",
56
140
  logPath: path.relative(REPO_ROOT, run.logPath),
57
- detail: `tmux session ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
141
+ detail: `terminal projection for ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
58
142
  });
59
143
  }
60
- return failures;
144
+ return warnings;
61
145
  }
62
146
 
63
- export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
147
+ export async function launchAgentSession(
148
+ lanePaths,
149
+ params,
150
+ { spawnRunnerFn = spawnAgentProcessRunner } = {},
151
+ ) {
64
152
  const {
65
153
  wave,
66
154
  waveDefinition = null,
@@ -84,19 +172,48 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
84
172
  context7Enabled,
85
173
  designExecutionMode = null,
86
174
  dryRun = false,
175
+ runtimePath = null,
87
176
  } = params;
88
177
  ensureDirectory(path.dirname(promptPath));
89
178
  ensureDirectory(path.dirname(logPath));
90
179
  ensureDirectory(path.dirname(statusPath));
180
+ if (runtimePath && fs.existsSync(runtimePath)) {
181
+ const priorRuntime = readJsonOrNull(runtimePath);
182
+ if (priorRuntime && typeof priorRuntime === "object") {
183
+ await terminateAgentProcessRuntime(priorRuntime);
184
+ }
185
+ }
91
186
  fs.rmSync(statusPath, { force: true });
187
+ if (runtimePath) {
188
+ ensureDirectory(path.dirname(runtimePath));
189
+ fs.rmSync(runtimePath, { force: true });
190
+ }
92
191
 
93
- const context7 = await prefetchContext7ForSelection(agent.context7Resolved, {
94
- cacheDir: lanePaths.context7CacheDir,
95
- disabled: !context7Enabled,
96
- });
192
+ const resolvedWaveDefinition = waveDefinition || { deployEnvironments: [] };
193
+ const context7 = dryRun
194
+ ? buildDryRunContext7Preview(agent.context7Resolved || null)
195
+ : await prefetchContext7ForSelection(agent.context7Resolved, {
196
+ lanePaths,
197
+ cacheDir: lanePaths.context7CacheDir,
198
+ disabled: !context7Enabled,
199
+ });
200
+ const integrationAgentId =
201
+ waveDefinition?.integrationAgentId || lanePaths.integrationAgentId || "A8";
202
+ const shouldLoadCorridorContext =
203
+ lanePaths.externalProviders?.corridor?.enabled === true &&
204
+ (isSecurityReviewAgent(agent) || agent.agentId === integrationAgentId);
205
+ const corridorContext = !dryRun && shouldLoadCorridorContext
206
+ ? await materializeWaveCorridorContext(lanePaths, resolvedWaveDefinition)
207
+ : null;
208
+ const corridorContextPath = !dryRun && shouldLoadCorridorContext
209
+ ? waveCorridorContextPath(lanePaths, wave)
210
+ : null;
211
+ const corridorContextText =
212
+ dryRun && shouldLoadCorridorContext
213
+ ? "Corridor context omitted in dry-run preview."
214
+ : renderCorridorPromptContext(corridorContext);
97
215
  const overlayDir = path.join(lanePaths.executorOverlaysDir, `wave-${wave}`, agent.slug);
98
216
  ensureDirectory(overlayDir);
99
- const resolvedWaveDefinition = waveDefinition || { deployEnvironments: [] };
100
217
  const skillsResolved =
101
218
  agent.skillsResolved ||
102
219
  resolveAgentSkills(agent, resolvedWaveDefinition, {
@@ -125,6 +242,8 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
125
242
  inboxPath,
126
243
  inboxText,
127
244
  context7,
245
+ corridorContextPath,
246
+ corridorContextText,
128
247
  componentPromotions: resolvedWaveDefinition.componentPromotions,
129
248
  evalTargets: resolvedWaveDefinition.evalTargets,
130
249
  benchmarkCatalogPath: lanePaths.laneProfile?.paths?.benchmarkCatalogPath,
@@ -150,11 +269,45 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
150
269
  overlayDir,
151
270
  skillProjection: agent.skillsResolved,
152
271
  });
272
+ const requestedCredentialProviders = Array.isArray(lanePaths.waveControl?.credentialProviders)
273
+ ? lanePaths.waveControl.credentialProviders
274
+ : [];
275
+ const requestedCredentials = Array.isArray(lanePaths.waveControl?.credentials)
276
+ ? lanePaths.waveControl.credentials
277
+ : [];
278
+ const leasedProviderEnv =
279
+ !dryRun && requestedCredentialProviders.length > 0
280
+ ? await requestWaveControlProviderEnv(fetch, lanePaths.waveControl, requestedCredentialProviders)
281
+ : {};
282
+ const leasedCredentialEnv =
283
+ !dryRun && requestedCredentials.length > 0
284
+ ? await requestWaveControlCredentialEnv(fetch, lanePaths.waveControl, requestedCredentials)
285
+ : {};
286
+ const overlappingLeasedEnvVars = Object.keys(leasedProviderEnv).filter((key) =>
287
+ Object.prototype.hasOwnProperty.call(leasedCredentialEnv, key),
288
+ );
289
+ if (overlappingLeasedEnvVars.length > 0) {
290
+ throw new Error(
291
+ `Wave Control leased duplicate environment variables: ${overlappingLeasedEnvVars.join(", ")}.`,
292
+ );
293
+ }
294
+ const leasedEnv = {
295
+ ...leasedProviderEnv,
296
+ ...leasedCredentialEnv,
297
+ };
298
+ if (Object.keys(leasedEnv).length > 0) {
299
+ launchSpec.env = {
300
+ ...(launchSpec.env || {}),
301
+ ...leasedEnv,
302
+ };
303
+ }
153
304
  const resolvedExecutorMode = launchSpec.executorId || agent.executorResolved?.id || "codex";
154
305
  writeJsonAtomic(path.join(overlayDir, "launch-preview.json"), {
155
306
  executorId: resolvedExecutorMode,
156
307
  command: launchSpec.command,
157
- env: launchSpec.env || {},
308
+ env: redactPreviewEnv(launchSpec.env || {}, Object.keys(leasedEnv)),
309
+ credentialProviders: requestedCredentialProviders,
310
+ credentials: requestedCredentials,
158
311
  useRateLimitRetries: launchSpec.useRateLimitRetries === true,
159
312
  invocationLines: launchSpec.invocationLines,
160
313
  limits: launchSpec.limits || null,
@@ -164,13 +317,13 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
164
317
  return {
165
318
  promptHash,
166
319
  context7,
320
+ corridorContext,
167
321
  executorId: resolvedExecutorMode,
168
322
  launchSpec,
169
323
  dryRun: true,
170
324
  skills: summarizeResolvedSkills(agent.skillsResolved),
171
325
  };
172
326
  }
173
- killTmuxSessionIfExists(lanePaths.tmuxSocketName, sessionName);
174
327
 
175
328
  const executionLines = [];
176
329
  if (launchSpec.env) {
@@ -195,6 +348,9 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
195
348
  executionLines.push("rate_attempt=1");
196
349
  executionLines.push("status=1");
197
350
  executionLines.push('while [ "$rate_attempt" -le "$max_rate_attempts" ]; do');
351
+ executionLines.push(
352
+ ` attempt_log_offset=$(wc -c < ${shellQuote(logPath)} 2>/dev/null || echo 0)`,
353
+ );
198
354
  for (const line of launchSpec.invocationLines) {
199
355
  executionLines.push(` ${line}`);
200
356
  }
@@ -206,7 +362,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
206
362
  executionLines.push(" break");
207
363
  executionLines.push(" fi");
208
364
  executionLines.push(
209
- ` if tail -n 120 ${shellQuote(logPath)} | grep -Eqi '429 Too Many Requests|exceeded retry limit|last status: 429|rate limit'; then`,
365
+ ` if tail -c +$((attempt_log_offset + 1)) ${shellQuote(logPath)} | grep -Eqi '429 Too Many Requests|exceeded retry limit|last status: 429|rate limit'; then`,
210
366
  );
211
367
  executionLines.push(" sleep_seconds=$((rate_delay_base * (2 ** (rate_attempt - 1))))");
212
368
  executionLines.push(
@@ -231,23 +387,67 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
231
387
  `export WAVE_ORCHESTRATOR_ID=${shellQuote(orchestratorId || "")}`,
232
388
  `export WAVE_EXECUTOR_MODE=${shellQuote(resolvedExecutorMode)}`,
233
389
  ...executionLines,
234
- `node -e ${shellQuote(
235
- "const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
236
- )} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
237
- `echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
238
- "exit \"$status\"",
239
390
  ].join("\n");
240
-
241
- runTmuxFn(
242
- lanePaths,
243
- ["new-session", "-d", "-s", sessionName, `bash -lc ${shellQuote(command)}`],
244
- `launch session ${sessionName}`,
245
- );
391
+ const payloadPath = path.join(overlayDir, "runner-payload.json");
392
+ const initialRuntimeRecord = runtimePath
393
+ ? {
394
+ runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
395
+ waveNumber: wave,
396
+ attempt: Number(attempt || 1),
397
+ agentId: agent.agentId,
398
+ sessionName,
399
+ tmuxSessionName: null,
400
+ sessionBackend: "process",
401
+ attachMode: "log-tail",
402
+ runnerPid: null,
403
+ executorPid: null,
404
+ pid: null,
405
+ pgid: null,
406
+ startedAt: new Date().toISOString(),
407
+ lastHeartbeatAt: new Date().toISOString(),
408
+ statusPath,
409
+ logPath,
410
+ exitCode: null,
411
+ exitReason: null,
412
+ terminalDisposition: "launching",
413
+ }
414
+ : null;
415
+ if (runtimePath && initialRuntimeRecord) {
416
+ writeJsonAtomic(runtimePath, initialRuntimeRecord);
417
+ }
418
+ const runner = spawnRunnerFn({
419
+ payloadPath,
420
+ runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
421
+ lane: lanePaths.lane,
422
+ waveNumber: wave,
423
+ attempt: Number(attempt || 1),
424
+ agentId: agent.agentId,
425
+ sessionName,
426
+ runtimePath,
427
+ statusPath,
428
+ logPath,
429
+ promptHash,
430
+ orchestratorId: orchestratorId || "",
431
+ executorId: resolvedExecutorMode,
432
+ env: launchSpec.env || {},
433
+ command,
434
+ });
435
+ if (runtimePath && initialRuntimeRecord) {
436
+ writeJsonAtomic(runtimePath, {
437
+ ...initialRuntimeRecord,
438
+ runnerPid: runner?.runnerPid || null,
439
+ lastHeartbeatAt: new Date().toISOString(),
440
+ });
441
+ }
246
442
  return {
247
443
  promptHash,
248
444
  context7,
445
+ corridorContext,
249
446
  executorId: resolvedExecutorMode,
250
447
  skills: summarizeResolvedSkills(agent.skillsResolved),
448
+ runtimePath,
449
+ sessionBackend: "process",
450
+ attachMode: "log-tail",
251
451
  };
252
452
  }
253
453
 
@@ -256,7 +456,7 @@ export async function waitForWaveCompletion(
256
456
  agentRuns,
257
457
  timeoutMinutes,
258
458
  onProgress = null,
259
- { collectUnexpectedSessionFailuresFn },
459
+ { collectUnexpectedSessionWarningsFn = () => [] },
260
460
  ) {
261
461
  const defaultTimeoutMs = timeoutMinutes * 60 * 1000;
262
462
  const startedAt = Date.now();
@@ -272,8 +472,7 @@ export async function waitForWaveCompletion(
272
472
  );
273
473
  const pending = new Set(agentRuns.map((run) => run.agent.agentId));
274
474
  const timedOutAgentIds = new Set();
275
- let sessionFailures = [];
276
-
475
+ let sessionWarnings = [];
277
476
  const refreshPending = () => {
278
477
  for (const run of agentRuns) {
279
478
  if (pending.has(run.agent.agentId) && fs.existsSync(run.statusPath)) {
@@ -282,51 +481,58 @@ export async function waitForWaveCompletion(
282
481
  }
283
482
  };
284
483
 
285
- await new Promise((resolve) => {
286
- const interval = setInterval(() => {
287
- refreshPending();
288
- onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
289
- if (pending.size === 0) {
290
- clearInterval(interval);
291
- resolve();
292
- return;
293
- }
294
- sessionFailures = collectUnexpectedSessionFailuresFn(lanePaths, agentRuns, pending);
295
- if (sessionFailures.length > 0) {
296
- onProgress?.({
297
- pendingAgentIds: new Set(pending),
298
- timedOut: false,
299
- failures: sessionFailures,
300
- });
301
- clearInterval(interval);
302
- resolve();
303
- return;
484
+ while (true) {
485
+ refreshPending();
486
+ onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
487
+ if (pending.size === 0) {
488
+ break;
489
+ }
490
+ sessionWarnings = collectUnexpectedSessionWarningsFn(lanePaths, agentRuns, pending);
491
+ if (sessionWarnings.length > 0) {
492
+ onProgress?.({
493
+ pendingAgentIds: new Set(pending),
494
+ timedOut: false,
495
+ warnings: sessionWarnings,
496
+ });
497
+ }
498
+ const now = Date.now();
499
+ for (const run of agentRuns) {
500
+ if (!pending.has(run.agent.agentId)) {
501
+ continue;
304
502
  }
305
- const now = Date.now();
306
- for (const run of agentRuns) {
307
- if (!pending.has(run.agent.agentId)) {
308
- continue;
309
- }
310
- const deadline = timeoutAtByAgentId.get(run.agent.agentId) || startedAt + defaultTimeoutMs;
311
- if (now <= deadline) {
312
- continue;
503
+ if (run.runtimePath && fs.existsSync(run.runtimePath)) {
504
+ try {
505
+ const runtimeRecord = readJsonOrNull(run.runtimePath);
506
+ if (
507
+ runtimeRecord &&
508
+ typeof runtimeRecord === "object" &&
509
+ ["completed", "failed", "terminated"].includes(
510
+ String(runtimeRecord.terminalDisposition || ""),
511
+ )
512
+ ) {
513
+ pending.delete(run.agent.agentId);
514
+ continue;
515
+ }
516
+ } catch {
517
+ // best-effort runtime observation only
313
518
  }
314
- timedOutAgentIds.add(run.agent.agentId);
315
- pending.delete(run.agent.agentId);
316
- killTmuxSessionIfExists(lanePaths.tmuxSocketName, run.sessionName);
317
519
  }
318
- if (pending.size === 0) {
319
- clearInterval(interval);
320
- resolve();
520
+ const deadline = timeoutAtByAgentId.get(run.agent.agentId) || startedAt + defaultTimeoutMs;
521
+ if (now <= deadline) {
522
+ continue;
321
523
  }
322
- }, DEFAULT_WAIT_PROGRESS_INTERVAL_MS);
323
- refreshPending();
324
- onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
325
- });
326
-
327
- if (sessionFailures.length > 0) {
328
- onProgress?.({ pendingAgentIds: new Set(), timedOut: false, failures: sessionFailures });
329
- return { failures: sessionFailures, timedOut: false };
524
+ timedOutAgentIds.add(run.agent.agentId);
525
+ pending.delete(run.agent.agentId);
526
+ const runtimeRecord =
527
+ run.runtimePath && fs.existsSync(run.runtimePath) ? readJsonOrNull(run.runtimePath) : null;
528
+ if (runtimeRecord) {
529
+ await terminateAgentProcessRuntime(runtimeRecord);
530
+ }
531
+ }
532
+ if (pending.size === 0) {
533
+ break;
534
+ }
535
+ await sleep(DEFAULT_WAIT_PROGRESS_INTERVAL_MS);
330
536
  }
331
537
 
332
538
  const failures = [];
@@ -336,10 +542,19 @@ export async function waitForWaveCompletion(
336
542
  continue;
337
543
  }
338
544
  if (code === null || timedOutAgentIds.has(run.agent.agentId)) {
545
+ let runtimeRecord = null;
546
+ if (run.runtimePath && fs.existsSync(run.runtimePath)) {
547
+ runtimeRecord = readJsonOrNull(run.runtimePath);
548
+ }
339
549
  failures.push({
340
550
  agentId: run.agent.agentId,
341
- statusCode: timedOutAgentIds.has(run.agent.agentId) ? "timeout-no-status" : "missing-status",
551
+ statusCode: timedOutAgentIds.has(run.agent.agentId)
552
+ ? "timeout-no-status"
553
+ : runtimeRecord?.terminalDisposition === "failed"
554
+ ? "runtime-failed-before-status"
555
+ : "missing-status",
342
556
  logPath: path.relative(REPO_ROOT, run.logPath),
557
+ detail: runtimeRecord?.exitReason || null,
343
558
  });
344
559
  continue;
345
560
  }