@chllming/wave-orchestration 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/LICENSE.md +21 -0
- package/README.md +133 -20
- package/docs/README.md +12 -4
- package/docs/agents/wave-security-role.md +1 -0
- package/docs/architecture/README.md +1498 -0
- package/docs/concepts/operating-modes.md +2 -2
- package/docs/guides/author-and-run-waves.md +14 -4
- package/docs/guides/planner.md +2 -2
- package/docs/guides/{recommendations-0.9.0.md → recommendations-0.9.2.md} +8 -7
- package/docs/guides/sandboxed-environments.md +158 -0
- package/docs/guides/terminal-surfaces.md +14 -12
- package/docs/plans/current-state.md +11 -3
- package/docs/plans/end-state-architecture.md +3 -1
- package/docs/plans/examples/wave-example-design-handoff.md +1 -1
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/migration.md +70 -19
- package/docs/plans/sandbox-end-state-architecture.md +153 -0
- package/docs/reference/cli-reference.md +71 -7
- package/docs/reference/coordination-and-closure.md +18 -1
- package/docs/reference/corridor.md +225 -0
- package/docs/reference/github-packages-setup.md +1 -1
- package/docs/reference/migration-0.2-to-0.5.md +9 -7
- package/docs/reference/npmjs-token-publishing.md +53 -0
- package/docs/reference/npmjs-trusted-publishing.md +4 -50
- package/docs/reference/package-publishing-flow.md +272 -0
- package/docs/reference/runtime-config/README.md +61 -3
- package/docs/reference/sample-waves.md +5 -5
- package/docs/reference/skills.md +1 -1
- package/docs/reference/wave-control.md +358 -27
- package/docs/roadmap.md +39 -204
- package/package.json +1 -1
- package/releases/manifest.json +38 -0
- package/scripts/wave-cli-bootstrap.mjs +52 -1
- package/scripts/wave-orchestrator/agent-process-runner.mjs +344 -0
- package/scripts/wave-orchestrator/agent-state.mjs +0 -1
- package/scripts/wave-orchestrator/artifact-schemas.mjs +7 -0
- package/scripts/wave-orchestrator/autonomous.mjs +47 -14
- package/scripts/wave-orchestrator/closure-engine.mjs +138 -17
- package/scripts/wave-orchestrator/config.mjs +199 -3
- package/scripts/wave-orchestrator/context7.mjs +231 -29
- package/scripts/wave-orchestrator/control-cli.mjs +42 -5
- package/scripts/wave-orchestrator/coordination.mjs +14 -0
- package/scripts/wave-orchestrator/corridor.mjs +363 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +115 -43
- package/scripts/wave-orchestrator/derived-state-engine.mjs +44 -4
- package/scripts/wave-orchestrator/gate-engine.mjs +126 -38
- package/scripts/wave-orchestrator/install.mjs +46 -0
- package/scripts/wave-orchestrator/launcher-progress.mjs +91 -0
- package/scripts/wave-orchestrator/launcher-runtime.mjs +290 -75
- package/scripts/wave-orchestrator/launcher.mjs +201 -53
- package/scripts/wave-orchestrator/ledger.mjs +7 -2
- package/scripts/wave-orchestrator/planner.mjs +1 -0
- package/scripts/wave-orchestrator/projection-writer.mjs +36 -1
- package/scripts/wave-orchestrator/provider-runtime.mjs +104 -0
- package/scripts/wave-orchestrator/reducer-snapshot.mjs +6 -0
- package/scripts/wave-orchestrator/retry-control.mjs +3 -3
- package/scripts/wave-orchestrator/retry-engine.mjs +93 -6
- package/scripts/wave-orchestrator/role-helpers.mjs +30 -0
- package/scripts/wave-orchestrator/session-supervisor.mjs +94 -85
- package/scripts/wave-orchestrator/shared.mjs +1 -0
- package/scripts/wave-orchestrator/supervisor-cli.mjs +1306 -0
- package/scripts/wave-orchestrator/terminals.mjs +12 -32
- package/scripts/wave-orchestrator/tmux-adapter.mjs +300 -0
- package/scripts/wave-orchestrator/traces.mjs +25 -0
- package/scripts/wave-orchestrator/wave-control-client.mjs +14 -1
- package/scripts/wave-orchestrator/wave-files.mjs +38 -5
- package/scripts/wave.mjs +13 -0
|
@@ -1,20 +1,31 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { buildExecutionPrompt } from "./coordination.mjs";
|
|
4
|
+
import {
|
|
5
|
+
materializeWaveCorridorContext,
|
|
6
|
+
renderCorridorPromptContext,
|
|
7
|
+
waveCorridorContextPath,
|
|
8
|
+
} from "./corridor.mjs";
|
|
4
9
|
import {
|
|
5
10
|
DEFAULT_AGENT_RATE_LIMIT_BASE_DELAY_SECONDS,
|
|
6
11
|
DEFAULT_AGENT_RATE_LIMIT_MAX_DELAY_SECONDS,
|
|
7
12
|
DEFAULT_WAIT_PROGRESS_INTERVAL_MS,
|
|
8
13
|
REPO_ROOT,
|
|
9
14
|
ensureDirectory,
|
|
15
|
+
readJsonOrNull,
|
|
10
16
|
shellQuote,
|
|
17
|
+
sleep,
|
|
11
18
|
writeJsonAtomic,
|
|
12
19
|
} from "./shared.mjs";
|
|
13
20
|
import { readStatusCodeIfPresent } from "./dashboard-state.mjs";
|
|
14
21
|
import { buildExecutorLaunchSpec } from "./executors.mjs";
|
|
15
22
|
import { hashAgentPromptFingerprint, prefetchContext7ForSelection } from "./context7.mjs";
|
|
16
|
-
import {
|
|
17
|
-
|
|
23
|
+
import {
|
|
24
|
+
isDesignAgent,
|
|
25
|
+
isSecurityReviewAgent,
|
|
26
|
+
resolveDesignReportPath,
|
|
27
|
+
resolveWaveRoleBindings,
|
|
28
|
+
} from "./role-helpers.mjs";
|
|
18
29
|
import {
|
|
19
30
|
resolveAgentSkills,
|
|
20
31
|
summarizeResolvedSkills,
|
|
@@ -25,6 +36,48 @@ import {
|
|
|
25
36
|
agentSignalPath,
|
|
26
37
|
agentUsesSignalHygiene,
|
|
27
38
|
} from "./signals.mjs";
|
|
39
|
+
import {
|
|
40
|
+
spawnAgentProcessRunner,
|
|
41
|
+
terminateAgentProcessRuntime,
|
|
42
|
+
} from "./agent-process-runner.mjs";
|
|
43
|
+
import {
|
|
44
|
+
requestWaveControlCredentialEnv,
|
|
45
|
+
requestWaveControlProviderEnv,
|
|
46
|
+
} from "./provider-runtime.mjs";
|
|
47
|
+
|
|
48
|
+
function redactPreviewEnv(env = {}, redactedKeys = []) {
|
|
49
|
+
const output = { ...(env || {}) };
|
|
50
|
+
for (const key of redactedKeys) {
|
|
51
|
+
if (Object.prototype.hasOwnProperty.call(output, key)) {
|
|
52
|
+
output[key] = "[redacted]";
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return output;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function buildDryRunContext7Preview(selection) {
|
|
59
|
+
if (
|
|
60
|
+
!selection ||
|
|
61
|
+
selection.bundleId === "none" ||
|
|
62
|
+
!Array.isArray(selection.libraries) ||
|
|
63
|
+
selection.libraries.length === 0
|
|
64
|
+
) {
|
|
65
|
+
return {
|
|
66
|
+
mode: "none",
|
|
67
|
+
selection,
|
|
68
|
+
promptText: "",
|
|
69
|
+
snippetHash: "",
|
|
70
|
+
warning: "",
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
mode: "dry-run",
|
|
75
|
+
selection,
|
|
76
|
+
promptText: "",
|
|
77
|
+
snippetHash: "",
|
|
78
|
+
warning: "Context7 prefetch skipped during dry-run preview.",
|
|
79
|
+
};
|
|
80
|
+
}
|
|
28
81
|
|
|
29
82
|
export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths) {
|
|
30
83
|
runInfo.agent.skillsResolved = resolveAgentSkills(
|
|
@@ -35,32 +88,67 @@ export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths)
|
|
|
35
88
|
return runInfo.agent.skillsResolved;
|
|
36
89
|
}
|
|
37
90
|
|
|
38
|
-
export function
|
|
91
|
+
export function applyLaunchResultToRun(
|
|
92
|
+
runInfo,
|
|
93
|
+
launchResult,
|
|
94
|
+
{
|
|
95
|
+
attempt = null,
|
|
96
|
+
fallbackExecutorId = null,
|
|
97
|
+
fallbackSkills = null,
|
|
98
|
+
} = {},
|
|
99
|
+
) {
|
|
100
|
+
if (!runInfo || !launchResult) {
|
|
101
|
+
return runInfo;
|
|
102
|
+
}
|
|
103
|
+
if (attempt !== null && attempt !== undefined) {
|
|
104
|
+
runInfo.lastLaunchAttempt = attempt;
|
|
105
|
+
}
|
|
106
|
+
runInfo.lastPromptHash = launchResult.promptHash || null;
|
|
107
|
+
runInfo.lastContext7 = launchResult.context7 || null;
|
|
108
|
+
runInfo.lastExecutorId = launchResult.executorId || fallbackExecutorId || null;
|
|
109
|
+
runInfo.lastSkillProjection = launchResult.skills || fallbackSkills || null;
|
|
110
|
+
runInfo.runtimePath = launchResult.runtimePath || runInfo.runtimePath || null;
|
|
111
|
+
runInfo.sessionBackend = launchResult.sessionBackend || runInfo.sessionBackend || "process";
|
|
112
|
+
runInfo.attachMode = launchResult.attachMode || runInfo.attachMode || "log-tail";
|
|
113
|
+
return runInfo;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function collectUnexpectedSessionWarnings(
|
|
39
117
|
lanePaths,
|
|
40
118
|
agentRuns,
|
|
41
119
|
pendingAgentIds,
|
|
42
120
|
{ listLaneTmuxSessionNamesFn },
|
|
43
121
|
) {
|
|
44
|
-
const
|
|
45
|
-
const failures = [];
|
|
122
|
+
const warnings = [];
|
|
46
123
|
for (const run of agentRuns) {
|
|
47
124
|
if (!pendingAgentIds.has(run.agent.agentId) || fs.existsSync(run.statusPath)) {
|
|
48
125
|
continue;
|
|
49
126
|
}
|
|
50
|
-
if (
|
|
127
|
+
if (!run.runtimePath || !fs.existsSync(run.runtimePath)) {
|
|
51
128
|
continue;
|
|
52
129
|
}
|
|
53
|
-
|
|
130
|
+
const runtimeRecord = JSON.parse(fs.readFileSync(run.runtimePath, "utf8"));
|
|
131
|
+
if (!runtimeRecord || typeof runtimeRecord !== "object") {
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
if (runtimeRecord.terminalDisposition !== "projection-missing") {
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
warnings.push({
|
|
54
138
|
agentId: run.agent.agentId,
|
|
55
|
-
statusCode: "session-missing",
|
|
139
|
+
statusCode: "terminal-session-missing",
|
|
56
140
|
logPath: path.relative(REPO_ROOT, run.logPath),
|
|
57
|
-
detail: `
|
|
141
|
+
detail: `terminal projection for ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
|
|
58
142
|
});
|
|
59
143
|
}
|
|
60
|
-
return
|
|
144
|
+
return warnings;
|
|
61
145
|
}
|
|
62
146
|
|
|
63
|
-
export async function launchAgentSession(
|
|
147
|
+
export async function launchAgentSession(
|
|
148
|
+
lanePaths,
|
|
149
|
+
params,
|
|
150
|
+
{ spawnRunnerFn = spawnAgentProcessRunner } = {},
|
|
151
|
+
) {
|
|
64
152
|
const {
|
|
65
153
|
wave,
|
|
66
154
|
waveDefinition = null,
|
|
@@ -84,19 +172,48 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
84
172
|
context7Enabled,
|
|
85
173
|
designExecutionMode = null,
|
|
86
174
|
dryRun = false,
|
|
175
|
+
runtimePath = null,
|
|
87
176
|
} = params;
|
|
88
177
|
ensureDirectory(path.dirname(promptPath));
|
|
89
178
|
ensureDirectory(path.dirname(logPath));
|
|
90
179
|
ensureDirectory(path.dirname(statusPath));
|
|
180
|
+
if (runtimePath && fs.existsSync(runtimePath)) {
|
|
181
|
+
const priorRuntime = readJsonOrNull(runtimePath);
|
|
182
|
+
if (priorRuntime && typeof priorRuntime === "object") {
|
|
183
|
+
await terminateAgentProcessRuntime(priorRuntime);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
91
186
|
fs.rmSync(statusPath, { force: true });
|
|
187
|
+
if (runtimePath) {
|
|
188
|
+
ensureDirectory(path.dirname(runtimePath));
|
|
189
|
+
fs.rmSync(runtimePath, { force: true });
|
|
190
|
+
}
|
|
92
191
|
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
192
|
+
const resolvedWaveDefinition = waveDefinition || { deployEnvironments: [] };
|
|
193
|
+
const context7 = dryRun
|
|
194
|
+
? buildDryRunContext7Preview(agent.context7Resolved || null)
|
|
195
|
+
: await prefetchContext7ForSelection(agent.context7Resolved, {
|
|
196
|
+
lanePaths,
|
|
197
|
+
cacheDir: lanePaths.context7CacheDir,
|
|
198
|
+
disabled: !context7Enabled,
|
|
199
|
+
});
|
|
200
|
+
const integrationAgentId =
|
|
201
|
+
waveDefinition?.integrationAgentId || lanePaths.integrationAgentId || "A8";
|
|
202
|
+
const shouldLoadCorridorContext =
|
|
203
|
+
lanePaths.externalProviders?.corridor?.enabled === true &&
|
|
204
|
+
(isSecurityReviewAgent(agent) || agent.agentId === integrationAgentId);
|
|
205
|
+
const corridorContext = !dryRun && shouldLoadCorridorContext
|
|
206
|
+
? await materializeWaveCorridorContext(lanePaths, resolvedWaveDefinition)
|
|
207
|
+
: null;
|
|
208
|
+
const corridorContextPath = !dryRun && shouldLoadCorridorContext
|
|
209
|
+
? waveCorridorContextPath(lanePaths, wave)
|
|
210
|
+
: null;
|
|
211
|
+
const corridorContextText =
|
|
212
|
+
dryRun && shouldLoadCorridorContext
|
|
213
|
+
? "Corridor context omitted in dry-run preview."
|
|
214
|
+
: renderCorridorPromptContext(corridorContext);
|
|
97
215
|
const overlayDir = path.join(lanePaths.executorOverlaysDir, `wave-${wave}`, agent.slug);
|
|
98
216
|
ensureDirectory(overlayDir);
|
|
99
|
-
const resolvedWaveDefinition = waveDefinition || { deployEnvironments: [] };
|
|
100
217
|
const skillsResolved =
|
|
101
218
|
agent.skillsResolved ||
|
|
102
219
|
resolveAgentSkills(agent, resolvedWaveDefinition, {
|
|
@@ -125,6 +242,8 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
125
242
|
inboxPath,
|
|
126
243
|
inboxText,
|
|
127
244
|
context7,
|
|
245
|
+
corridorContextPath,
|
|
246
|
+
corridorContextText,
|
|
128
247
|
componentPromotions: resolvedWaveDefinition.componentPromotions,
|
|
129
248
|
evalTargets: resolvedWaveDefinition.evalTargets,
|
|
130
249
|
benchmarkCatalogPath: lanePaths.laneProfile?.paths?.benchmarkCatalogPath,
|
|
@@ -150,11 +269,45 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
150
269
|
overlayDir,
|
|
151
270
|
skillProjection: agent.skillsResolved,
|
|
152
271
|
});
|
|
272
|
+
const requestedCredentialProviders = Array.isArray(lanePaths.waveControl?.credentialProviders)
|
|
273
|
+
? lanePaths.waveControl.credentialProviders
|
|
274
|
+
: [];
|
|
275
|
+
const requestedCredentials = Array.isArray(lanePaths.waveControl?.credentials)
|
|
276
|
+
? lanePaths.waveControl.credentials
|
|
277
|
+
: [];
|
|
278
|
+
const leasedProviderEnv =
|
|
279
|
+
!dryRun && requestedCredentialProviders.length > 0
|
|
280
|
+
? await requestWaveControlProviderEnv(fetch, lanePaths.waveControl, requestedCredentialProviders)
|
|
281
|
+
: {};
|
|
282
|
+
const leasedCredentialEnv =
|
|
283
|
+
!dryRun && requestedCredentials.length > 0
|
|
284
|
+
? await requestWaveControlCredentialEnv(fetch, lanePaths.waveControl, requestedCredentials)
|
|
285
|
+
: {};
|
|
286
|
+
const overlappingLeasedEnvVars = Object.keys(leasedProviderEnv).filter((key) =>
|
|
287
|
+
Object.prototype.hasOwnProperty.call(leasedCredentialEnv, key),
|
|
288
|
+
);
|
|
289
|
+
if (overlappingLeasedEnvVars.length > 0) {
|
|
290
|
+
throw new Error(
|
|
291
|
+
`Wave Control leased duplicate environment variables: ${overlappingLeasedEnvVars.join(", ")}.`,
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
const leasedEnv = {
|
|
295
|
+
...leasedProviderEnv,
|
|
296
|
+
...leasedCredentialEnv,
|
|
297
|
+
};
|
|
298
|
+
if (Object.keys(leasedEnv).length > 0) {
|
|
299
|
+
launchSpec.env = {
|
|
300
|
+
...(launchSpec.env || {}),
|
|
301
|
+
...leasedEnv,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
153
304
|
const resolvedExecutorMode = launchSpec.executorId || agent.executorResolved?.id || "codex";
|
|
154
305
|
writeJsonAtomic(path.join(overlayDir, "launch-preview.json"), {
|
|
155
306
|
executorId: resolvedExecutorMode,
|
|
156
307
|
command: launchSpec.command,
|
|
157
|
-
env: launchSpec.env || {},
|
|
308
|
+
env: redactPreviewEnv(launchSpec.env || {}, Object.keys(leasedEnv)),
|
|
309
|
+
credentialProviders: requestedCredentialProviders,
|
|
310
|
+
credentials: requestedCredentials,
|
|
158
311
|
useRateLimitRetries: launchSpec.useRateLimitRetries === true,
|
|
159
312
|
invocationLines: launchSpec.invocationLines,
|
|
160
313
|
limits: launchSpec.limits || null,
|
|
@@ -164,13 +317,13 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
164
317
|
return {
|
|
165
318
|
promptHash,
|
|
166
319
|
context7,
|
|
320
|
+
corridorContext,
|
|
167
321
|
executorId: resolvedExecutorMode,
|
|
168
322
|
launchSpec,
|
|
169
323
|
dryRun: true,
|
|
170
324
|
skills: summarizeResolvedSkills(agent.skillsResolved),
|
|
171
325
|
};
|
|
172
326
|
}
|
|
173
|
-
killTmuxSessionIfExists(lanePaths.tmuxSocketName, sessionName);
|
|
174
327
|
|
|
175
328
|
const executionLines = [];
|
|
176
329
|
if (launchSpec.env) {
|
|
@@ -195,6 +348,9 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
195
348
|
executionLines.push("rate_attempt=1");
|
|
196
349
|
executionLines.push("status=1");
|
|
197
350
|
executionLines.push('while [ "$rate_attempt" -le "$max_rate_attempts" ]; do');
|
|
351
|
+
executionLines.push(
|
|
352
|
+
` attempt_log_offset=$(wc -c < ${shellQuote(logPath)} 2>/dev/null || echo 0)`,
|
|
353
|
+
);
|
|
198
354
|
for (const line of launchSpec.invocationLines) {
|
|
199
355
|
executionLines.push(` ${line}`);
|
|
200
356
|
}
|
|
@@ -206,7 +362,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
206
362
|
executionLines.push(" break");
|
|
207
363
|
executionLines.push(" fi");
|
|
208
364
|
executionLines.push(
|
|
209
|
-
` if tail -
|
|
365
|
+
` if tail -c +$((attempt_log_offset + 1)) ${shellQuote(logPath)} | grep -Eqi '429 Too Many Requests|exceeded retry limit|last status: 429|rate limit'; then`,
|
|
210
366
|
);
|
|
211
367
|
executionLines.push(" sleep_seconds=$((rate_delay_base * (2 ** (rate_attempt - 1))))");
|
|
212
368
|
executionLines.push(
|
|
@@ -231,23 +387,67 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
231
387
|
`export WAVE_ORCHESTRATOR_ID=${shellQuote(orchestratorId || "")}`,
|
|
232
388
|
`export WAVE_EXECUTOR_MODE=${shellQuote(resolvedExecutorMode)}`,
|
|
233
389
|
...executionLines,
|
|
234
|
-
`node -e ${shellQuote(
|
|
235
|
-
"const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
|
|
236
|
-
)} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
|
|
237
|
-
`echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
|
|
238
|
-
"exit \"$status\"",
|
|
239
390
|
].join("\n");
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
391
|
+
const payloadPath = path.join(overlayDir, "runner-payload.json");
|
|
392
|
+
const initialRuntimeRecord = runtimePath
|
|
393
|
+
? {
|
|
394
|
+
runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
|
|
395
|
+
waveNumber: wave,
|
|
396
|
+
attempt: Number(attempt || 1),
|
|
397
|
+
agentId: agent.agentId,
|
|
398
|
+
sessionName,
|
|
399
|
+
tmuxSessionName: null,
|
|
400
|
+
sessionBackend: "process",
|
|
401
|
+
attachMode: "log-tail",
|
|
402
|
+
runnerPid: null,
|
|
403
|
+
executorPid: null,
|
|
404
|
+
pid: null,
|
|
405
|
+
pgid: null,
|
|
406
|
+
startedAt: new Date().toISOString(),
|
|
407
|
+
lastHeartbeatAt: new Date().toISOString(),
|
|
408
|
+
statusPath,
|
|
409
|
+
logPath,
|
|
410
|
+
exitCode: null,
|
|
411
|
+
exitReason: null,
|
|
412
|
+
terminalDisposition: "launching",
|
|
413
|
+
}
|
|
414
|
+
: null;
|
|
415
|
+
if (runtimePath && initialRuntimeRecord) {
|
|
416
|
+
writeJsonAtomic(runtimePath, initialRuntimeRecord);
|
|
417
|
+
}
|
|
418
|
+
const runner = spawnRunnerFn({
|
|
419
|
+
payloadPath,
|
|
420
|
+
runId: process.env.WAVE_SUPERVISOR_RUN_ID || null,
|
|
421
|
+
lane: lanePaths.lane,
|
|
422
|
+
waveNumber: wave,
|
|
423
|
+
attempt: Number(attempt || 1),
|
|
424
|
+
agentId: agent.agentId,
|
|
425
|
+
sessionName,
|
|
426
|
+
runtimePath,
|
|
427
|
+
statusPath,
|
|
428
|
+
logPath,
|
|
429
|
+
promptHash,
|
|
430
|
+
orchestratorId: orchestratorId || "",
|
|
431
|
+
executorId: resolvedExecutorMode,
|
|
432
|
+
env: launchSpec.env || {},
|
|
433
|
+
command,
|
|
434
|
+
});
|
|
435
|
+
if (runtimePath && initialRuntimeRecord) {
|
|
436
|
+
writeJsonAtomic(runtimePath, {
|
|
437
|
+
...initialRuntimeRecord,
|
|
438
|
+
runnerPid: runner?.runnerPid || null,
|
|
439
|
+
lastHeartbeatAt: new Date().toISOString(),
|
|
440
|
+
});
|
|
441
|
+
}
|
|
246
442
|
return {
|
|
247
443
|
promptHash,
|
|
248
444
|
context7,
|
|
445
|
+
corridorContext,
|
|
249
446
|
executorId: resolvedExecutorMode,
|
|
250
447
|
skills: summarizeResolvedSkills(agent.skillsResolved),
|
|
448
|
+
runtimePath,
|
|
449
|
+
sessionBackend: "process",
|
|
450
|
+
attachMode: "log-tail",
|
|
251
451
|
};
|
|
252
452
|
}
|
|
253
453
|
|
|
@@ -256,7 +456,7 @@ export async function waitForWaveCompletion(
|
|
|
256
456
|
agentRuns,
|
|
257
457
|
timeoutMinutes,
|
|
258
458
|
onProgress = null,
|
|
259
|
-
{
|
|
459
|
+
{ collectUnexpectedSessionWarningsFn = () => [] },
|
|
260
460
|
) {
|
|
261
461
|
const defaultTimeoutMs = timeoutMinutes * 60 * 1000;
|
|
262
462
|
const startedAt = Date.now();
|
|
@@ -272,8 +472,7 @@ export async function waitForWaveCompletion(
|
|
|
272
472
|
);
|
|
273
473
|
const pending = new Set(agentRuns.map((run) => run.agent.agentId));
|
|
274
474
|
const timedOutAgentIds = new Set();
|
|
275
|
-
let
|
|
276
|
-
|
|
475
|
+
let sessionWarnings = [];
|
|
277
476
|
const refreshPending = () => {
|
|
278
477
|
for (const run of agentRuns) {
|
|
279
478
|
if (pending.has(run.agent.agentId) && fs.existsSync(run.statusPath)) {
|
|
@@ -282,51 +481,58 @@ export async function waitForWaveCompletion(
|
|
|
282
481
|
}
|
|
283
482
|
};
|
|
284
483
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
return;
|
|
484
|
+
while (true) {
|
|
485
|
+
refreshPending();
|
|
486
|
+
onProgress?.({ pendingAgentIds: new Set(pending), timedOut: false });
|
|
487
|
+
if (pending.size === 0) {
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
sessionWarnings = collectUnexpectedSessionWarningsFn(lanePaths, agentRuns, pending);
|
|
491
|
+
if (sessionWarnings.length > 0) {
|
|
492
|
+
onProgress?.({
|
|
493
|
+
pendingAgentIds: new Set(pending),
|
|
494
|
+
timedOut: false,
|
|
495
|
+
warnings: sessionWarnings,
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
const now = Date.now();
|
|
499
|
+
for (const run of agentRuns) {
|
|
500
|
+
if (!pending.has(run.agent.agentId)) {
|
|
501
|
+
continue;
|
|
304
502
|
}
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
503
|
+
if (run.runtimePath && fs.existsSync(run.runtimePath)) {
|
|
504
|
+
try {
|
|
505
|
+
const runtimeRecord = readJsonOrNull(run.runtimePath);
|
|
506
|
+
if (
|
|
507
|
+
runtimeRecord &&
|
|
508
|
+
typeof runtimeRecord === "object" &&
|
|
509
|
+
["completed", "failed", "terminated"].includes(
|
|
510
|
+
String(runtimeRecord.terminalDisposition || ""),
|
|
511
|
+
)
|
|
512
|
+
) {
|
|
513
|
+
pending.delete(run.agent.agentId);
|
|
514
|
+
continue;
|
|
515
|
+
}
|
|
516
|
+
} catch {
|
|
517
|
+
// best-effort runtime observation only
|
|
313
518
|
}
|
|
314
|
-
timedOutAgentIds.add(run.agent.agentId);
|
|
315
|
-
pending.delete(run.agent.agentId);
|
|
316
|
-
killTmuxSessionIfExists(lanePaths.tmuxSocketName, run.sessionName);
|
|
317
519
|
}
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
520
|
+
const deadline = timeoutAtByAgentId.get(run.agent.agentId) || startedAt + defaultTimeoutMs;
|
|
521
|
+
if (now <= deadline) {
|
|
522
|
+
continue;
|
|
321
523
|
}
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
524
|
+
timedOutAgentIds.add(run.agent.agentId);
|
|
525
|
+
pending.delete(run.agent.agentId);
|
|
526
|
+
const runtimeRecord =
|
|
527
|
+
run.runtimePath && fs.existsSync(run.runtimePath) ? readJsonOrNull(run.runtimePath) : null;
|
|
528
|
+
if (runtimeRecord) {
|
|
529
|
+
await terminateAgentProcessRuntime(runtimeRecord);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
if (pending.size === 0) {
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
await sleep(DEFAULT_WAIT_PROGRESS_INTERVAL_MS);
|
|
330
536
|
}
|
|
331
537
|
|
|
332
538
|
const failures = [];
|
|
@@ -336,10 +542,19 @@ export async function waitForWaveCompletion(
|
|
|
336
542
|
continue;
|
|
337
543
|
}
|
|
338
544
|
if (code === null || timedOutAgentIds.has(run.agent.agentId)) {
|
|
545
|
+
let runtimeRecord = null;
|
|
546
|
+
if (run.runtimePath && fs.existsSync(run.runtimePath)) {
|
|
547
|
+
runtimeRecord = readJsonOrNull(run.runtimePath);
|
|
548
|
+
}
|
|
339
549
|
failures.push({
|
|
340
550
|
agentId: run.agent.agentId,
|
|
341
|
-
statusCode: timedOutAgentIds.has(run.agent.agentId)
|
|
551
|
+
statusCode: timedOutAgentIds.has(run.agent.agentId)
|
|
552
|
+
? "timeout-no-status"
|
|
553
|
+
: runtimeRecord?.terminalDisposition === "failed"
|
|
554
|
+
? "runtime-failed-before-status"
|
|
555
|
+
: "missing-status",
|
|
342
556
|
logPath: path.relative(REPO_ROOT, run.logPath),
|
|
557
|
+
detail: runtimeRecord?.exitReason || null,
|
|
343
558
|
});
|
|
344
559
|
continue;
|
|
345
560
|
}
|