@chllming/wave-orchestration 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/LICENSE.md +21 -0
- package/README.md +133 -20
- package/docs/README.md +12 -4
- package/docs/agents/wave-security-role.md +1 -0
- package/docs/architecture/README.md +1498 -0
- package/docs/concepts/operating-modes.md +2 -2
- package/docs/guides/author-and-run-waves.md +14 -4
- package/docs/guides/planner.md +2 -2
- package/docs/guides/{recommendations-0.9.0.md → recommendations-0.9.2.md} +8 -7
- package/docs/guides/sandboxed-environments.md +158 -0
- package/docs/guides/terminal-surfaces.md +14 -12
- package/docs/plans/current-state.md +11 -3
- package/docs/plans/end-state-architecture.md +3 -1
- package/docs/plans/examples/wave-example-design-handoff.md +1 -1
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/migration.md +70 -19
- package/docs/plans/sandbox-end-state-architecture.md +153 -0
- package/docs/reference/cli-reference.md +71 -7
- package/docs/reference/coordination-and-closure.md +18 -1
- package/docs/reference/corridor.md +225 -0
- package/docs/reference/github-packages-setup.md +1 -1
- package/docs/reference/migration-0.2-to-0.5.md +9 -7
- package/docs/reference/npmjs-token-publishing.md +53 -0
- package/docs/reference/npmjs-trusted-publishing.md +4 -50
- package/docs/reference/package-publishing-flow.md +272 -0
- package/docs/reference/runtime-config/README.md +61 -3
- package/docs/reference/sample-waves.md +5 -5
- package/docs/reference/skills.md +1 -1
- package/docs/reference/wave-control.md +358 -27
- package/docs/roadmap.md +39 -204
- package/package.json +1 -1
- package/releases/manifest.json +38 -0
- package/scripts/wave-cli-bootstrap.mjs +52 -1
- package/scripts/wave-orchestrator/agent-process-runner.mjs +344 -0
- package/scripts/wave-orchestrator/agent-state.mjs +0 -1
- package/scripts/wave-orchestrator/artifact-schemas.mjs +7 -0
- package/scripts/wave-orchestrator/autonomous.mjs +47 -14
- package/scripts/wave-orchestrator/closure-engine.mjs +138 -17
- package/scripts/wave-orchestrator/config.mjs +199 -3
- package/scripts/wave-orchestrator/context7.mjs +231 -29
- package/scripts/wave-orchestrator/control-cli.mjs +42 -5
- package/scripts/wave-orchestrator/coordination.mjs +14 -0
- package/scripts/wave-orchestrator/corridor.mjs +363 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +115 -43
- package/scripts/wave-orchestrator/derived-state-engine.mjs +44 -4
- package/scripts/wave-orchestrator/gate-engine.mjs +126 -38
- package/scripts/wave-orchestrator/install.mjs +46 -0
- package/scripts/wave-orchestrator/launcher-progress.mjs +91 -0
- package/scripts/wave-orchestrator/launcher-runtime.mjs +290 -75
- package/scripts/wave-orchestrator/launcher.mjs +201 -53
- package/scripts/wave-orchestrator/ledger.mjs +7 -2
- package/scripts/wave-orchestrator/planner.mjs +1 -0
- package/scripts/wave-orchestrator/projection-writer.mjs +36 -1
- package/scripts/wave-orchestrator/provider-runtime.mjs +104 -0
- package/scripts/wave-orchestrator/reducer-snapshot.mjs +6 -0
- package/scripts/wave-orchestrator/retry-control.mjs +3 -3
- package/scripts/wave-orchestrator/retry-engine.mjs +93 -6
- package/scripts/wave-orchestrator/role-helpers.mjs +30 -0
- package/scripts/wave-orchestrator/session-supervisor.mjs +94 -85
- package/scripts/wave-orchestrator/shared.mjs +1 -0
- package/scripts/wave-orchestrator/supervisor-cli.mjs +1306 -0
- package/scripts/wave-orchestrator/terminals.mjs +12 -32
- package/scripts/wave-orchestrator/tmux-adapter.mjs +300 -0
- package/scripts/wave-orchestrator/traces.mjs +25 -0
- package/scripts/wave-orchestrator/wave-control-client.mjs +14 -1
- package/scripts/wave-orchestrator/wave-files.mjs +38 -5
- package/scripts/wave.mjs +13 -0
|
@@ -0,0 +1,1306 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import crypto from "node:crypto";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { buildLanePaths, ensureDirectory, PACKAGE_ROOT, REPO_ROOT, readFileTail, readJsonOrNull, readStatusRecordIfPresent, shellQuote, sleep, toIsoTimestamp, sanitizeAdhocRunId, sanitizeLaneName, parsePositiveInt, parseNonNegativeInt, writeJsonAtomic } from "./shared.mjs";
|
|
6
|
+
import { loadWaveConfig } from "./config.mjs";
|
|
7
|
+
import { attachSession as attachTmuxSession } from "./tmux-adapter.mjs";
|
|
8
|
+
import {
|
|
9
|
+
readLauncherProgress,
|
|
10
|
+
} from "./launcher-progress.mjs";
|
|
11
|
+
|
|
12
|
+
const DEFAULT_SUPERVISOR_POLL_MS = 2000;
|
|
13
|
+
const DEFAULT_SUPERVISOR_LEASE_MS = 15000;
|
|
14
|
+
const DEFAULT_SUPERVISOR_RESUME_LIMIT = 1;
|
|
15
|
+
const SUPERVISOR_STATUS_VALUES = new Set(["pending", "running", "completed", "failed"]);
|
|
16
|
+
|
|
17
|
+
export function buildSupervisorPaths(lanePaths) {
|
|
18
|
+
const rootDir = path.join(lanePaths.controlDir, "supervisor");
|
|
19
|
+
return {
|
|
20
|
+
rootDir,
|
|
21
|
+
runsDir: path.join(rootDir, "runs"),
|
|
22
|
+
lockPath: path.join(rootDir, "daemon.lock"),
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function isProcessAlive(pid) {
|
|
27
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
try {
|
|
31
|
+
process.kill(pid, 0);
|
|
32
|
+
return true;
|
|
33
|
+
} catch {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function generateRunId() {
|
|
39
|
+
return `run-${Date.now()}-${crypto.randomBytes(4).toString("hex")}`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function parseLauncherContext(argv) {
|
|
43
|
+
const config = loadWaveConfig();
|
|
44
|
+
const context = {
|
|
45
|
+
project: config.defaultProject,
|
|
46
|
+
lane: "main",
|
|
47
|
+
adhocRunId: null,
|
|
48
|
+
};
|
|
49
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
50
|
+
const arg = argv[index];
|
|
51
|
+
if (arg === "--project") {
|
|
52
|
+
context.project = String(argv[index + 1] || "").trim() || context.project;
|
|
53
|
+
index += 1;
|
|
54
|
+
} else if (arg === "--lane") {
|
|
55
|
+
context.lane = sanitizeLaneName(argv[index + 1]);
|
|
56
|
+
index += 1;
|
|
57
|
+
} else if (arg === "--adhoc-run") {
|
|
58
|
+
context.adhocRunId = sanitizeAdhocRunId(argv[index + 1]);
|
|
59
|
+
index += 1;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return context;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function runDirForId(paths, runId) {
|
|
66
|
+
return path.join(paths.runsDir, runId);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function supervisorRunDirForId(paths, runId) {
|
|
70
|
+
return runDirForId(paths, runId);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function statePathForRun(paths, runId) {
|
|
74
|
+
return path.join(runDirForId(paths, runId), "state.json");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function supervisorStatePathForRun(paths, runId) {
|
|
78
|
+
return statePathForRun(paths, runId);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function requestPathForRun(paths, runId) {
|
|
82
|
+
return path.join(runDirForId(paths, runId), "request.json");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function launcherStatusPathForRun(paths, runId) {
|
|
86
|
+
return path.join(runDirForId(paths, runId), "launcher-status.json");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function launcherLogPathForRun(paths, runId) {
|
|
90
|
+
return path.join(runDirForId(paths, runId), "launcher.log");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function launcherProgressPathForRun(paths, runId) {
|
|
94
|
+
return path.join(runDirForId(paths, runId), "launcher-progress.json");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function agentRuntimeDirForRun(paths, runId) {
|
|
98
|
+
return path.join(runDirForId(paths, runId), "agents");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function supervisorAgentRuntimePathForRun(paths, runId, agentId) {
|
|
102
|
+
return path.join(agentRuntimeDirForRun(paths, runId), `${agentId}.runtime.json`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function supervisorPathsFromStatePath(statePath) {
|
|
106
|
+
const runDir = path.dirname(statePath);
|
|
107
|
+
const runsDir = path.dirname(runDir);
|
|
108
|
+
const rootDir = path.dirname(runsDir);
|
|
109
|
+
return {
|
|
110
|
+
rootDir,
|
|
111
|
+
runsDir,
|
|
112
|
+
lockPath: path.join(rootDir, "daemon.lock"),
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function normalizeRunState(payload) {
|
|
117
|
+
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
const status = String(payload.status || "").trim().toLowerCase();
|
|
121
|
+
return {
|
|
122
|
+
...payload,
|
|
123
|
+
status: SUPERVISOR_STATUS_VALUES.has(status) ? status : "pending",
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function readRunState(statePath) {
|
|
128
|
+
return normalizeRunState(readJsonOrNull(statePath));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function writeRunState(statePath, payload) {
|
|
132
|
+
writeJsonAtomic(statePath, payload);
|
|
133
|
+
return payload;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function ensureSupervisorDirectories(paths, runId = null) {
|
|
137
|
+
ensureDirectory(paths.rootDir);
|
|
138
|
+
ensureDirectory(paths.runsDir);
|
|
139
|
+
if (runId) {
|
|
140
|
+
ensureDirectory(runDirForId(paths, runId));
|
|
141
|
+
ensureDirectory(agentRuntimeDirForRun(paths, runId));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function eventsPathForRun(paths, runId) {
|
|
146
|
+
return path.join(runDirForId(paths, runId), "events.jsonl");
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function appendSupervisorEvent(paths, runId, event) {
|
|
150
|
+
fs.appendFileSync(
|
|
151
|
+
eventsPathForRun(paths, runId),
|
|
152
|
+
`${JSON.stringify({ recordedAt: toIsoTimestamp(), ...event })}\n`,
|
|
153
|
+
"utf8",
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function parseWaveBoundsFromLauncherArgs(launcherArgs = []) {
|
|
158
|
+
let startWave = null;
|
|
159
|
+
let endWave = null;
|
|
160
|
+
for (let index = 0; index < launcherArgs.length; index += 1) {
|
|
161
|
+
const arg = launcherArgs[index];
|
|
162
|
+
if (arg === "--start-wave") {
|
|
163
|
+
startWave = Number.parseInt(String(launcherArgs[index + 1] || ""), 10);
|
|
164
|
+
index += 1;
|
|
165
|
+
} else if (arg === "--end-wave") {
|
|
166
|
+
endWave = Number.parseInt(String(launcherArgs[index + 1] || ""), 10);
|
|
167
|
+
index += 1;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
startWave: Number.isFinite(startWave) ? startWave : null,
|
|
172
|
+
endWave: Number.isFinite(endWave) ? endWave : null,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function parseLauncherWaveSelection(launcherArgs = []) {
|
|
177
|
+
const { startWave, endWave } = parseWaveBoundsFromLauncherArgs(launcherArgs);
|
|
178
|
+
return {
|
|
179
|
+
startWave,
|
|
180
|
+
endWave,
|
|
181
|
+
autoNext: launcherArgsInclude(launcherArgs, "--auto-next"),
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function deriveActiveWaveFromLauncherArgs(launcherArgs = []) {
|
|
186
|
+
const { startWave, endWave } = parseWaveBoundsFromLauncherArgs(launcherArgs);
|
|
187
|
+
if (Number.isFinite(startWave) && Number.isFinite(endWave) && startWave === endWave) {
|
|
188
|
+
return startWave;
|
|
189
|
+
}
|
|
190
|
+
return Number.isFinite(startWave) ? startWave : null;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function launcherArgsInclude(launcherArgs = [], flag) {
|
|
194
|
+
return Array.isArray(launcherArgs) && launcherArgs.includes(flag);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function selectedWavesFromLauncherArgs(launcherArgs = []) {
|
|
198
|
+
const { startWave, endWave, autoNext } = parseLauncherWaveSelection(launcherArgs);
|
|
199
|
+
if (autoNext) {
|
|
200
|
+
return [];
|
|
201
|
+
}
|
|
202
|
+
if (Number.isFinite(startWave) && Number.isFinite(endWave) && endWave >= startWave) {
|
|
203
|
+
return Array.from({ length: endWave - startWave + 1 }, (_, index) => startWave + index);
|
|
204
|
+
}
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function activeWaveFromState(state) {
|
|
209
|
+
if (Number.isFinite(Number(state?.activeWave))) {
|
|
210
|
+
return Number(state.activeWave);
|
|
211
|
+
}
|
|
212
|
+
return deriveActiveWaveFromLauncherArgs(state?.launcherArgs);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function buildAgentRuntimeSummary(runtimeRecords = []) {
|
|
216
|
+
return runtimeRecords.map((record) => ({
|
|
217
|
+
agentId: record.agentId || null,
|
|
218
|
+
pid: record.pid || record.executorPid || record.runnerPid || null,
|
|
219
|
+
pgid: record.pgid || null,
|
|
220
|
+
runnerPid: record.runnerPid || null,
|
|
221
|
+
executorPid: record.executorPid || null,
|
|
222
|
+
lastHeartbeatAt: record.lastHeartbeatAt || null,
|
|
223
|
+
exitCode: record.exitCode ?? null,
|
|
224
|
+
terminalDisposition: record.terminalDisposition || null,
|
|
225
|
+
sessionBackend: record.sessionBackend || "process",
|
|
226
|
+
attachMode: record.attachMode || "log-tail",
|
|
227
|
+
}));
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function buildSupervisorLockPayload(context, { supervisorId = null } = {}) {
|
|
231
|
+
const heartbeatAt = toIsoTimestamp();
|
|
232
|
+
return {
|
|
233
|
+
supervisorId: supervisorId || `supervisor-${process.pid}-${crypto.randomBytes(4).toString("hex")}`,
|
|
234
|
+
pid: process.pid,
|
|
235
|
+
project: context.project,
|
|
236
|
+
lane: context.lane,
|
|
237
|
+
adhocRunId: context.adhocRunId || null,
|
|
238
|
+
acquiredAt: heartbeatAt,
|
|
239
|
+
heartbeatAt,
|
|
240
|
+
leaseExpiresAt: new Date(Date.now() + DEFAULT_SUPERVISOR_LEASE_MS).toISOString(),
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function supervisorLeaseIsFresh(payload) {
|
|
245
|
+
const leaseExpiresAt = Date.parse(String(payload?.leaseExpiresAt || ""));
|
|
246
|
+
return Number.isFinite(leaseExpiresAt) && leaseExpiresAt > Date.now();
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function writeSupervisorLock(lockPath, payload) {
|
|
250
|
+
fs.writeFileSync(lockPath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
|
251
|
+
return payload;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function acquireSupervisorLock(lockPath, context) {
|
|
255
|
+
ensureDirectory(path.dirname(lockPath));
|
|
256
|
+
const payload = buildSupervisorLockPayload(context);
|
|
257
|
+
try {
|
|
258
|
+
const fd = fs.openSync(lockPath, "wx");
|
|
259
|
+
fs.writeFileSync(fd, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
|
260
|
+
fs.closeSync(fd);
|
|
261
|
+
return payload;
|
|
262
|
+
} catch (error) {
|
|
263
|
+
if (error?.code !== "EEXIST") {
|
|
264
|
+
throw error;
|
|
265
|
+
}
|
|
266
|
+
const existing = readJsonOrNull(lockPath);
|
|
267
|
+
const existingPid = Number.parseInt(String(existing?.pid ?? ""), 10);
|
|
268
|
+
if (isProcessAlive(existingPid) && supervisorLeaseIsFresh(existing)) {
|
|
269
|
+
return null;
|
|
270
|
+
}
|
|
271
|
+
fs.rmSync(lockPath, { force: true });
|
|
272
|
+
return acquireSupervisorLock(lockPath, context);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function releaseSupervisorLock(lockPath) {
|
|
277
|
+
fs.rmSync(lockPath, { force: true });
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function launcherWrapperCommand({ launcherArgs, launcherStatusPath, launcherLogPath }) {
|
|
281
|
+
const entrypoint = path.join(PACKAGE_ROOT, "scripts", "wave-launcher.mjs");
|
|
282
|
+
const argv = launcherArgs.map((arg) => shellQuote(arg)).join(" ");
|
|
283
|
+
return [
|
|
284
|
+
`cd ${shellQuote(REPO_ROOT)}`,
|
|
285
|
+
`node ${shellQuote(entrypoint)} ${argv} >> ${shellQuote(launcherLogPath)} 2>&1`,
|
|
286
|
+
"status=$?",
|
|
287
|
+
`node -e ${shellQuote(
|
|
288
|
+
"const fs=require('node:fs'); const file=process.argv[1]; const payload={exitCode:Number(process.argv[2]),completedAt:new Date().toISOString()}; fs.writeFileSync(file, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
|
|
289
|
+
)} ${shellQuote(launcherStatusPath)} \"$status\"`,
|
|
290
|
+
"exit 0",
|
|
291
|
+
].join("\n");
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function stripLauncherArgsForResume(launcherArgs = []) {
|
|
295
|
+
const stripped = [];
|
|
296
|
+
for (let index = 0; index < launcherArgs.length; index += 1) {
|
|
297
|
+
const arg = launcherArgs[index];
|
|
298
|
+
if (["--start-wave", "--end-wave"].includes(arg)) {
|
|
299
|
+
index += 1;
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
if (["--auto-next", "--resume-control-state"].includes(arg)) {
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
stripped.push(arg);
|
|
306
|
+
}
|
|
307
|
+
return stripped;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function buildResumedLauncherArgs(state, progressJournal) {
|
|
311
|
+
const baseLauncherArgs = Array.isArray(state?.launcherArgs) ? state.launcherArgs : [];
|
|
312
|
+
const { startWave, endWave, autoNext } = parseLauncherWaveSelection(baseLauncherArgs);
|
|
313
|
+
const waveNumber = Number.isFinite(Number(progressJournal?.waveNumber))
|
|
314
|
+
? Number(progressJournal.waveNumber)
|
|
315
|
+
: Number.isFinite(Number(state?.activeWave))
|
|
316
|
+
? Number(state.activeWave)
|
|
317
|
+
: null;
|
|
318
|
+
if (!Number.isFinite(waveNumber)) {
|
|
319
|
+
return null;
|
|
320
|
+
}
|
|
321
|
+
if (Number.isFinite(endWave) && endWave < waveNumber) {
|
|
322
|
+
return null;
|
|
323
|
+
}
|
|
324
|
+
const resumedArgs = [
|
|
325
|
+
...stripLauncherArgsForResume(baseLauncherArgs),
|
|
326
|
+
];
|
|
327
|
+
if (autoNext) {
|
|
328
|
+
resumedArgs.push("--auto-next");
|
|
329
|
+
if (Number.isFinite(endWave)) {
|
|
330
|
+
resumedArgs.push("--end-wave", String(endWave));
|
|
331
|
+
}
|
|
332
|
+
} else {
|
|
333
|
+
resumedArgs.push("--start-wave", String(waveNumber));
|
|
334
|
+
if (Number.isFinite(endWave)) {
|
|
335
|
+
resumedArgs.push("--end-wave", String(endWave));
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
resumedArgs.push("--resume-control-state");
|
|
339
|
+
return resumedArgs;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function resolvedActiveWave(state, progressJournal) {
|
|
343
|
+
if (Number.isFinite(Number(progressJournal?.waveNumber))) {
|
|
344
|
+
return Number(progressJournal.waveNumber);
|
|
345
|
+
}
|
|
346
|
+
if (Number.isFinite(Number(state?.activeWave))) {
|
|
347
|
+
return Number(state.activeWave);
|
|
348
|
+
}
|
|
349
|
+
return deriveActiveWaveFromLauncherArgs(
|
|
350
|
+
Array.isArray(state?.launcherArgs) ? state.launcherArgs : [],
|
|
351
|
+
);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function resolvedCompletedActiveWave(state, progressJournal) {
|
|
355
|
+
if (Number.isFinite(Number(progressJournal?.waveNumber))) {
|
|
356
|
+
return Number(progressJournal.waveNumber);
|
|
357
|
+
}
|
|
358
|
+
const selectedWaves = selectedWavesFromLauncherArgs(
|
|
359
|
+
Array.isArray(state?.launcherArgs) ? state.launcherArgs : [],
|
|
360
|
+
);
|
|
361
|
+
if (selectedWaves.length > 0) {
|
|
362
|
+
return selectedWaves[selectedWaves.length - 1];
|
|
363
|
+
}
|
|
364
|
+
if (Number.isFinite(Number(state?.activeWave))) {
|
|
365
|
+
return Number(state.activeWave);
|
|
366
|
+
}
|
|
367
|
+
return deriveActiveWaveFromLauncherArgs(
|
|
368
|
+
Array.isArray(state?.launcherArgs) ? state.launcherArgs : [],
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
export function startSupervisorRun(
|
|
373
|
+
state,
|
|
374
|
+
statePath,
|
|
375
|
+
paths,
|
|
376
|
+
{ supervisorId = null, launcherArgs = null, recoveryState = "healthy", resumeAction = null } = {},
|
|
377
|
+
) {
|
|
378
|
+
const launcherStatusPath = launcherStatusPathForRun(paths, state.runId);
|
|
379
|
+
const launcherLogPath = launcherLogPathForRun(paths, state.runId);
|
|
380
|
+
fs.rmSync(launcherStatusPath, { force: true });
|
|
381
|
+
const effectiveLauncherArgs =
|
|
382
|
+
Array.isArray(launcherArgs) && launcherArgs.length > 0
|
|
383
|
+
? launcherArgs
|
|
384
|
+
: Array.isArray(state.launcherArgs)
|
|
385
|
+
? state.launcherArgs
|
|
386
|
+
: [];
|
|
387
|
+
const child = spawn(
|
|
388
|
+
"bash",
|
|
389
|
+
["-lc", launcherWrapperCommand({
|
|
390
|
+
launcherArgs: effectiveLauncherArgs,
|
|
391
|
+
launcherStatusPath,
|
|
392
|
+
launcherLogPath,
|
|
393
|
+
})],
|
|
394
|
+
{
|
|
395
|
+
cwd: REPO_ROOT,
|
|
396
|
+
detached: true,
|
|
397
|
+
stdio: "ignore",
|
|
398
|
+
env: {
|
|
399
|
+
...process.env,
|
|
400
|
+
WAVE_SUPERVISOR_RUN_ID: state.runId,
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
);
|
|
404
|
+
child.unref();
|
|
405
|
+
appendSupervisorEvent(paths, state.runId, {
|
|
406
|
+
type: "launcher-started",
|
|
407
|
+
runId: state.runId,
|
|
408
|
+
launcherPid: child.pid,
|
|
409
|
+
resumed: Array.isArray(launcherArgs) && launcherArgs.length > 0,
|
|
410
|
+
});
|
|
411
|
+
return writeRunState(statePath, {
|
|
412
|
+
...state,
|
|
413
|
+
launcherArgs: effectiveLauncherArgs,
|
|
414
|
+
status: "running",
|
|
415
|
+
startedAt: state.startedAt || toIsoTimestamp(),
|
|
416
|
+
updatedAt: toIsoTimestamp(),
|
|
417
|
+
supervisorId: supervisorId || state.supervisorId || null,
|
|
418
|
+
leaseExpiresAt: new Date(Date.now() + DEFAULT_SUPERVISOR_LEASE_MS).toISOString(),
|
|
419
|
+
launcherPid: child.pid,
|
|
420
|
+
launcherStatusPath,
|
|
421
|
+
launcherLogPath,
|
|
422
|
+
activeWave: state.activeWave ?? deriveActiveWaveFromLauncherArgs(effectiveLauncherArgs),
|
|
423
|
+
terminalDisposition: "running",
|
|
424
|
+
agentRuntimeSummary: Array.isArray(state.agentRuntimeSummary) ? state.agentRuntimeSummary : [],
|
|
425
|
+
sessionBackend: "process",
|
|
426
|
+
recoveryState,
|
|
427
|
+
resumeAction,
|
|
428
|
+
resumeAttempts:
|
|
429
|
+
Array.isArray(launcherArgs) && launcherArgs.length > 0
|
|
430
|
+
? Number(state.resumeAttempts || 0) + 1
|
|
431
|
+
: Number(state.resumeAttempts || 0),
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function readAgentRuntimeRecords(paths, runId) {
|
|
436
|
+
const dir = agentRuntimeDirForRun(paths, runId);
|
|
437
|
+
if (!fs.existsSync(dir)) {
|
|
438
|
+
return [];
|
|
439
|
+
}
|
|
440
|
+
return fs.readdirSync(dir, { withFileTypes: true })
|
|
441
|
+
.filter((entry) => entry.isFile() && entry.name.endsWith(".runtime.json"))
|
|
442
|
+
.map((entry) => readJsonOrNull(path.join(dir, entry.name)))
|
|
443
|
+
.filter((record) => record && typeof record === "object");
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
function runtimeHeartbeatIsFresh(runtimeRecord, freshnessMs = DEFAULT_SUPERVISOR_LEASE_MS * 2) {
|
|
447
|
+
const heartbeatAt = Date.parse(String(runtimeRecord?.lastHeartbeatAt || ""));
|
|
448
|
+
return Number.isFinite(heartbeatAt) && heartbeatAt >= Date.now() - freshnessMs;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function runtimeRecordIndicatesLiveWork(runtimeRecord) {
|
|
452
|
+
return (
|
|
453
|
+
isProcessAlive(
|
|
454
|
+
Number.parseInt(
|
|
455
|
+
String(runtimeRecord?.executorPid ?? runtimeRecord?.pid ?? runtimeRecord?.runnerPid ?? ""),
|
|
456
|
+
10,
|
|
457
|
+
),
|
|
458
|
+
) ||
|
|
459
|
+
runtimeHeartbeatIsFresh(runtimeRecord)
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
export function reconcileSupervisorRun(state, statePath) {
|
|
464
|
+
const runId = state?.runId || path.basename(path.dirname(statePath));
|
|
465
|
+
const paths = supervisorPathsFromStatePath(statePath);
|
|
466
|
+
const effectiveLauncherStatusPath =
|
|
467
|
+
state?.launcherStatusPath || launcherStatusPathForRun(paths, runId);
|
|
468
|
+
const launcherStatus = readJsonOrNull(effectiveLauncherStatusPath);
|
|
469
|
+
const runtimeRecords = readAgentRuntimeRecords(paths, runId);
|
|
470
|
+
const liveRuntimeRecords = runtimeRecords.filter((record) => runtimeRecordIndicatesLiveWork(record));
|
|
471
|
+
const progressJournal = readLauncherProgress(
|
|
472
|
+
launcherProgressPathForRun(paths, runId),
|
|
473
|
+
{ runId, waveNumber: state?.activeWave ?? null },
|
|
474
|
+
);
|
|
475
|
+
const activeWave = resolvedActiveWave(state, progressJournal);
|
|
476
|
+
const launcherPid = Number.parseInt(String(state?.launcherPid ?? ""), 10);
|
|
477
|
+
const launcherAlive = isProcessAlive(launcherPid);
|
|
478
|
+
if (launcherStatus && typeof launcherStatus === "object") {
|
|
479
|
+
const exitCode = Number.parseInt(String(launcherStatus.exitCode ?? ""), 10);
|
|
480
|
+
const terminalActiveWave =
|
|
481
|
+
exitCode === 0 ? resolvedCompletedActiveWave(state, progressJournal) : activeWave;
|
|
482
|
+
appendSupervisorEvent(paths, runId, {
|
|
483
|
+
type: "launcher-status-reconciled",
|
|
484
|
+
runId,
|
|
485
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
486
|
+
});
|
|
487
|
+
return writeRunState(statePath, {
|
|
488
|
+
...state,
|
|
489
|
+
status: exitCode === 0 ? "completed" : "failed",
|
|
490
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
491
|
+
completedAt: launcherStatus.completedAt || toIsoTimestamp(),
|
|
492
|
+
updatedAt: toIsoTimestamp(),
|
|
493
|
+
terminalDisposition: exitCode === 0 ? "completed" : "failed",
|
|
494
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
495
|
+
activeWave: terminalActiveWave,
|
|
496
|
+
sessionBackend: "process",
|
|
497
|
+
recoveryState: "healthy",
|
|
498
|
+
resumeAction: null,
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
if (state?.status === "running" && launcherAlive) {
|
|
502
|
+
return writeRunState(statePath, {
|
|
503
|
+
...state,
|
|
504
|
+
updatedAt: toIsoTimestamp(),
|
|
505
|
+
activeWave,
|
|
506
|
+
terminalDisposition: "running",
|
|
507
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
508
|
+
sessionBackend: "process",
|
|
509
|
+
recoveryState: "healthy",
|
|
510
|
+
resumeAction: null,
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
if (state?.status === "running" && liveRuntimeRecords.length > 0) {
|
|
514
|
+
if (state?.terminalDisposition !== "launcher-lost-agents-running") {
|
|
515
|
+
appendSupervisorEvent(paths, runId, {
|
|
516
|
+
type: "agent-runtime-adopted",
|
|
517
|
+
runId,
|
|
518
|
+
agentIds: liveRuntimeRecords.map((record) => record.agentId).filter(Boolean),
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
return writeRunState(statePath, {
|
|
522
|
+
...state,
|
|
523
|
+
updatedAt: toIsoTimestamp(),
|
|
524
|
+
activeWave,
|
|
525
|
+
terminalDisposition: "launcher-lost-agents-running",
|
|
526
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
527
|
+
sessionBackend: "process",
|
|
528
|
+
recoveryState: "degraded",
|
|
529
|
+
resumeAction: "wait-for-live-agents",
|
|
530
|
+
detail: "Launcher exited while agent runtime work is still live.",
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
if (
|
|
534
|
+
state?.status === "running" &&
|
|
535
|
+
progressJournal?.finalized === true &&
|
|
536
|
+
["completed", "failed"].includes(String(progressJournal.finalDisposition || ""))
|
|
537
|
+
) {
|
|
538
|
+
const exitCode = Number.parseInt(String(progressJournal.exitCode ?? ""), 10);
|
|
539
|
+
const terminalActiveWave =
|
|
540
|
+
progressJournal.finalDisposition === "completed"
|
|
541
|
+
? resolvedCompletedActiveWave(state, progressJournal)
|
|
542
|
+
: activeWave;
|
|
543
|
+
appendSupervisorEvent(paths, runId, {
|
|
544
|
+
type: "launcher-status-reconciled",
|
|
545
|
+
runId,
|
|
546
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
547
|
+
source: "progress-journal",
|
|
548
|
+
});
|
|
549
|
+
return writeRunState(statePath, {
|
|
550
|
+
...state,
|
|
551
|
+
status: progressJournal.finalDisposition === "completed" ? "completed" : "failed",
|
|
552
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
553
|
+
completedAt: progressJournal.updatedAt || toIsoTimestamp(),
|
|
554
|
+
updatedAt: toIsoTimestamp(),
|
|
555
|
+
terminalDisposition: progressJournal.finalDisposition,
|
|
556
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
557
|
+
activeWave: terminalActiveWave,
|
|
558
|
+
sessionBackend: "process",
|
|
559
|
+
recoveryState: "recovered-from-progress",
|
|
560
|
+
resumeAction: null,
|
|
561
|
+
detail:
|
|
562
|
+
progressJournal.finalDisposition === "completed"
|
|
563
|
+
? "Recovered final supervisor state from launcher progress journal."
|
|
564
|
+
: "Recovered terminal failure from launcher progress journal.",
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
if (state?.status === "running" && !launcherAlive) {
|
|
568
|
+
const resumedArgs = buildResumedLauncherArgs(state, progressJournal);
|
|
569
|
+
if (resumedArgs && Number(state?.resumeAttempts || 0) < DEFAULT_SUPERVISOR_RESUME_LIMIT) {
|
|
570
|
+
return startSupervisorRun(
|
|
571
|
+
{
|
|
572
|
+
...state,
|
|
573
|
+
activeWave,
|
|
574
|
+
},
|
|
575
|
+
statePath,
|
|
576
|
+
paths,
|
|
577
|
+
{
|
|
578
|
+
supervisorId: state?.supervisorId || null,
|
|
579
|
+
launcherArgs: resumedArgs,
|
|
580
|
+
recoveryState: "resuming",
|
|
581
|
+
resumeAction: "resume-current-wave",
|
|
582
|
+
},
|
|
583
|
+
);
|
|
584
|
+
}
|
|
585
|
+
appendSupervisorEvent(paths, runId, {
|
|
586
|
+
type: "failed-before-status",
|
|
587
|
+
runId,
|
|
588
|
+
launcherPid: state.launcherPid || null,
|
|
589
|
+
});
|
|
590
|
+
return writeRunState(statePath, {
|
|
591
|
+
...state,
|
|
592
|
+
status: "failed",
|
|
593
|
+
exitCode: null,
|
|
594
|
+
completedAt: toIsoTimestamp(),
|
|
595
|
+
updatedAt: toIsoTimestamp(),
|
|
596
|
+
detail: "Launcher exited before writing supervisor status.",
|
|
597
|
+
activeWave,
|
|
598
|
+
terminalDisposition: "launcher-lost-before-status",
|
|
599
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
600
|
+
sessionBackend: "process",
|
|
601
|
+
recoveryState: "failed",
|
|
602
|
+
resumeAction: null,
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
if (runtimeRecords.length === 0) {
|
|
606
|
+
return state;
|
|
607
|
+
}
|
|
608
|
+
return writeRunState(statePath, {
|
|
609
|
+
...state,
|
|
610
|
+
updatedAt: toIsoTimestamp(),
|
|
611
|
+
activeWave,
|
|
612
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
613
|
+
sessionBackend: "process",
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
function reconcileSupervisorReadState(state, statePath) {
|
|
618
|
+
const runId = state?.runId || path.basename(path.dirname(statePath));
|
|
619
|
+
const paths = supervisorPathsFromStatePath(statePath);
|
|
620
|
+
const effectiveLauncherStatusPath =
|
|
621
|
+
state?.launcherStatusPath || launcherStatusPathForRun(paths, runId);
|
|
622
|
+
const launcherStatus = readJsonOrNull(effectiveLauncherStatusPath);
|
|
623
|
+
const runtimeRecords = readAgentRuntimeRecords(paths, runId);
|
|
624
|
+
const liveRuntimeRecords = runtimeRecords.filter((record) => runtimeRecordIndicatesLiveWork(record));
|
|
625
|
+
const progressJournal = readLauncherProgress(
|
|
626
|
+
launcherProgressPathForRun(paths, runId),
|
|
627
|
+
{ runId, waveNumber: state?.activeWave ?? null },
|
|
628
|
+
);
|
|
629
|
+
const activeWave = resolvedActiveWave(state, progressJournal);
|
|
630
|
+
const launcherPid = Number.parseInt(String(state?.launcherPid ?? ""), 10);
|
|
631
|
+
const launcherAlive = isProcessAlive(launcherPid);
|
|
632
|
+
if (launcherStatus && typeof launcherStatus === "object") {
|
|
633
|
+
const exitCode = Number.parseInt(String(launcherStatus.exitCode ?? ""), 10);
|
|
634
|
+
const terminalActiveWave =
|
|
635
|
+
exitCode === 0 ? resolvedCompletedActiveWave(state, progressJournal) : activeWave;
|
|
636
|
+
return writeRunState(statePath, {
|
|
637
|
+
...state,
|
|
638
|
+
status: exitCode === 0 ? "completed" : "failed",
|
|
639
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
640
|
+
completedAt: launcherStatus.completedAt || toIsoTimestamp(),
|
|
641
|
+
updatedAt: toIsoTimestamp(),
|
|
642
|
+
terminalDisposition: exitCode === 0 ? "completed" : "failed",
|
|
643
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
644
|
+
activeWave: terminalActiveWave,
|
|
645
|
+
sessionBackend: "process",
|
|
646
|
+
recoveryState: "healthy",
|
|
647
|
+
resumeAction: null,
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
if (
|
|
651
|
+
state?.status === "running" &&
|
|
652
|
+
progressJournal?.finalized === true &&
|
|
653
|
+
["completed", "failed"].includes(String(progressJournal.finalDisposition || ""))
|
|
654
|
+
) {
|
|
655
|
+
const exitCode = Number.parseInt(String(progressJournal.exitCode ?? ""), 10);
|
|
656
|
+
const terminalActiveWave =
|
|
657
|
+
progressJournal.finalDisposition === "completed"
|
|
658
|
+
? resolvedCompletedActiveWave(state, progressJournal)
|
|
659
|
+
: activeWave;
|
|
660
|
+
return writeRunState(statePath, {
|
|
661
|
+
...state,
|
|
662
|
+
status: progressJournal.finalDisposition === "completed" ? "completed" : "failed",
|
|
663
|
+
exitCode: Number.isFinite(exitCode) ? exitCode : null,
|
|
664
|
+
completedAt: progressJournal.updatedAt || toIsoTimestamp(),
|
|
665
|
+
updatedAt: toIsoTimestamp(),
|
|
666
|
+
terminalDisposition: progressJournal.finalDisposition,
|
|
667
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
668
|
+
activeWave: terminalActiveWave,
|
|
669
|
+
sessionBackend: "process",
|
|
670
|
+
recoveryState: "recovered-from-progress",
|
|
671
|
+
resumeAction: null,
|
|
672
|
+
detail:
|
|
673
|
+
progressJournal.finalDisposition === "completed"
|
|
674
|
+
? "Recovered final supervisor state from launcher progress journal."
|
|
675
|
+
: "Recovered terminal failure from launcher progress journal.",
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
if (state?.status === "running" && launcherAlive) {
|
|
679
|
+
return writeRunState(statePath, {
|
|
680
|
+
...state,
|
|
681
|
+
updatedAt: toIsoTimestamp(),
|
|
682
|
+
activeWave,
|
|
683
|
+
terminalDisposition: "running",
|
|
684
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
685
|
+
sessionBackend: "process",
|
|
686
|
+
recoveryState: "healthy",
|
|
687
|
+
resumeAction: null,
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
if (state?.status === "running" && liveRuntimeRecords.length > 0) {
|
|
691
|
+
return writeRunState(statePath, {
|
|
692
|
+
...state,
|
|
693
|
+
updatedAt: toIsoTimestamp(),
|
|
694
|
+
activeWave,
|
|
695
|
+
terminalDisposition: "launcher-lost-agents-running",
|
|
696
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
697
|
+
sessionBackend: "process",
|
|
698
|
+
recoveryState: "degraded",
|
|
699
|
+
resumeAction: "wait-for-live-agents",
|
|
700
|
+
detail: "Launcher exited while agent runtime work is still live.",
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
if (runtimeRecords.length === 0 && activeWave === state?.activeWave) {
|
|
704
|
+
return state;
|
|
705
|
+
}
|
|
706
|
+
return writeRunState(statePath, {
|
|
707
|
+
...state,
|
|
708
|
+
updatedAt: toIsoTimestamp(),
|
|
709
|
+
activeWave,
|
|
710
|
+
agentRuntimeSummary: buildAgentRuntimeSummary(runtimeRecords),
|
|
711
|
+
sessionBackend: "process",
|
|
712
|
+
});
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
function supervisorStatePathForRunId(runId, context = {}) {
|
|
716
|
+
if (!runId || !context?.lane) {
|
|
717
|
+
return null;
|
|
718
|
+
}
|
|
719
|
+
const lanePaths = buildLanePaths(context.lane, {
|
|
720
|
+
project: context.project,
|
|
721
|
+
adhocRunId: context.adhocRunId,
|
|
722
|
+
});
|
|
723
|
+
return statePathForRun(buildSupervisorPaths(lanePaths), runId);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
export function findSupervisorRunState(runId, context = {}, options = {}) {
|
|
727
|
+
const statePath = supervisorStatePathForRunId(runId, context);
|
|
728
|
+
if (!statePath || !fs.existsSync(statePath)) {
|
|
729
|
+
return null;
|
|
730
|
+
}
|
|
731
|
+
const state = readRunState(statePath);
|
|
732
|
+
const effectiveState =
|
|
733
|
+
options.reconcile && state ? reconcileSupervisorReadState(state, statePath) : state;
|
|
734
|
+
return {
|
|
735
|
+
statePath,
|
|
736
|
+
runDir: path.dirname(statePath),
|
|
737
|
+
state: effectiveState,
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function formatRunState(state) {
|
|
742
|
+
const exitCode = state?.exitCode ?? "n/a";
|
|
743
|
+
return [
|
|
744
|
+
`run_id=${state?.runId || "unknown"}`,
|
|
745
|
+
`status=${state?.status || "unknown"}`,
|
|
746
|
+
`lane=${state?.lane || "unknown"}`,
|
|
747
|
+
`project=${state?.project || "unknown"}`,
|
|
748
|
+
`pid=${state?.launcherPid || "none"}`,
|
|
749
|
+
`exit_code=${exitCode}`,
|
|
750
|
+
`terminal_disposition=${state?.terminalDisposition || "unknown"}`,
|
|
751
|
+
`recovery_state=${state?.recoveryState || "unknown"}`,
|
|
752
|
+
`resume_action=${state?.resumeAction || "none"}`,
|
|
753
|
+
].join(" ");
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
function parseSubmitArgs(argv) {
|
|
757
|
+
const options = {
|
|
758
|
+
json: false,
|
|
759
|
+
help: false,
|
|
760
|
+
launcherArgs: [],
|
|
761
|
+
};
|
|
762
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
763
|
+
const arg = argv[index];
|
|
764
|
+
if (arg === "--json") {
|
|
765
|
+
options.json = true;
|
|
766
|
+
} else if (arg === "--help" || arg === "-h") {
|
|
767
|
+
options.help = true;
|
|
768
|
+
} else {
|
|
769
|
+
options.launcherArgs.push(arg);
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
return options;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
function parseSupervisorArgs(argv) {
|
|
776
|
+
const options = {
|
|
777
|
+
project: loadWaveConfig().defaultProject,
|
|
778
|
+
lane: "main",
|
|
779
|
+
adhocRunId: null,
|
|
780
|
+
once: false,
|
|
781
|
+
pollMs: DEFAULT_SUPERVISOR_POLL_MS,
|
|
782
|
+
timeoutSeconds: 30,
|
|
783
|
+
json: false,
|
|
784
|
+
runId: "",
|
|
785
|
+
projectProvided: false,
|
|
786
|
+
laneProvided: false,
|
|
787
|
+
adhocRunProvided: false,
|
|
788
|
+
};
|
|
789
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
790
|
+
const arg = argv[index];
|
|
791
|
+
if (arg === "--project") {
|
|
792
|
+
options.project = String(argv[++index] || "").trim() || options.project;
|
|
793
|
+
options.projectProvided = true;
|
|
794
|
+
} else if (arg === "--lane") {
|
|
795
|
+
options.lane = sanitizeLaneName(argv[++index]);
|
|
796
|
+
options.laneProvided = true;
|
|
797
|
+
} else if (arg === "--adhoc-run") {
|
|
798
|
+
options.adhocRunId = sanitizeAdhocRunId(argv[++index]);
|
|
799
|
+
options.adhocRunProvided = true;
|
|
800
|
+
} else if (arg === "--once") {
|
|
801
|
+
options.once = true;
|
|
802
|
+
} else if (arg === "--poll-ms") {
|
|
803
|
+
options.pollMs = parsePositiveInt(argv[++index], "--poll-ms");
|
|
804
|
+
} else if (arg === "--timeout-seconds") {
|
|
805
|
+
options.timeoutSeconds = parseNonNegativeInt(argv[++index], "--timeout-seconds");
|
|
806
|
+
} else if (arg === "--run-id") {
|
|
807
|
+
options.runId = String(argv[++index] || "").trim();
|
|
808
|
+
} else if (arg === "--json") {
|
|
809
|
+
options.json = true;
|
|
810
|
+
} else if (arg === "--help" || arg === "-h") {
|
|
811
|
+
options.help = true;
|
|
812
|
+
} else if (arg && arg !== "--") {
|
|
813
|
+
throw new Error(`Unknown argument: ${arg}`);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
return options;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
function printUsage(command) {
|
|
820
|
+
if (command === "submit") {
|
|
821
|
+
console.log("Usage: wave submit [launcher options] [--json]");
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
if (command === "supervise") {
|
|
825
|
+
console.log("Usage: wave supervise [--project <id>] [--lane <lane>] [--adhoc-run <id>] [--poll-ms <n>] [--once]");
|
|
826
|
+
return;
|
|
827
|
+
}
|
|
828
|
+
if (command === "status") {
|
|
829
|
+
console.log("Usage: wave status --run-id <id> --project <id> --lane <lane> [--adhoc-run <id>] [--json]");
|
|
830
|
+
return;
|
|
831
|
+
}
|
|
832
|
+
if (command === "wait") {
|
|
833
|
+
console.log("Usage: wave wait --run-id <id> --project <id> --lane <lane> [--adhoc-run <id>] [--timeout-seconds <n>] [--json]");
|
|
834
|
+
return;
|
|
835
|
+
}
|
|
836
|
+
if (command === "attach") {
|
|
837
|
+
console.log("Usage: wave attach --run-id <id> --project <id> --lane <lane> [--adhoc-run <id>] (--agent <id> | --dashboard)");
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
function parseAttachArgs(argv) {
|
|
842
|
+
const options = {
|
|
843
|
+
project: loadWaveConfig().defaultProject,
|
|
844
|
+
lane: "main",
|
|
845
|
+
adhocRunId: null,
|
|
846
|
+
runId: "",
|
|
847
|
+
json: false,
|
|
848
|
+
help: false,
|
|
849
|
+
projectProvided: false,
|
|
850
|
+
laneProvided: false,
|
|
851
|
+
adhocRunProvided: false,
|
|
852
|
+
agentId: "",
|
|
853
|
+
dashboard: false,
|
|
854
|
+
};
|
|
855
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
856
|
+
const arg = argv[index];
|
|
857
|
+
if (arg === "--project") {
|
|
858
|
+
options.project = String(argv[++index] || "").trim() || options.project;
|
|
859
|
+
options.projectProvided = true;
|
|
860
|
+
} else if (arg === "--lane") {
|
|
861
|
+
options.lane = sanitizeLaneName(argv[++index]);
|
|
862
|
+
options.laneProvided = true;
|
|
863
|
+
} else if (arg === "--adhoc-run") {
|
|
864
|
+
options.adhocRunId = sanitizeAdhocRunId(argv[++index]);
|
|
865
|
+
options.adhocRunProvided = true;
|
|
866
|
+
} else if (arg === "--run-id") {
|
|
867
|
+
options.runId = String(argv[++index] || "").trim();
|
|
868
|
+
} else if (arg === "--help" || arg === "-h") {
|
|
869
|
+
options.help = true;
|
|
870
|
+
} else if (arg === "--agent") {
|
|
871
|
+
options.agentId = String(argv[index + 1] || "").trim();
|
|
872
|
+
index += 1;
|
|
873
|
+
} else if (arg === "--dashboard") {
|
|
874
|
+
options.dashboard = true;
|
|
875
|
+
} else if (arg && arg !== "--") {
|
|
876
|
+
throw new Error(`Unknown argument: ${arg}`);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
return options;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
function ensureSupervisorRunning(context) {
|
|
883
|
+
const lanePaths = buildLanePaths(context.lane, {
|
|
884
|
+
project: context.project,
|
|
885
|
+
adhocRunId: context.adhocRunId,
|
|
886
|
+
});
|
|
887
|
+
const paths = buildSupervisorPaths(lanePaths);
|
|
888
|
+
ensureSupervisorDirectories(paths);
|
|
889
|
+
const existing = readJsonOrNull(paths.lockPath);
|
|
890
|
+
const existingPid = Number.parseInt(String(existing?.pid ?? ""), 10);
|
|
891
|
+
if (isProcessAlive(existingPid) && supervisorLeaseIsFresh(existing)) {
|
|
892
|
+
return existingPid;
|
|
893
|
+
}
|
|
894
|
+
const args = [
|
|
895
|
+
path.join(PACKAGE_ROOT, "scripts", "wave.mjs"),
|
|
896
|
+
"supervise",
|
|
897
|
+
"--project",
|
|
898
|
+
context.project,
|
|
899
|
+
"--lane",
|
|
900
|
+
context.lane,
|
|
901
|
+
"--poll-ms",
|
|
902
|
+
String(DEFAULT_SUPERVISOR_POLL_MS),
|
|
903
|
+
];
|
|
904
|
+
if (context.adhocRunId) {
|
|
905
|
+
args.push("--adhoc-run", context.adhocRunId);
|
|
906
|
+
}
|
|
907
|
+
const child = spawn(process.execPath, args, {
|
|
908
|
+
cwd: REPO_ROOT,
|
|
909
|
+
detached: true,
|
|
910
|
+
stdio: "ignore",
|
|
911
|
+
env: process.env,
|
|
912
|
+
});
|
|
913
|
+
child.unref();
|
|
914
|
+
return child.pid;
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
export function submitLauncherRun(argv) {
|
|
918
|
+
const context = parseLauncherContext(argv);
|
|
919
|
+
const lanePaths = buildLanePaths(context.lane, {
|
|
920
|
+
project: context.project,
|
|
921
|
+
adhocRunId: context.adhocRunId,
|
|
922
|
+
});
|
|
923
|
+
const paths = buildSupervisorPaths(lanePaths);
|
|
924
|
+
const runId = generateRunId();
|
|
925
|
+
ensureSupervisorDirectories(paths, runId);
|
|
926
|
+
const state = {
|
|
927
|
+
runId,
|
|
928
|
+
project: context.project,
|
|
929
|
+
lane: context.lane,
|
|
930
|
+
adhocRunId: context.adhocRunId || null,
|
|
931
|
+
status: "pending",
|
|
932
|
+
submittedAt: toIsoTimestamp(),
|
|
933
|
+
updatedAt: toIsoTimestamp(),
|
|
934
|
+
launcherArgs: argv,
|
|
935
|
+
launcherPid: null,
|
|
936
|
+
exitCode: null,
|
|
937
|
+
activeWave: deriveActiveWaveFromLauncherArgs(argv),
|
|
938
|
+
terminalDisposition: "pending",
|
|
939
|
+
agentRuntimeSummary: [],
|
|
940
|
+
sessionBackend: "process",
|
|
941
|
+
recoveryState: "pending",
|
|
942
|
+
resumeAction: null,
|
|
943
|
+
resumeAttempts: 0,
|
|
944
|
+
};
|
|
945
|
+
writeJsonAtomic(requestPathForRun(paths, runId), state);
|
|
946
|
+
writeRunState(statePathForRun(paths, runId), state);
|
|
947
|
+
const supervisorPid = ensureSupervisorRunning(context);
|
|
948
|
+
appendSupervisorEvent(paths, runId, {
|
|
949
|
+
type: "submitted",
|
|
950
|
+
runId,
|
|
951
|
+
project: context.project,
|
|
952
|
+
lane: context.lane,
|
|
953
|
+
adhocRunId: context.adhocRunId || null,
|
|
954
|
+
});
|
|
955
|
+
return {
|
|
956
|
+
runId,
|
|
957
|
+
project: context.project,
|
|
958
|
+
lane: context.lane,
|
|
959
|
+
adhocRunId: context.adhocRunId || null,
|
|
960
|
+
supervisorPid,
|
|
961
|
+
statePath: statePathForRun(paths, runId),
|
|
962
|
+
};
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
function pendingRunStates(paths) {
|
|
966
|
+
ensureSupervisorDirectories(paths);
|
|
967
|
+
const runDirs = fs.readdirSync(paths.runsDir, { withFileTypes: true })
|
|
968
|
+
.filter((entry) => entry.isDirectory())
|
|
969
|
+
.map((entry) => path.join(paths.runsDir, entry.name));
|
|
970
|
+
return runDirs
|
|
971
|
+
.map((runDir) => {
|
|
972
|
+
const statePath = path.join(runDir, "state.json");
|
|
973
|
+
return {
|
|
974
|
+
statePath,
|
|
975
|
+
state: readRunState(statePath),
|
|
976
|
+
};
|
|
977
|
+
})
|
|
978
|
+
.filter((entry) => entry.state)
|
|
979
|
+
.sort((left, right) => String(left.state.submittedAt || "").localeCompare(String(right.state.submittedAt || "")));
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
export async function runSupervisorLoop(options) {
|
|
983
|
+
const lanePaths = buildLanePaths(options.lane, {
|
|
984
|
+
project: options.project,
|
|
985
|
+
adhocRunId: options.adhocRunId,
|
|
986
|
+
});
|
|
987
|
+
const paths = buildSupervisorPaths(lanePaths);
|
|
988
|
+
ensureSupervisorDirectories(paths);
|
|
989
|
+
const lock = acquireSupervisorLock(paths.lockPath, options);
|
|
990
|
+
if (!lock) {
|
|
991
|
+
return { alreadyRunning: true };
|
|
992
|
+
}
|
|
993
|
+
try {
|
|
994
|
+
const runningStates = pendingRunStates(paths).filter((entry) => entry.state.status === "running");
|
|
995
|
+
for (const entry of runningStates) {
|
|
996
|
+
appendSupervisorEvent(paths, entry.state.runId, {
|
|
997
|
+
type: "daemon-adopted",
|
|
998
|
+
runId: entry.state.runId,
|
|
999
|
+
supervisorId: lock.supervisorId,
|
|
1000
|
+
});
|
|
1001
|
+
writeRunState(entry.statePath, {
|
|
1002
|
+
...entry.state,
|
|
1003
|
+
supervisorId: lock.supervisorId,
|
|
1004
|
+
leaseExpiresAt: lock.leaseExpiresAt,
|
|
1005
|
+
updatedAt: toIsoTimestamp(),
|
|
1006
|
+
});
|
|
1007
|
+
reconcileSupervisorRun(entry.state, entry.statePath);
|
|
1008
|
+
}
|
|
1009
|
+
while (true) {
|
|
1010
|
+
writeSupervisorLock(paths.lockPath, {
|
|
1011
|
+
...lock,
|
|
1012
|
+
heartbeatAt: toIsoTimestamp(),
|
|
1013
|
+
leaseExpiresAt: new Date(Date.now() + DEFAULT_SUPERVISOR_LEASE_MS).toISOString(),
|
|
1014
|
+
});
|
|
1015
|
+
const entries = pendingRunStates(paths);
|
|
1016
|
+
let running = false;
|
|
1017
|
+
for (const entry of entries) {
|
|
1018
|
+
if (entry.state.status === "running") {
|
|
1019
|
+
reconcileSupervisorRun(entry.state, entry.statePath);
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
const refreshedEntries = pendingRunStates(paths);
|
|
1023
|
+
for (const entry of refreshedEntries) {
|
|
1024
|
+
if (entry.state.status === "running") {
|
|
1025
|
+
running = true;
|
|
1026
|
+
break;
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
if (!running) {
|
|
1030
|
+
const nextPending = refreshedEntries.find((entry) => entry.state.status === "pending");
|
|
1031
|
+
if (nextPending) {
|
|
1032
|
+
appendSupervisorEvent(paths, nextPending.state.runId, {
|
|
1033
|
+
type: "daemon-claimed",
|
|
1034
|
+
runId: nextPending.state.runId,
|
|
1035
|
+
supervisorId: lock.supervisorId,
|
|
1036
|
+
});
|
|
1037
|
+
startSupervisorRun(nextPending.state, nextPending.statePath, paths, {
|
|
1038
|
+
supervisorId: lock.supervisorId,
|
|
1039
|
+
});
|
|
1040
|
+
running = true;
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
if (options.once && !running && !refreshedEntries.some((entry) => entry.state.status === "pending")) {
|
|
1044
|
+
break;
|
|
1045
|
+
}
|
|
1046
|
+
await sleep(options.pollMs);
|
|
1047
|
+
}
|
|
1048
|
+
return { alreadyRunning: false };
|
|
1049
|
+
} finally {
|
|
1050
|
+
releaseSupervisorLock(paths.lockPath);
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
export async function waitForRunState(options) {
|
|
1055
|
+
const deadline = Date.now() + options.timeoutSeconds * 1000;
|
|
1056
|
+
while (true) {
|
|
1057
|
+
const located = findSupervisorRunState(options.runId, options, { reconcile: true });
|
|
1058
|
+
if (!located?.state) {
|
|
1059
|
+
throw new Error(`Run ${options.runId} not found.`);
|
|
1060
|
+
}
|
|
1061
|
+
if (["completed", "failed"].includes(located.state.status)) {
|
|
1062
|
+
return located;
|
|
1063
|
+
}
|
|
1064
|
+
if (Date.now() >= deadline) {
|
|
1065
|
+
return located;
|
|
1066
|
+
}
|
|
1067
|
+
await sleep(DEFAULT_SUPERVISOR_POLL_MS);
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
function compareSupervisorEntries(left, right) {
|
|
1072
|
+
const rank = {
|
|
1073
|
+
running: 0,
|
|
1074
|
+
pending: 1,
|
|
1075
|
+
failed: 2,
|
|
1076
|
+
completed: 3,
|
|
1077
|
+
};
|
|
1078
|
+
const leftRank = rank[left?.state?.status] ?? 99;
|
|
1079
|
+
const rightRank = rank[right?.state?.status] ?? 99;
|
|
1080
|
+
if (leftRank !== rightRank) {
|
|
1081
|
+
return leftRank - rightRank;
|
|
1082
|
+
}
|
|
1083
|
+
const leftUpdatedAt = Date.parse(String(left?.state?.updatedAt || left?.state?.submittedAt || ""));
|
|
1084
|
+
const rightUpdatedAt = Date.parse(String(right?.state?.updatedAt || right?.state?.submittedAt || ""));
|
|
1085
|
+
return rightUpdatedAt - leftUpdatedAt;
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
export function summarizeSupervisorStateForWave(lanePaths, waveNumber, { agentId = "" } = {}) {
|
|
1089
|
+
const paths = buildSupervisorPaths(lanePaths);
|
|
1090
|
+
if (!fs.existsSync(paths.runsDir)) {
|
|
1091
|
+
return null;
|
|
1092
|
+
}
|
|
1093
|
+
const matching = pendingRunStates(paths)
|
|
1094
|
+
.map((entry) => ({
|
|
1095
|
+
...entry,
|
|
1096
|
+
state:
|
|
1097
|
+
entry.state?.status === "running"
|
|
1098
|
+
? reconcileSupervisorReadState(entry.state, entry.statePath)
|
|
1099
|
+
: entry.state,
|
|
1100
|
+
}))
|
|
1101
|
+
.filter((entry) => activeWaveFromState(entry.state) === Number(waveNumber));
|
|
1102
|
+
if (matching.length === 0) {
|
|
1103
|
+
return null;
|
|
1104
|
+
}
|
|
1105
|
+
const selected = matching.sort(compareSupervisorEntries)[0];
|
|
1106
|
+
const runtimeSummary = Array.isArray(selected.state?.agentRuntimeSummary)
|
|
1107
|
+
? selected.state.agentRuntimeSummary
|
|
1108
|
+
: [];
|
|
1109
|
+
return {
|
|
1110
|
+
runId: selected.state.runId,
|
|
1111
|
+
status: selected.state.status || "pending",
|
|
1112
|
+
terminalDisposition: selected.state.terminalDisposition || null,
|
|
1113
|
+
launcherPid: selected.state.launcherPid || null,
|
|
1114
|
+
exitCode: selected.state.exitCode ?? null,
|
|
1115
|
+
detail: selected.state.detail || null,
|
|
1116
|
+
updatedAt: selected.state.updatedAt || null,
|
|
1117
|
+
leaseExpiresAt: selected.state.leaseExpiresAt || null,
|
|
1118
|
+
supervisorId: selected.state.supervisorId || null,
|
|
1119
|
+
sessionBackend: selected.state.sessionBackend || "process",
|
|
1120
|
+
recoveryState: selected.state.recoveryState || null,
|
|
1121
|
+
resumeAction: selected.state.resumeAction || null,
|
|
1122
|
+
agentRuntimeSummary: agentId
|
|
1123
|
+
? runtimeSummary.filter((record) => record?.agentId === agentId)
|
|
1124
|
+
: runtimeSummary,
|
|
1125
|
+
};
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
function runtimeRecordForAgent(runDir, agentId) {
|
|
1129
|
+
const runtimePath = path.join(runDir, "agents", `${agentId}.runtime.json`);
|
|
1130
|
+
if (!fs.existsSync(runtimePath)) {
|
|
1131
|
+
return null;
|
|
1132
|
+
}
|
|
1133
|
+
return {
|
|
1134
|
+
runtimePath,
|
|
1135
|
+
runtime: readJsonOrNull(runtimePath),
|
|
1136
|
+
};
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
async function attachAgentRuntimeSession(options) {
|
|
1140
|
+
const located = findSupervisorRunState(options.runId, options, { reconcile: true });
|
|
1141
|
+
if (!located?.state) {
|
|
1142
|
+
throw new Error(`Run ${options.runId} not found.`);
|
|
1143
|
+
}
|
|
1144
|
+
const runtimeEntry = runtimeRecordForAgent(located.runDir, options.agentId);
|
|
1145
|
+
const runtime = runtimeEntry?.runtime;
|
|
1146
|
+
if (!runtime || typeof runtime !== "object") {
|
|
1147
|
+
throw new Error(`No runtime record found for agent ${options.agentId}.`);
|
|
1148
|
+
}
|
|
1149
|
+
const attachMode = String(runtime.attachMode || "log-tail").trim() || "log-tail";
|
|
1150
|
+
const sessionName = String(runtime.tmuxSessionName || runtime.sessionName || "").trim();
|
|
1151
|
+
const lanePaths = buildLanePaths(options.lane, {
|
|
1152
|
+
project: options.project,
|
|
1153
|
+
adhocRunId: options.adhocRunId,
|
|
1154
|
+
});
|
|
1155
|
+
if (attachMode === "session" && sessionName) {
|
|
1156
|
+
await attachTmuxSession(lanePaths.tmuxSocketName, sessionName);
|
|
1157
|
+
return;
|
|
1158
|
+
}
|
|
1159
|
+
const logPath = String(runtime.logPath || "").trim();
|
|
1160
|
+
if (!logPath) {
|
|
1161
|
+
throw new Error(`No log path recorded for agent ${options.agentId}.`);
|
|
1162
|
+
}
|
|
1163
|
+
const terminal =
|
|
1164
|
+
Boolean(readStatusRecordIfPresent(String(runtime.statusPath || "").trim())) ||
|
|
1165
|
+
["completed", "failed", "terminated"].includes(String(runtime.terminalDisposition || ""));
|
|
1166
|
+
if (terminal) {
|
|
1167
|
+
const tail = readFileTail(logPath, 12000);
|
|
1168
|
+
if (tail) {
|
|
1169
|
+
process.stdout.write(tail.endsWith("\n") ? tail : `${tail}\n`);
|
|
1170
|
+
}
|
|
1171
|
+
return;
|
|
1172
|
+
}
|
|
1173
|
+
const result = spawn("tail", ["-n", "200", "-F", logPath], {
|
|
1174
|
+
cwd: REPO_ROOT,
|
|
1175
|
+
stdio: "inherit",
|
|
1176
|
+
env: process.env,
|
|
1177
|
+
});
|
|
1178
|
+
await new Promise((resolve, reject) => {
|
|
1179
|
+
result.on("exit", (code) => {
|
|
1180
|
+
if (code === 0) {
|
|
1181
|
+
resolve();
|
|
1182
|
+
return;
|
|
1183
|
+
}
|
|
1184
|
+
reject(new Error(`log follow exited ${code ?? 1}.`));
|
|
1185
|
+
});
|
|
1186
|
+
result.on("error", (error) => {
|
|
1187
|
+
reject(new Error(`log follow failed: ${error.message}`));
|
|
1188
|
+
});
|
|
1189
|
+
});
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
function attachDashboard(options) {
|
|
1193
|
+
const args = [
|
|
1194
|
+
path.join(PACKAGE_ROOT, "scripts", "wave-dashboard.mjs"),
|
|
1195
|
+
"--project",
|
|
1196
|
+
options.project,
|
|
1197
|
+
"--lane",
|
|
1198
|
+
options.lane,
|
|
1199
|
+
"--attach",
|
|
1200
|
+
"current",
|
|
1201
|
+
];
|
|
1202
|
+
const result = spawn(process.execPath, args, {
|
|
1203
|
+
cwd: REPO_ROOT,
|
|
1204
|
+
stdio: "inherit",
|
|
1205
|
+
env: process.env,
|
|
1206
|
+
});
|
|
1207
|
+
return new Promise((resolve, reject) => {
|
|
1208
|
+
result.on("exit", (code) => {
|
|
1209
|
+
if (code === 0) {
|
|
1210
|
+
resolve();
|
|
1211
|
+
return;
|
|
1212
|
+
}
|
|
1213
|
+
reject(new Error(`dashboard attach exited ${code ?? 1}.`));
|
|
1214
|
+
});
|
|
1215
|
+
result.on("error", (error) => {
|
|
1216
|
+
reject(new Error(`dashboard attach failed: ${error.message}`));
|
|
1217
|
+
});
|
|
1218
|
+
});
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
export async function runSupervisorCli(command, argv) {
|
|
1222
|
+
if (command === "submit") {
|
|
1223
|
+
const submitOptions = parseSubmitArgs(argv);
|
|
1224
|
+
if (submitOptions.help) {
|
|
1225
|
+
printUsage(command);
|
|
1226
|
+
return;
|
|
1227
|
+
}
|
|
1228
|
+
const result = submitLauncherRun(submitOptions.launcherArgs);
|
|
1229
|
+
if (submitOptions.json) {
|
|
1230
|
+
console.log(JSON.stringify({
|
|
1231
|
+
runId: result.runId,
|
|
1232
|
+
project: result.project,
|
|
1233
|
+
lane: result.lane,
|
|
1234
|
+
adhocRunId: result.adhocRunId,
|
|
1235
|
+
statePath: path.relative(REPO_ROOT, result.statePath),
|
|
1236
|
+
}, null, 2));
|
|
1237
|
+
return;
|
|
1238
|
+
}
|
|
1239
|
+
console.log(`run_id=${result.runId}`);
|
|
1240
|
+
console.log(`project=${result.project}`);
|
|
1241
|
+
console.log(`lane=${result.lane}`);
|
|
1242
|
+
if (result.adhocRunId) {
|
|
1243
|
+
console.log(`adhoc_run=${result.adhocRunId}`);
|
|
1244
|
+
}
|
|
1245
|
+
console.log(`state_path=${path.relative(REPO_ROOT, result.statePath)}`);
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
const options = command === "attach" ? parseAttachArgs(argv) : parseSupervisorArgs(argv);
|
|
1249
|
+
if (options.help) {
|
|
1250
|
+
printUsage(command);
|
|
1251
|
+
return;
|
|
1252
|
+
}
|
|
1253
|
+
if (command === "supervise") {
|
|
1254
|
+
const result = await runSupervisorLoop(options);
|
|
1255
|
+
if (result.alreadyRunning) {
|
|
1256
|
+
console.log(`[supervise] daemon already running for ${options.project}/${options.lane}`);
|
|
1257
|
+
}
|
|
1258
|
+
return;
|
|
1259
|
+
}
|
|
1260
|
+
if (command === "status") {
|
|
1261
|
+
if (!options.runId) {
|
|
1262
|
+
throw new Error("--run-id is required");
|
|
1263
|
+
}
|
|
1264
|
+
if (!options.projectProvided || !options.laneProvided) {
|
|
1265
|
+
throw new Error("--project and --lane are required");
|
|
1266
|
+
}
|
|
1267
|
+
const located = findSupervisorRunState(options.runId, options, { reconcile: true });
|
|
1268
|
+
if (!located?.state) {
|
|
1269
|
+
throw new Error(`Run ${options.runId} not found.`);
|
|
1270
|
+
}
|
|
1271
|
+
console.log(options.json ? JSON.stringify(located.state, null, 2) : formatRunState(located.state));
|
|
1272
|
+
return;
|
|
1273
|
+
}
|
|
1274
|
+
if (command === "wait") {
|
|
1275
|
+
if (!options.runId) {
|
|
1276
|
+
throw new Error("--run-id is required");
|
|
1277
|
+
}
|
|
1278
|
+
if (!options.projectProvided || !options.laneProvided) {
|
|
1279
|
+
throw new Error("--project and --lane are required");
|
|
1280
|
+
}
|
|
1281
|
+
const located = await waitForRunState(options);
|
|
1282
|
+
console.log(options.json ? JSON.stringify(located.state, null, 2) : formatRunState(located.state));
|
|
1283
|
+
if (located.state.status === "failed") {
|
|
1284
|
+
process.exitCode = Number.isInteger(located.state.exitCode) ? located.state.exitCode : 1;
|
|
1285
|
+
}
|
|
1286
|
+
return;
|
|
1287
|
+
}
|
|
1288
|
+
if (command === "attach") {
|
|
1289
|
+
if (!options.runId) {
|
|
1290
|
+
throw new Error("--run-id is required");
|
|
1291
|
+
}
|
|
1292
|
+
if (!options.projectProvided || !options.laneProvided) {
|
|
1293
|
+
throw new Error("--project and --lane are required");
|
|
1294
|
+
}
|
|
1295
|
+
if (Boolean(options.dashboard) === Boolean(options.agentId)) {
|
|
1296
|
+
throw new Error("Specify exactly one of --agent <id> or --dashboard");
|
|
1297
|
+
}
|
|
1298
|
+
if (options.dashboard) {
|
|
1299
|
+
await attachDashboard(options);
|
|
1300
|
+
return;
|
|
1301
|
+
}
|
|
1302
|
+
await attachAgentRuntimeSession(options);
|
|
1303
|
+
return;
|
|
1304
|
+
}
|
|
1305
|
+
throw new Error(`Unknown supervisor command: ${command}`);
|
|
1306
|
+
}
|