agent-relay-orchestrator 0.91.3 → 0.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/src/control.ts +27 -1
- package/src/index.ts +7 -3
- package/src/shared-callmux.ts +44 -5
- package/src/spawn/runtime.ts +7 -4
- package/src/spawn/sessions.ts +10 -2
- package/src/spawn/supervisor.ts +1 -39
- package/src/spawn/systemd.ts +73 -6
- package/src/spawn/types.ts +3 -0
- package/src/workspace-probe/idle-refresh.ts +110 -0
- package/src/workspace-probe/index.ts +1 -0
- package/src/workspace-probe/merge.ts +5 -1
- package/src/workspace-probe/probe.ts +58 -1
- package/src/workspace-probe/types.ts +17 -0
- package/vendor/callmux/bin/callmux.js +47579 -0
- package/vendor/callmux/package.json +11 -0
- package/vendor/callmux/schema.json +868 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-orchestrator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.92.0",
|
|
4
4
|
"description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"files": [
|
|
10
10
|
"src/**/*.ts",
|
|
11
11
|
"!src/**/*.test.ts",
|
|
12
|
+
"vendor/**",
|
|
12
13
|
"README.md"
|
|
13
14
|
],
|
|
14
15
|
"scripts": {
|
|
@@ -16,7 +17,7 @@
|
|
|
16
17
|
"test": "bun test"
|
|
17
18
|
},
|
|
18
19
|
"dependencies": {
|
|
19
|
-
"agent-relay-sdk": "0.2.
|
|
20
|
+
"agent-relay-sdk": "0.2.72"
|
|
20
21
|
},
|
|
21
22
|
"devDependencies": {
|
|
22
23
|
"@types/bun": "latest",
|
package/src/control.ts
CHANGED
|
@@ -4,7 +4,7 @@ import type { ManagedAgentReport, RelayClient, RelayCommand } from "./relay";
|
|
|
4
4
|
import { handleSelfUpgrade } from "./self-upgrade";
|
|
5
5
|
import { readLocalProviderConfigs } from "./provider-config-migration";
|
|
6
6
|
import { spawnAgent, stopSession, type SpawnOptions } from "./spawn";
|
|
7
|
-
import { cleanupWorkspace, discardRecoveryBranch, mergeWorkspace, pruneWorktrees, reconcileWorkspace, refreshWorkspaceDeps, workspacesRoot } from "./workspace-probe";
|
|
7
|
+
import { cleanupWorkspace, discardRecoveryBranch, idleRefreshWorktree, mergeWorkspace, pruneWorktrees, reconcileWorkspace, refreshWorkspaceDeps, workspacesRoot } from "./workspace-probe";
|
|
8
8
|
import { armWorkspacePrAutoMerge, mergeWorkspacePr, refreshWorkspacePrBranch } from "./workspace-pr";
|
|
9
9
|
import type { WorkspaceMergeResult } from "agent-relay-sdk";
|
|
10
10
|
|
|
@@ -195,6 +195,16 @@ export function createControlHandler(
|
|
|
195
195
|
force: command.params.force === true,
|
|
196
196
|
});
|
|
197
197
|
await relay.updateCommand(command.id, "succeeded", result as unknown as Record<string, unknown>);
|
|
198
|
+
} else if (command.type === "workspace.idle-refresh") {
|
|
199
|
+
const result = idleRefreshWorktree({
|
|
200
|
+
id: typeof command.params.workspaceId === "string" ? command.params.workspaceId : undefined,
|
|
201
|
+
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
202
|
+
worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
|
|
203
|
+
branch: typeof command.params.branch === "string" ? command.params.branch : undefined,
|
|
204
|
+
baseRef: typeof command.params.baseRef === "string" ? command.params.baseRef : undefined,
|
|
205
|
+
baseSha: typeof command.params.baseSha === "string" ? command.params.baseSha : undefined,
|
|
206
|
+
});
|
|
207
|
+
await relay.updateCommand(command.id, result.error ? "failed" : "succeeded", result as unknown as Record<string, unknown>, result.error);
|
|
198
208
|
} else if (command.type === "workspace.prune") {
|
|
199
209
|
const result = pruneWorktrees({
|
|
200
210
|
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
@@ -289,6 +299,7 @@ function spawnOptionsFromRecord(source: Record<string, any>, config: Orchestrato
|
|
|
289
299
|
automationId: typeof source.automationId === "string" ? source.automationId : undefined,
|
|
290
300
|
automationRunId: typeof source.automationRunId === "string" ? source.automationRunId : undefined,
|
|
291
301
|
requestedVia: typeof source.requestedVia === "string" ? source.requestedVia : undefined,
|
|
302
|
+
resumeWorkspace: parseResumeWorkspace(source.resumeWorkspace),
|
|
292
303
|
};
|
|
293
304
|
}
|
|
294
305
|
|
|
@@ -308,3 +319,18 @@ function stringRecord(value: unknown): Record<string, string> | undefined {
|
|
|
308
319
|
function stringArray(value: unknown): string[] | undefined {
|
|
309
320
|
return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string") : undefined;
|
|
310
321
|
}
|
|
322
|
+
|
|
323
|
+
function parseResumeWorkspace(value: unknown): import("./workspace-probe/types").ResumeWorkspaceTarget | undefined {
|
|
324
|
+
if (!isRecord(value)) return undefined;
|
|
325
|
+
const branch = typeof value.branch === "string" ? value.branch : undefined;
|
|
326
|
+
const mode = value.mode === "attach" || value.mode === "branch-from" ? value.mode : undefined;
|
|
327
|
+
if (!branch || !mode) return undefined;
|
|
328
|
+
return {
|
|
329
|
+
branch,
|
|
330
|
+
mode,
|
|
331
|
+
worktreePath: typeof value.worktreePath === "string" ? value.worktreePath : undefined,
|
|
332
|
+
workspaceId: typeof value.workspaceId === "string" ? value.workspaceId : undefined,
|
|
333
|
+
baseRef: typeof value.baseRef === "string" ? value.baseRef : undefined,
|
|
334
|
+
baseSha: typeof value.baseSha === "string" ? value.baseSha : undefined,
|
|
335
|
+
};
|
|
336
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { loadConfig, initConfigFile } from "./config";
|
|
|
4
4
|
import { createRelayClient } from "./relay";
|
|
5
5
|
import type { ManagedSessionExitDiagnostics } from "./relay";
|
|
6
6
|
import { createControlHandler } from "./control";
|
|
7
|
-
import { diagnoseSessionExit, hydrateTerminalGuests,
|
|
7
|
+
import { diagnoseSessionExit, hydrateTerminalGuests, managedSessionLiveness, reapTerminalGuests, refreshManagedAgentReport } from "./spawn";
|
|
8
8
|
import { startApiServer } from "./api";
|
|
9
9
|
import { recoverManagedAgents } from "./recovery";
|
|
10
10
|
import { ProviderProbeCache } from "./provider-probe";
|
|
@@ -166,8 +166,12 @@ async function healthCheck(): Promise<void> {
|
|
|
166
166
|
changed = true;
|
|
167
167
|
}
|
|
168
168
|
const sessionName = refreshed.sessionName ?? refreshed.tmuxSession;
|
|
169
|
-
const
|
|
170
|
-
if (
|
|
169
|
+
const liveness = managedSessionLiveness(sessionName);
|
|
170
|
+
if (liveness === "unknown") {
|
|
171
|
+
console.error(`[orchestrator] Session liveness unknown: ${sessionName}; preserving and retrying next health check`);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
if (liveness === "dead") {
|
|
171
175
|
const diagnostics = diagnoseSessionExit({
|
|
172
176
|
agentId: refreshed.agentId,
|
|
173
177
|
policyName: refreshed.policyName,
|
package/src/shared-callmux.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
|
-
import { dirname, isAbsolute, join } from "node:path";
|
|
3
|
+
import { dirname, isAbsolute, join, resolve } from "node:path";
|
|
4
4
|
import { errMessage, isRecord } from "agent-relay-sdk";
|
|
5
5
|
import type { OrchestratorConfig } from "./config";
|
|
6
6
|
import { agentRelayHome } from "./config";
|
|
@@ -54,6 +54,15 @@ export interface SharedCallmuxSupervisorDeps {
|
|
|
54
54
|
setTimeout(fn: () => void, ms: number): Timer;
|
|
55
55
|
clearTimeout(timer: Timer): void;
|
|
56
56
|
log(message: string): void;
|
|
57
|
+
report(snapshot: SharedCallmuxHealthSnapshot): void;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface SharedCallmuxHealthSnapshot {
|
|
61
|
+
state: "disabled" | "missing" | "starting" | "running" | "unhealthy" | "restarting" | "stopped";
|
|
62
|
+
url: string;
|
|
63
|
+
command?: string;
|
|
64
|
+
reason?: string;
|
|
65
|
+
pid?: number;
|
|
57
66
|
}
|
|
58
67
|
|
|
59
68
|
export function sharedCallmuxOptionsFromEnv(env: Record<string, string | undefined> = process.env): SharedCallmuxOptions {
|
|
@@ -63,14 +72,13 @@ export function sharedCallmuxOptionsFromEnv(env: Record<string, string | undefin
|
|
|
63
72
|
const port = numberEnv(env[SHARED_CALLMUX_PORT_ENV]) ?? parsed?.port ?? DEFAULT_SHARED_CALLMUX_PORT;
|
|
64
73
|
const url = explicitUrl ?? `http://${host}:${port}/mcp`;
|
|
65
74
|
return {
|
|
66
|
-
|
|
67
|
-
command: env[SHARED_CALLMUX_COMMAND_ENV] || "callmux",
|
|
75
|
+
command: env[SHARED_CALLMUX_COMMAND_ENV] || bundledCallmuxCommand() || "callmux",
|
|
68
76
|
host,
|
|
69
77
|
port,
|
|
70
78
|
url,
|
|
71
79
|
configPath: env[SHARED_CALLMUX_CONFIG_ENV] || join(agentRelayHome(), "callmux", "shared-listener.json"),
|
|
72
80
|
sourceConfigPath: env[SHARED_CALLMUX_SOURCE_CONFIG_ENV] || env.CALLMUX_CONFIG || join(homedir(), ".config", "callmux", "config.json"),
|
|
73
|
-
enabled: env[SHARED_CALLMUX_ENABLE_ENV]
|
|
81
|
+
enabled: !envOff(env[SHARED_CALLMUX_ENABLE_ENV]),
|
|
74
82
|
};
|
|
75
83
|
}
|
|
76
84
|
|
|
@@ -118,14 +126,17 @@ export class SharedCallmuxSupervisor {
|
|
|
118
126
|
|
|
119
127
|
start(): void {
|
|
120
128
|
if (!this.opts.enabled) {
|
|
121
|
-
this.deps.log("[orchestrator] shared callmux listener disabled
|
|
129
|
+
this.deps.log("[orchestrator] shared callmux listener disabled by AGENT_RELAY_SHARED_CALLMUX_ENABLE=0");
|
|
130
|
+
this.report("disabled", "global kill-switch");
|
|
122
131
|
return;
|
|
123
132
|
}
|
|
124
133
|
this.deps.log(`[orchestrator] Shared callmux listener: ${this.opts.url}`);
|
|
125
134
|
if (!this.deps.which(this.opts.command)) {
|
|
126
135
|
this.deps.log("[orchestrator] shared callmux not found — shared listener dormant");
|
|
136
|
+
this.report("missing", "command not found");
|
|
127
137
|
return;
|
|
128
138
|
}
|
|
139
|
+
this.report("starting");
|
|
129
140
|
this.spawn();
|
|
130
141
|
this.healthTimer = this.deps.setInterval(() => {
|
|
131
142
|
void this.checkHealth();
|
|
@@ -156,9 +167,11 @@ export class SharedCallmuxSupervisor {
|
|
|
156
167
|
}
|
|
157
168
|
if (ok) {
|
|
158
169
|
this.backoffMs = this.timing.restartBaseMs ?? 1_000;
|
|
170
|
+
this.report("running", undefined, this.proc?.pid);
|
|
159
171
|
return true;
|
|
160
172
|
}
|
|
161
173
|
this.deps.log(`[orchestrator] Shared callmux readiness failed at ${readyUrl}; restarting`);
|
|
174
|
+
this.report("unhealthy", "readiness failed", this.proc?.pid);
|
|
162
175
|
this.restart("readiness failed");
|
|
163
176
|
return false;
|
|
164
177
|
}
|
|
@@ -177,20 +190,24 @@ export class SharedCallmuxSupervisor {
|
|
|
177
190
|
proc = this.deps.spawn(this.opts.command, args, { env, cwd: homedir() });
|
|
178
191
|
} catch (err) {
|
|
179
192
|
this.deps.log(`[orchestrator] Shared callmux listener failed to start: ${errMessage(err)}; scheduling restart`);
|
|
193
|
+
this.report("restarting", errMessage(err));
|
|
180
194
|
this.scheduleRestart();
|
|
181
195
|
return;
|
|
182
196
|
}
|
|
183
197
|
this.proc = proc;
|
|
184
198
|
this.deps.log(`[orchestrator] Started shared callmux listener pid=${proc.pid ?? "unknown"}`);
|
|
199
|
+
this.report("running", undefined, proc.pid);
|
|
185
200
|
proc.exited.then((code) => {
|
|
186
201
|
if (this.proc !== proc || this.stopping) return;
|
|
187
202
|
this.proc = null;
|
|
188
203
|
this.deps.log(`[orchestrator] Shared callmux listener exited (${code ?? "signal"}); scheduling restart`);
|
|
204
|
+
this.report("restarting", `exited ${code ?? "signal"}`);
|
|
189
205
|
this.scheduleRestart();
|
|
190
206
|
}).catch((err) => {
|
|
191
207
|
if (this.proc !== proc || this.stopping) return;
|
|
192
208
|
this.proc = null;
|
|
193
209
|
this.deps.log(`[orchestrator] Shared callmux listener exit watcher failed: ${err}`);
|
|
210
|
+
this.report("restarting", errMessage(err));
|
|
194
211
|
this.scheduleRestart();
|
|
195
212
|
});
|
|
196
213
|
}
|
|
@@ -201,9 +218,20 @@ export class SharedCallmuxSupervisor {
|
|
|
201
218
|
this.proc.kill("SIGTERM");
|
|
202
219
|
this.proc = null;
|
|
203
220
|
}
|
|
221
|
+
this.report("restarting", reason);
|
|
204
222
|
this.scheduleRestart();
|
|
205
223
|
}
|
|
206
224
|
|
|
225
|
+
private report(state: SharedCallmuxHealthSnapshot["state"], reason?: string, pid?: number): void {
|
|
226
|
+
this.deps.report({
|
|
227
|
+
state,
|
|
228
|
+
url: this.opts.url,
|
|
229
|
+
command: this.opts.command,
|
|
230
|
+
...(reason ? { reason } : {}),
|
|
231
|
+
...(pid ? { pid } : {}),
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
207
235
|
private scheduleRestart(): void {
|
|
208
236
|
if (this.stopping || this.restartTimer) return;
|
|
209
237
|
const delay = this.backoffMs;
|
|
@@ -238,15 +266,26 @@ function defaultDeps(): SharedCallmuxSupervisorDeps {
|
|
|
238
266
|
setTimeout: (fn, ms) => setTimeout(fn, ms),
|
|
239
267
|
clearTimeout: (timer) => clearTimeout(timer),
|
|
240
268
|
log: (message) => console.error(message),
|
|
269
|
+
report: (snapshot) => console.error(`[orchestrator] shared callmux status ${snapshot.state}${snapshot.reason ? `: ${snapshot.reason}` : ""}`),
|
|
241
270
|
};
|
|
242
271
|
}
|
|
243
272
|
|
|
273
|
+
export function bundledCallmuxCommand(): string | null {
|
|
274
|
+
const candidate = resolve(import.meta.dir, "../vendor/callmux/bin/callmux.js");
|
|
275
|
+
return existsSync(candidate) ? candidate : null;
|
|
276
|
+
}
|
|
277
|
+
|
|
244
278
|
function resolveCommand(command: string): string | null {
|
|
245
279
|
if (isAbsolute(command)) return existsSync(command) ? command : null;
|
|
246
280
|
if (command.includes("/")) return existsSync(command) ? command : null;
|
|
247
281
|
return Bun.which(command);
|
|
248
282
|
}
|
|
249
283
|
|
|
284
|
+
function envOff(value: string | undefined): boolean {
|
|
285
|
+
if (value === undefined || value === null || value === "") return false;
|
|
286
|
+
return ["0", "false", "off", "no"].includes(value.trim().toLowerCase());
|
|
287
|
+
}
|
|
288
|
+
|
|
250
289
|
function readJsonObject(path: string): Record<string, unknown> {
|
|
251
290
|
if (!existsSync(path)) return {};
|
|
252
291
|
const parsed = JSON.parse(readFileSync(path, "utf8"));
|
package/src/spawn/runtime.ts
CHANGED
|
@@ -5,7 +5,7 @@ import type { ManagedAgentReport } from "../relay";
|
|
|
5
5
|
import { isPidAlive, parseProcStateIsZombie } from "agent-relay-sdk/process-utils";
|
|
6
6
|
import { tmuxHasSession } from "agent-relay-sdk/tmux-utils";
|
|
7
7
|
import { LOG_DIR, RUNNER_INFO_DIR, SESSION_DIR, STATE_FILE } from "./constants";
|
|
8
|
-
import { systemdMainPid } from "./systemd";
|
|
8
|
+
import { systemdMainPid, systemdUnitLiveness, type SystemdUnitLiveness } from "./systemd";
|
|
9
9
|
import { sanitizeFsName } from "agent-relay-sdk/fs-name";
|
|
10
10
|
import type { RunnerInfo, SessionRecord, SessionSupervisor } from "./types";
|
|
11
11
|
|
|
@@ -66,12 +66,15 @@ export function sessionSupervisor(record?: Pick<SessionRecord, "supervisor">): S
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
export function isSessionRecordAlive(record: SessionRecord): boolean {
|
|
69
|
+
return sessionRecordLiveness(record) === "alive";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function sessionRecordLiveness(record: SessionRecord): SystemdUnitLiveness {
|
|
69
73
|
const supervisor = sessionSupervisor(record);
|
|
70
74
|
if (supervisor.type === "systemd" && supervisor.unit) {
|
|
71
|
-
|
|
72
|
-
return pid > 0 && isPidAlive(pid);
|
|
75
|
+
return systemdUnitLiveness(supervisor.unit);
|
|
73
76
|
}
|
|
74
|
-
return isPidAlive(record.pid);
|
|
77
|
+
return isPidAlive(record.pid) ? "alive" : "dead";
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
export function currentSessionPid(record: SessionRecord): number {
|
package/src/spawn/sessions.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { sanitizeFsName } from "agent-relay-sdk/fs-name";
|
|
|
4
4
|
import { shellEscape } from "agent-relay-sdk/shell-utils";
|
|
5
5
|
import { tmuxHasSession } from "agent-relay-sdk/tmux-utils";
|
|
6
6
|
import { cleanupSessionRecord } from "./supervisor";
|
|
7
|
-
import { currentSessionPid, findSessionRecord, isSessionRecordAlive, loadState, readRunnerInfo, saveState, sessionReportFields, sessionSupervisor } from "./runtime";
|
|
7
|
+
import { currentSessionPid, findSessionRecord, isSessionRecordAlive, loadState, readRunnerInfo, saveState, sessionRecordLiveness, sessionReportFields, sessionSupervisor } from "./runtime";
|
|
8
8
|
import type { SessionInfo, SessionRecord } from "./types";
|
|
9
9
|
|
|
10
10
|
export function listSessions(prefix: string): SessionInfo[] {
|
|
@@ -31,6 +31,11 @@ export function isSessionAlive(name: string): boolean {
|
|
|
31
31
|
return record ? isSessionRecordAlive(record) : false;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
export function managedSessionLiveness(name: string): "alive" | "dead" | "unknown" {
|
|
35
|
+
const record = loadState().find((r) => r.name === name);
|
|
36
|
+
return record ? sessionRecordLiveness(record) : "dead";
|
|
37
|
+
}
|
|
38
|
+
|
|
34
39
|
export function refreshManagedAgentReport(agent: ManagedAgentReport): ManagedAgentReport {
|
|
35
40
|
const record = findSessionRecord({
|
|
36
41
|
tmuxSession: agent.sessionName ?? agent.tmuxSession,
|
|
@@ -56,7 +61,10 @@ export async function recoverExistingSessions(
|
|
|
56
61
|
const alive: SessionRecord[] = [];
|
|
57
62
|
|
|
58
63
|
for (const record of records) {
|
|
59
|
-
|
|
64
|
+
const liveness = sessionRecordLiveness(record);
|
|
65
|
+
if (liveness === "unknown") {
|
|
66
|
+
console.error(`[orchestrator] Session liveness unknown: ${record.name} (pid ${record.pid}) — preserving`);
|
|
67
|
+
} else if (liveness === "dead") {
|
|
60
68
|
console.error(`[orchestrator] Stale session: ${record.name} (pid ${record.pid} dead) — removing`);
|
|
61
69
|
cleanupSessionRecord(record);
|
|
62
70
|
continue;
|
package/src/spawn/supervisor.ts
CHANGED
|
@@ -11,7 +11,7 @@ import { SESSION_DIR } from "./constants";
|
|
|
11
11
|
import { disableSystemdSupervisor, forceSystemdSupervisor } from "../config";
|
|
12
12
|
import { logLines } from "./log-utils";
|
|
13
13
|
import { currentSessionPid, ensureSessionDir, findSessionRecord, isSessionRecordAlive, loadState, logFilePath, readRunnerInfo, removeSessionRecord, sessionSupervisor } from "./runtime";
|
|
14
|
-
import { systemdMainPid, systemdUnitName } from "./systemd";
|
|
14
|
+
import { systemdMainPid, systemdUnitDiagnostics, systemdUnitName } from "./systemd";
|
|
15
15
|
import type { SessionRecord, SessionSupervisor, SpawnedRunner } from "./types";
|
|
16
16
|
|
|
17
17
|
export function spawnRunner(name: string, command: string[], cwd: string, env: Record<string, string>, logFile: string): SpawnedRunner {
|
|
@@ -123,44 +123,6 @@ function waitForSystemdMainPid(unit: string, timeoutMs: number): number {
|
|
|
123
123
|
return 0;
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
function systemdUnitDiagnostics(unit: string): NonNullable<ManagedSessionExitDiagnostics["systemd"]> {
|
|
127
|
-
const result = Bun.spawnSync([
|
|
128
|
-
"systemctl", "--user", "show", `${unit}.service`,
|
|
129
|
-
"-p", "ActiveState",
|
|
130
|
-
"-p", "SubState",
|
|
131
|
-
"-p", "Result",
|
|
132
|
-
"-p", "ExecMainCode",
|
|
133
|
-
"-p", "ExecMainStatus",
|
|
134
|
-
"-p", "MainPID",
|
|
135
|
-
], {
|
|
136
|
-
stdin: "ignore",
|
|
137
|
-
stdout: "pipe",
|
|
138
|
-
stderr: "pipe",
|
|
139
|
-
});
|
|
140
|
-
if (result.exitCode !== 0) {
|
|
141
|
-
return {
|
|
142
|
-
unit,
|
|
143
|
-
unavailable: result.stderr.toString().trim() || `systemctl show exited with ${result.exitCode}`,
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
const props = new Map<string, string>();
|
|
147
|
-
for (const line of result.stdout.toString().split("\n")) {
|
|
148
|
-
const index = line.indexOf("=");
|
|
149
|
-
if (index <= 0) continue;
|
|
150
|
-
props.set(line.slice(0, index), line.slice(index + 1));
|
|
151
|
-
}
|
|
152
|
-
const mainPid = Number(props.get("MainPID"));
|
|
153
|
-
return {
|
|
154
|
-
unit,
|
|
155
|
-
activeState: props.get("ActiveState") || undefined,
|
|
156
|
-
subState: props.get("SubState") || undefined,
|
|
157
|
-
result: props.get("Result") || undefined,
|
|
158
|
-
execMainCode: props.get("ExecMainCode") || undefined,
|
|
159
|
-
execMainStatus: props.get("ExecMainStatus") || undefined,
|
|
160
|
-
mainPid: Number.isFinite(mainPid) && mainPid > 0 ? mainPid : undefined,
|
|
161
|
-
};
|
|
162
|
-
}
|
|
163
|
-
|
|
164
126
|
function logFileDiagnostics(logFile: string): Pick<ManagedSessionExitDiagnostics, "logBytes" | "logEmpty" | "logTail"> & { logUnavailable?: string } {
|
|
165
127
|
try {
|
|
166
128
|
const stat = statSync(logFile);
|
package/src/spawn/systemd.ts
CHANGED
|
@@ -1,17 +1,84 @@
|
|
|
1
1
|
import { sanitizeFsName } from "agent-relay-sdk/fs-name";
|
|
2
|
+
import { isPidAlive } from "agent-relay-sdk/process-utils";
|
|
3
|
+
|
|
4
|
+
export type SystemdUnitLiveness = "alive" | "dead" | "unknown";
|
|
5
|
+
|
|
6
|
+
export interface SystemdUnitDiagnostics {
|
|
7
|
+
unit: string;
|
|
8
|
+
activeState?: string;
|
|
9
|
+
subState?: string;
|
|
10
|
+
result?: string;
|
|
11
|
+
execMainCode?: string;
|
|
12
|
+
execMainStatus?: string;
|
|
13
|
+
mainPid?: number;
|
|
14
|
+
unavailable?: string;
|
|
15
|
+
}
|
|
2
16
|
|
|
3
17
|
export function systemdUnitName(session: string): string {
|
|
4
18
|
const safe = sanitizeFsName(session, { replacement: "-", trimEdge: true, fallback: "agent" });
|
|
5
19
|
return `agent-relay-managed-${safe}`.slice(0, 180);
|
|
6
20
|
}
|
|
7
21
|
|
|
8
|
-
export function
|
|
9
|
-
const result = Bun.spawnSync([
|
|
22
|
+
export function systemdUnitDiagnostics(unit: string): SystemdUnitDiagnostics {
|
|
23
|
+
const result = Bun.spawnSync([
|
|
24
|
+
"systemctl", "--user", "show", `${unit}.service`,
|
|
25
|
+
"-p", "ActiveState",
|
|
26
|
+
"-p", "SubState",
|
|
27
|
+
"-p", "Result",
|
|
28
|
+
"-p", "ExecMainCode",
|
|
29
|
+
"-p", "ExecMainStatus",
|
|
30
|
+
"-p", "MainPID",
|
|
31
|
+
], {
|
|
10
32
|
stdin: "ignore",
|
|
11
33
|
stdout: "pipe",
|
|
12
|
-
stderr: "
|
|
34
|
+
stderr: "pipe",
|
|
13
35
|
});
|
|
14
|
-
if (result.exitCode !== 0)
|
|
15
|
-
|
|
16
|
-
|
|
36
|
+
if (result.exitCode !== 0) {
|
|
37
|
+
return {
|
|
38
|
+
unit,
|
|
39
|
+
unavailable: result.stderr.toString().trim() || `systemctl show exited with ${result.exitCode}`,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
const props = new Map<string, string>();
|
|
43
|
+
for (const line of result.stdout.toString().split("\n")) {
|
|
44
|
+
const index = line.indexOf("=");
|
|
45
|
+
if (index <= 0) continue;
|
|
46
|
+
props.set(line.slice(0, index), line.slice(index + 1));
|
|
47
|
+
}
|
|
48
|
+
const mainPid = Number(props.get("MainPID"));
|
|
49
|
+
return {
|
|
50
|
+
unit,
|
|
51
|
+
activeState: props.get("ActiveState") || undefined,
|
|
52
|
+
subState: props.get("SubState") || undefined,
|
|
53
|
+
result: props.get("Result") || undefined,
|
|
54
|
+
execMainCode: props.get("ExecMainCode") || undefined,
|
|
55
|
+
execMainStatus: props.get("ExecMainStatus") || undefined,
|
|
56
|
+
mainPid: Number.isFinite(mainPid) && mainPid > 0 ? mainPid : undefined,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function systemdUnitLivenessFromDiagnostics(
|
|
61
|
+
diagnostics: SystemdUnitDiagnostics,
|
|
62
|
+
isAlive: (pid: number) => boolean,
|
|
63
|
+
): SystemdUnitLiveness {
|
|
64
|
+
if (diagnostics.mainPid && isAlive(diagnostics.mainPid)) return "alive";
|
|
65
|
+
if (diagnostics.unavailable) return "unknown";
|
|
66
|
+
|
|
67
|
+
const activeState = diagnostics.activeState?.toLowerCase();
|
|
68
|
+
const subState = diagnostics.subState?.toLowerCase();
|
|
69
|
+
if (activeState === "inactive" || activeState === "failed" || subState === "dead" || subState === "failed") {
|
|
70
|
+
return "dead";
|
|
71
|
+
}
|
|
72
|
+
if (activeState === "active" || activeState === "activating" || activeState === "reloading" || activeState === "deactivating") {
|
|
73
|
+
return "unknown";
|
|
74
|
+
}
|
|
75
|
+
return "unknown";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function systemdUnitLiveness(unit: string): SystemdUnitLiveness {
|
|
79
|
+
return systemdUnitLivenessFromDiagnostics(systemdUnitDiagnostics(unit), isPidAlive);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function systemdMainPid(unit: string): number {
|
|
83
|
+
return systemdUnitDiagnostics(unit).mainPid ?? 0;
|
|
17
84
|
}
|
package/src/spawn/types.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { OrchestratorConfig } from "../config";
|
|
2
2
|
import type { AgentLifecycle, WorkspaceMetadata, WorkspaceMode } from "agent-relay-sdk";
|
|
3
|
+
import type { ResumeWorkspaceTarget } from "../workspace-probe/types";
|
|
3
4
|
|
|
4
5
|
export interface SpawnOptions {
|
|
5
6
|
provider: "claude" | "codex";
|
|
@@ -29,6 +30,8 @@ export interface SpawnOptions {
|
|
|
29
30
|
/** How the spawn was requested (`mcp` = an agent via the MCP surface, else dashboard/CLI). Drives
|
|
30
31
|
* the origin tag so an MCP-spawned worker isn't mislabeled `dashboard-spawned` (#330). */
|
|
31
32
|
requestedVia?: string;
|
|
33
|
+
/** #635 — attach to or branch off an existing worktree instead of creating a fresh one. */
|
|
34
|
+
resumeWorkspace?: ResumeWorkspaceTarget;
|
|
32
35
|
}
|
|
33
36
|
|
|
34
37
|
export interface SessionInfo {
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { git } from "../git";
|
|
4
|
+
import { refreshWorkspaceDeps } from "./deps";
|
|
5
|
+
import { syncBaseFromOrigin } from "./git-state";
|
|
6
|
+
import { nextBranchName } from "./names";
|
|
7
|
+
import { shortBranch } from "./parse";
|
|
8
|
+
import type { WorkspaceDepsRefreshResult } from "agent-relay-sdk";
|
|
9
|
+
|
|
10
|
+
export interface IdleRefreshResult {
|
|
11
|
+
workspaceId?: string;
|
|
12
|
+
/** True when the worktree was successfully refreshed to origin/main. */
|
|
13
|
+
refreshed: boolean;
|
|
14
|
+
/** New branch name after refresh (`nextBranchName` --N). Present when `refreshed`. */
|
|
15
|
+
newBranch?: string;
|
|
16
|
+
/** SHA of origin/main the worktree now sits on. Present when `refreshed`. */
|
|
17
|
+
baseSha?: string;
|
|
18
|
+
/** Human-readable skip reason when `refreshed` is false and there is no error. */
|
|
19
|
+
reason?: string;
|
|
20
|
+
/** Git/system error that prevented the refresh. */
|
|
21
|
+
error?: string;
|
|
22
|
+
depsRefresh?: WorkspaceDepsRefreshResult;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Proactively refresh an idle branch-agent worktree to the current upstream tip
|
|
27
|
+
* (origin/main or equivalent), making it current before the agent's next turn.
|
|
28
|
+
*
|
|
29
|
+
* Safety predicates — ALL must hold before the worktree is touched:
|
|
30
|
+
* 1. Worktree is clean (zero uncommitted/untracked changes, re-verified live).
|
|
31
|
+
* 2. No commits ahead of base (nothing to lose on a branch reset).
|
|
32
|
+
* 3. A remote upstream is configured for the base branch (we know who to follow).
|
|
33
|
+
* 4. HEAD is an ancestor of the upstream tip (FF-only; divergence = skip).
|
|
34
|
+
* 5. HEAD is not already AT the upstream tip (skip if already current).
|
|
35
|
+
*
|
|
36
|
+
* On success: `checkout -B <fresh> <upstream>`, old branch deleted, deps refreshed.
|
|
37
|
+
* On any predicate failure: returns `{ refreshed: false, reason }` — never mutates.
|
|
38
|
+
*/
|
|
39
|
+
export function idleRefreshWorktree(input: {
|
|
40
|
+
id?: string;
|
|
41
|
+
worktreePath?: string;
|
|
42
|
+
repoRoot?: string;
|
|
43
|
+
branch?: string;
|
|
44
|
+
baseRef?: string;
|
|
45
|
+
baseSha?: string;
|
|
46
|
+
}): IdleRefreshResult {
|
|
47
|
+
if (!input.worktreePath) return { refreshed: false, error: "worktreePath required" };
|
|
48
|
+
const worktreePath = resolve(input.worktreePath);
|
|
49
|
+
if (!existsSync(worktreePath)) return { workspaceId: input.id, refreshed: false, reason: "worktree missing" };
|
|
50
|
+
const repoRoot = input.repoRoot ? resolve(input.repoRoot) : worktreePath;
|
|
51
|
+
|
|
52
|
+
// Predicate 1: re-check live dirty count (stored metadata may be stale).
|
|
53
|
+
const status = git(["status", "--porcelain"], worktreePath);
|
|
54
|
+
if (!status.ok) return { workspaceId: input.id, refreshed: false, error: status.stderr || "git status failed" };
|
|
55
|
+
const dirty = status.stdout ? status.stdout.split("\n").filter(Boolean).length : 0;
|
|
56
|
+
if (dirty > 0) return { workspaceId: input.id, refreshed: false, reason: "worktree has uncommitted changes" };
|
|
57
|
+
|
|
58
|
+
const base = input.baseRef;
|
|
59
|
+
if (!base) return { workspaceId: input.id, refreshed: false, reason: "no base ref" };
|
|
60
|
+
|
|
61
|
+
// Predicate 2: no commits ahead of base.
|
|
62
|
+
const countResult = git(["rev-list", "--count", `${base}..HEAD`], worktreePath);
|
|
63
|
+
const ahead = countResult.ok ? Number(countResult.stdout.trim()) : NaN;
|
|
64
|
+
if (!Number.isFinite(ahead)) return { workspaceId: input.id, refreshed: false, reason: "could not determine ahead count" };
|
|
65
|
+
if (ahead > 0) return { workspaceId: input.id, refreshed: false, reason: "workspace has commits ahead of base" };
|
|
66
|
+
|
|
67
|
+
// Predicate 3: fetch origin and resolve an upstream ref.
|
|
68
|
+
const startRef = syncBaseFromOrigin(worktreePath, base);
|
|
69
|
+
if (!startRef || startRef === base) {
|
|
70
|
+
return { workspaceId: input.id, refreshed: false, reason: "no upstream configured for base branch" };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Predicate 5: skip if already current (HEAD SHA == upstream tip).
|
|
74
|
+
const headSha = git(["rev-parse", "HEAD"], worktreePath).stdout.trim();
|
|
75
|
+
const upstreamSha = git(["rev-parse", startRef], worktreePath).stdout.trim();
|
|
76
|
+
if (headSha && headSha === upstreamSha) {
|
|
77
|
+
return { workspaceId: input.id, refreshed: false, reason: "already current with origin" };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Predicate 4: FF-only guard — HEAD must be an ancestor of the upstream tip.
|
|
81
|
+
// If not, the base has diverged; leave it for the conflict scan.
|
|
82
|
+
if (!git(["merge-base", "--is-ancestor", "HEAD", startRef], worktreePath).ok) {
|
|
83
|
+
return { workspaceId: input.id, refreshed: false, reason: "diverged from origin — leaving for conflict scan" };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// All predicates passed — safe to advance.
|
|
87
|
+
const liveBranch = shortBranch(git(["symbolic-ref", "--quiet", "--short", "HEAD"], worktreePath).stdout || undefined);
|
|
88
|
+
const branch = liveBranch ?? input.branch;
|
|
89
|
+
if (!branch) return { workspaceId: input.id, refreshed: false, reason: "could not determine current branch" };
|
|
90
|
+
|
|
91
|
+
const fresh = nextBranchName(repoRoot, branch);
|
|
92
|
+
if (!git(["checkout", "-B", fresh, startRef], worktreePath).ok) {
|
|
93
|
+
return { workspaceId: input.id, refreshed: false, error: "git checkout failed" };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Old branch is now orphaned (no commits of its own) — safe to delete.
|
|
97
|
+
git(["branch", "-D", branch], repoRoot);
|
|
98
|
+
|
|
99
|
+
const baseSha = git(["rev-parse", "HEAD"], worktreePath).stdout.trim() || undefined;
|
|
100
|
+
const depsRefresh = refreshWorkspaceDeps(repoRoot, worktreePath);
|
|
101
|
+
const reportDeps = depsRefresh.refreshed || depsRefresh.stale || depsRefresh.error;
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
workspaceId: input.id,
|
|
105
|
+
refreshed: true,
|
|
106
|
+
newBranch: fresh,
|
|
107
|
+
...(baseSha ? { baseSha } : {}),
|
|
108
|
+
...(reportDeps ? { depsRefresh } : {}),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
@@ -373,7 +373,11 @@ function resolveNoopMerge(
|
|
|
373
373
|
const base = preview.baseRef;
|
|
374
374
|
if (base && branch) {
|
|
375
375
|
const fresh = nextBranchName(repoRoot, branch);
|
|
376
|
-
|
|
376
|
+
// #478 — cut from the FETCHED upstream tip so even a plain noop recycle (no
|
|
377
|
+
// startRef) advances to origin/main, not the stale local base. The PR-land
|
|
378
|
+
// recycle (#423) already passes startRef (the verified upstream sha); this
|
|
379
|
+
// makes the rebase-ff noop path equally multi-host-correct.
|
|
380
|
+
const start = startRef ?? syncBaseFromOrigin(worktreePath, base) ?? base;
|
|
377
381
|
if (git(["checkout", "-B", fresh, start], worktreePath).ok) {
|
|
378
382
|
// Old branch's tree is already in base (that's what noop means) — safe to drop.
|
|
379
383
|
const oldDeleted = git(["branch", "-D", branch], repoRoot).ok;
|