agent-relay-server 0.27.1 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/openapi.json +14 -1
- package/package.json +2 -2
- package/public/index.html +2522 -660
- package/runner/src/adapter.ts +14 -1
- package/src/branch-landed.ts +2 -0
- package/src/db.ts +15 -2
- package/src/mcp.ts +51 -10
- package/src/notify.ts +8 -0
- package/src/routes.ts +4 -0
- package/src/workspace-orphans.ts +199 -38
package/runner/src/adapter.ts
CHANGED
|
@@ -211,9 +211,19 @@ function isPersistedRelayMessage(message: Message): boolean {
|
|
|
211
211
|
return Number.isSafeInteger(message.id) && message.id > 0;
|
|
212
212
|
}
|
|
213
213
|
|
|
214
|
+
// #283 — one-line nudge that replaces the reply-scaffold footer for notification-class
|
|
215
|
+
// (replyExpected:false) messages. Deliberately tiny so a bloated context can't drown the
|
|
216
|
+
// no-reply rule established at session start. Shared with the Claude delivery path.
|
|
217
|
+
export const NOTIFICATION_NUDGE = "↪ Notification — no reply needed.";
|
|
218
|
+
|
|
219
|
+
// A notification is a persisted message the server marked replyExpected:false.
|
|
220
|
+
export function isNotificationMessage(message: Message): boolean {
|
|
221
|
+
return isPersistedRelayMessage(message) && message.replyExpected === false;
|
|
222
|
+
}
|
|
223
|
+
|
|
214
224
|
function latestReplyableMessage(messages: Message[]): Message | undefined {
|
|
215
225
|
return messages
|
|
216
|
-
.filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message))
|
|
226
|
+
.filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message) && message.replyExpected !== false)
|
|
217
227
|
.at(-1);
|
|
218
228
|
}
|
|
219
229
|
|
|
@@ -316,6 +326,9 @@ export function providerMessageText(messages: Message[]): string {
|
|
|
316
326
|
"If you already delivered the useful response through Relay, do not send a separate status-only confirmation.",
|
|
317
327
|
"If multiple messages arrived together, cover them in one reply instead of answering each line separately.",
|
|
318
328
|
].join("\n"));
|
|
329
|
+
} else if (messages.some(isNotificationMessage)) {
|
|
330
|
+
// #283 — pure notification batch: no scaffold, just the one-line no-reply nudge.
|
|
331
|
+
sections.push(NOTIFICATION_NUDGE);
|
|
319
332
|
}
|
|
320
333
|
return sections.join("\n\n");
|
|
321
334
|
}
|
package/src/branch-landed.ts
CHANGED
|
@@ -78,6 +78,7 @@ export function notifyBranchLanded(input: BranchLandedInput): void {
|
|
|
78
78
|
subject: "Your branch landed",
|
|
79
79
|
body: `✅ ${branchLabel} landed on \`${base}\`${shaLabel}${subjectLabel}.${continueLabel}`,
|
|
80
80
|
payload,
|
|
81
|
+
replyExpected: false,
|
|
81
82
|
});
|
|
82
83
|
}
|
|
83
84
|
|
|
@@ -93,6 +94,7 @@ export function notifyBranchLanded(input: BranchLandedInput): void {
|
|
|
93
94
|
subject: `Merged to ${base}`,
|
|
94
95
|
body: `🔀 ${branchLabel}${authorLabel} merged to \`${base}\`${shaLabel}${subjectLabel}.`,
|
|
95
96
|
payload,
|
|
97
|
+
replyExpected: false,
|
|
96
98
|
});
|
|
97
99
|
}
|
|
98
100
|
}
|
package/src/db.ts
CHANGED
|
@@ -226,6 +226,7 @@ export function initDb(path: string = "agent-relay.db"): Database {
|
|
|
226
226
|
body TEXT NOT NULL,
|
|
227
227
|
thread_id INTEGER,
|
|
228
228
|
reply_to INTEGER REFERENCES messages(id),
|
|
229
|
+
reply_expected INTEGER NOT NULL DEFAULT 1,
|
|
229
230
|
claimable INTEGER NOT NULL DEFAULT 0,
|
|
230
231
|
claimed_by TEXT,
|
|
231
232
|
claimed_at INTEGER,
|
|
@@ -857,6 +858,9 @@ export function initDb(path: string = "agent-relay.db"): Database {
|
|
|
857
858
|
db.run("ALTER TABLE messages ADD COLUMN thread_id INTEGER");
|
|
858
859
|
db.run("ALTER TABLE messages ADD COLUMN reply_to INTEGER REFERENCES messages(id)");
|
|
859
860
|
}
|
|
861
|
+
if (!colNames.includes("reply_expected")) {
|
|
862
|
+
db.run("ALTER TABLE messages ADD COLUMN reply_expected INTEGER NOT NULL DEFAULT 1");
|
|
863
|
+
}
|
|
860
864
|
if (!colNames.includes("claimable")) {
|
|
861
865
|
db.run("ALTER TABLE messages ADD COLUMN claimable INTEGER NOT NULL DEFAULT 0");
|
|
862
866
|
db.run("ALTER TABLE messages ADD COLUMN claimed_by TEXT");
|
|
@@ -1292,6 +1296,9 @@ function rowToMessage(row: any): Message {
|
|
|
1292
1296
|
body: row.body,
|
|
1293
1297
|
threadId: row.thread_id ?? undefined,
|
|
1294
1298
|
replyTo: row.reply_to ?? undefined,
|
|
1299
|
+
// Default (true) stays absent to match the `claimable` idiom and keep notification-free
|
|
1300
|
+
// messages byte-identical on the wire; only an explicit notification surfaces false (#283).
|
|
1301
|
+
replyExpected: row.reply_expected === 0 ? false : undefined,
|
|
1295
1302
|
claimable: row.claimable === 1 ? true : undefined,
|
|
1296
1303
|
claimedBy: row.claimed_by ?? undefined,
|
|
1297
1304
|
claimedAt: row.claimed_at ?? undefined,
|
|
@@ -3794,12 +3801,12 @@ export function sendMessageWithResult(input: SendMessageInput): { message: Messa
|
|
|
3794
3801
|
|
|
3795
3802
|
const insert = db.query(`
|
|
3796
3803
|
INSERT INTO messages (
|
|
3797
|
-
from_agent, to_target, kind, channel, subject, body, thread_id, reply_to, claimable,
|
|
3804
|
+
from_agent, to_target, kind, channel, subject, body, thread_id, reply_to, reply_expected, claimable,
|
|
3798
3805
|
idempotency_key, delivery_status, queued_at, max_age_seconds, resolved_to_agent,
|
|
3799
3806
|
payload, meta, created_at, occurred_at
|
|
3800
3807
|
)
|
|
3801
3808
|
VALUES (
|
|
3802
|
-
$from, $to, $kind, $channel, $subject, $body, $threadId, $replyTo, $claimable,
|
|
3809
|
+
$from, $to, $kind, $channel, $subject, $body, $threadId, $replyTo, $replyExpected, $claimable,
|
|
3803
3810
|
$idempotencyKey, $deliveryStatus, $queuedAt, $maxAgeSeconds, $resolvedToAgent,
|
|
3804
3811
|
$payload, $meta, $now, $occurredAt
|
|
3805
3812
|
)
|
|
@@ -3833,6 +3840,9 @@ export function sendMessageWithResult(input: SendMessageInput): { message: Messa
|
|
|
3833
3840
|
$body: input.body,
|
|
3834
3841
|
$threadId: threadId,
|
|
3835
3842
|
$replyTo: input.replyTo ?? null,
|
|
3843
|
+
// Server-owned reply obligation (#283): true by default; only an explicit false marks
|
|
3844
|
+
// a notification. Stored 0/1 so the footer renderer + reply tracker key off one column.
|
|
3845
|
+
$replyExpected: input.replyExpected === false ? 0 : 1,
|
|
3836
3846
|
$claimable: claimable ? 1 : 0,
|
|
3837
3847
|
$idempotencyKey: input.idempotencyKey ?? null,
|
|
3838
3848
|
$deliveryStatus: deliveryStatus,
|
|
@@ -4318,6 +4328,9 @@ export function pollMessages(query: PollQuery): Message[] {
|
|
|
4318
4328
|
}
|
|
4319
4329
|
|
|
4320
4330
|
function messageRequiresReply(message: Message): boolean {
|
|
4331
|
+
// Server-owned notification flag (#283) wins over every kind/sender heuristic below: an
|
|
4332
|
+
// explicit replyExpected:false is a fire-and-forget message that must never become an obligation.
|
|
4333
|
+
if (message.replyExpected === false) return false;
|
|
4321
4334
|
if (message.kind === "system" || message.kind === "control" || message.kind === "session") return false;
|
|
4322
4335
|
if (message.from === "user") return true;
|
|
4323
4336
|
if (message.kind === "task" || message.kind === "channel.event") return true;
|
package/src/mcp.ts
CHANGED
|
@@ -247,19 +247,19 @@ const TOOLS: ToolDefinition[] = [
|
|
|
247
247
|
},
|
|
248
248
|
{
|
|
249
249
|
name: "relay_spawn_agent",
|
|
250
|
-
description: "Spawn a long-living provider agent through Relay's orchestrator. Gated: requires the command:spawn scope, granted only to agents whose profile sets maxSpawnedAgents>0, up to that live-children quota. Spawned agents cannot themselves spawn (no grandchildren).",
|
|
250
|
+
description: "Spawn a long-living provider agent through Relay's orchestrator, optionally handing it its first task via `prompt` in the same call. Defaults to your own host (override with orchestratorId) and returns the resolved agent id once it registers. Gated: requires the command:spawn scope, granted only to agents whose profile sets maxSpawnedAgents>0, up to that live-children quota. Spawned agents cannot themselves spawn (no grandchildren).",
|
|
251
251
|
requiredScopes: ["command:spawn"],
|
|
252
252
|
inputSchema: {
|
|
253
253
|
type: "object",
|
|
254
254
|
properties: {
|
|
255
255
|
provider: { type: "string", enum: SPAWN_PROVIDERS },
|
|
256
|
-
orchestratorId: { type: "string" },
|
|
257
|
-
cwd: { type: "string" },
|
|
256
|
+
orchestratorId: { type: "string", description: "Target host. Defaults to the host that owns cwd, else YOUR OWN host — only set it to spawn onto a different machine." },
|
|
257
|
+
cwd: { type: "string", description: "Working directory for the agent. Must resolve within the target orchestrator's base directory (enforced server-side)." },
|
|
258
258
|
label: { type: "string" },
|
|
259
259
|
model: { type: "string" },
|
|
260
260
|
effort: { type: "string", enum: VALID_EFFORTS },
|
|
261
261
|
approvalMode: { type: "string", enum: APPROVAL_MODES },
|
|
262
|
-
prompt: { type: "string" },
|
|
262
|
+
prompt: { type: "string", description: "Initial task/message delivered to the agent on launch — spawn and hand it its first instruction in one call (no separate follow-up message needed)." },
|
|
263
263
|
systemPromptAppend: { type: "string" },
|
|
264
264
|
profile: { type: "string", description: "Agent profile name to apply (env, instructions, permissions, MCP/skills, spawn quota)." },
|
|
265
265
|
tags: { type: "array", items: { type: "string" } },
|
|
@@ -267,6 +267,7 @@ const TOOLS: ToolDefinition[] = [
|
|
|
267
267
|
providerArgs: { type: "array", items: { type: "string" } },
|
|
268
268
|
policyName: { type: "string" },
|
|
269
269
|
spawnRequestId: { type: "string" },
|
|
270
|
+
waitForRegistrationMs: { type: "integer", minimum: 0, maximum: 30000, description: "How long to wait for the spawned agent to register before returning, so the response carries its resolved agent id (default 8000; 0 = return immediately with just spawnRequestId)." },
|
|
270
271
|
},
|
|
271
272
|
required: ["provider"],
|
|
272
273
|
additionalProperties: false,
|
|
@@ -485,7 +486,7 @@ async function callTool(auth: McpAuthContext, params: unknown): Promise<Record<s
|
|
|
485
486
|
else if (name === "relay_agent_status") result = relayAgentStatus(args);
|
|
486
487
|
else if (name === "relay_find_agents") result = relayFindAgents(auth, args);
|
|
487
488
|
else if (name === "relay_whoami") result = relayWhoami(auth);
|
|
488
|
-
else if (name === "relay_spawn_agent") result = relaySpawnAgent(auth, args);
|
|
489
|
+
else if (name === "relay_spawn_agent") result = await relaySpawnAgent(auth, args);
|
|
489
490
|
else if (name === "relay_shutdown_agent") result = relayShutdownAgent(auth, args);
|
|
490
491
|
else if (name === "relay_workspace_status") result = await relayWorkspaceStatus(auth, args);
|
|
491
492
|
else if (name === "relay_workspace_list") result = relayWorkspaceList(auth, args);
|
|
@@ -763,10 +764,12 @@ function relayFindAgents(auth: McpAuthContext, args: Record<string, unknown>): R
|
|
|
763
764
|
return { agents, count: agents.length };
|
|
764
765
|
}
|
|
765
766
|
|
|
766
|
-
function relaySpawnAgent(auth: McpAuthContext, args: Record<string, unknown>): Record<string, unknown
|
|
767
|
+
async function relaySpawnAgent(auth: McpAuthContext, args: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
767
768
|
const provider = enumField(args.provider, "provider", SPAWN_PROVIDERS) as SpawnProvider;
|
|
768
769
|
const cwd = optionalString(args.cwd, "cwd", 500);
|
|
769
|
-
const
|
|
770
|
+
const callerId = callerAgentId(auth);
|
|
771
|
+
const preferHost = callerId ? getAgent(callerId)?.machine : undefined;
|
|
772
|
+
const orchestrator = selectSpawnOrchestrator(provider, optionalString(args.orchestratorId, "orchestratorId", 200), cwd, preferHost);
|
|
770
773
|
const resolvedCwd = cwd || orchestrator.baseDir;
|
|
771
774
|
if (cwd && !isPathWithinBase(cwd, orchestrator.baseDir)) {
|
|
772
775
|
throw new ValidationError(`cwd must be within orchestrator base directory: ${orchestrator.baseDir}`);
|
|
@@ -781,7 +784,6 @@ function relaySpawnAgent(auth: McpAuthContext, args: Record<string, unknown>): R
|
|
|
781
784
|
// #221 runtime gate (belt; the coarse `command:spawn` scope is enforced in callTool, and is
|
|
782
785
|
// granted only to agents whose profile sets maxSpawnedAgents>0 and never to children).
|
|
783
786
|
// Server/admin tokens have no caller identity → unrestricted by design.
|
|
784
|
-
const callerId = callerAgentId(auth);
|
|
785
787
|
if (callerId) {
|
|
786
788
|
const me = getAgent(callerId);
|
|
787
789
|
if (me?.spawnedBy) {
|
|
@@ -841,7 +843,27 @@ function relaySpawnAgent(auth: McpAuthContext, args: Record<string, unknown>): R
|
|
|
841
843
|
}),
|
|
842
844
|
});
|
|
843
845
|
emitCommand(command);
|
|
844
|
-
|
|
846
|
+
|
|
847
|
+
// #255: resolve the spawned agent id once it registers. Spawn is a fire-and-forget command
|
|
848
|
+
// over the bus; the child registers back to THIS relay (same DB) with meta.spawnRequestId set,
|
|
849
|
+
// so a bounded poll links the request to the agent without a separate relay_find_agents round
|
|
850
|
+
// trip. waitForRegistrationMs:0 opts out (pure fire-and-forget); the default is short because
|
|
851
|
+
// isolated-worktree spawns register near-instantly (symlinked deps).
|
|
852
|
+
const waitMs = Math.min(optionalNonNegativeInt(args.waitForRegistrationMs, "waitForRegistrationMs") ?? 8000, 30000);
|
|
853
|
+
const agentId = waitMs > 0 ? await waitForSpawnedAgent(spawnRequestId, waitMs) : null;
|
|
854
|
+
return { ok: true, spawnRequestId, orchestratorId: orchestrator.id, provider, agentId, registered: agentId !== null, command };
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Poll the agents table for the child that registers with this spawnRequestId (#255). Returns
|
|
858
|
+
// the resolved agent id, or null on timeout (the caller still has spawnRequestId to poll later).
|
|
859
|
+
async function waitForSpawnedAgent(spawnRequestId: string, timeoutMs: number, pollMs = 300): Promise<string | null> {
|
|
860
|
+
const deadline = Date.now() + timeoutMs;
|
|
861
|
+
for (;;) {
|
|
862
|
+
const match = listAgents().find((a) => a.meta?.spawnRequestId === spawnRequestId);
|
|
863
|
+
if (match) return match.id;
|
|
864
|
+
if (Date.now() >= deadline) return null;
|
|
865
|
+
await new Promise<void>((resolve) => setTimeout(resolve, Math.min(pollMs, Math.max(0, deadline - Date.now()))));
|
|
866
|
+
}
|
|
845
867
|
}
|
|
846
868
|
|
|
847
869
|
function relayShutdownAgent(auth: McpAuthContext, args: Record<string, unknown>): Record<string, unknown> {
|
|
@@ -1062,7 +1084,12 @@ function policyStatusPayload(policy: NonNullable<ReturnType<typeof getSpawnPolic
|
|
|
1062
1084
|
};
|
|
1063
1085
|
}
|
|
1064
1086
|
|
|
1065
|
-
function selectSpawnOrchestrator(
|
|
1087
|
+
function selectSpawnOrchestrator(
|
|
1088
|
+
provider: SpawnProvider,
|
|
1089
|
+
orchestratorId?: string,
|
|
1090
|
+
cwd?: string,
|
|
1091
|
+
preferHost?: string,
|
|
1092
|
+
): NonNullable<ReturnType<typeof getOrchestrator>> {
|
|
1066
1093
|
if (orchestratorId) {
|
|
1067
1094
|
const orchestrator = getOrchestrator(orchestratorId);
|
|
1068
1095
|
if (!orchestrator) throw new McpNotFoundError(`orchestrator ${orchestratorId} not found`);
|
|
@@ -1075,6 +1102,14 @@ function selectSpawnOrchestrator(provider: SpawnProvider, orchestratorId?: strin
|
|
|
1075
1102
|
const match = candidates.find((item) => isPathWithinBase(cwd, item.baseDir));
|
|
1076
1103
|
if (match) return match;
|
|
1077
1104
|
}
|
|
1105
|
+
// #255: with neither an explicit id nor a cwd to pin the host, default to the CALLER's own
|
|
1106
|
+
// host instead of silently grabbing candidates[0] (a foreign host whose baseDir would then
|
|
1107
|
+
// reject the caller's cwd — the footgun the spawn recipe warned about). An agent's `machine`
|
|
1108
|
+
// is its OS hostname; match it against the orchestrator hostname (or id, defensively).
|
|
1109
|
+
if (preferHost) {
|
|
1110
|
+
const own = candidates.find((item) => item.hostname === preferHost || item.id === preferHost);
|
|
1111
|
+
if (own) return own;
|
|
1112
|
+
}
|
|
1078
1113
|
const orchestrator = candidates[0];
|
|
1079
1114
|
if (!orchestrator) throw new McpNotFoundError(`no orchestrator available for provider: ${provider}`);
|
|
1080
1115
|
return orchestrator;
|
|
@@ -1327,6 +1362,12 @@ function optionalPositiveInt(value: unknown, field: string): number | undefined
|
|
|
1327
1362
|
return value;
|
|
1328
1363
|
}
|
|
1329
1364
|
|
|
1365
|
+
function optionalNonNegativeInt(value: unknown, field: string): number | undefined {
|
|
1366
|
+
if (value === undefined || value === null) return undefined;
|
|
1367
|
+
if (typeof value !== "number" || !Number.isSafeInteger(value) || value < 0) throw new ValidationError(`${field} must be a non-negative integer`);
|
|
1368
|
+
return value;
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1330
1371
|
function optionalFutureTimestamp(value: unknown, field: string): number | undefined {
|
|
1331
1372
|
const timestamp = optionalPositiveInt(value, field);
|
|
1332
1373
|
if (timestamp !== undefined && timestamp <= Date.now()) throw new ValidationError(`${field} must be a future unix timestamp in milliseconds`);
|
package/src/notify.ts
CHANGED
|
@@ -10,6 +10,13 @@ export interface SystemNotifyOptions {
|
|
|
10
10
|
kind?: MessageKind;
|
|
11
11
|
/** Sender id; defaults to "system". */
|
|
12
12
|
from?: string;
|
|
13
|
+
/**
|
|
14
|
+
* #283 — set false for a fire-and-forget notification (merge notice, lifecycle event): the
|
|
15
|
+
* server suppresses the reply-scaffold footer and the reply-obligation tracker skips it.
|
|
16
|
+
* Omit (default true) for system messages that genuinely want the agent to act/answer
|
|
17
|
+
* (steward task assignments, conflict handoffs).
|
|
18
|
+
*/
|
|
19
|
+
replyExpected?: boolean;
|
|
13
20
|
}
|
|
14
21
|
|
|
15
22
|
/**
|
|
@@ -25,6 +32,7 @@ export function notifySystemMessage(to: string, opts: SystemNotifyOptions): Mess
|
|
|
25
32
|
subject: opts.subject,
|
|
26
33
|
body: opts.body,
|
|
27
34
|
payload: opts.payload,
|
|
35
|
+
replyExpected: opts.replyExpected,
|
|
28
36
|
});
|
|
29
37
|
emitNewMessage(msg);
|
|
30
38
|
return msg;
|
package/src/routes.ts
CHANGED
|
@@ -520,6 +520,9 @@ function normalizeMessageInput(body: unknown): SendMessageInput {
|
|
|
520
520
|
if (body.claimable !== undefined && typeof body.claimable !== "boolean") {
|
|
521
521
|
throw new ValidationError("claimable must be a boolean");
|
|
522
522
|
}
|
|
523
|
+
if (body.replyExpected !== undefined && typeof body.replyExpected !== "boolean") {
|
|
524
|
+
throw new ValidationError("replyExpected must be a boolean");
|
|
525
|
+
}
|
|
523
526
|
|
|
524
527
|
const input: SendMessageInput = {
|
|
525
528
|
from: cleanString(body.from, "from", { required: true, max: 200 })!,
|
|
@@ -527,6 +530,7 @@ function normalizeMessageInput(body: unknown): SendMessageInput {
|
|
|
527
530
|
body: cleanString(body.body, "body", { required: true, max: MAX_BODY_BYTES })!,
|
|
528
531
|
kind: kind as SendMessageInput["kind"] | undefined,
|
|
529
532
|
replyTo: cleanPositiveId(body.replyTo, "replyTo"),
|
|
533
|
+
replyExpected: body.replyExpected as boolean | undefined,
|
|
530
534
|
claimable: body.claimable as boolean | undefined,
|
|
531
535
|
idempotencyKey: cleanString(body.idempotencyKey, "idempotencyKey", { max: 240 }),
|
|
532
536
|
};
|
package/src/workspace-orphans.ts
CHANGED
|
@@ -5,11 +5,15 @@
|
|
|
5
5
|
// visible to the agent or the dashboard, so unlanded work can sit stranded for
|
|
6
6
|
// weeks (one real casualty: a CI-guard test, recovered by hand).
|
|
7
7
|
//
|
|
8
|
-
// THE invariant (single home, see `worktreeReapable`):
|
|
9
|
-
// "nothing would be lost"
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
8
|
+
// THE invariant (single home, see `worktreeReapable`): a worktree is reaped only
|
|
9
|
+
// when BOTH hold — "nothing would be lost" (landed or empty; un-landed commits are
|
|
10
|
+
// flagged, never force-removed) AND "nobody is using it" (the owner is dead, and no
|
|
11
|
+
// live row claims the path). #278 proved the first half alone is not enough: a
|
|
12
|
+
// post-land recycled worktree (ahead 0) owned by a LIVE session is reap-safe by
|
|
13
|
+
// land-state, yet destroying it kills that session's toolchain mid-flight. Path-keyed
|
|
14
|
+
// row matching (never repoRoot-scoped) plus an owner-liveness guard enforce the
|
|
15
|
+
// second half. This module is the disk⇄DB reconciler the GC's `git worktree prune`
|
|
16
|
+
// (a no-op while the directory exists) never was.
|
|
13
17
|
|
|
14
18
|
import { resolve } from "node:path";
|
|
15
19
|
import { RELAY_TOKEN_HEADER } from "agent-relay-sdk";
|
|
@@ -20,6 +24,7 @@ import { emitRelayEvent } from "./events";
|
|
|
20
24
|
import { isPathWithinBase } from "./utils";
|
|
21
25
|
import { TERMINAL_WORKSPACE_STATUSES, worktreeReapable, type WorktreeReapState } from "./workspace-phase";
|
|
22
26
|
import { isOwnerAlive } from "./workspace-merge";
|
|
27
|
+
import { applyWorkspaceAction } from "./workspace-actions";
|
|
23
28
|
|
|
24
29
|
// Don't re-flag the same un-landed orphan every sweep — surface it once, then
|
|
25
30
|
// stay quiet for this window. In-memory (keyed by worktree path) like the
|
|
@@ -29,10 +34,21 @@ const UNLANDED_FLAG_COOLDOWN_MS = Number(process.env.AGENT_RELAY_ORPHAN_FLAG_COO
|
|
|
29
34
|
// remove them (parity with the session reaper's detect-only switch).
|
|
30
35
|
const orphanWorktreeReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_WORKTREE_REAP !== "0";
|
|
31
36
|
const flaggedAt = new Map<string, number>();
|
|
32
|
-
const
|
|
37
|
+
const IN_FLIGHT_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["merge_planned", "cleanup_requested"]);
|
|
38
|
+
|
|
39
|
+
// #279 dead-owner grace window: a tracked worktree whose owner has died is only
|
|
40
|
+
// reaped after the owner is observed dead continuously for this window, so a
|
|
41
|
+
// reconnecting agent isn't raced. Keyed by resolved worktree path; in-memory like
|
|
42
|
+
// the session reaper's tracker (a restart re-arms the clock — conservative).
|
|
43
|
+
const orphanGraceMs = (): number => {
|
|
44
|
+
const v = Number(process.env.AGENT_RELAY_ORPHAN_GRACE_MS);
|
|
45
|
+
return Number.isFinite(v) && v >= 0 ? v : 30 * 60 * 1000;
|
|
46
|
+
};
|
|
47
|
+
const deadOwnerTracker = new Map<string, { firstSeenDeadAt: number }>();
|
|
33
48
|
|
|
34
49
|
export function resetOrphanWorktreeStateForTests(): void {
|
|
35
50
|
flaggedAt.clear();
|
|
51
|
+
deadOwnerTracker.clear();
|
|
36
52
|
}
|
|
37
53
|
|
|
38
54
|
interface OnlineOrchestrator {
|
|
@@ -138,6 +154,17 @@ export interface CollectOrphansResult {
|
|
|
138
154
|
baseSha?: string;
|
|
139
155
|
ownerAgentId?: string;
|
|
140
156
|
}>;
|
|
157
|
+
/** Non-terminal isolated rows whose worktree IS present on disk (#279). Tracked,
|
|
158
|
+
* not orphaned — the reaper decides on owner-liveness + grace + land-state. */
|
|
159
|
+
deadOwnerWorkspaces: Array<{
|
|
160
|
+
workspaceId: string;
|
|
161
|
+
worktreePath: string;
|
|
162
|
+
repoRoot: string;
|
|
163
|
+
branch?: string;
|
|
164
|
+
baseRef?: string;
|
|
165
|
+
baseSha?: string;
|
|
166
|
+
ownerAgentId?: string;
|
|
167
|
+
}>;
|
|
141
168
|
reason?: string;
|
|
142
169
|
}
|
|
143
170
|
|
|
@@ -150,11 +177,27 @@ export interface CollectOrphansResult {
|
|
|
150
177
|
*/
|
|
151
178
|
export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
|
|
152
179
|
const orchestrators = onlineOrchestrators();
|
|
153
|
-
if (!orchestrators.length) return { orphans: [], missingWorktrees: [], reason: "no online orchestrators" };
|
|
180
|
+
if (!orchestrators.length) return { orphans: [], missingWorktrees: [], deadOwnerWorkspaces: [], reason: "no online orchestrators" };
|
|
154
181
|
|
|
155
182
|
const all = listWorkspaces();
|
|
156
183
|
const orphans: WorkspaceOrphan[] = [];
|
|
157
184
|
const missingWorktrees: CollectOrphansResult["missingWorktrees"] = [];
|
|
185
|
+
const deadOwnerWorkspaces: CollectOrphansResult["deadOwnerWorkspaces"] = [];
|
|
186
|
+
|
|
187
|
+
// Worktree paths are globally unique, so a row from ANY repoRoot that records a
|
|
188
|
+
// path is the authoritative claim on it. Match disk→DB across ALL rows, NEVER
|
|
189
|
+
// scoped to one repoRoot: the scoped version false-orphaned chained workspaces
|
|
190
|
+
// (managed session → base checkout → isolated worktree, whose row records the base
|
|
191
|
+
// checkout as repoRoot, not the main repo the probe is seeded from), so the reaper
|
|
192
|
+
// destroyed a live session's worktree (#278).
|
|
193
|
+
const rowsByPath = new Map(all.filter((ws) => ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]));
|
|
194
|
+
|
|
195
|
+
// Union of every probed repo's on-disk worktrees + the repoRoots we actually
|
|
196
|
+
// reached. The DB→disk (missing-worktree) pass runs ONCE, globally, after the loop
|
|
197
|
+
// against these — per-repo scoping there falsely flagged chained rows as missing
|
|
198
|
+
// (their worktree lives under a different repo's probe than the one iterating). #278
|
|
199
|
+
const onDiskAll = new Set<string>();
|
|
200
|
+
const probedRepoRoots = new Set<string>();
|
|
158
201
|
|
|
159
202
|
for (const repoRoot of knownRepoRoots(all)) {
|
|
160
203
|
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
@@ -162,40 +205,32 @@ export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
|
|
|
162
205
|
const probe = await fetchHostProbe(orch.apiUrl, repoRoot);
|
|
163
206
|
if (!probe?.worktrees) continue;
|
|
164
207
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
|
|
168
|
-
.map((ws) => [resolve(ws.worktreePath), ws]),
|
|
169
|
-
);
|
|
170
|
-
const onDisk = new Set(probe.worktrees.map((wt) => (wt.path ? resolve(wt.path) : "")).filter(Boolean));
|
|
171
|
-
|
|
172
|
-
// DB→disk drift: a live isolated row whose worktree is no longer on disk.
|
|
173
|
-
for (const [path, ws] of liveRowsByPath) {
|
|
174
|
-
if (ws.mode === "isolated" && !onDisk.has(path)) {
|
|
175
|
-
missingWorktrees.push({
|
|
176
|
-
workspaceId: ws.id,
|
|
177
|
-
worktreePath: ws.worktreePath,
|
|
178
|
-
repoRoot,
|
|
179
|
-
status: ws.status,
|
|
180
|
-
branch: ws.branch,
|
|
181
|
-
baseRef: ws.baseRef,
|
|
182
|
-
baseSha: ws.baseSha,
|
|
183
|
-
ownerAgentId: ws.ownerAgentId,
|
|
184
|
-
});
|
|
185
|
-
}
|
|
186
|
-
}
|
|
208
|
+
probedRepoRoots.add(resolve(repoRoot));
|
|
209
|
+
for (const wt of probe.worktrees) if (wt.path) onDiskAll.add(resolve(wt.path));
|
|
187
210
|
|
|
188
|
-
// disk→DB drift: a worktree on disk with no live row.
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
);
|
|
211
|
+
// disk→DB drift: a worktree on disk with no live row → orphan. A worktree with a
|
|
212
|
+
// live isolated row is NOT an orphan, but if its owner has died it's the #279
|
|
213
|
+
// dead-owner case — surface it for the reaper to evaluate (liveness + grace).
|
|
192
214
|
for (const worktree of probe.worktrees) {
|
|
193
215
|
if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
|
|
194
216
|
// Only agent-relay-created worktrees (agent/* branches) are reclaimable —
|
|
195
217
|
// never touch a user's own linked worktrees.
|
|
196
218
|
if (!worktree.branch?.startsWith("agent/")) continue;
|
|
197
219
|
const row = rowsByPath.get(resolve(worktree.path));
|
|
198
|
-
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status))
|
|
220
|
+
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) {
|
|
221
|
+
if (row.mode === "isolated") {
|
|
222
|
+
deadOwnerWorkspaces.push({
|
|
223
|
+
workspaceId: row.id,
|
|
224
|
+
worktreePath: worktree.path,
|
|
225
|
+
repoRoot: row.repoRoot,
|
|
226
|
+
branch: row.branch ?? worktree.branch,
|
|
227
|
+
baseRef: row.baseRef,
|
|
228
|
+
baseSha: row.baseSha,
|
|
229
|
+
ownerAgentId: row.ownerAgentId,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
continue; // tracked & live — not an orphan
|
|
233
|
+
}
|
|
199
234
|
|
|
200
235
|
const orphan: WorkspaceOrphan = {
|
|
201
236
|
worktreePath: worktree.path,
|
|
@@ -223,7 +258,32 @@ export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
|
|
|
223
258
|
}
|
|
224
259
|
}
|
|
225
260
|
|
|
226
|
-
|
|
261
|
+
// DB→disk drift: a non-terminal isolated row whose worktree is gone from disk. Run
|
|
262
|
+
// once, globally, against the union of probed repos. Flag "missing" only when the
|
|
263
|
+
// path is absent from EVERY probe AND the row was COVERABLE — its repoRoot was
|
|
264
|
+
// probed, OR its repoRoot is itself a worktree we saw on disk (the chained case: an
|
|
265
|
+
// isolated row's repoRoot is its session base checkout, a linked worktree of a
|
|
266
|
+
// probed repo). The coverability gate is what stops rows under an un-probeable host
|
|
267
|
+
// from being falsely flagged missing. #278
|
|
268
|
+
for (const ws of all) {
|
|
269
|
+
if (ws.mode !== "isolated" || !ws.worktreePath) continue;
|
|
270
|
+
if (TERMINAL_WORKSPACE_STATUSES.has(ws.status)) continue;
|
|
271
|
+
if (onDiskAll.has(resolve(ws.worktreePath))) continue;
|
|
272
|
+
const coverable = probedRepoRoots.has(resolve(ws.repoRoot)) || onDiskAll.has(resolve(ws.repoRoot));
|
|
273
|
+
if (!coverable) continue;
|
|
274
|
+
missingWorktrees.push({
|
|
275
|
+
workspaceId: ws.id,
|
|
276
|
+
worktreePath: ws.worktreePath,
|
|
277
|
+
repoRoot: ws.repoRoot,
|
|
278
|
+
status: ws.status,
|
|
279
|
+
branch: ws.branch,
|
|
280
|
+
baseRef: ws.baseRef,
|
|
281
|
+
baseSha: ws.baseSha,
|
|
282
|
+
ownerAgentId: ws.ownerAgentId,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return { orphans, missingWorktrees, deadOwnerWorkspaces };
|
|
227
287
|
}
|
|
228
288
|
|
|
229
289
|
function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): string {
|
|
@@ -254,7 +314,7 @@ function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): str
|
|
|
254
314
|
* directions surface. Never removes on uncertainty.
|
|
255
315
|
*/
|
|
256
316
|
export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>> {
|
|
257
|
-
const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
|
|
317
|
+
const { orphans, missingWorktrees, deadOwnerWorkspaces, reason } = await collectWorkspaceOrphans();
|
|
258
318
|
if (reason) return { skipped: reason };
|
|
259
319
|
|
|
260
320
|
const orchestrators = onlineOrchestrators();
|
|
@@ -263,14 +323,27 @@ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>>
|
|
|
263
323
|
const flagged: string[] = [];
|
|
264
324
|
const autoAbandoned: string[] = [];
|
|
265
325
|
const flaggedMissingWorktrees: string[] = [];
|
|
326
|
+
const deadOwnerReaped: string[] = [];
|
|
327
|
+
const deadOwnerFlagged: string[] = [];
|
|
266
328
|
const now = Date.now();
|
|
267
329
|
|
|
330
|
+
// Defense in depth (#278): the matching fix in collectWorkspaceOrphans should keep
|
|
331
|
+
// any path with a live, non-terminal row out of `orphans`. This is the belt to that
|
|
332
|
+
// suspenders — re-read rows fresh and never reap a path one still claims, even if a
|
|
333
|
+
// future refactor reintroduces a lookup gap. False-reaping a live session is fatal.
|
|
334
|
+
const nonTerminalRowsByPath = new Map(
|
|
335
|
+
listWorkspaces()
|
|
336
|
+
.filter((ws) => ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
|
|
337
|
+
.map((ws) => [resolve(ws.worktreePath), ws]),
|
|
338
|
+
);
|
|
339
|
+
|
|
268
340
|
for (const orphan of orphans) {
|
|
269
341
|
const orch = orchestrators.find((candidate) => isPathWithinBase(orphan.repoRoot, candidate.baseDir));
|
|
270
342
|
if (!orch) continue;
|
|
271
343
|
|
|
272
344
|
if (orphan.safeToReap === true) {
|
|
273
345
|
if (!reapEnabled) continue; // detect-only mode
|
|
346
|
+
if (nonTerminalRowsByPath.has(resolve(orphan.worktreePath))) continue; // still claimed — never reap (#278)
|
|
274
347
|
const commandId = dispatchCleanup(orch, orphan);
|
|
275
348
|
reaped.push(orphan.worktreePath);
|
|
276
349
|
flaggedAt.delete(orphan.worktreePath);
|
|
@@ -315,7 +388,7 @@ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>>
|
|
|
315
388
|
if (!workspace || TERMINAL_WORKSPACE_STATUSES.has(workspace.status) || workspace.mode !== "isolated" || !workspace.worktreePath) continue;
|
|
316
389
|
const key = `missing:${workspace.worktreePath}`;
|
|
317
390
|
const ownerAlive = isOwnerAlive(workspace.ownerAgentId);
|
|
318
|
-
const inFlight =
|
|
391
|
+
const inFlight = IN_FLIGHT_WORKSPACE_STATUSES.has(workspace.status);
|
|
319
392
|
const last = flaggedAt.get(key) ?? 0;
|
|
320
393
|
if (ownerAlive || inFlight) {
|
|
321
394
|
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
@@ -410,9 +483,95 @@ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>>
|
|
|
410
483
|
});
|
|
411
484
|
}
|
|
412
485
|
|
|
486
|
+
// #279: a tracked worktree whose owner died — the common stale case the reaper
|
|
487
|
+
// never handled (inverse of #278's false kill). Gate on owner-liveness + a grace
|
|
488
|
+
// window + "nothing would be lost", then dispatch through the GUARDED action (the
|
|
489
|
+
// #254 isOwnerAlive re-check at dispatch is the safety net against a stale read).
|
|
490
|
+
const graceMs = orphanGraceMs();
|
|
491
|
+
const liveDeadOwnerKeys = new Set<string>();
|
|
492
|
+
for (const cand of deadOwnerWorkspaces) {
|
|
493
|
+
const workspace = getWorkspace(cand.workspaceId);
|
|
494
|
+
if (!workspace || workspace.mode !== "isolated" || !workspace.worktreePath) continue;
|
|
495
|
+
if (TERMINAL_WORKSPACE_STATUSES.has(workspace.status)) continue;
|
|
496
|
+
if (IN_FLIGHT_WORKSPACE_STATUSES.has(workspace.status)) continue; // cleanup/merge already dispatched
|
|
497
|
+
const key = `dead-owner:${resolve(workspace.worktreePath)}`;
|
|
498
|
+
liveDeadOwnerKeys.add(key);
|
|
499
|
+
|
|
500
|
+
if (isOwnerAlive(workspace.ownerAgentId)) {
|
|
501
|
+
deadOwnerTracker.delete(key); // owner alive — reset the grace window (regression vs #278)
|
|
502
|
+
continue;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// Owner observed dead. Require continuous dead-ness for the grace window so a
|
|
506
|
+
// reconnecting agent isn't raced; the in-memory tracker re-arms on restart. A
|
|
507
|
+
// grace of 0 reaps on the first dead observation (the env's escape hatch).
|
|
508
|
+
const tracked = deadOwnerTracker.get(key);
|
|
509
|
+
const firstSeenDeadAt = tracked?.firstSeenDeadAt ?? now;
|
|
510
|
+
if (!tracked) deadOwnerTracker.set(key, { firstSeenDeadAt });
|
|
511
|
+
if (now - firstSeenDeadAt < graceMs) continue;
|
|
512
|
+
|
|
513
|
+
// Grace elapsed + owner dead. Gate on land-state — "nothing would be lost".
|
|
514
|
+
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(workspace.repoRoot, candidate.baseDir));
|
|
515
|
+
const preview = orch?.apiUrl
|
|
516
|
+
? await fetchWorktreeReapState(orch.apiUrl, workspace.worktreePath, workspace.baseRef)
|
|
517
|
+
: null;
|
|
518
|
+
const safeToReap = preview && !preview.missing && !preview.error
|
|
519
|
+
? worktreeReapable({ landed: preview.landed, ahead: preview.ahead, unmergedAhead: preview.unmergedAhead, dirtyCount: preview.dirtyCount })
|
|
520
|
+
: undefined;
|
|
521
|
+
|
|
522
|
+
if (safeToReap === true) {
|
|
523
|
+
if (!reapEnabled) continue; // detect-only mode
|
|
524
|
+
// Route through the guarded action (#279): no `force`, so a flipped-alive owner
|
|
525
|
+
// is rejected (409) — our safety net. Also does the cleanup_requested transition
|
|
526
|
+
// + audit. Attributed to a distinct requester so incidents are traceable.
|
|
527
|
+
const result = applyWorkspaceAction(workspace, {
|
|
528
|
+
action: "cleanup",
|
|
529
|
+
agentId: "workspace-dead-owner-reaper",
|
|
530
|
+
auditMetadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", requestedBy: "workspace-dead-owner-reaper", deadOwner: workspace.ownerAgentId },
|
|
531
|
+
});
|
|
532
|
+
if (!result.ok) continue; // guard tripped (owner came back) or no owning orchestrator — re-evaluate next sweep
|
|
533
|
+
if (result.command) {
|
|
534
|
+
emitRelayEvent({ type: `command.${result.command.status}`, source: result.command.source, subject: result.command.id, data: { command: result.command } });
|
|
535
|
+
}
|
|
536
|
+
deadOwnerReaped.push(workspace.id);
|
|
537
|
+
deadOwnerTracker.delete(key);
|
|
538
|
+
continue;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Un-landed or un-probeable: flag as stranded (dead owner), never remove.
|
|
542
|
+
const flagKey = `dead-owner-stranded:${resolve(workspace.worktreePath)}`;
|
|
543
|
+
liveDeadOwnerKeys.add(flagKey);
|
|
544
|
+
const last = flaggedAt.get(flagKey) ?? 0;
|
|
545
|
+
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
546
|
+
flaggedAt.set(flagKey, now);
|
|
547
|
+
deadOwnerFlagged.push(workspace.id);
|
|
548
|
+
const detail = safeToReap === undefined
|
|
549
|
+
? "host could not be probed for land-state"
|
|
550
|
+
: (preview && (preview.dirtyCount ?? 0) > 0)
|
|
551
|
+
? "uncommitted changes in the worktree"
|
|
552
|
+
: `${preview?.unmergedAhead ?? preview?.ahead ?? "?"} un-landed commit(s)`;
|
|
553
|
+
createActivityEvent({
|
|
554
|
+
clientId: `workspace-dead-owner-stranded-${workspace.id}-${now}`,
|
|
555
|
+
kind: "state",
|
|
556
|
+
title: "Dead-owner worktree needs attention",
|
|
557
|
+
body: `${workspace.branch ?? workspace.id} in ${workspace.repoRoot} — owning agent ${workspace.ownerAgentId ?? "?"} is gone and the worktree holds work that hasn't landed (${detail}). Recover the commits, then clean it up explicitly.`,
|
|
558
|
+
meta: workspace.branch ?? workspace.id,
|
|
559
|
+
icon: "ti-alert-triangle",
|
|
560
|
+
view: "orchestrators",
|
|
561
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", workspaceId: workspace.id, worktreePath: workspace.worktreePath, branch: workspace.branch, ownerAgentId: workspace.ownerAgentId, deadOwner: true },
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
// Drop grace-tracker entries for worktrees that are gone (reaped/landed) so a
|
|
565
|
+
// future re-orphaning re-arms the window from scratch.
|
|
566
|
+
for (const key of deadOwnerTracker.keys()) if (!liveDeadOwnerKeys.has(key)) deadOwnerTracker.delete(key);
|
|
567
|
+
|
|
413
568
|
// Forget cooldown entries for orphans that are gone (reaped/recovered) so a
|
|
414
569
|
// future re-orphaning of the same path re-announces immediately.
|
|
415
|
-
const liveKeys = new Set([
|
|
570
|
+
const liveKeys = new Set([
|
|
571
|
+
...orphans.map((o) => o.worktreePath),
|
|
572
|
+
...missingWorktrees.map((m) => `missing:${m.worktreePath}`),
|
|
573
|
+
...liveDeadOwnerKeys,
|
|
574
|
+
]);
|
|
416
575
|
for (const key of flaggedAt.keys()) if (!liveKeys.has(key) && !reaped.includes(key)) flaggedAt.delete(key);
|
|
417
576
|
|
|
418
577
|
return {
|
|
@@ -422,6 +581,8 @@ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>>
|
|
|
422
581
|
autoAbandoned,
|
|
423
582
|
flaggedMissingWorktrees,
|
|
424
583
|
missingWorktrees: missingWorktrees.map((m) => m.workspaceId),
|
|
584
|
+
deadOwnerReaped,
|
|
585
|
+
deadOwnerFlagged,
|
|
425
586
|
reapEnabled,
|
|
426
587
|
};
|
|
427
588
|
}
|