agent-relay-server 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/routes.ts CHANGED
@@ -167,7 +167,7 @@ import { errMessage, isRecord, SPAWN_PROVIDERS, VALID_WORKSPACE_MODES, VALID_EFF
167
167
  import { effectiveProviderCatalogList } from "./provider-catalog-store";
168
168
  import { buildManagedSpawnParams, effectiveManagedPolicyWorkspaceMode } from "./managed-policy";
169
169
  import { buildSpawnCommand, generateSpawnRequestId, resolveSpawnModelParams, type SpawnModelParams } from "./spawn-command";
170
- import { requestWorkspaceMerge } from "./workspace-merge";
170
+ import { isOwnerAlive, withOwnerOnline } from "./workspace-merge";
171
171
  import { claimMetadataPatch, workspaceActiveClaim } from "./workspace-claim";
172
172
  import {
173
173
  applyWorkspaceAction,
@@ -178,6 +178,7 @@ import {
178
178
  } from "./workspace-actions";
179
179
  import { describeWorkspacePhase, landReceipt, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
180
180
  import { notifyBranchLanded } from "./branch-landed";
181
+ import { collectWorkspaceOrphans } from "./workspace-orphans";
181
182
  import type { WorkspaceDiagnostics, WorkspaceGitState, WorkspaceRecord } from "./types";
182
183
  import {
183
184
  getComponentAuth,
@@ -3812,7 +3813,7 @@ const getWorkspaces: Handler = (req) => {
3812
3813
  const repoRoot = cleanString(url.searchParams.get("repoRoot") ?? undefined, "repoRoot", { max: 1000 });
3813
3814
  const ownerAgentId = cleanString(url.searchParams.get("agentId") ?? undefined, "agentId", { max: 240 });
3814
3815
  const status = optionalEnum(url.searchParams.get("status") ?? undefined, "status", VALID_WORKSPACE_STATUSES) as WorkspaceStatus | undefined;
3815
- return json(listWorkspaces({ repoRoot, ownerAgentId, status }));
3816
+ return json(listWorkspaces({ repoRoot, ownerAgentId, status }).map(withOwnerOnline));
3816
3817
  } catch (e) {
3817
3818
  if (e instanceof ValidationError) return error(e.message, 400);
3818
3819
  throw e;
@@ -3822,7 +3823,7 @@ const getWorkspaces: Handler = (req) => {
3822
3823
  const getWorkspaceById: Handler = (_req, params) => {
3823
3824
  const workspace = getWorkspace(params.id!);
3824
3825
  if (!workspace) return error("workspace not found", 404);
3825
- return json(workspace);
3826
+ return json(withOwnerOnline(workspace));
3826
3827
  };
3827
3828
 
3828
3829
  // Per-repo coordination state: persistent steward records (survive offline gaps)
@@ -3879,41 +3880,14 @@ const getWorkspaceDiff: Handler = (req, params) => {
3879
3880
  };
3880
3881
 
3881
3882
  // Worktrees found on disk (agent/* branches) with no live workspace row — left
3882
- // behind by crashes or failed cleanups. Probes each known repo's owning host
3883
- // and subtracts active DB rows. Reclaim them via POST .../orphans/reclaim.
3883
+ // behind by crashes or failed cleanups. Probes each known repo's owning host and
3884
+ // subtracts live DB rows, enriching each with land-state so reap-safe cruft is
3885
+ // distinguishable from stranded work. Also reports the inverse drift (live rows
3886
+ // whose worktree vanished). Discovery is shared with the scheduled reaper
3887
+ // (workspace-orphan-reaper). Reclaim via POST .../orphans/reclaim.
3884
3888
  const getWorkspaceOrphans: Handler = async () => {
3885
- const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
3886
- if (!orchestrators.length) return json({ orphans: [], reason: "no online orchestrators" });
3887
- const all = listWorkspaces();
3888
- const repoRoots = [...new Set(all.map((ws) => ws.repoRoot).filter(Boolean))];
3889
- const headers: Record<string, string> = {};
3890
- const relayToken = process.env.AGENT_RELAY_TOKEN;
3891
- if (relayToken) headers[RELAY_TOKEN_HEADER] = relayToken;
3892
- const orphans: WorkspaceOrphan[] = [];
3893
-
3894
- for (const repoRoot of repoRoots) {
3895
- const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
3896
- if (!orch?.apiUrl) continue;
3897
- let probe: WorkspaceProbe | undefined;
3898
- try {
3899
- const res = await fetch(`${orch.apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, { headers, signal: AbortSignal.timeout(10_000) });
3900
- if (!res.ok) continue;
3901
- probe = await res.json() as WorkspaceProbe;
3902
- } catch {
3903
- continue;
3904
- }
3905
- const rowsByPath = new Map(all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]));
3906
- for (const worktree of probe?.worktrees ?? []) {
3907
- if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
3908
- // Only agent-relay-created worktrees (agent/* branches) are reclaimable —
3909
- // never touch a user's own linked worktrees.
3910
- if (!worktree.branch?.startsWith("agent/")) continue;
3911
- const row = rowsByPath.get(resolve(worktree.path));
3912
- if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
3913
- orphans.push({ worktreePath: worktree.path, repoRoot, branch: worktree.branch, headSha: worktree.headSha, hadTerminalRow: Boolean(row) });
3914
- }
3915
- }
3916
- return json({ orphans });
3889
+ const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
3890
+ return json(reason ? { orphans, missingWorktrees, reason } : { orphans, missingWorktrees });
3917
3891
  };
3918
3892
 
3919
3893
  const postWorkspaceOrphanReclaim: Handler = async (req) => {
@@ -3927,11 +3901,25 @@ const postWorkspaceOrphanReclaim: Handler = async (req) => {
3927
3901
  const repoRoot = cleanString(parsed.body.repoRoot, "repoRoot", { max: 1000 });
3928
3902
  const branch = cleanString(parsed.body.branch, "branch", { max: 240 });
3929
3903
  if (!worktreePath || !repoRoot) return error("worktreePath and repoRoot required", 400);
3904
+ const force = parsed.body.force === true;
3930
3905
  // Refuse to reclaim a path that still backs a live workspace row.
3931
3906
  const live = listWorkspaces().find((ws) => ws.worktreePath && resolve(ws.worktreePath) === resolve(worktreePath) && !TERMINAL_WORKSPACE_STATUSES.has(ws.status));
3932
3907
  if (live) return error(`path backs live workspace ${live.id}; clean it through the workspace, not orphan reclaim`, 409);
3933
3908
  const orch = listOrchestrators().find((candidate) => candidate.status === "online" && isPathWithinBase(repoRoot, candidate.baseDir));
3934
3909
  if (!orch) return error("no online orchestrator owns this path", 409);
3910
+ // Land-safety gate (#244): reclaim force-removes the worktree, so refuse when
3911
+ // it holds un-landed work unless the caller explicitly opts into discarding
3912
+ // it. Mirrors the scheduled reaper — never destroy work on uncertainty.
3913
+ if (!force) {
3914
+ const { orphans } = await collectWorkspaceOrphans();
3915
+ const target = orphans.find((o) => resolve(o.worktreePath) === resolve(worktreePath));
3916
+ if (target && target.safeToReap !== true) {
3917
+ const why = target.safeToReap === undefined
3918
+ ? "land-state could not be determined"
3919
+ : target.dirty ? "uncommitted changes" : `${target.unmergedAhead ?? target.ahead ?? "?"} un-landed commit(s)`;
3920
+ return error(`worktree holds un-landed work (${why}); recover it first, or pass {"force":true} to discard`, 409);
3921
+ }
3922
+ }
3935
3923
  const command = createCommand({
3936
3924
  type: "workspace.cleanup",
3937
3925
  source: "system",
@@ -4018,7 +4006,7 @@ const getWorkspaceDiagnostics: Handler = async (_req, params) => {
4018
4006
  const workspace = getWorkspace(params.id!);
4019
4007
  if (!workspace) return error("workspace not found", 404);
4020
4008
  const owner = workspace.ownerAgentId ? getAgent(workspace.ownerAgentId) : null;
4021
- const ownerOnline = Boolean(owner) && owner!.status !== "offline";
4009
+ const ownerOnline = isOwnerAlive(workspace.ownerAgentId);
4022
4010
  const orch = listOrchestrators().find((candidate) => isPathWithinBase(workspace.sourceCwd, candidate.baseDir));
4023
4011
  const orchOnline = Boolean(orch) && orch!.status === "online";
4024
4012
  const fetched = await fetchWorkspaceGitState(workspace);
@@ -4067,7 +4055,7 @@ const postWorkspaceCleanupStale: Handler = async (req) => {
4067
4055
  const cleaned: string[] = [];
4068
4056
  for (const ws of candidates) {
4069
4057
  const owner = ws.ownerAgentId ? getAgent(ws.ownerAgentId) : null;
4070
- const ownerOnline = Boolean(owner) && owner!.status !== "offline";
4058
+ const ownerOnline = isOwnerAlive(ws.ownerAgentId);
4071
4059
  if (ownerOnline) continue; // never clean a live owner's worktree
4072
4060
  if (offlineOwnerOnly && !ws.ownerAgentId) { /* no owner recorded — still eligible */ }
4073
4061
  if (workspaceActiveClaim(ws)) continue; // respect steward claims
@@ -4129,7 +4117,7 @@ const postWorkspaceAction: Handler = async (req, params) => {
4129
4117
  // plus the directive projection + land receipt so the CLI `--wait` and any
4130
4118
  // HTTP caller get the same legible answer.
4131
4119
  return json({
4132
- workspace: waited.workspace,
4120
+ workspace: withOwnerOnline(waited.workspace),
4133
4121
  guidance: describeWorkspacePhase(waited.workspace),
4134
4122
  ...(landed ? { landed } : {}),
4135
4123
  fromStatus: waited.fromStatus,
@@ -4137,7 +4125,7 @@ const postWorkspaceAction: Handler = async (req, params) => {
4137
4125
  timedOut: waited.timedOut,
4138
4126
  });
4139
4127
  }
4140
- return json(workspace);
4128
+ return json(withOwnerOnline(workspace));
4141
4129
  }
4142
4130
 
4143
4131
  // Everything else delegates to the shared core (one home, shared with the
@@ -4150,6 +4138,7 @@ const postWorkspaceAction: Handler = async (req, params) => {
4150
4138
  metadata: cleanMeta(parsed.body.metadata) ?? {},
4151
4139
  strategy: optionalEnum(parsed.body.strategy, "strategy", ["pr", "rebase-ff", "auto"] as const, "auto") as WorkspaceMergeStrategy,
4152
4140
  deleteBranch: typeof parsed.body.deleteBranch === "boolean" ? parsed.body.deleteBranch : undefined,
4141
+ force: parsed.body.force === true,
4153
4142
  prTitle: cleanString(parsed.body.prTitle, "prTitle", { max: 240 }),
4154
4143
  prBody: cleanString(parsed.body.prBody, "prBody", { max: 8000 }),
4155
4144
  purpose: cleanString(parsed.body.purpose, "purpose", { max: 120 }),
@@ -4158,7 +4147,7 @@ const postWorkspaceAction: Handler = async (req, params) => {
4158
4147
  });
4159
4148
  if (!result.ok) return error(result.error, result.httpStatus);
4160
4149
  if (result.command) emitCommand(result.command);
4161
- const payload: Record<string, unknown> = { workspace: result.workspace };
4150
+ const payload: Record<string, unknown> = { workspace: withOwnerOnline(result.workspace) };
4162
4151
  if (result.command) payload.command = result.command;
4163
4152
  if (result.claim !== undefined) payload.claim = result.claim;
4164
4153
  return json(payload, result.httpStatus);
@@ -19,8 +19,9 @@ import {
19
19
  updateWorkspaceStatus,
20
20
  } from "./db";
21
21
  import { emitActivityEvent } from "./sse";
22
- import { requestWorkspaceMerge } from "./workspace-merge";
22
+ import { isOwnerAlive, requestWorkspaceMerge } from "./workspace-merge";
23
23
  import { claimMetadataPatch, workspaceActiveClaim } from "./workspace-claim";
24
+ import { TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
24
25
  import type { Command, WorkspaceMergeStrategy, WorkspaceRecord, WorkspaceStatus } from "./types";
25
26
 
26
27
  // Single source of truth for the action verb set. The route's `optionalEnum` and
@@ -50,6 +51,8 @@ export interface ApplyWorkspaceActionInput {
50
51
  deleteBranch?: boolean;
51
52
  prTitle?: string;
52
53
  prBody?: string;
54
+ // cleanup
55
+ force?: boolean;
53
56
  // claim / release-claim
54
57
  purpose?: string;
55
58
  // deps-refresh
@@ -203,6 +206,20 @@ export function applyWorkspaceAction(workspace: WorkspaceRecord, input: ApplyWor
203
206
  const nextStatus = STATUS_BY_ACTION[action];
204
207
  if (!nextStatus) return { ok: false, httpStatus: 400, error: `unsupported action: ${action}` };
205
208
 
209
+ if (
210
+ action === "cleanup" &&
211
+ input.force !== true &&
212
+ workspace.mode === "isolated" &&
213
+ !TERMINAL_WORKSPACE_STATUSES.has(workspace.status) &&
214
+ isOwnerAlive(workspace.ownerAgentId)
215
+ ) {
216
+ return {
217
+ ok: false,
218
+ httpStatus: 409,
219
+ error: `workspace ${workspace.id} owner is still online; pass force:true to clean it up intentionally`,
220
+ };
221
+ }
222
+
206
223
  const updated = updateWorkspaceStatus(workspace.id, nextStatus, {
207
224
  ...metadata,
208
225
  ...(detail ? { detail } : {}),
@@ -215,7 +232,7 @@ export function applyWorkspaceAction(workspace: WorkspaceRecord, input: ApplyWor
215
232
  let command: Command | undefined;
216
233
  if (requiresCommand) {
217
234
  // Only `cleanup` reaches here — `merge` returned early via the shared helper.
218
- const built = buildWorkspaceCleanupCommand(workspace, agentId ?? "dashboard");
235
+ const built = buildWorkspaceCleanupCommand(workspace, agentId ?? "dashboard", { force: input.force === true });
219
236
  if (!built.ok) return { ok: false, httpStatus: built.status, error: built.error };
220
237
  command = built.command;
221
238
  }
@@ -237,7 +254,20 @@ export function applyWorkspaceAction(workspace: WorkspaceRecord, input: ApplyWor
237
254
  export function buildWorkspaceCleanupCommand(
238
255
  workspace: WorkspaceRecord,
239
256
  requestedBy: string,
257
+ opts: { force?: boolean } = {},
240
258
  ): { ok: true; command: Command } | { ok: false; status: number; error: string } {
259
+ if (
260
+ opts.force !== true &&
261
+ workspace.mode === "isolated" &&
262
+ !TERMINAL_WORKSPACE_STATUSES.has(workspace.status) &&
263
+ isOwnerAlive(workspace.ownerAgentId)
264
+ ) {
265
+ return {
266
+ ok: false,
267
+ status: 409,
268
+ error: `workspace ${workspace.id} owner is still online; pass force:true to clean it up intentionally`,
269
+ };
270
+ }
241
271
  const owners = listOrchestrators().filter((candidate) => isPathWithinBase(workspace.sourceCwd, candidate.baseDir));
242
272
  const owner = owners.find((candidate) => candidate.status === "online") ?? owners[0];
243
273
  if (!owner) return { ok: false, status: 409, error: "no orchestrator owns this workspace path; use DELETE /api/workspaces/:id to purge the record" };
@@ -32,12 +32,16 @@ export type RequestWorkspaceMergeResult =
32
32
 
33
33
  // The owner is "alive" while its relay agent exists and isn't offline (online or
34
34
  // a borderline-stale disconnect both count — don't nuke a worktree on a blip).
35
- function isOwnerAlive(ownerAgentId: string | undefined): boolean {
35
+ export function isOwnerAlive(ownerAgentId: string | undefined): boolean {
36
36
  if (!ownerAgentId) return false;
37
37
  const agent = getAgent(ownerAgentId);
38
38
  return Boolean(agent) && agent!.status !== "offline";
39
39
  }
40
40
 
41
+ export function withOwnerOnline<T extends { ownerAgentId?: string }>(workspace: T): T & { ownerOnline: boolean } {
42
+ return { ...workspace, ownerOnline: isOwnerAlive(workspace.ownerAgentId) };
43
+ }
44
+
41
45
  /**
42
46
  * Dispatch a base merge for an isolated workspace, serialized by the per-repo
43
47
  * merge lease (issue #157). Single source of truth shared by the manual
@@ -0,0 +1,427 @@
1
+ // Orphaned-worktree reconciliation (#244). An isolated worktree on disk with no
2
+ // live DB row is "orphaned" — left behind when a session ended without a clean
3
+ // teardown (crash, killed runner, a reaped row). Symmetrically, a live row whose
4
+ // worktree is gone from disk is the other half of the same drift. Neither is
5
+ // visible to the agent or the dashboard, so unlanded work can sit stranded for
6
+ // weeks (one real casualty: a CI-guard test, recovered by hand).
7
+ //
8
+ // THE invariant (single home, see `worktreeReapable`): reaping is gated on
9
+ // "nothing would be lost" — landed or empty — NEVER on session liveness or a
10
+ // timer. A worktree holding un-landed commits is flagged for attention, never
11
+ // force-removed. This module is the disk⇄DB reconciler the GC's `git worktree
12
+ // prune` (a no-op while the directory exists) never was.
13
+
14
+ import { resolve } from "node:path";
15
+ import { RELAY_TOKEN_HEADER } from "agent-relay-sdk";
16
+ import type { WorkspaceMergePreview, WorkspaceOrphan, WorkspaceProbe, WorkspaceRecord, WorkspaceStatus } from "./types";
17
+ import { createActivityEvent, getWorkspace, listOrchestrators, listWorkspaces, updateWorkspaceStatus } from "./db";
18
+ import { createCommand } from "./commands-db";
19
+ import { emitRelayEvent } from "./events";
20
+ import { isPathWithinBase } from "./utils";
21
+ import { TERMINAL_WORKSPACE_STATUSES, worktreeReapable, type WorktreeReapState } from "./workspace-phase";
22
+ import { isOwnerAlive } from "./workspace-merge";
23
+
24
+ // Don't re-flag the same un-landed orphan every sweep — surface it once, then
25
+ // stay quiet for this window. In-memory (keyed by worktree path) like the
26
+ // orphaned-session reaper: a restart re-announces, which is acceptable noise.
27
+ const UNLANDED_FLAG_COOLDOWN_MS = Number(process.env.AGENT_RELAY_ORPHAN_FLAG_COOLDOWN_MS) || 6 * 60 * 60 * 1000;
28
+ // Set AGENT_RELAY_ORPHAN_WORKTREE_REAP=0 to detect + report orphans but never
29
+ // remove them (parity with the session reaper's detect-only switch).
30
+ const orphanWorktreeReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_WORKTREE_REAP !== "0";
31
+ const flaggedAt = new Map<string, number>();
32
+ const IN_FLIGHT_MISSING_WORKTREE_STATUSES = new Set<WorkspaceStatus>(["merge_planned", "cleanup_requested"]);
33
+
34
+ export function resetOrphanWorktreeStateForTests(): void {
35
+ flaggedAt.clear();
36
+ }
37
+
38
+ interface OnlineOrchestrator {
39
+ id: string;
40
+ agentId: string;
41
+ apiUrl?: string;
42
+ baseDir?: string;
43
+ }
44
+
45
+ function relayHeaders(): Record<string, string> {
46
+ const headers: Record<string, string> = {};
47
+ const token = process.env.AGENT_RELAY_TOKEN;
48
+ if (token) headers[RELAY_TOKEN_HEADER] = token;
49
+ return headers;
50
+ }
51
+
52
+ async function fetchHostProbe(apiUrl: string, repoRoot: string): Promise<WorkspaceProbe | null> {
53
+ try {
54
+ const res = await fetch(`${apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, {
55
+ headers: relayHeaders(),
56
+ signal: AbortSignal.timeout(10_000),
57
+ });
58
+ if (!res.ok) return null;
59
+ return await res.json() as WorkspaceProbe;
60
+ } catch {
61
+ return null;
62
+ }
63
+ }
64
+
65
+ // Land-state for a single worktree path. Reuses the host merge-preview (squash-
66
+ // aware `landed`, `unmergedAhead`, `dirtyCount`) the conflict scan already trusts.
67
+ async function fetchWorktreeReapState(apiUrl: string, worktreePath: string, baseRef?: string): Promise<WorkspaceMergePreview | null> {
68
+ const query = new URLSearchParams({ path: worktreePath, checkPr: "1" });
69
+ if (baseRef) query.set("baseRef", baseRef);
70
+ try {
71
+ const res = await fetch(`${apiUrl}/api/workspace/merge-preview?${query.toString()}`, {
72
+ headers: relayHeaders(),
73
+ signal: AbortSignal.timeout(8_000),
74
+ });
75
+ if (!res.ok) return null;
76
+ return await res.json() as WorkspaceMergePreview;
77
+ } catch {
78
+ return null;
79
+ }
80
+ }
81
+
82
+ type MissingBranchProbe = { kind: "gone" } | { kind: "preview"; preview: WorkspaceMergePreview } | { kind: "unavailable" };
83
+
84
+ async function fetchBranchReapState(
85
+ apiUrl: string,
86
+ repoRoot: string,
87
+ branch: string | undefined,
88
+ baseRef?: string,
89
+ baseSha?: string,
90
+ ): Promise<MissingBranchProbe> {
91
+ if (!branch) return { kind: "unavailable" };
92
+ const query = new URLSearchParams({ repoRoot, branch, checkPr: "1" });
93
+ if (baseRef) query.set("baseRef", baseRef);
94
+ if (baseSha) query.set("baseSha", baseSha);
95
+ try {
96
+ const res = await fetch(`${apiUrl}/api/workspace/branch-merge-preview?${query.toString()}`, {
97
+ headers: relayHeaders(),
98
+ signal: AbortSignal.timeout(8_000),
99
+ });
100
+ if (res.status === 404) return { kind: "gone" };
101
+ if (!res.ok) return { kind: "unavailable" };
102
+ return { kind: "preview", preview: await res.json() as WorkspaceMergePreview };
103
+ } catch {
104
+ return { kind: "unavailable" };
105
+ }
106
+ }
107
+
108
+ function previewReapable(preview: WorkspaceMergePreview): boolean | undefined {
109
+ if (preview.error) return undefined;
110
+ const hasSignal = preview.landed === true || typeof preview.ahead === "number" || typeof preview.unmergedAhead === "number";
111
+ if (!hasSignal) return undefined;
112
+ return worktreeReapable({ landed: preview.landed, ahead: preview.ahead, unmergedAhead: preview.unmergedAhead, dirtyCount: 0 });
113
+ }
114
+
115
+ function onlineOrchestrators(): OnlineOrchestrator[] {
116
+ return listOrchestrators()
117
+ .filter((orch) => orch.status === "online" && orch.apiUrl && orch.agentId)
118
+ .map((orch) => ({ id: orch.id, agentId: orch.agentId!, apiUrl: orch.apiUrl, baseDir: orch.baseDir }));
119
+ }
120
+
121
+ /** Repo roots any workspace row references — the seeds we probe for orphans.
122
+ * One probe per repo returns ALL its worktrees, so a single (even shared) row
123
+ * per repo is enough to discover every orphan under it. */
124
+ function knownRepoRoots(workspaces: WorkspaceRecord[]): string[] {
125
+ return [...new Set(workspaces.map((ws) => ws.repoRoot).filter(Boolean))];
126
+ }
127
+
128
+ export interface CollectOrphansResult {
129
+ orphans: WorkspaceOrphan[];
130
+ /** Live isolated rows whose worktree is missing on disk (DB→disk drift). */
131
+ missingWorktrees: Array<{
132
+ workspaceId: string;
133
+ worktreePath: string;
134
+ repoRoot: string;
135
+ status: WorkspaceStatus;
136
+ branch?: string;
137
+ baseRef?: string;
138
+ baseSha?: string;
139
+ ownerAgentId?: string;
140
+ }>;
141
+ reason?: string;
142
+ }
143
+
144
+ /**
145
+ * The disk⇄DB reconcile pass. For every known repo with an online owning host:
146
+ * probe its worktrees, subtract live DB rows → orphans (disk without a live row),
147
+ * and the inverse → live rows whose worktree is gone (DB without disk). Each
148
+ * orphan is enriched with land-state so callers can tell reap-safe cruft from
149
+ * stranded work. Shared by the `/orphans` route and the scheduled reaper.
150
+ */
151
+ export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
152
+ const orchestrators = onlineOrchestrators();
153
+ if (!orchestrators.length) return { orphans: [], missingWorktrees: [], reason: "no online orchestrators" };
154
+
155
+ const all = listWorkspaces();
156
+ const orphans: WorkspaceOrphan[] = [];
157
+ const missingWorktrees: CollectOrphansResult["missingWorktrees"] = [];
158
+
159
+ for (const repoRoot of knownRepoRoots(all)) {
160
+ const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
161
+ if (!orch?.apiUrl) continue;
162
+ const probe = await fetchHostProbe(orch.apiUrl, repoRoot);
163
+ if (!probe?.worktrees) continue;
164
+
165
+ const liveRowsByPath = new Map(
166
+ all
167
+ .filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
168
+ .map((ws) => [resolve(ws.worktreePath), ws]),
169
+ );
170
+ const onDisk = new Set(probe.worktrees.map((wt) => (wt.path ? resolve(wt.path) : "")).filter(Boolean));
171
+
172
+ // DB→disk drift: a live isolated row whose worktree is no longer on disk.
173
+ for (const [path, ws] of liveRowsByPath) {
174
+ if (ws.mode === "isolated" && !onDisk.has(path)) {
175
+ missingWorktrees.push({
176
+ workspaceId: ws.id,
177
+ worktreePath: ws.worktreePath,
178
+ repoRoot,
179
+ status: ws.status,
180
+ branch: ws.branch,
181
+ baseRef: ws.baseRef,
182
+ baseSha: ws.baseSha,
183
+ ownerAgentId: ws.ownerAgentId,
184
+ });
185
+ }
186
+ }
187
+
188
+ // disk→DB drift: a worktree on disk with no live row.
189
+ const rowsByPath = new Map(
190
+ all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]),
191
+ );
192
+ for (const worktree of probe.worktrees) {
193
+ if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
194
+ // Only agent-relay-created worktrees (agent/* branches) are reclaimable —
195
+ // never touch a user's own linked worktrees.
196
+ if (!worktree.branch?.startsWith("agent/")) continue;
197
+ const row = rowsByPath.get(resolve(worktree.path));
198
+ if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
199
+
200
+ const orphan: WorkspaceOrphan = {
201
+ worktreePath: worktree.path,
202
+ repoRoot,
203
+ branch: worktree.branch,
204
+ headSha: worktree.headSha,
205
+ hadTerminalRow: Boolean(row),
206
+ };
207
+ const preview = await fetchWorktreeReapState(orch.apiUrl, worktree.path, probe.branch);
208
+ if (preview && !preview.missing && !preview.error) {
209
+ const state: WorktreeReapState = {
210
+ landed: preview.landed,
211
+ ahead: preview.ahead,
212
+ unmergedAhead: preview.unmergedAhead,
213
+ dirtyCount: preview.dirtyCount,
214
+ };
215
+ orphan.landed = preview.landed;
216
+ orphan.ahead = preview.ahead;
217
+ orphan.unmergedAhead = preview.unmergedAhead;
218
+ orphan.dirty = (preview.dirtyCount ?? 0) > 0;
219
+ orphan.safeToReap = worktreeReapable(state);
220
+ }
221
+ // No probe → safeToReap stays undefined (treated as not-safe by callers).
222
+ orphans.push(orphan);
223
+ }
224
+ }
225
+
226
+ return { orphans, missingWorktrees };
227
+ }
228
+
229
+ function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): string {
230
+ const command = createCommand({
231
+ type: "workspace.cleanup",
232
+ source: "system",
233
+ target: orch.agentId,
234
+ params: {
235
+ action: "cleanup",
236
+ worktreePath: orphan.worktreePath,
237
+ repoRoot: orphan.repoRoot,
238
+ branch: orphan.branch,
239
+ deleteBranch: true,
240
+ reclaim: true,
241
+ requestedBy: "workspace-orphan-reaper",
242
+ requestedAt: Date.now(),
243
+ },
244
+ });
245
+ emitRelayEvent({ type: `command.${command.status}`, source: command.source, subject: command.id, data: { command } });
246
+ return command.id;
247
+ }
248
+
249
+ /**
250
+ * Scheduled reaper (maintenance job). Auto-removes orphaned worktrees that are
251
+ * safe to reap (landed/empty, clean tree) and flags the rest — un-landed work or
252
+ * an un-probeable host — as needs-attention instead of destroying them. Also
253
+ * reports the inverse drift (live rows whose worktree vanished) so both
254
+ * directions surface. Never removes on uncertainty.
255
+ */
256
+ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>> {
257
+ const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
258
+ if (reason) return { skipped: reason };
259
+
260
+ const orchestrators = onlineOrchestrators();
261
+ const reapEnabled = orphanWorktreeReapEnabled();
262
+ const reaped: string[] = [];
263
+ const flagged: string[] = [];
264
+ const autoAbandoned: string[] = [];
265
+ const flaggedMissingWorktrees: string[] = [];
266
+ const now = Date.now();
267
+
268
+ for (const orphan of orphans) {
269
+ const orch = orchestrators.find((candidate) => isPathWithinBase(orphan.repoRoot, candidate.baseDir));
270
+ if (!orch) continue;
271
+
272
+ if (orphan.safeToReap === true) {
273
+ if (!reapEnabled) continue; // detect-only mode
274
+ const commandId = dispatchCleanup(orch, orphan);
275
+ reaped.push(orphan.worktreePath);
276
+ flaggedAt.delete(orphan.worktreePath);
277
+ createActivityEvent({
278
+ clientId: `workspace-orphan-reaped-${orphan.worktreePath}-${now}`,
279
+ kind: "state",
280
+ title: "Orphaned worktree reaped",
281
+ body: `${orphan.branch ?? orphan.worktreePath} — ${orphan.landed ? "work already landed" : "no work to preserve"}; removing the stale worktree`,
282
+ meta: orphan.branch ?? orphan.worktreePath,
283
+ icon: "ti-trash",
284
+ view: "orchestrators",
285
+ metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, commandId, landed: orphan.landed },
286
+ });
287
+ continue;
288
+ }
289
+
290
+ // Not safe (un-landed work, dirty tree, or un-probeable) — flag once per
291
+ // cooldown, never remove. This is the stranded-work needs-attention entry.
292
+ const last = flaggedAt.get(orphan.worktreePath) ?? 0;
293
+ if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
294
+ flaggedAt.set(orphan.worktreePath, now);
295
+ flagged.push(orphan.worktreePath);
296
+ const detail = orphan.safeToReap === undefined
297
+ ? "host could not be probed for land-state"
298
+ : orphan.dirty
299
+ ? "uncommitted changes in the worktree"
300
+ : `${orphan.unmergedAhead ?? orphan.ahead ?? "?"} un-landed commit(s)`;
301
+ createActivityEvent({
302
+ clientId: `workspace-orphan-stranded-${orphan.worktreePath}-${now}`,
303
+ kind: "state",
304
+ title: "Stranded worktree needs attention",
305
+ body: `${orphan.branch ?? orphan.worktreePath} in ${orphan.repoRoot} is orphaned (no live workspace row) and holds work that hasn't landed — ${detail}. Reclaim with force to discard, or recover the commits before removing.`,
306
+ meta: orphan.branch ?? orphan.worktreePath,
307
+ icon: "ti-alert-triangle",
308
+ view: "orchestrators",
309
+ metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, branch: orphan.branch, ahead: orphan.ahead, unmergedAhead: orphan.unmergedAhead, dirty: orphan.dirty, headSha: orphan.headSha },
310
+ });
311
+ }
312
+
313
+ for (const missing of missingWorktrees) {
314
+ const workspace = getWorkspace(missing.workspaceId);
315
+ if (!workspace || TERMINAL_WORKSPACE_STATUSES.has(workspace.status) || workspace.mode !== "isolated" || !workspace.worktreePath) continue;
316
+ const key = `missing:${workspace.worktreePath}`;
317
+ const ownerAlive = isOwnerAlive(workspace.ownerAgentId);
318
+ const inFlight = IN_FLIGHT_MISSING_WORKTREE_STATUSES.has(workspace.status);
319
+ const last = flaggedAt.get(key) ?? 0;
320
+ if (ownerAlive || inFlight) {
321
+ if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
322
+ flaggedAt.set(key, now);
323
+ createActivityEvent({
324
+ clientId: `workspace-row-no-worktree-${workspace.id}-${now}`,
325
+ kind: "state",
326
+ title: "Workspace row has no worktree on disk",
327
+ body: `Workspace ${workspace.id} (${workspace.status}) points at ${workspace.worktreePath}, which no longer exists on disk — disk/DB drift.`,
328
+ meta: workspace.id,
329
+ icon: "ti-unlink",
330
+ view: "orchestrators",
331
+ metadata: {
332
+ source: "server",
333
+ maintenanceJobId: "workspace-orphan-reaper",
334
+ workspaceId: workspace.id,
335
+ worktreePath: workspace.worktreePath,
336
+ status: workspace.status,
337
+ ownerAlive,
338
+ inFlight,
339
+ },
340
+ });
341
+ continue;
342
+ }
343
+
344
+ const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(workspace.repoRoot, candidate.baseDir));
345
+ const probe = orch?.apiUrl
346
+ ? await fetchBranchReapState(orch.apiUrl, workspace.repoRoot, workspace.branch, workspace.baseRef, workspace.baseSha)
347
+ : { kind: "unavailable" } as const;
348
+ const safeToAbandon = probe.kind === "gone"
349
+ ? true
350
+ : probe.kind === "preview"
351
+ ? previewReapable(probe.preview)
352
+ : undefined;
353
+
354
+ if (safeToAbandon === true) {
355
+ const reasonText = probe.kind === "gone" ? "missing worktree; branch ref gone" : "missing worktree; branch already landed";
356
+ const updated = updateWorkspaceStatus(workspace.id, "abandoned", {
357
+ autoAbandoned: true,
358
+ abandonedReason: reasonText,
359
+ abandonedAt: now,
360
+ });
361
+ if (!updated) continue;
362
+ autoAbandoned.push(workspace.id);
363
+ flaggedAt.delete(key);
364
+ createActivityEvent({
365
+ clientId: `workspace-row-auto-abandoned-${workspace.id}-${now}`,
366
+ kind: "state",
367
+ title: "Workspace auto-abandoned",
368
+ body: `${workspace.branch ?? workspace.id} in ${workspace.repoRoot} — worktree missing and ${probe.kind === "gone" ? "branch ref is gone" : "branch has fully landed"}`,
369
+ meta: workspace.branch ?? workspace.id,
370
+ icon: "ti-clock-x",
371
+ view: "orchestrators",
372
+ metadata: {
373
+ source: "server",
374
+ maintenanceJobId: "workspace-orphan-reaper",
375
+ workspaceId: workspace.id,
376
+ worktreePath: workspace.worktreePath,
377
+ branch: workspace.branch,
378
+ reason: reasonText,
379
+ },
380
+ });
381
+ continue;
382
+ }
383
+
384
+ if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
385
+ flaggedAt.set(key, now);
386
+ flaggedMissingWorktrees.push(workspace.id);
387
+ const detail = probe.kind === "preview"
388
+ ? `${probe.preview.unmergedAhead ?? probe.preview.ahead ?? "?"} un-landed commit(s) still recoverable on branch ${workspace.branch ?? workspace.id}`
389
+ : workspace.branch
390
+ ? `host could not confirm whether branch ${workspace.branch} has landed`
391
+ : "host could not confirm whether recoverable branch work remains";
392
+ createActivityEvent({
393
+ clientId: `workspace-row-needs-attention-${workspace.id}-${now}`,
394
+ kind: "state",
395
+ title: "Missing-worktree workspace needs attention",
396
+ body: `${workspace.id} in ${workspace.repoRoot} has no worktree on disk and cannot be auto-abandoned safely — ${detail}. Recover via the branch if needed, then abandon or clean it up explicitly.`,
397
+ meta: workspace.id,
398
+ icon: "ti-alert-triangle",
399
+ view: "orchestrators",
400
+ metadata: {
401
+ source: "server",
402
+ maintenanceJobId: "workspace-orphan-reaper",
403
+ workspaceId: workspace.id,
404
+ worktreePath: workspace.worktreePath,
405
+ branch: workspace.branch,
406
+ status: workspace.status,
407
+ probe: probe.kind,
408
+ ...(probe.kind === "preview" ? { ahead: probe.preview.ahead, unmergedAhead: probe.preview.unmergedAhead, landed: probe.preview.landed } : {}),
409
+ },
410
+ });
411
+ }
412
+
413
+ // Forget cooldown entries for orphans that are gone (reaped/recovered) so a
414
+ // future re-orphaning of the same path re-announces immediately.
415
+ const liveKeys = new Set([...orphans.map((o) => o.worktreePath), ...missingWorktrees.map((m) => `missing:${m.worktreePath}`)]);
416
+ for (const key of flaggedAt.keys()) if (!liveKeys.has(key) && !reaped.includes(key)) flaggedAt.delete(key);
417
+
418
+ return {
419
+ scanned: orphans.length,
420
+ reaped,
421
+ flagged,
422
+ autoAbandoned,
423
+ flaggedMissingWorktrees,
424
+ missingWorktrees: missingWorktrees.map((m) => m.workspaceId),
425
+ reapEnabled,
426
+ };
427
+ }