agent-relay-server 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/branch-landed.ts +53 -19
- package/src/maintenance.ts +14 -0
- package/src/routes.ts +22 -34
- package/src/workspace-orphans.ts +289 -0
- package/src/workspace-phase.ts +29 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-server",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.26.0",
|
|
4
4
|
"description": "Lightweight HTTP message relay for inter-agent communication across machines",
|
|
5
5
|
"module": "src/index.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"CONTRIBUTING.md"
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"agent-relay-sdk": "0.2.
|
|
36
|
+
"agent-relay-sdk": "0.2.15"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
39
|
"prepack": "bun run build:dashboard:bundle >&2",
|
package/src/branch-landed.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { emitRelayEvent } from "./events";
|
|
2
2
|
import { getNotificationsConfig } from "./config-store";
|
|
3
3
|
import { notifySystemMessage } from "./notify";
|
|
4
|
-
import
|
|
4
|
+
import { listAgents } from "./db";
|
|
5
|
+
import { isAgentOnline } from "./agent-ref";
|
|
6
|
+
import type { AgentCard, WorkspaceRecord } from "./types";
|
|
5
7
|
|
|
6
8
|
export interface BranchLandedInput {
|
|
7
9
|
/**
|
|
@@ -52,26 +54,58 @@ export function notifyBranchLanded(input: BranchLandedInput): void {
|
|
|
52
54
|
if (!config.enabled || !config.branchLanded) return;
|
|
53
55
|
|
|
54
56
|
const author = workspace.ownerAgentId;
|
|
55
|
-
if (!author) return;
|
|
56
|
-
|
|
57
|
-
const branchLabel = landedBranch ? `\`${landedBranch}\`` : "Your branch";
|
|
58
57
|
const shaLabel = shortSha ? ` as \`${shortSha}\`` : "";
|
|
59
58
|
const subjectLabel = input.subject ? ` — "${input.subject}"` : "";
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
:
|
|
59
|
+
const payload = {
|
|
60
|
+
kind: "branch.landed",
|
|
61
|
+
workspaceId: workspace.id,
|
|
62
|
+
repoRoot: workspace.repoRoot,
|
|
63
|
+
branch: landedBranch,
|
|
64
|
+
base,
|
|
65
|
+
sha: input.mergedSha,
|
|
66
|
+
author,
|
|
67
|
+
newBranch: input.newBranch,
|
|
68
|
+
};
|
|
63
69
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
70
|
+
// The branch author cares most — push regardless of online (store-ahead delivers it on
|
|
71
|
+
// next poll if they've moved on, #234). They land-and-continue onto the recycled branch.
|
|
72
|
+
if (author) {
|
|
73
|
+
const branchLabel = landedBranch ? `\`${landedBranch}\`` : "Your branch";
|
|
74
|
+
const continueLabel = input.newBranch
|
|
75
|
+
? ` You're now on \`${input.newBranch}\` — keep working there.`
|
|
76
|
+
: " Worktree reclaimed.";
|
|
77
|
+
notifySystemMessage(author, {
|
|
78
|
+
subject: "Your branch landed",
|
|
79
|
+
body: `✅ ${branchLabel} landed on \`${base}\`${shaLabel}${subjectLabel}.${continueLabel}`,
|
|
80
|
+
payload,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Agents on `main` — those whose cwd IS the main checkout (not an isolated worktree) —
|
|
85
|
+
// get a live "merged" notice so a long-lived main agent's context stays current as work
|
|
86
|
+
// lands under it (#239). Online-only: a stale/exited main session needs no wake, and
|
|
87
|
+
// store-ahead to it would just pile up noise. The author is in a worktree (cwd ≠ repoRoot)
|
|
88
|
+
// so it's naturally excluded; guard anyway for shared-mode owners.
|
|
89
|
+
const branchLabel = landedBranch ? `\`${landedBranch}\`` : "A branch";
|
|
90
|
+
const authorLabel = author ? ` by \`${author}\`` : "";
|
|
91
|
+
for (const agent of agentsOnMain(workspace.repoRoot, author)) {
|
|
92
|
+
notifySystemMessage(agent.id, {
|
|
93
|
+
subject: `Merged to ${base}`,
|
|
94
|
+
body: `🔀 ${branchLabel}${authorLabel} merged to \`${base}\`${shaLabel}${subjectLabel}.`,
|
|
95
|
+
payload,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// An agent is "on `main`" when its registered cwd equals the repo's main checkout — i.e. it
|
|
101
|
+
// works in the base, not an isolated worktree. Excludes the author, pseudo agents (system/
|
|
102
|
+
// user), channels, and offline sessions.
|
|
103
|
+
function agentsOnMain(repoRoot: string, author: string | undefined): AgentCard[] {
|
|
104
|
+
return listAgents().filter((a) => {
|
|
105
|
+
if (a.id === author || a.id === "system" || a.id === "user") return false;
|
|
106
|
+
if (a.kind === "channel" || a.meta?.kind === "channel") return false;
|
|
107
|
+
const cwd = a.meta?.cwd;
|
|
108
|
+
if (typeof cwd !== "string" || cwd !== repoRoot) return false;
|
|
109
|
+
return isAgentOnline(a);
|
|
76
110
|
});
|
|
77
111
|
}
|
package/src/maintenance.ts
CHANGED
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
import type { WorkspaceMergePreview, WorkspaceRecord, WorkspaceStatus } from "./types";
|
|
34
34
|
import { requestWorkspaceMerge } from "./workspace-merge";
|
|
35
35
|
import { workspaceActiveClaim } from "./workspace-claim";
|
|
36
|
+
import { reapOrphanedWorktrees } from "./workspace-orphans";
|
|
36
37
|
import { READY_TO_LAND_STATUSES, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
|
|
37
38
|
import { errMessage, RELAY_TOKEN_HEADER } from "agent-relay-sdk";
|
|
38
39
|
import { getStewardConfig } from "./config-store";
|
|
@@ -66,6 +67,10 @@ const DB_VACUUM_EVERY = Number(process.env.AGENT_RELAY_DB_VACUUM_EVERY) || 7;
|
|
|
66
67
|
let dbMaintenanceRuns = 0;
|
|
67
68
|
const WORKSPACE_REVIEW_TTL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_REVIEW_TTL_MS) || 3 * DAY_MS;
|
|
68
69
|
const WORKSPACE_GC_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_GC_INTERVAL_MS) || 60 * 60 * 1000;
|
|
70
|
+
// Disk⇄DB orphan reconcile cadence (#244). Runs on start for a boot-time pass,
|
|
71
|
+
// then periodically — orphans accrue slowly (one per crashed/killed session), so
|
|
72
|
+
// a 30-min sweep is plenty without hammering the hosts with probes.
|
|
73
|
+
const WORKSPACE_ORPHAN_REAPER_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_ORPHAN_REAPER_INTERVAL_MS) || 30 * 60 * 1000;
|
|
69
74
|
// Deterministic auto-land (Layer 0): merge clean fast-forwards with no human in
|
|
70
75
|
// the loop. Default on for the seamless workflow; set AGENT_RELAY_WORKSPACE_AUTO_MERGE=0
|
|
71
76
|
// to require a manual or steward merge per repo. Read at call-time so operators can
|
|
@@ -411,6 +416,15 @@ const definitions: MaintenanceJobDefinition[] = [
|
|
|
411
416
|
timeoutMs: 60 * 1000,
|
|
412
417
|
handler: workspaceGC,
|
|
413
418
|
},
|
|
419
|
+
{
|
|
420
|
+
id: "workspace-orphan-reaper",
|
|
421
|
+
title: "Workspace orphan reaper",
|
|
422
|
+
description: "Reconcile disk⇄DB: reap orphaned worktrees whose work has landed (or is empty), flag orphans holding un-landed work as needs-attention instead of deleting, and report rows whose worktree vanished. git worktree prune can't do this — it no-ops while the directory still exists.",
|
|
423
|
+
intervalMs: WORKSPACE_ORPHAN_REAPER_INTERVAL_MS,
|
|
424
|
+
runOnStart: true,
|
|
425
|
+
timeoutMs: 2 * 60 * 1000,
|
|
426
|
+
handler: reapOrphanedWorktrees,
|
|
427
|
+
},
|
|
414
428
|
];
|
|
415
429
|
|
|
416
430
|
function workspacePathWithinBase(path: string | undefined, baseDir: string | undefined): boolean {
|
package/src/routes.ts
CHANGED
|
@@ -178,6 +178,7 @@ import {
|
|
|
178
178
|
} from "./workspace-actions";
|
|
179
179
|
import { describeWorkspacePhase, landReceipt, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
|
|
180
180
|
import { notifyBranchLanded } from "./branch-landed";
|
|
181
|
+
import { collectWorkspaceOrphans } from "./workspace-orphans";
|
|
181
182
|
import type { WorkspaceDiagnostics, WorkspaceGitState, WorkspaceRecord } from "./types";
|
|
182
183
|
import {
|
|
183
184
|
getComponentAuth,
|
|
@@ -3879,41 +3880,14 @@ const getWorkspaceDiff: Handler = (req, params) => {
|
|
|
3879
3880
|
};
|
|
3880
3881
|
|
|
3881
3882
|
// Worktrees found on disk (agent/* branches) with no live workspace row — left
|
|
3882
|
-
// behind by crashes or failed cleanups. Probes each known repo's owning host
|
|
3883
|
-
//
|
|
3883
|
+
// behind by crashes or failed cleanups. Probes each known repo's owning host and
|
|
3884
|
+
// subtracts live DB rows, enriching each with land-state so reap-safe cruft is
|
|
3885
|
+
// distinguishable from stranded work. Also reports the inverse drift (live rows
|
|
3886
|
+
// whose worktree vanished). Discovery is shared with the scheduled reaper
|
|
3887
|
+
// (workspace-orphan-reaper). Reclaim via POST .../orphans/reclaim.
|
|
3884
3888
|
const getWorkspaceOrphans: Handler = async () => {
|
|
3885
|
-
const
|
|
3886
|
-
|
|
3887
|
-
const all = listWorkspaces();
|
|
3888
|
-
const repoRoots = [...new Set(all.map((ws) => ws.repoRoot).filter(Boolean))];
|
|
3889
|
-
const headers: Record<string, string> = {};
|
|
3890
|
-
const relayToken = process.env.AGENT_RELAY_TOKEN;
|
|
3891
|
-
if (relayToken) headers[RELAY_TOKEN_HEADER] = relayToken;
|
|
3892
|
-
const orphans: WorkspaceOrphan[] = [];
|
|
3893
|
-
|
|
3894
|
-
for (const repoRoot of repoRoots) {
|
|
3895
|
-
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
3896
|
-
if (!orch?.apiUrl) continue;
|
|
3897
|
-
let probe: WorkspaceProbe | undefined;
|
|
3898
|
-
try {
|
|
3899
|
-
const res = await fetch(`${orch.apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, { headers, signal: AbortSignal.timeout(10_000) });
|
|
3900
|
-
if (!res.ok) continue;
|
|
3901
|
-
probe = await res.json() as WorkspaceProbe;
|
|
3902
|
-
} catch {
|
|
3903
|
-
continue;
|
|
3904
|
-
}
|
|
3905
|
-
const rowsByPath = new Map(all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]));
|
|
3906
|
-
for (const worktree of probe?.worktrees ?? []) {
|
|
3907
|
-
if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
|
|
3908
|
-
// Only agent-relay-created worktrees (agent/* branches) are reclaimable —
|
|
3909
|
-
// never touch a user's own linked worktrees.
|
|
3910
|
-
if (!worktree.branch?.startsWith("agent/")) continue;
|
|
3911
|
-
const row = rowsByPath.get(resolve(worktree.path));
|
|
3912
|
-
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
|
|
3913
|
-
orphans.push({ worktreePath: worktree.path, repoRoot, branch: worktree.branch, headSha: worktree.headSha, hadTerminalRow: Boolean(row) });
|
|
3914
|
-
}
|
|
3915
|
-
}
|
|
3916
|
-
return json({ orphans });
|
|
3889
|
+
const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
|
|
3890
|
+
return json(reason ? { orphans, missingWorktrees, reason } : { orphans, missingWorktrees });
|
|
3917
3891
|
};
|
|
3918
3892
|
|
|
3919
3893
|
const postWorkspaceOrphanReclaim: Handler = async (req) => {
|
|
@@ -3927,11 +3901,25 @@ const postWorkspaceOrphanReclaim: Handler = async (req) => {
|
|
|
3927
3901
|
const repoRoot = cleanString(parsed.body.repoRoot, "repoRoot", { max: 1000 });
|
|
3928
3902
|
const branch = cleanString(parsed.body.branch, "branch", { max: 240 });
|
|
3929
3903
|
if (!worktreePath || !repoRoot) return error("worktreePath and repoRoot required", 400);
|
|
3904
|
+
const force = parsed.body.force === true;
|
|
3930
3905
|
// Refuse to reclaim a path that still backs a live workspace row.
|
|
3931
3906
|
const live = listWorkspaces().find((ws) => ws.worktreePath && resolve(ws.worktreePath) === resolve(worktreePath) && !TERMINAL_WORKSPACE_STATUSES.has(ws.status));
|
|
3932
3907
|
if (live) return error(`path backs live workspace ${live.id}; clean it through the workspace, not orphan reclaim`, 409);
|
|
3933
3908
|
const orch = listOrchestrators().find((candidate) => candidate.status === "online" && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
3934
3909
|
if (!orch) return error("no online orchestrator owns this path", 409);
|
|
3910
|
+
// Land-safety gate (#244): reclaim force-removes the worktree, so refuse when
|
|
3911
|
+
// it holds un-landed work unless the caller explicitly opts into discarding
|
|
3912
|
+
// it. Mirrors the scheduled reaper — never destroy work on uncertainty.
|
|
3913
|
+
if (!force) {
|
|
3914
|
+
const { orphans } = await collectWorkspaceOrphans();
|
|
3915
|
+
const target = orphans.find((o) => resolve(o.worktreePath) === resolve(worktreePath));
|
|
3916
|
+
if (target && target.safeToReap !== true) {
|
|
3917
|
+
const why = target.safeToReap === undefined
|
|
3918
|
+
? "land-state could not be determined"
|
|
3919
|
+
: target.dirty ? "uncommitted changes" : `${target.unmergedAhead ?? target.ahead ?? "?"} un-landed commit(s)`;
|
|
3920
|
+
return error(`worktree holds un-landed work (${why}); recover it first, or pass {"force":true} to discard`, 409);
|
|
3921
|
+
}
|
|
3922
|
+
}
|
|
3935
3923
|
const command = createCommand({
|
|
3936
3924
|
type: "workspace.cleanup",
|
|
3937
3925
|
source: "system",
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
// Orphaned-worktree reconciliation (#244). An isolated worktree on disk with no
|
|
2
|
+
// live DB row is "orphaned" — left behind when a session ended without a clean
|
|
3
|
+
// teardown (crash, killed runner, a reaped row). Symmetrically, a live row whose
|
|
4
|
+
// worktree is gone from disk is the other half of the same drift. Neither is
|
|
5
|
+
// visible to the agent or the dashboard, so unlanded work can sit stranded for
|
|
6
|
+
// weeks (one real casualty: a CI-guard test, recovered by hand).
|
|
7
|
+
//
|
|
8
|
+
// THE invariant (single home, see `worktreeReapable`): reaping is gated on
|
|
9
|
+
// "nothing would be lost" — landed or empty — NEVER on session liveness or a
|
|
10
|
+
// timer. A worktree holding un-landed commits is flagged for attention, never
|
|
11
|
+
// force-removed. This module is the disk⇄DB reconciler the GC's `git worktree
|
|
12
|
+
// prune` (a no-op while the directory exists) never was.
|
|
13
|
+
|
|
14
|
+
import { resolve } from "node:path";
|
|
15
|
+
import { RELAY_TOKEN_HEADER, errMessage } from "agent-relay-sdk";
|
|
16
|
+
import type { WorkspaceMergePreview, WorkspaceOrphan, WorkspaceProbe, WorkspaceRecord } from "./types";
|
|
17
|
+
import { createActivityEvent, listOrchestrators, listWorkspaces } from "./db";
|
|
18
|
+
import { createCommand } from "./commands-db";
|
|
19
|
+
import { emitRelayEvent } from "./events";
|
|
20
|
+
import { isPathWithinBase } from "./utils";
|
|
21
|
+
import { TERMINAL_WORKSPACE_STATUSES, worktreeReapable, type WorktreeReapState } from "./workspace-phase";
|
|
22
|
+
|
|
23
|
+
// Don't re-flag the same un-landed orphan every sweep — surface it once, then
|
|
24
|
+
// stay quiet for this window. In-memory (keyed by worktree path) like the
|
|
25
|
+
// orphaned-session reaper: a restart re-announces, which is acceptable noise.
|
|
26
|
+
const UNLANDED_FLAG_COOLDOWN_MS = Number(process.env.AGENT_RELAY_ORPHAN_FLAG_COOLDOWN_MS) || 6 * 60 * 60 * 1000;
|
|
27
|
+
// Set AGENT_RELAY_ORPHAN_WORKTREE_REAP=0 to detect + report orphans but never
|
|
28
|
+
// remove them (parity with the session reaper's detect-only switch).
|
|
29
|
+
const orphanWorktreeReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_WORKTREE_REAP !== "0";
|
|
30
|
+
const flaggedAt = new Map<string, number>();
|
|
31
|
+
|
|
32
|
+
export function resetOrphanWorktreeStateForTests(): void {
|
|
33
|
+
flaggedAt.clear();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface OnlineOrchestrator {
|
|
37
|
+
id: string;
|
|
38
|
+
agentId: string;
|
|
39
|
+
apiUrl?: string;
|
|
40
|
+
baseDir?: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function relayHeaders(): Record<string, string> {
|
|
44
|
+
const headers: Record<string, string> = {};
|
|
45
|
+
const token = process.env.AGENT_RELAY_TOKEN;
|
|
46
|
+
if (token) headers[RELAY_TOKEN_HEADER] = token;
|
|
47
|
+
return headers;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function fetchHostProbe(apiUrl: string, repoRoot: string): Promise<WorkspaceProbe | null> {
|
|
51
|
+
try {
|
|
52
|
+
const res = await fetch(`${apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, {
|
|
53
|
+
headers: relayHeaders(),
|
|
54
|
+
signal: AbortSignal.timeout(10_000),
|
|
55
|
+
});
|
|
56
|
+
if (!res.ok) return null;
|
|
57
|
+
return await res.json() as WorkspaceProbe;
|
|
58
|
+
} catch {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Land-state for a single worktree path. Reuses the host merge-preview (squash-
|
|
64
|
+
// aware `landed`, `unmergedAhead`, `dirtyCount`) the conflict scan already trusts.
|
|
65
|
+
async function fetchWorktreeReapState(apiUrl: string, worktreePath: string, baseRef?: string): Promise<WorkspaceMergePreview | null> {
|
|
66
|
+
const query = new URLSearchParams({ path: worktreePath, checkPr: "1" });
|
|
67
|
+
if (baseRef) query.set("baseRef", baseRef);
|
|
68
|
+
try {
|
|
69
|
+
const res = await fetch(`${apiUrl}/api/workspace/merge-preview?${query.toString()}`, {
|
|
70
|
+
headers: relayHeaders(),
|
|
71
|
+
signal: AbortSignal.timeout(8_000),
|
|
72
|
+
});
|
|
73
|
+
if (!res.ok) return null;
|
|
74
|
+
return await res.json() as WorkspaceMergePreview;
|
|
75
|
+
} catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function onlineOrchestrators(): OnlineOrchestrator[] {
|
|
81
|
+
return listOrchestrators()
|
|
82
|
+
.filter((orch) => orch.status === "online" && orch.apiUrl && orch.agentId)
|
|
83
|
+
.map((orch) => ({ id: orch.id, agentId: orch.agentId!, apiUrl: orch.apiUrl, baseDir: orch.baseDir }));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Repo roots any workspace row references — the seeds we probe for orphans.
|
|
87
|
+
* One probe per repo returns ALL its worktrees, so a single (even shared) row
|
|
88
|
+
* per repo is enough to discover every orphan under it. */
|
|
89
|
+
function knownRepoRoots(workspaces: WorkspaceRecord[]): string[] {
|
|
90
|
+
return [...new Set(workspaces.map((ws) => ws.repoRoot).filter(Boolean))];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface CollectOrphansResult {
|
|
94
|
+
orphans: WorkspaceOrphan[];
|
|
95
|
+
/** Live isolated rows whose worktree is missing on disk (DB→disk drift). */
|
|
96
|
+
missingWorktrees: Array<{ workspaceId: string; worktreePath: string; repoRoot: string; status: string }>;
|
|
97
|
+
reason?: string;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* The disk⇄DB reconcile pass. For every known repo with an online owning host:
|
|
102
|
+
* probe its worktrees, subtract live DB rows → orphans (disk without a live row),
|
|
103
|
+
* and the inverse → live rows whose worktree is gone (DB without disk). Each
|
|
104
|
+
* orphan is enriched with land-state so callers can tell reap-safe cruft from
|
|
105
|
+
* stranded work. Shared by the `/orphans` route and the scheduled reaper.
|
|
106
|
+
*/
|
|
107
|
+
export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
|
|
108
|
+
const orchestrators = onlineOrchestrators();
|
|
109
|
+
if (!orchestrators.length) return { orphans: [], missingWorktrees: [], reason: "no online orchestrators" };
|
|
110
|
+
|
|
111
|
+
const all = listWorkspaces();
|
|
112
|
+
const orphans: WorkspaceOrphan[] = [];
|
|
113
|
+
const missingWorktrees: CollectOrphansResult["missingWorktrees"] = [];
|
|
114
|
+
|
|
115
|
+
for (const repoRoot of knownRepoRoots(all)) {
|
|
116
|
+
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
117
|
+
if (!orch?.apiUrl) continue;
|
|
118
|
+
const probe = await fetchHostProbe(orch.apiUrl, repoRoot);
|
|
119
|
+
if (!probe?.worktrees) continue;
|
|
120
|
+
|
|
121
|
+
const liveRowsByPath = new Map(
|
|
122
|
+
all
|
|
123
|
+
.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
|
|
124
|
+
.map((ws) => [resolve(ws.worktreePath), ws]),
|
|
125
|
+
);
|
|
126
|
+
const onDisk = new Set(probe.worktrees.map((wt) => (wt.path ? resolve(wt.path) : "")).filter(Boolean));
|
|
127
|
+
|
|
128
|
+
// DB→disk drift: a live isolated row whose worktree is no longer on disk.
|
|
129
|
+
for (const [path, ws] of liveRowsByPath) {
|
|
130
|
+
if (ws.mode === "isolated" && !onDisk.has(path)) {
|
|
131
|
+
missingWorktrees.push({ workspaceId: ws.id, worktreePath: ws.worktreePath, repoRoot, status: ws.status });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// disk→DB drift: a worktree on disk with no live row.
|
|
136
|
+
const rowsByPath = new Map(
|
|
137
|
+
all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]),
|
|
138
|
+
);
|
|
139
|
+
for (const worktree of probe.worktrees) {
|
|
140
|
+
if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
|
|
141
|
+
// Only agent-relay-created worktrees (agent/* branches) are reclaimable —
|
|
142
|
+
// never touch a user's own linked worktrees.
|
|
143
|
+
if (!worktree.branch?.startsWith("agent/")) continue;
|
|
144
|
+
const row = rowsByPath.get(resolve(worktree.path));
|
|
145
|
+
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
|
|
146
|
+
|
|
147
|
+
const orphan: WorkspaceOrphan = {
|
|
148
|
+
worktreePath: worktree.path,
|
|
149
|
+
repoRoot,
|
|
150
|
+
branch: worktree.branch,
|
|
151
|
+
headSha: worktree.headSha,
|
|
152
|
+
hadTerminalRow: Boolean(row),
|
|
153
|
+
};
|
|
154
|
+
const preview = await fetchWorktreeReapState(orch.apiUrl, worktree.path, probe.branch);
|
|
155
|
+
if (preview && !preview.missing && !preview.error) {
|
|
156
|
+
const state: WorktreeReapState = {
|
|
157
|
+
landed: preview.landed,
|
|
158
|
+
ahead: preview.ahead,
|
|
159
|
+
unmergedAhead: preview.unmergedAhead,
|
|
160
|
+
dirtyCount: preview.dirtyCount,
|
|
161
|
+
};
|
|
162
|
+
orphan.landed = preview.landed;
|
|
163
|
+
orphan.ahead = preview.ahead;
|
|
164
|
+
orphan.unmergedAhead = preview.unmergedAhead;
|
|
165
|
+
orphan.dirty = (preview.dirtyCount ?? 0) > 0;
|
|
166
|
+
orphan.safeToReap = worktreeReapable(state);
|
|
167
|
+
}
|
|
168
|
+
// No probe → safeToReap stays undefined (treated as not-safe by callers).
|
|
169
|
+
orphans.push(orphan);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return { orphans, missingWorktrees };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): string {
|
|
177
|
+
const command = createCommand({
|
|
178
|
+
type: "workspace.cleanup",
|
|
179
|
+
source: "system",
|
|
180
|
+
target: orch.agentId,
|
|
181
|
+
params: {
|
|
182
|
+
action: "cleanup",
|
|
183
|
+
worktreePath: orphan.worktreePath,
|
|
184
|
+
repoRoot: orphan.repoRoot,
|
|
185
|
+
branch: orphan.branch,
|
|
186
|
+
deleteBranch: true,
|
|
187
|
+
reclaim: true,
|
|
188
|
+
requestedBy: "workspace-orphan-reaper",
|
|
189
|
+
requestedAt: Date.now(),
|
|
190
|
+
},
|
|
191
|
+
});
|
|
192
|
+
emitRelayEvent({ type: `command.${command.status}`, source: command.source, subject: command.id, data: { command } });
|
|
193
|
+
return command.id;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Scheduled reaper (maintenance job). Auto-removes orphaned worktrees that are
|
|
198
|
+
* safe to reap (landed/empty, clean tree) and flags the rest — un-landed work or
|
|
199
|
+
* an un-probeable host — as needs-attention instead of destroying them. Also
|
|
200
|
+
* reports the inverse drift (live rows whose worktree vanished) so both
|
|
201
|
+
* directions surface. Never removes on uncertainty.
|
|
202
|
+
*/
|
|
203
|
+
export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>> {
|
|
204
|
+
const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
|
|
205
|
+
if (reason) return { skipped: reason };
|
|
206
|
+
|
|
207
|
+
const orchestrators = onlineOrchestrators();
|
|
208
|
+
const reapEnabled = orphanWorktreeReapEnabled();
|
|
209
|
+
const reaped: string[] = [];
|
|
210
|
+
const flagged: string[] = [];
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
|
|
213
|
+
for (const orphan of orphans) {
|
|
214
|
+
const orch = orchestrators.find((candidate) => isPathWithinBase(orphan.repoRoot, candidate.baseDir));
|
|
215
|
+
if (!orch) continue;
|
|
216
|
+
|
|
217
|
+
if (orphan.safeToReap === true) {
|
|
218
|
+
if (!reapEnabled) continue; // detect-only mode
|
|
219
|
+
const commandId = dispatchCleanup(orch, orphan);
|
|
220
|
+
reaped.push(orphan.worktreePath);
|
|
221
|
+
flaggedAt.delete(orphan.worktreePath);
|
|
222
|
+
createActivityEvent({
|
|
223
|
+
clientId: `workspace-orphan-reaped-${orphan.worktreePath}-${now}`,
|
|
224
|
+
kind: "state",
|
|
225
|
+
title: "Orphaned worktree reaped",
|
|
226
|
+
body: `${orphan.branch ?? orphan.worktreePath} — ${orphan.landed ? "work already landed" : "no work to preserve"}; removing the stale worktree`,
|
|
227
|
+
meta: orphan.branch ?? orphan.worktreePath,
|
|
228
|
+
icon: "ti-trash",
|
|
229
|
+
view: "orchestrators",
|
|
230
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, commandId, landed: orphan.landed },
|
|
231
|
+
});
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Not safe (un-landed work, dirty tree, or un-probeable) — flag once per
|
|
236
|
+
// cooldown, never remove. This is the stranded-work needs-attention entry.
|
|
237
|
+
const last = flaggedAt.get(orphan.worktreePath) ?? 0;
|
|
238
|
+
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
239
|
+
flaggedAt.set(orphan.worktreePath, now);
|
|
240
|
+
flagged.push(orphan.worktreePath);
|
|
241
|
+
const detail = orphan.safeToReap === undefined
|
|
242
|
+
? "host could not be probed for land-state"
|
|
243
|
+
: orphan.dirty
|
|
244
|
+
? "uncommitted changes in the worktree"
|
|
245
|
+
: `${orphan.unmergedAhead ?? orphan.ahead ?? "?"} un-landed commit(s)`;
|
|
246
|
+
createActivityEvent({
|
|
247
|
+
clientId: `workspace-orphan-stranded-${orphan.worktreePath}-${now}`,
|
|
248
|
+
kind: "state",
|
|
249
|
+
title: "Stranded worktree needs attention",
|
|
250
|
+
body: `${orphan.branch ?? orphan.worktreePath} in ${orphan.repoRoot} is orphaned (no live workspace row) and holds work that hasn't landed — ${detail}. Reclaim with force to discard, or recover the commits before removing.`,
|
|
251
|
+
meta: orphan.branch ?? orphan.worktreePath,
|
|
252
|
+
icon: "ti-alert-triangle",
|
|
253
|
+
view: "orchestrators",
|
|
254
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, branch: orphan.branch, ahead: orphan.ahead, unmergedAhead: orphan.unmergedAhead, dirty: orphan.dirty, headSha: orphan.headSha },
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// DB→disk drift is observability-only: a live row whose worktree vanished is
|
|
259
|
+
// surfaced, not auto-deleted (the row may still be mid-land or recoverable).
|
|
260
|
+
for (const missing of missingWorktrees) {
|
|
261
|
+
const key = `missing:${missing.worktreePath}`;
|
|
262
|
+
const last = flaggedAt.get(key) ?? 0;
|
|
263
|
+
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
264
|
+
flaggedAt.set(key, now);
|
|
265
|
+
createActivityEvent({
|
|
266
|
+
clientId: `workspace-row-no-worktree-${missing.workspaceId}-${now}`,
|
|
267
|
+
kind: "state",
|
|
268
|
+
title: "Workspace row has no worktree on disk",
|
|
269
|
+
body: `Workspace ${missing.workspaceId} (${missing.status}) points at ${missing.worktreePath}, which no longer exists on disk — disk/DB drift.`,
|
|
270
|
+
meta: missing.workspaceId,
|
|
271
|
+
icon: "ti-unlink",
|
|
272
|
+
view: "orchestrators",
|
|
273
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", workspaceId: missing.workspaceId, worktreePath: missing.worktreePath, status: missing.status },
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Forget cooldown entries for orphans that are gone (reaped/recovered) so a
|
|
278
|
+
// future re-orphaning of the same path re-announces immediately.
|
|
279
|
+
const liveKeys = new Set([...orphans.map((o) => o.worktreePath), ...missingWorktrees.map((m) => `missing:${m.worktreePath}`)]);
|
|
280
|
+
for (const key of flaggedAt.keys()) if (!liveKeys.has(key) && !reaped.includes(key)) flaggedAt.delete(key);
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
scanned: orphans.length,
|
|
284
|
+
reaped,
|
|
285
|
+
flagged,
|
|
286
|
+
missingWorktrees: missingWorktrees.map((m) => m.workspaceId),
|
|
287
|
+
reapEnabled,
|
|
288
|
+
};
|
|
289
|
+
}
|
package/src/workspace-phase.ts
CHANGED
|
@@ -33,6 +33,35 @@ export const TERMINAL_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["cleaned",
|
|
|
33
33
|
// it's also where a failed auto-merge lands for a retry, see routes.ts.)
|
|
34
34
|
export const READY_TO_LAND_STATUSES = new Set<WorkspaceStatus>(["ready", "review_requested"]);
|
|
35
35
|
|
|
36
|
+
// Land-state shape a host reports for a worktree (subset of WorkspaceMergePreview
|
|
37
|
+
// / WorkspaceGitState — the fields that decide reapability). Kept structural so
|
|
38
|
+
// both the merge-preview path and a raw git-state probe satisfy it.
|
|
39
|
+
export interface WorktreeReapState {
|
|
40
|
+
/** Work already in base (squash/cherry/PR-merged). Detection only under-reports. */
|
|
41
|
+
landed?: boolean;
|
|
42
|
+
/** Commits ahead of base by raw count (a squash-landed branch still shows >0). */
|
|
43
|
+
ahead?: number;
|
|
44
|
+
/** Commits ahead whose patch is NOT already in base — the squash-aware count. */
|
|
45
|
+
unmergedAhead?: number;
|
|
46
|
+
/** Uncommitted working-tree changes. */
|
|
47
|
+
dirtyCount?: number;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// THE reap-safety invariant (#244): a worktree may be removed only when nothing
|
|
51
|
+
// would be lost — clean tree AND (no commits ahead OR the work already landed).
|
|
52
|
+
// SINGLE HOME: the orphan reaper, the orphan-reclaim gate, and the host's
|
|
53
|
+
// exit-time `reconcileWorkspace` "empty" check all mean the same thing; they
|
|
54
|
+
// drifted into three private copies and a land-blind force-remove slipped
|
|
55
|
+
// through (the recovered NUL-guard test was one keystroke from deletion).
|
|
56
|
+
// Mirror `reconcileWorkspace`: landing detection can only under-report, so an
|
|
57
|
+
// uncertain worktree is NEVER reapable — it gets flagged for review instead.
|
|
58
|
+
export function worktreeReapable(state: WorktreeReapState | null | undefined): boolean {
|
|
59
|
+
if (!state) return false;
|
|
60
|
+
if ((state.dirtyCount ?? 0) > 0) return false;
|
|
61
|
+
if (state.landed === true) return true;
|
|
62
|
+
return (state.unmergedAhead ?? state.ahead ?? 0) === 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
36
65
|
// How long a workspace may sit in a ready-to-land status before the directive
|
|
37
66
|
// projection stops saying "healthy, just wait" and surfaces it as needs-attention
|
|
38
67
|
// (#242 watchdog). A clean auto-merge runs ~every 2 min, so a handful of missed
|