agent-relay-server 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/public/index.html +25 -8
- package/src/branch-landed.ts +111 -0
- package/src/config-store.ts +31 -0
- package/src/maintenance.ts +30 -20
- package/src/notify.ts +31 -0
- package/src/routes.ts +37 -35
- package/src/workspace-orphans.ts +289 -0
- package/src/workspace-phase.ts +80 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-server",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.26.0",
|
|
4
4
|
"description": "Lightweight HTTP message relay for inter-agent communication across machines",
|
|
5
5
|
"module": "src/index.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"CONTRIBUTING.md"
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"agent-relay-sdk": "0.2.
|
|
36
|
+
"agent-relay-sdk": "0.2.15"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
39
|
"prepack": "bun run build:dashboard:bundle >&2",
|
package/public/index.html
CHANGED
|
@@ -10168,6 +10168,8 @@ function parseSseFrame(frame) {
|
|
|
10168
10168
|
}
|
|
10169
10169
|
//#endregion
|
|
10170
10170
|
//#region src/lib/api.ts
|
|
10171
|
+
var API_TIMEOUT_MS = 2e4;
|
|
10172
|
+
var SSE_STALE_MS = 35e3;
|
|
10171
10173
|
var authToken = "";
|
|
10172
10174
|
function setAuthToken(token) {
|
|
10173
10175
|
authToken = token;
|
|
@@ -10211,11 +10213,11 @@ function openTerminalWebSocket(orchestratorId, session) {
|
|
|
10211
10213
|
return new WebSocket(url);
|
|
10212
10214
|
}
|
|
10213
10215
|
function openRelayEventStream(token, handlers) {
|
|
10214
|
-
const abort = new AbortController();
|
|
10215
10216
|
const eventUrl = new URL("api/events", baseUrl()).toString();
|
|
10216
10217
|
let closed = false;
|
|
10217
10218
|
let retryMs = 5e3;
|
|
10218
10219
|
let reconnectTimer = null;
|
|
10220
|
+
let activeAbort = null;
|
|
10219
10221
|
const scheduleReconnect = () => {
|
|
10220
10222
|
if (closed) return;
|
|
10221
10223
|
reconnectTimer = setTimeout(connect, retryMs);
|
|
@@ -10226,6 +10228,11 @@ function openRelayEventStream(token, handlers) {
|
|
|
10226
10228
|
if (data.length > 0) handlers.message(event, data.join("\n"));
|
|
10227
10229
|
};
|
|
10228
10230
|
const connect = async () => {
|
|
10231
|
+
if (closed) return;
|
|
10232
|
+
const ac = new AbortController();
|
|
10233
|
+
activeAbort = ac;
|
|
10234
|
+
let lastFrameAt = Date.now();
|
|
10235
|
+
let staleTimer = null;
|
|
10229
10236
|
try {
|
|
10230
10237
|
const headers = { Accept: "text/event-stream" };
|
|
10231
10238
|
const effectiveToken = token || getAuthToken();
|
|
@@ -10233,10 +10240,14 @@ function openRelayEventStream(token, handlers) {
|
|
|
10233
10240
|
const response = await fetch(eventUrl, {
|
|
10234
10241
|
headers,
|
|
10235
10242
|
cache: "no-store",
|
|
10236
|
-
signal:
|
|
10243
|
+
signal: ac.signal
|
|
10237
10244
|
});
|
|
10238
10245
|
if (!response.ok || !response.body) throw new Error(`SSE failed: ${response.status}`);
|
|
10239
10246
|
handlers.connected?.();
|
|
10247
|
+
lastFrameAt = Date.now();
|
|
10248
|
+
staleTimer = setInterval(() => {
|
|
10249
|
+
if (Date.now() - lastFrameAt > SSE_STALE_MS) ac.abort();
|
|
10250
|
+
}, 5e3);
|
|
10240
10251
|
const reader = response.body.getReader();
|
|
10241
10252
|
const decoder = new TextDecoder();
|
|
10242
10253
|
let buffer = "";
|
|
@@ -10246,6 +10257,7 @@ function openRelayEventStream(token, handlers) {
|
|
|
10246
10257
|
buffer += decoder.decode();
|
|
10247
10258
|
break;
|
|
10248
10259
|
}
|
|
10260
|
+
lastFrameAt = Date.now();
|
|
10249
10261
|
buffer += decoder.decode(value, { stream: true });
|
|
10250
10262
|
let frameEnd = buffer.indexOf("\n\n");
|
|
10251
10263
|
while (frameEnd >= 0) {
|
|
@@ -10256,6 +10268,7 @@ function openRelayEventStream(token, handlers) {
|
|
|
10256
10268
|
}
|
|
10257
10269
|
}
|
|
10258
10270
|
} catch {} finally {
|
|
10271
|
+
if (staleTimer) clearInterval(staleTimer);
|
|
10259
10272
|
if (closed) return;
|
|
10260
10273
|
handlers.disconnected?.();
|
|
10261
10274
|
scheduleReconnect();
|
|
@@ -10265,13 +10278,14 @@ function openRelayEventStream(token, handlers) {
|
|
|
10265
10278
|
return { close() {
|
|
10266
10279
|
closed = true;
|
|
10267
10280
|
if (reconnectTimer) clearTimeout(reconnectTimer);
|
|
10268
|
-
abort
|
|
10281
|
+
activeAbort?.abort();
|
|
10269
10282
|
} };
|
|
10270
10283
|
}
|
|
10271
10284
|
async function api(method, path, body) {
|
|
10272
10285
|
const opts = {
|
|
10273
10286
|
method,
|
|
10274
|
-
headers: {}
|
|
10287
|
+
headers: {},
|
|
10288
|
+
signal: AbortSignal.timeout(API_TIMEOUT_MS)
|
|
10275
10289
|
};
|
|
10276
10290
|
const headers = opts.headers;
|
|
10277
10291
|
if (authToken) headers["X-Agent-Relay-Token"] = authToken;
|
|
@@ -12991,10 +13005,13 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
12991
13005
|
connectSSE() {
|
|
12992
13006
|
get().disconnectSSE();
|
|
12993
13007
|
set({ _es: openRelayEventStream(get().authToken, {
|
|
12994
|
-
connected: () =>
|
|
12995
|
-
|
|
12996
|
-
|
|
12997
|
-
|
|
13008
|
+
connected: () => {
|
|
13009
|
+
set(get().connectionError ? {
|
|
13010
|
+
connected: true,
|
|
13011
|
+
connectionError: false
|
|
13012
|
+
} : { connected: true });
|
|
13013
|
+
get().refreshLiveData();
|
|
13014
|
+
},
|
|
12998
13015
|
disconnected: () => set({ connected: false }),
|
|
12999
13016
|
message: (event, data) => {
|
|
13000
13017
|
if (event === "connected") return;
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { emitRelayEvent } from "./events";
|
|
2
|
+
import { getNotificationsConfig } from "./config-store";
|
|
3
|
+
import { notifySystemMessage } from "./notify";
|
|
4
|
+
import { listAgents } from "./db";
|
|
5
|
+
import { isAgentOnline } from "./agent-ref";
|
|
6
|
+
import type { AgentCard, WorkspaceRecord } from "./types";
|
|
7
|
+
|
|
8
|
+
export interface BranchLandedInput {
|
|
9
|
+
/**
|
|
10
|
+
* The workspace as it was AT land time — `branch` must be the branch that landed,
|
|
11
|
+
* captured before any land-and-continue recycle repoints the row (#206). `ownerAgentId`
|
|
12
|
+
* is the author the "landed" notice is pushed to.
|
|
13
|
+
*/
|
|
14
|
+
workspace: Pick<WorkspaceRecord, "id" | "repoRoot" | "branch" | "baseRef" | "ownerAgentId">;
|
|
15
|
+
/** SHA the base now points at after the land. */
|
|
16
|
+
mergedSha?: string;
|
|
17
|
+
/** Subject line of the landed commit, when the orchestrator reported it. */
|
|
18
|
+
subject?: string;
|
|
19
|
+
/** Fresh branch the worktree was recycled onto (land-and-continue), if any. */
|
|
20
|
+
newBranch?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* #239 — turn an authoritative land completion into a relay-driven push so the author
|
|
25
|
+
* stops polling to learn it merged. Always emits the durable `branch.landed` event (the
|
|
26
|
+
* rest of the bus does the same); only the agent-facing push is gated, since it wakes the
|
|
27
|
+
* recipient. Offline authors get it on next poll via store-ahead (#234).
|
|
28
|
+
*
|
|
29
|
+
* Agents-on-main fan-out (the second #239 recipient class) lands in a follow-up commit.
|
|
30
|
+
*/
|
|
31
|
+
export function notifyBranchLanded(input: BranchLandedInput): void {
|
|
32
|
+
const { workspace } = input;
|
|
33
|
+
const base = workspace.baseRef ?? "base";
|
|
34
|
+
const landedBranch = workspace.branch;
|
|
35
|
+
const shortSha = input.mergedSha ? input.mergedSha.slice(0, 12) : undefined;
|
|
36
|
+
|
|
37
|
+
emitRelayEvent({
|
|
38
|
+
type: "branch.landed",
|
|
39
|
+
source: "server",
|
|
40
|
+
subject: workspace.id,
|
|
41
|
+
data: {
|
|
42
|
+
workspaceId: workspace.id,
|
|
43
|
+
repoRoot: workspace.repoRoot,
|
|
44
|
+
branch: landedBranch,
|
|
45
|
+
base,
|
|
46
|
+
sha: input.mergedSha,
|
|
47
|
+
subject: input.subject,
|
|
48
|
+
author: workspace.ownerAgentId,
|
|
49
|
+
newBranch: input.newBranch,
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const config = getNotificationsConfig();
|
|
54
|
+
if (!config.enabled || !config.branchLanded) return;
|
|
55
|
+
|
|
56
|
+
const author = workspace.ownerAgentId;
|
|
57
|
+
const shaLabel = shortSha ? ` as \`${shortSha}\`` : "";
|
|
58
|
+
const subjectLabel = input.subject ? ` — "${input.subject}"` : "";
|
|
59
|
+
const payload = {
|
|
60
|
+
kind: "branch.landed",
|
|
61
|
+
workspaceId: workspace.id,
|
|
62
|
+
repoRoot: workspace.repoRoot,
|
|
63
|
+
branch: landedBranch,
|
|
64
|
+
base,
|
|
65
|
+
sha: input.mergedSha,
|
|
66
|
+
author,
|
|
67
|
+
newBranch: input.newBranch,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// The branch author cares most — push regardless of online (store-ahead delivers it on
|
|
71
|
+
// next poll if they've moved on, #234). They land-and-continue onto the recycled branch.
|
|
72
|
+
if (author) {
|
|
73
|
+
const branchLabel = landedBranch ? `\`${landedBranch}\`` : "Your branch";
|
|
74
|
+
const continueLabel = input.newBranch
|
|
75
|
+
? ` You're now on \`${input.newBranch}\` — keep working there.`
|
|
76
|
+
: " Worktree reclaimed.";
|
|
77
|
+
notifySystemMessage(author, {
|
|
78
|
+
subject: "Your branch landed",
|
|
79
|
+
body: `✅ ${branchLabel} landed on \`${base}\`${shaLabel}${subjectLabel}.${continueLabel}`,
|
|
80
|
+
payload,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Agents on `main` — those whose cwd IS the main checkout (not an isolated worktree) —
|
|
85
|
+
// get a live "merged" notice so a long-lived main agent's context stays current as work
|
|
86
|
+
// lands under it (#239). Online-only: a stale/exited main session needs no wake, and
|
|
87
|
+
// store-ahead to it would just pile up noise. The author is in a worktree (cwd ≠ repoRoot)
|
|
88
|
+
// so it's naturally excluded; guard anyway for shared-mode owners.
|
|
89
|
+
const branchLabel = landedBranch ? `\`${landedBranch}\`` : "A branch";
|
|
90
|
+
const authorLabel = author ? ` by \`${author}\`` : "";
|
|
91
|
+
for (const agent of agentsOnMain(workspace.repoRoot, author)) {
|
|
92
|
+
notifySystemMessage(agent.id, {
|
|
93
|
+
subject: `Merged to ${base}`,
|
|
94
|
+
body: `🔀 ${branchLabel}${authorLabel} merged to \`${base}\`${shaLabel}${subjectLabel}.`,
|
|
95
|
+
payload,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// An agent is "on `main`" when its registered cwd equals the repo's main checkout — i.e. it
|
|
101
|
+
// works in the base, not an isolated worktree. Excludes the author, pseudo agents (system/
|
|
102
|
+
// user), channels, and offline sessions.
|
|
103
|
+
function agentsOnMain(repoRoot: string, author: string | undefined): AgentCard[] {
|
|
104
|
+
return listAgents().filter((a) => {
|
|
105
|
+
if (a.id === author || a.id === "system" || a.id === "user") return false;
|
|
106
|
+
if (a.kind === "channel" || a.meta?.kind === "channel") return false;
|
|
107
|
+
const cwd = a.meta?.cwd;
|
|
108
|
+
if (typeof cwd !== "string" || cwd !== repoRoot) return false;
|
|
109
|
+
return isAgentOnline(a);
|
|
110
|
+
});
|
|
111
|
+
}
|
package/src/config-store.ts
CHANGED
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
InsightsConfig,
|
|
11
11
|
ManagedAgentState,
|
|
12
12
|
ManagedAgentStatus,
|
|
13
|
+
NotificationsConfig,
|
|
13
14
|
SpawnApprovalMode,
|
|
14
15
|
SpawnPolicy,
|
|
15
16
|
SpawnProvider,
|
|
@@ -24,6 +25,8 @@ const STEWARD_NAMESPACE = "steward";
|
|
|
24
25
|
const STEWARD_KEY = "default";
|
|
25
26
|
const INSIGHTS_NAMESPACE = "insights";
|
|
26
27
|
const INSIGHTS_KEY = "default";
|
|
28
|
+
const NOTIFICATIONS_NAMESPACE = "notifications";
|
|
29
|
+
const NOTIFICATIONS_KEY = "default";
|
|
27
30
|
const WORKSPACE_NAMESPACE = "workspace";
|
|
28
31
|
const WORKSPACE_KEY = "default";
|
|
29
32
|
const VALID_PROFILE_PROVIDERS = ["any", "claude", "codex"] as const;
|
|
@@ -460,6 +463,26 @@ function validateInsightsConfig(value: unknown): InsightsConfig {
|
|
|
460
463
|
};
|
|
461
464
|
}
|
|
462
465
|
|
|
466
|
+
// Relay-driven lifecycle push notifications (#239 event bus). Default-on; the
|
|
467
|
+
// operator can flip the master switch or individual events off via the generic
|
|
468
|
+
// config route. Push messages wake recipients, so they must be suppressible.
|
|
469
|
+
const NOTIFICATIONS_CONFIG_DEFAULTS: NotificationsConfig = {
|
|
470
|
+
enabled: true,
|
|
471
|
+
branchLanded: true,
|
|
472
|
+
};
|
|
473
|
+
|
|
474
|
+
function validateNotificationsConfig(value: unknown): NotificationsConfig {
|
|
475
|
+
if (!isRecord(value)) throw new ValidationError("notifications config value must be an object");
|
|
476
|
+
return {
|
|
477
|
+
enabled: value.enabled === undefined
|
|
478
|
+
? NOTIFICATIONS_CONFIG_DEFAULTS.enabled
|
|
479
|
+
: cleanBoolean(value.enabled, "enabled"),
|
|
480
|
+
branchLanded: value.branchLanded === undefined
|
|
481
|
+
? NOTIFICATIONS_CONFIG_DEFAULTS.branchLanded
|
|
482
|
+
: cleanBoolean(value.branchLanded, "branchLanded"),
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
|
|
463
486
|
// Global workspace provisioning config for isolated worktrees (#159 follow-up).
|
|
464
487
|
// Defaults seed the two untracked paths an isolated agent almost always needs:
|
|
465
488
|
// the agent guide and the rig config, both gitignored so a fresh worktree lacks them.
|
|
@@ -487,6 +510,7 @@ function normalizeValue(namespace: string, key: string, value: unknown): unknown
|
|
|
487
510
|
if (namespace === AGENT_PROFILE_NAMESPACE) return validateAgentProfile(key, value);
|
|
488
511
|
if (namespace === STEWARD_NAMESPACE) return validateStewardConfig(value);
|
|
489
512
|
if (namespace === INSIGHTS_NAMESPACE) return validateInsightsConfig(value);
|
|
513
|
+
if (namespace === NOTIFICATIONS_NAMESPACE) return validateNotificationsConfig(value);
|
|
490
514
|
if (namespace === WORKSPACE_NAMESPACE) return validateWorkspaceConfig(value);
|
|
491
515
|
if (JSON.stringify(value) === undefined) throw new ValidationError("value must be valid JSON");
|
|
492
516
|
return value;
|
|
@@ -620,6 +644,13 @@ export function getInsightsConfigEntry(): ConfigEntry<InsightsConfig> {
|
|
|
620
644
|
};
|
|
621
645
|
}
|
|
622
646
|
|
|
647
|
+
/** Lifecycle-notification config (#239), merged over defaults (always usable). */
|
|
648
|
+
export function getNotificationsConfig(): NotificationsConfig {
|
|
649
|
+
const entry = getConfig<Partial<NotificationsConfig>>(NOTIFICATIONS_NAMESPACE, NOTIFICATIONS_KEY);
|
|
650
|
+
if (!entry) return { ...NOTIFICATIONS_CONFIG_DEFAULTS };
|
|
651
|
+
return validateNotificationsConfig({ ...NOTIFICATIONS_CONFIG_DEFAULTS, ...entry.value });
|
|
652
|
+
}
|
|
653
|
+
|
|
623
654
|
export function setInsightsConfig(value: unknown, updatedBy?: string): ConfigEntry<InsightsConfig> {
|
|
624
655
|
return setConfig(INSIGHTS_NAMESPACE, INSIGHTS_KEY, value as InsightsConfig, updatedBy);
|
|
625
656
|
}
|
package/src/maintenance.ts
CHANGED
|
@@ -27,14 +27,14 @@ import {
|
|
|
27
27
|
releaseExpiredMergeLeases,
|
|
28
28
|
releaseOrphanedTasks,
|
|
29
29
|
runDbMaintenance,
|
|
30
|
-
sendMessage,
|
|
31
30
|
sweepArtifacts,
|
|
32
31
|
updateWorkspaceStatus,
|
|
33
32
|
} from "./db";
|
|
34
33
|
import type { WorkspaceMergePreview, WorkspaceRecord, WorkspaceStatus } from "./types";
|
|
35
34
|
import { requestWorkspaceMerge } from "./workspace-merge";
|
|
36
35
|
import { workspaceActiveClaim } from "./workspace-claim";
|
|
37
|
-
import {
|
|
36
|
+
import { reapOrphanedWorktrees } from "./workspace-orphans";
|
|
37
|
+
import { READY_TO_LAND_STATUSES, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
|
|
38
38
|
import { errMessage, RELAY_TOKEN_HEADER } from "agent-relay-sdk";
|
|
39
39
|
import { getStewardConfig } from "./config-store";
|
|
40
40
|
import { ensureRepoSteward } from "./steward";
|
|
@@ -46,11 +46,11 @@ import {
|
|
|
46
46
|
emitAgentStatus,
|
|
47
47
|
emitMessageClaimReleased,
|
|
48
48
|
emitMessageExpired,
|
|
49
|
-
emitNewMessage,
|
|
50
49
|
emitOrchestratorStatus,
|
|
51
50
|
emitPoolBindingChanged,
|
|
52
51
|
emitTaskChanged,
|
|
53
52
|
} from "./sse";
|
|
53
|
+
import { notifySystemMessage } from "./notify";
|
|
54
54
|
import { pruneExpiredTokenRecords } from "./token-db";
|
|
55
55
|
import type { Command, MaintenanceJob, MaintenanceJobRun } from "./types";
|
|
56
56
|
|
|
@@ -67,6 +67,10 @@ const DB_VACUUM_EVERY = Number(process.env.AGENT_RELAY_DB_VACUUM_EVERY) || 7;
|
|
|
67
67
|
let dbMaintenanceRuns = 0;
|
|
68
68
|
const WORKSPACE_REVIEW_TTL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_REVIEW_TTL_MS) || 3 * DAY_MS;
|
|
69
69
|
const WORKSPACE_GC_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_GC_INTERVAL_MS) || 60 * 60 * 1000;
|
|
70
|
+
// Disk⇄DB orphan reconcile cadence (#244). Runs on start for a boot-time pass,
|
|
71
|
+
// then periodically — orphans accrue slowly (one per crashed/killed session), so
|
|
72
|
+
// a 30-min sweep is plenty without hammering the hosts with probes.
|
|
73
|
+
const WORKSPACE_ORPHAN_REAPER_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_ORPHAN_REAPER_INTERVAL_MS) || 30 * 60 * 1000;
|
|
70
74
|
// Deterministic auto-land (Layer 0): merge clean fast-forwards with no human in
|
|
71
75
|
// the loop. Default on for the seamless workflow; set AGENT_RELAY_WORKSPACE_AUTO_MERGE=0
|
|
72
76
|
// to require a manual or steward merge per repo. Read at call-time so operators can
|
|
@@ -83,7 +87,10 @@ const STEWARD_WAKE_COOLDOWN_MS = Number(process.env.AGENT_RELAY_STEWARD_WAKE_COO
|
|
|
83
87
|
const stewardEscalationMs = () => Number(process.env.AGENT_RELAY_WORKSPACE_STEWARD_ESCALATION_MS) || 60 * 60 * 1000;
|
|
84
88
|
const stewardFallbackTarget = () => (process.env.AGENT_RELAY_WORKSPACE_STEWARD_FALLBACK || "").trim();
|
|
85
89
|
// Statuses that need an owner — a stranded one of these is what escalation rescues.
|
|
86
|
-
|
|
90
|
+
// Derived from the shared ready-to-land set (#242) plus `conflict`, so a stranded
|
|
91
|
+
// `ready` worktree (no online steward) escalates to the fallback target instead of
|
|
92
|
+
// rotting silently — same gap that left the original #242 branch parked.
|
|
93
|
+
const STRANDABLE_STATUSES = new Set<WorkspaceStatus>([...READY_TO_LAND_STATUSES, "conflict"]);
|
|
87
94
|
// Live statuses worth scanning. Terminal (cleaned/merged/abandoned) and
|
|
88
95
|
// in-flight (cleanup_requested) states are skipped.
|
|
89
96
|
const CONFLICT_SCAN_STATUSES = new Set<WorkspaceStatus>(["active", "ready", "review_requested", "merge_planned", "conflict"]);
|
|
@@ -394,7 +401,7 @@ const definitions: MaintenanceJobDefinition[] = [
|
|
|
394
401
|
{
|
|
395
402
|
id: "workspace-auto-merge",
|
|
396
403
|
title: "Workspace auto-merge",
|
|
397
|
-
description: "Auto-merge any non-conflicting review_requested worktree into base under the per-repo lease (rebasing when the base moved on); only real or unknown conflicts are left for the steward.",
|
|
404
|
+
description: "Auto-merge any non-conflicting ready/review_requested worktree into base under the per-repo lease (rebasing when the base moved on); only real or unknown conflicts are left for the steward.",
|
|
398
405
|
intervalMs: WORKSPACE_AUTO_MERGE_INTERVAL_MS,
|
|
399
406
|
runOnStart: false,
|
|
400
407
|
timeoutMs: 60 * 1000,
|
|
@@ -409,6 +416,15 @@ const definitions: MaintenanceJobDefinition[] = [
|
|
|
409
416
|
timeoutMs: 60 * 1000,
|
|
410
417
|
handler: workspaceGC,
|
|
411
418
|
},
|
|
419
|
+
{
|
|
420
|
+
id: "workspace-orphan-reaper",
|
|
421
|
+
title: "Workspace orphan reaper",
|
|
422
|
+
description: "Reconcile disk⇄DB: reap orphaned worktrees whose work has landed (or is empty), flag orphans holding un-landed work as needs-attention instead of deleting, and report rows whose worktree vanished. git worktree prune can't do this — it no-ops while the directory still exists.",
|
|
423
|
+
intervalMs: WORKSPACE_ORPHAN_REAPER_INTERVAL_MS,
|
|
424
|
+
runOnStart: true,
|
|
425
|
+
timeoutMs: 2 * 60 * 1000,
|
|
426
|
+
handler: reapOrphanedWorktrees,
|
|
427
|
+
},
|
|
412
428
|
];
|
|
413
429
|
|
|
414
430
|
function workspacePathWithinBase(path: string | undefined, baseDir: string | undefined): boolean {
|
|
@@ -532,15 +548,11 @@ function wakeRepoSteward(ws: WorkspaceRecord, reason: string): string | null {
|
|
|
532
548
|
const policyName = ensureRepoSteward(ws.repoRoot);
|
|
533
549
|
if (!policyName) return null;
|
|
534
550
|
try {
|
|
535
|
-
|
|
536
|
-
from: "system",
|
|
537
|
-
to: `policy:${policyName}`,
|
|
538
|
-
kind: "system",
|
|
551
|
+
notifySystemMessage(`policy:${policyName}`, {
|
|
539
552
|
subject: `Steward: ${ws.status} workspace needs attention`,
|
|
540
553
|
body: `Workspace \`${ws.branch ?? ws.id}\` (id ${ws.id}) in ${ws.repoRoot} is ${ws.status} and could not auto-land (${reason}). Claim it first so auto-merge yields: \`agent-relay workspace claim --id ${ws.id} --purpose steward\`. Inspect: \`agent-relay steward inspect ${ws.id}\`. Then cd into ${ws.worktreePath}, rebase onto ${ws.baseRef ?? "base"}, resolve, run checks, and land: \`agent-relay workspace land --id ${ws.id} --strategy rebase-ff\` — or \`agent-relay workspace release --id ${ws.id}\` and escalate if you can't.`,
|
|
541
554
|
payload: { kind: "workspace.steward-task", workspaceId: ws.id, repoRoot: ws.repoRoot, worktreePath: ws.worktreePath, branch: ws.branch, baseRef: ws.baseRef, status: ws.status, reason },
|
|
542
555
|
});
|
|
543
|
-
emitNewMessage(msg);
|
|
544
556
|
getLifecycleManager().onMessageForPolicy(policyName);
|
|
545
557
|
patchWorkspaceMetadata(ws.id, { stewardWokenAt: Date.now(), stewardPolicy: policyName });
|
|
546
558
|
return policyName;
|
|
@@ -631,15 +643,11 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
631
643
|
if (woke) notifiedStewards.push(woke);
|
|
632
644
|
} else if (ws.stewardAgentId) {
|
|
633
645
|
try {
|
|
634
|
-
|
|
635
|
-
from: "system",
|
|
636
|
-
to: ws.stewardAgentId,
|
|
637
|
-
kind: "system",
|
|
646
|
+
notifySystemMessage(ws.stewardAgentId, {
|
|
638
647
|
subject: "Workspace merge conflict",
|
|
639
648
|
body: `Workspace \`${ws.branch ?? ws.id}\` in ${ws.repoRoot} can no longer merge cleanly into ${p.baseRef ?? "base"} (${p.ahead ?? "?"} ahead, ${p.behind ?? "?"} behind). As repo steward, please coordinate resolution.`,
|
|
640
649
|
payload: { kind: "workspace.conflict", workspaceId: ws.id, repoRoot: ws.repoRoot, branch: ws.branch, baseRef: p.baseRef, ahead: p.ahead, behind: p.behind },
|
|
641
650
|
});
|
|
642
|
-
emitNewMessage(msg);
|
|
643
651
|
notifiedStewards.push(ws.stewardAgentId);
|
|
644
652
|
} catch {
|
|
645
653
|
// Steward unregistered/stale — the activity event still records it.
|
|
@@ -657,9 +665,11 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
657
665
|
return { scanned: candidates.length, flagged, cleared, merged, notifiedStewards };
|
|
658
666
|
}
|
|
659
667
|
|
|
660
|
-
// Deterministic auto-land (Layer 0, issue #167 / #207). Walk the "ready to
|
|
661
|
-
// queue (
|
|
662
|
-
//
|
|
668
|
+
// Deterministic auto-land (Layer 0, issue #167 / #207 / #242). Walk the "ready to
|
|
669
|
+
// land" queue (isolated worktrees in any READY_TO_LAND status — `ready` from
|
|
670
|
+
// `relay_workspace_ready`, or `review_requested` from a failed-merge retry) and
|
|
671
|
+
// land any whose merge is predicted conflict-free, via the shared lease-serialized
|
|
672
|
+
// merge helper — even
|
|
663
673
|
// when the base moved on (behind>0): mergeRebaseFf rebases onto the current base
|
|
664
674
|
// before fast-forwarding. Only a predicted conflict or an unknown merge state is
|
|
665
675
|
// left for the steward; clean parallel work lands with no agent in the loop.
|
|
@@ -669,7 +679,7 @@ async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
|
|
|
669
679
|
if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
|
|
670
680
|
|
|
671
681
|
const candidates = listWorkspaces().filter(
|
|
672
|
-
(ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && ws.status
|
|
682
|
+
(ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && READY_TO_LAND_STATUSES.has(ws.status),
|
|
673
683
|
);
|
|
674
684
|
const stewardEnabled = getStewardConfig().enabled;
|
|
675
685
|
const merged: string[] = [];
|
|
@@ -738,7 +748,7 @@ async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
|
|
|
738
748
|
function notifyTarget(target: string, subject: string, body: string, payload: Record<string, unknown>): string | null {
|
|
739
749
|
if (!target) return null;
|
|
740
750
|
try {
|
|
741
|
-
|
|
751
|
+
notifySystemMessage(target, { subject, body, payload });
|
|
742
752
|
return target;
|
|
743
753
|
} catch {
|
|
744
754
|
return null;
|
package/src/notify.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { sendMessage } from "./db";
|
|
2
|
+
import { emitNewMessage } from "./sse";
|
|
3
|
+
import type { Message, MessageKind } from "./types";
|
|
4
|
+
|
|
5
|
+
export interface SystemNotifyOptions {
|
|
6
|
+
subject?: string;
|
|
7
|
+
body: string;
|
|
8
|
+
payload?: Record<string, unknown>;
|
|
9
|
+
/** Defaults to "system" — a bypass-targeting kind that wakes the recipient like a prompt. */
|
|
10
|
+
kind?: MessageKind;
|
|
11
|
+
/** Sender id; defaults to "system". */
|
|
12
|
+
from?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Post a system DM to one agent and fan it out over the bus. This is the one home for
|
|
17
|
+
* "relay tells an agent something happened" — store-ahead delivers it on next poll if the
|
|
18
|
+
* recipient is offline (#234). Used by the GC sweep (maintenance) and lifecycle events (#239).
|
|
19
|
+
*/
|
|
20
|
+
export function notifySystemMessage(to: string, opts: SystemNotifyOptions): Message {
|
|
21
|
+
const msg = sendMessage({
|
|
22
|
+
from: opts.from ?? "system",
|
|
23
|
+
to,
|
|
24
|
+
kind: opts.kind ?? "system",
|
|
25
|
+
subject: opts.subject,
|
|
26
|
+
body: opts.body,
|
|
27
|
+
payload: opts.payload,
|
|
28
|
+
});
|
|
29
|
+
emitNewMessage(msg);
|
|
30
|
+
return msg;
|
|
31
|
+
}
|
package/src/routes.ts
CHANGED
|
@@ -177,6 +177,8 @@ import {
|
|
|
177
177
|
WORKSPACE_ACTIONS,
|
|
178
178
|
} from "./workspace-actions";
|
|
179
179
|
import { describeWorkspacePhase, landReceipt, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
|
|
180
|
+
import { notifyBranchLanded } from "./branch-landed";
|
|
181
|
+
import { collectWorkspaceOrphans } from "./workspace-orphans";
|
|
180
182
|
import type { WorkspaceDiagnostics, WorkspaceGitState, WorkspaceRecord } from "./types";
|
|
181
183
|
import {
|
|
182
184
|
getComponentAuth,
|
|
@@ -3878,41 +3880,14 @@ const getWorkspaceDiff: Handler = (req, params) => {
|
|
|
3878
3880
|
};
|
|
3879
3881
|
|
|
3880
3882
|
// Worktrees found on disk (agent/* branches) with no live workspace row — left
|
|
3881
|
-
// behind by crashes or failed cleanups. Probes each known repo's owning host
|
|
3882
|
-
//
|
|
3883
|
+
// behind by crashes or failed cleanups. Probes each known repo's owning host and
|
|
3884
|
+
// subtracts live DB rows, enriching each with land-state so reap-safe cruft is
|
|
3885
|
+
// distinguishable from stranded work. Also reports the inverse drift (live rows
|
|
3886
|
+
// whose worktree vanished). Discovery is shared with the scheduled reaper
|
|
3887
|
+
// (workspace-orphan-reaper). Reclaim via POST .../orphans/reclaim.
|
|
3883
3888
|
const getWorkspaceOrphans: Handler = async () => {
|
|
3884
|
-
const
|
|
3885
|
-
|
|
3886
|
-
const all = listWorkspaces();
|
|
3887
|
-
const repoRoots = [...new Set(all.map((ws) => ws.repoRoot).filter(Boolean))];
|
|
3888
|
-
const headers: Record<string, string> = {};
|
|
3889
|
-
const relayToken = process.env.AGENT_RELAY_TOKEN;
|
|
3890
|
-
if (relayToken) headers[RELAY_TOKEN_HEADER] = relayToken;
|
|
3891
|
-
const orphans: WorkspaceOrphan[] = [];
|
|
3892
|
-
|
|
3893
|
-
for (const repoRoot of repoRoots) {
|
|
3894
|
-
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
3895
|
-
if (!orch?.apiUrl) continue;
|
|
3896
|
-
let probe: WorkspaceProbe | undefined;
|
|
3897
|
-
try {
|
|
3898
|
-
const res = await fetch(`${orch.apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, { headers, signal: AbortSignal.timeout(10_000) });
|
|
3899
|
-
if (!res.ok) continue;
|
|
3900
|
-
probe = await res.json() as WorkspaceProbe;
|
|
3901
|
-
} catch {
|
|
3902
|
-
continue;
|
|
3903
|
-
}
|
|
3904
|
-
const rowsByPath = new Map(all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]));
|
|
3905
|
-
for (const worktree of probe?.worktrees ?? []) {
|
|
3906
|
-
if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
|
|
3907
|
-
// Only agent-relay-created worktrees (agent/* branches) are reclaimable —
|
|
3908
|
-
// never touch a user's own linked worktrees.
|
|
3909
|
-
if (!worktree.branch?.startsWith("agent/")) continue;
|
|
3910
|
-
const row = rowsByPath.get(resolve(worktree.path));
|
|
3911
|
-
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
|
|
3912
|
-
orphans.push({ worktreePath: worktree.path, repoRoot, branch: worktree.branch, headSha: worktree.headSha, hadTerminalRow: Boolean(row) });
|
|
3913
|
-
}
|
|
3914
|
-
}
|
|
3915
|
-
return json({ orphans });
|
|
3889
|
+
const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
|
|
3890
|
+
return json(reason ? { orphans, missingWorktrees, reason } : { orphans, missingWorktrees });
|
|
3916
3891
|
};
|
|
3917
3892
|
|
|
3918
3893
|
const postWorkspaceOrphanReclaim: Handler = async (req) => {
|
|
@@ -3926,11 +3901,25 @@ const postWorkspaceOrphanReclaim: Handler = async (req) => {
|
|
|
3926
3901
|
const repoRoot = cleanString(parsed.body.repoRoot, "repoRoot", { max: 1000 });
|
|
3927
3902
|
const branch = cleanString(parsed.body.branch, "branch", { max: 240 });
|
|
3928
3903
|
if (!worktreePath || !repoRoot) return error("worktreePath and repoRoot required", 400);
|
|
3904
|
+
const force = parsed.body.force === true;
|
|
3929
3905
|
// Refuse to reclaim a path that still backs a live workspace row.
|
|
3930
3906
|
const live = listWorkspaces().find((ws) => ws.worktreePath && resolve(ws.worktreePath) === resolve(worktreePath) && !TERMINAL_WORKSPACE_STATUSES.has(ws.status));
|
|
3931
3907
|
if (live) return error(`path backs live workspace ${live.id}; clean it through the workspace, not orphan reclaim`, 409);
|
|
3932
3908
|
const orch = listOrchestrators().find((candidate) => candidate.status === "online" && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
3933
3909
|
if (!orch) return error("no online orchestrator owns this path", 409);
|
|
3910
|
+
// Land-safety gate (#244): reclaim force-removes the worktree, so refuse when
|
|
3911
|
+
// it holds un-landed work unless the caller explicitly opts into discarding
|
|
3912
|
+
// it. Mirrors the scheduled reaper — never destroy work on uncertainty.
|
|
3913
|
+
if (!force) {
|
|
3914
|
+
const { orphans } = await collectWorkspaceOrphans();
|
|
3915
|
+
const target = orphans.find((o) => resolve(o.worktreePath) === resolve(worktreePath));
|
|
3916
|
+
if (target && target.safeToReap !== true) {
|
|
3917
|
+
const why = target.safeToReap === undefined
|
|
3918
|
+
? "land-state could not be determined"
|
|
3919
|
+
: target.dirty ? "uncommitted changes" : `${target.unmergedAhead ?? target.ahead ?? "?"} un-landed commit(s)`;
|
|
3920
|
+
return error(`worktree holds un-landed work (${why}); recover it first, or pass {"force":true} to discard`, 409);
|
|
3921
|
+
}
|
|
3922
|
+
}
|
|
3934
3923
|
const command = createCommand({
|
|
3935
3924
|
type: "workspace.cleanup",
|
|
3936
3925
|
source: "system",
|
|
@@ -4478,6 +4467,9 @@ const patchCommand: Handler = async (req, params) => {
|
|
|
4478
4467
|
const workspaceId = cleanString(command.result.workspaceId, "result.workspaceId", { max: 160 });
|
|
4479
4468
|
const resultStatus = optionalEnum(command.result.status, "result.status", VALID_WORKSPACE_STATUSES) as WorkspaceStatus | undefined;
|
|
4480
4469
|
if (workspaceId && resultStatus) {
|
|
4470
|
+
// Snapshot the row BEFORE the recycle repoints `branch` (#206) — the landed
|
|
4471
|
+
// branch name + author (#239 branch.landed push) come from this pre-mutation state.
|
|
4472
|
+
const landedWorkspace = getWorkspace(workspaceId);
|
|
4481
4473
|
updateWorkspaceStatus(workspaceId, resultStatus, {
|
|
4482
4474
|
mergeResult: command.result,
|
|
4483
4475
|
mergeCommandId: command.id,
|
|
@@ -4491,10 +4483,20 @@ const patchCommand: Handler = async (req, params) => {
|
|
|
4491
4483
|
// Land-and-continue (#206): the worktree was recycled onto a fresh branch.
|
|
4492
4484
|
// Repoint the row so the next merge targets the live branch, not the deleted one.
|
|
4493
4485
|
const newBranch = cleanString(command.result.newBranch, "result.newBranch", { max: 240 });
|
|
4486
|
+
const mergedSha = cleanString(command.result.mergedSha, "result.mergedSha", { max: 64 });
|
|
4494
4487
|
if (newBranch) {
|
|
4495
|
-
const mergedSha = cleanString(command.result.mergedSha, "result.mergedSha", { max: 64 });
|
|
4496
4488
|
setWorkspaceBranch(workspaceId, newBranch, mergedSha);
|
|
4497
4489
|
}
|
|
4490
|
+
// #239 — push the author a "your branch landed" notice (no polling). Only on a
|
|
4491
|
+
// real land; a no-op resolution (#230) merged nothing, so it earns no notice.
|
|
4492
|
+
if (command.result.merged === true && landedWorkspace) {
|
|
4493
|
+
notifyBranchLanded({
|
|
4494
|
+
workspace: landedWorkspace,
|
|
4495
|
+
mergedSha,
|
|
4496
|
+
subject: cleanString(command.result.subject, "result.subject", { max: 200 }),
|
|
4497
|
+
newBranch,
|
|
4498
|
+
});
|
|
4499
|
+
}
|
|
4498
4500
|
}
|
|
4499
4501
|
} else if (command.status === "failed" && command.correlationId) {
|
|
4500
4502
|
// Merge couldn't complete — don't leave it stuck in merge_planned.
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
// Orphaned-worktree reconciliation (#244). An isolated worktree on disk with no
|
|
2
|
+
// live DB row is "orphaned" — left behind when a session ended without a clean
|
|
3
|
+
// teardown (crash, killed runner, a reaped row). Symmetrically, a live row whose
|
|
4
|
+
// worktree is gone from disk is the other half of the same drift. Neither is
|
|
5
|
+
// visible to the agent or the dashboard, so unlanded work can sit stranded for
|
|
6
|
+
// weeks (one real casualty: a CI-guard test, recovered by hand).
|
|
7
|
+
//
|
|
8
|
+
// THE invariant (single home, see `worktreeReapable`): reaping is gated on
|
|
9
|
+
// "nothing would be lost" — landed or empty — NEVER on session liveness or a
|
|
10
|
+
// timer. A worktree holding un-landed commits is flagged for attention, never
|
|
11
|
+
// force-removed. This module is the disk⇄DB reconciler the GC's `git worktree
|
|
12
|
+
// prune` (a no-op while the directory exists) never was.
|
|
13
|
+
|
|
14
|
+
import { resolve } from "node:path";
|
|
15
|
+
import { RELAY_TOKEN_HEADER, errMessage } from "agent-relay-sdk";
|
|
16
|
+
import type { WorkspaceMergePreview, WorkspaceOrphan, WorkspaceProbe, WorkspaceRecord } from "./types";
|
|
17
|
+
import { createActivityEvent, listOrchestrators, listWorkspaces } from "./db";
|
|
18
|
+
import { createCommand } from "./commands-db";
|
|
19
|
+
import { emitRelayEvent } from "./events";
|
|
20
|
+
import { isPathWithinBase } from "./utils";
|
|
21
|
+
import { TERMINAL_WORKSPACE_STATUSES, worktreeReapable, type WorktreeReapState } from "./workspace-phase";
|
|
22
|
+
|
|
23
|
+
// Don't re-flag the same un-landed orphan every sweep — surface it once, then
|
|
24
|
+
// stay quiet for this window. In-memory (keyed by worktree path) like the
|
|
25
|
+
// orphaned-session reaper: a restart re-announces, which is acceptable noise.
|
|
26
|
+
const UNLANDED_FLAG_COOLDOWN_MS = Number(process.env.AGENT_RELAY_ORPHAN_FLAG_COOLDOWN_MS) || 6 * 60 * 60 * 1000;
|
|
27
|
+
// Set AGENT_RELAY_ORPHAN_WORKTREE_REAP=0 to detect + report orphans but never
|
|
28
|
+
// remove them (parity with the session reaper's detect-only switch).
|
|
29
|
+
const orphanWorktreeReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_WORKTREE_REAP !== "0";
|
|
30
|
+
const flaggedAt = new Map<string, number>();
|
|
31
|
+
|
|
32
|
+
export function resetOrphanWorktreeStateForTests(): void {
|
|
33
|
+
flaggedAt.clear();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface OnlineOrchestrator {
|
|
37
|
+
id: string;
|
|
38
|
+
agentId: string;
|
|
39
|
+
apiUrl?: string;
|
|
40
|
+
baseDir?: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function relayHeaders(): Record<string, string> {
|
|
44
|
+
const headers: Record<string, string> = {};
|
|
45
|
+
const token = process.env.AGENT_RELAY_TOKEN;
|
|
46
|
+
if (token) headers[RELAY_TOKEN_HEADER] = token;
|
|
47
|
+
return headers;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function fetchHostProbe(apiUrl: string, repoRoot: string): Promise<WorkspaceProbe | null> {
|
|
51
|
+
try {
|
|
52
|
+
const res = await fetch(`${apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, {
|
|
53
|
+
headers: relayHeaders(),
|
|
54
|
+
signal: AbortSignal.timeout(10_000),
|
|
55
|
+
});
|
|
56
|
+
if (!res.ok) return null;
|
|
57
|
+
return await res.json() as WorkspaceProbe;
|
|
58
|
+
} catch {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Land-state for a single worktree path. Reuses the host merge-preview (squash-
|
|
64
|
+
// aware `landed`, `unmergedAhead`, `dirtyCount`) the conflict scan already trusts.
|
|
65
|
+
async function fetchWorktreeReapState(apiUrl: string, worktreePath: string, baseRef?: string): Promise<WorkspaceMergePreview | null> {
|
|
66
|
+
const query = new URLSearchParams({ path: worktreePath, checkPr: "1" });
|
|
67
|
+
if (baseRef) query.set("baseRef", baseRef);
|
|
68
|
+
try {
|
|
69
|
+
const res = await fetch(`${apiUrl}/api/workspace/merge-preview?${query.toString()}`, {
|
|
70
|
+
headers: relayHeaders(),
|
|
71
|
+
signal: AbortSignal.timeout(8_000),
|
|
72
|
+
});
|
|
73
|
+
if (!res.ok) return null;
|
|
74
|
+
return await res.json() as WorkspaceMergePreview;
|
|
75
|
+
} catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function onlineOrchestrators(): OnlineOrchestrator[] {
|
|
81
|
+
return listOrchestrators()
|
|
82
|
+
.filter((orch) => orch.status === "online" && orch.apiUrl && orch.agentId)
|
|
83
|
+
.map((orch) => ({ id: orch.id, agentId: orch.agentId!, apiUrl: orch.apiUrl, baseDir: orch.baseDir }));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Repo roots any workspace row references — the seeds we probe for orphans.
|
|
87
|
+
* One probe per repo returns ALL its worktrees, so a single (even shared) row
|
|
88
|
+
* per repo is enough to discover every orphan under it. */
|
|
89
|
+
function knownRepoRoots(workspaces: WorkspaceRecord[]): string[] {
|
|
90
|
+
return [...new Set(workspaces.map((ws) => ws.repoRoot).filter(Boolean))];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface CollectOrphansResult {
|
|
94
|
+
orphans: WorkspaceOrphan[];
|
|
95
|
+
/** Live isolated rows whose worktree is missing on disk (DB→disk drift). */
|
|
96
|
+
missingWorktrees: Array<{ workspaceId: string; worktreePath: string; repoRoot: string; status: string }>;
|
|
97
|
+
reason?: string;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* The disk⇄DB reconcile pass. For every known repo with an online owning host:
|
|
102
|
+
* probe its worktrees, subtract live DB rows → orphans (disk without a live row),
|
|
103
|
+
* and the inverse → live rows whose worktree is gone (DB without disk). Each
|
|
104
|
+
* orphan is enriched with land-state so callers can tell reap-safe cruft from
|
|
105
|
+
* stranded work. Shared by the `/orphans` route and the scheduled reaper.
|
|
106
|
+
*/
|
|
107
|
+
export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
|
|
108
|
+
const orchestrators = onlineOrchestrators();
|
|
109
|
+
if (!orchestrators.length) return { orphans: [], missingWorktrees: [], reason: "no online orchestrators" };
|
|
110
|
+
|
|
111
|
+
const all = listWorkspaces();
|
|
112
|
+
const orphans: WorkspaceOrphan[] = [];
|
|
113
|
+
const missingWorktrees: CollectOrphansResult["missingWorktrees"] = [];
|
|
114
|
+
|
|
115
|
+
for (const repoRoot of knownRepoRoots(all)) {
|
|
116
|
+
const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
|
|
117
|
+
if (!orch?.apiUrl) continue;
|
|
118
|
+
const probe = await fetchHostProbe(orch.apiUrl, repoRoot);
|
|
119
|
+
if (!probe?.worktrees) continue;
|
|
120
|
+
|
|
121
|
+
const liveRowsByPath = new Map(
|
|
122
|
+
all
|
|
123
|
+
.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
|
|
124
|
+
.map((ws) => [resolve(ws.worktreePath), ws]),
|
|
125
|
+
);
|
|
126
|
+
const onDisk = new Set(probe.worktrees.map((wt) => (wt.path ? resolve(wt.path) : "")).filter(Boolean));
|
|
127
|
+
|
|
128
|
+
// DB→disk drift: a live isolated row whose worktree is no longer on disk.
|
|
129
|
+
for (const [path, ws] of liveRowsByPath) {
|
|
130
|
+
if (ws.mode === "isolated" && !onDisk.has(path)) {
|
|
131
|
+
missingWorktrees.push({ workspaceId: ws.id, worktreePath: ws.worktreePath, repoRoot, status: ws.status });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// disk→DB drift: a worktree on disk with no live row.
|
|
136
|
+
const rowsByPath = new Map(
|
|
137
|
+
all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]),
|
|
138
|
+
);
|
|
139
|
+
for (const worktree of probe.worktrees) {
|
|
140
|
+
if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
|
|
141
|
+
// Only agent-relay-created worktrees (agent/* branches) are reclaimable —
|
|
142
|
+
// never touch a user's own linked worktrees.
|
|
143
|
+
if (!worktree.branch?.startsWith("agent/")) continue;
|
|
144
|
+
const row = rowsByPath.get(resolve(worktree.path));
|
|
145
|
+
if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
|
|
146
|
+
|
|
147
|
+
const orphan: WorkspaceOrphan = {
|
|
148
|
+
worktreePath: worktree.path,
|
|
149
|
+
repoRoot,
|
|
150
|
+
branch: worktree.branch,
|
|
151
|
+
headSha: worktree.headSha,
|
|
152
|
+
hadTerminalRow: Boolean(row),
|
|
153
|
+
};
|
|
154
|
+
const preview = await fetchWorktreeReapState(orch.apiUrl, worktree.path, probe.branch);
|
|
155
|
+
if (preview && !preview.missing && !preview.error) {
|
|
156
|
+
const state: WorktreeReapState = {
|
|
157
|
+
landed: preview.landed,
|
|
158
|
+
ahead: preview.ahead,
|
|
159
|
+
unmergedAhead: preview.unmergedAhead,
|
|
160
|
+
dirtyCount: preview.dirtyCount,
|
|
161
|
+
};
|
|
162
|
+
orphan.landed = preview.landed;
|
|
163
|
+
orphan.ahead = preview.ahead;
|
|
164
|
+
orphan.unmergedAhead = preview.unmergedAhead;
|
|
165
|
+
orphan.dirty = (preview.dirtyCount ?? 0) > 0;
|
|
166
|
+
orphan.safeToReap = worktreeReapable(state);
|
|
167
|
+
}
|
|
168
|
+
// No probe → safeToReap stays undefined (treated as not-safe by callers).
|
|
169
|
+
orphans.push(orphan);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return { orphans, missingWorktrees };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): string {
|
|
177
|
+
const command = createCommand({
|
|
178
|
+
type: "workspace.cleanup",
|
|
179
|
+
source: "system",
|
|
180
|
+
target: orch.agentId,
|
|
181
|
+
params: {
|
|
182
|
+
action: "cleanup",
|
|
183
|
+
worktreePath: orphan.worktreePath,
|
|
184
|
+
repoRoot: orphan.repoRoot,
|
|
185
|
+
branch: orphan.branch,
|
|
186
|
+
deleteBranch: true,
|
|
187
|
+
reclaim: true,
|
|
188
|
+
requestedBy: "workspace-orphan-reaper",
|
|
189
|
+
requestedAt: Date.now(),
|
|
190
|
+
},
|
|
191
|
+
});
|
|
192
|
+
emitRelayEvent({ type: `command.${command.status}`, source: command.source, subject: command.id, data: { command } });
|
|
193
|
+
return command.id;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Scheduled reaper (maintenance job). Auto-removes orphaned worktrees that are
|
|
198
|
+
* safe to reap (landed/empty, clean tree) and flags the rest — un-landed work or
|
|
199
|
+
* an un-probeable host — as needs-attention instead of destroying them. Also
|
|
200
|
+
* reports the inverse drift (live rows whose worktree vanished) so both
|
|
201
|
+
* directions surface. Never removes on uncertainty.
|
|
202
|
+
*/
|
|
203
|
+
export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>> {
|
|
204
|
+
const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
|
|
205
|
+
if (reason) return { skipped: reason };
|
|
206
|
+
|
|
207
|
+
const orchestrators = onlineOrchestrators();
|
|
208
|
+
const reapEnabled = orphanWorktreeReapEnabled();
|
|
209
|
+
const reaped: string[] = [];
|
|
210
|
+
const flagged: string[] = [];
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
|
|
213
|
+
for (const orphan of orphans) {
|
|
214
|
+
const orch = orchestrators.find((candidate) => isPathWithinBase(orphan.repoRoot, candidate.baseDir));
|
|
215
|
+
if (!orch) continue;
|
|
216
|
+
|
|
217
|
+
if (orphan.safeToReap === true) {
|
|
218
|
+
if (!reapEnabled) continue; // detect-only mode
|
|
219
|
+
const commandId = dispatchCleanup(orch, orphan);
|
|
220
|
+
reaped.push(orphan.worktreePath);
|
|
221
|
+
flaggedAt.delete(orphan.worktreePath);
|
|
222
|
+
createActivityEvent({
|
|
223
|
+
clientId: `workspace-orphan-reaped-${orphan.worktreePath}-${now}`,
|
|
224
|
+
kind: "state",
|
|
225
|
+
title: "Orphaned worktree reaped",
|
|
226
|
+
body: `${orphan.branch ?? orphan.worktreePath} — ${orphan.landed ? "work already landed" : "no work to preserve"}; removing the stale worktree`,
|
|
227
|
+
meta: orphan.branch ?? orphan.worktreePath,
|
|
228
|
+
icon: "ti-trash",
|
|
229
|
+
view: "orchestrators",
|
|
230
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, commandId, landed: orphan.landed },
|
|
231
|
+
});
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Not safe (un-landed work, dirty tree, or un-probeable) — flag once per
|
|
236
|
+
// cooldown, never remove. This is the stranded-work needs-attention entry.
|
|
237
|
+
const last = flaggedAt.get(orphan.worktreePath) ?? 0;
|
|
238
|
+
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
239
|
+
flaggedAt.set(orphan.worktreePath, now);
|
|
240
|
+
flagged.push(orphan.worktreePath);
|
|
241
|
+
const detail = orphan.safeToReap === undefined
|
|
242
|
+
? "host could not be probed for land-state"
|
|
243
|
+
: orphan.dirty
|
|
244
|
+
? "uncommitted changes in the worktree"
|
|
245
|
+
: `${orphan.unmergedAhead ?? orphan.ahead ?? "?"} un-landed commit(s)`;
|
|
246
|
+
createActivityEvent({
|
|
247
|
+
clientId: `workspace-orphan-stranded-${orphan.worktreePath}-${now}`,
|
|
248
|
+
kind: "state",
|
|
249
|
+
title: "Stranded worktree needs attention",
|
|
250
|
+
body: `${orphan.branch ?? orphan.worktreePath} in ${orphan.repoRoot} is orphaned (no live workspace row) and holds work that hasn't landed — ${detail}. Reclaim with force to discard, or recover the commits before removing.`,
|
|
251
|
+
meta: orphan.branch ?? orphan.worktreePath,
|
|
252
|
+
icon: "ti-alert-triangle",
|
|
253
|
+
view: "orchestrators",
|
|
254
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, branch: orphan.branch, ahead: orphan.ahead, unmergedAhead: orphan.unmergedAhead, dirty: orphan.dirty, headSha: orphan.headSha },
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// DB→disk drift is observability-only: a live row whose worktree vanished is
|
|
259
|
+
// surfaced, not auto-deleted (the row may still be mid-land or recoverable).
|
|
260
|
+
for (const missing of missingWorktrees) {
|
|
261
|
+
const key = `missing:${missing.worktreePath}`;
|
|
262
|
+
const last = flaggedAt.get(key) ?? 0;
|
|
263
|
+
if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
|
|
264
|
+
flaggedAt.set(key, now);
|
|
265
|
+
createActivityEvent({
|
|
266
|
+
clientId: `workspace-row-no-worktree-${missing.workspaceId}-${now}`,
|
|
267
|
+
kind: "state",
|
|
268
|
+
title: "Workspace row has no worktree on disk",
|
|
269
|
+
body: `Workspace ${missing.workspaceId} (${missing.status}) points at ${missing.worktreePath}, which no longer exists on disk — disk/DB drift.`,
|
|
270
|
+
meta: missing.workspaceId,
|
|
271
|
+
icon: "ti-unlink",
|
|
272
|
+
view: "orchestrators",
|
|
273
|
+
metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", workspaceId: missing.workspaceId, worktreePath: missing.worktreePath, status: missing.status },
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Forget cooldown entries for orphans that are gone (reaped/recovered) so a
|
|
278
|
+
// future re-orphaning of the same path re-announces immediately.
|
|
279
|
+
const liveKeys = new Set([...orphans.map((o) => o.worktreePath), ...missingWorktrees.map((m) => `missing:${m.worktreePath}`)]);
|
|
280
|
+
for (const key of flaggedAt.keys()) if (!liveKeys.has(key) && !reaped.includes(key)) flaggedAt.delete(key);
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
scanned: orphans.length,
|
|
284
|
+
reaped,
|
|
285
|
+
flagged,
|
|
286
|
+
missingWorktrees: missingWorktrees.map((m) => m.workspaceId),
|
|
287
|
+
reapEnabled,
|
|
288
|
+
};
|
|
289
|
+
}
|
package/src/workspace-phase.ts
CHANGED
|
@@ -21,6 +21,55 @@ import type { WorkspaceRecord, WorkspaceStatus } from "./types";
|
|
|
21
21
|
// initialize primer (don't brief an agent on a dead workspace). Was duplicated.
|
|
22
22
|
export const TERMINAL_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["cleaned", "merged", "abandoned"]);
|
|
23
23
|
|
|
24
|
+
// The "handed off, waiting to land" statuses — an agent has finished and the
|
|
25
|
+
// auto-merge-back is responsible for getting the branch onto base. SINGLE HOME:
|
|
26
|
+
// the auto-land consumer (maintenance `autoMergeCleanFastForwards`) and the
|
|
27
|
+
// strand-escalation set MUST both derive from this. They drifted before (#242):
|
|
28
|
+
// `relay_workspace_ready` sets `ready`, but the consumer only scanned
|
|
29
|
+
// `review_requested`, so a clean `ready` worktree was never a merge candidate and
|
|
30
|
+
// parked forever while this phase view kept reporting "healthy, wait." Producer
|
|
31
|
+
// and consumer now read the same set so a `ready` can never silently fall out of
|
|
32
|
+
// the land queue again. (`review_requested` is the same healthy hand-off state —
|
|
33
|
+
// it's also where a failed auto-merge lands for a retry, see routes.ts.)
|
|
34
|
+
export const READY_TO_LAND_STATUSES = new Set<WorkspaceStatus>(["ready", "review_requested"]);
|
|
35
|
+
|
|
36
|
+
// Land-state shape a host reports for a worktree (subset of WorkspaceMergePreview
|
|
37
|
+
// / WorkspaceGitState — the fields that decide reapability). Kept structural so
|
|
38
|
+
// both the merge-preview path and a raw git-state probe satisfy it.
|
|
39
|
+
export interface WorktreeReapState {
|
|
40
|
+
/** Work already in base (squash/cherry/PR-merged). Detection only under-reports. */
|
|
41
|
+
landed?: boolean;
|
|
42
|
+
/** Commits ahead of base by raw count (a squash-landed branch still shows >0). */
|
|
43
|
+
ahead?: number;
|
|
44
|
+
/** Commits ahead whose patch is NOT already in base — the squash-aware count. */
|
|
45
|
+
unmergedAhead?: number;
|
|
46
|
+
/** Uncommitted working-tree changes. */
|
|
47
|
+
dirtyCount?: number;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// THE reap-safety invariant (#244): a worktree may be removed only when nothing
|
|
51
|
+
// would be lost — clean tree AND (no commits ahead OR the work already landed).
|
|
52
|
+
// SINGLE HOME: the orphan reaper, the orphan-reclaim gate, and the host's
|
|
53
|
+
// exit-time `reconcileWorkspace` "empty" check all mean the same thing; they
|
|
54
|
+
// drifted into three private copies and a land-blind force-remove slipped
|
|
55
|
+
// through (the recovered NUL-guard test was one keystroke from deletion).
|
|
56
|
+
// Mirror `reconcileWorkspace`: landing detection can only under-report, so an
|
|
57
|
+
// uncertain worktree is NEVER reapable — it gets flagged for review instead.
|
|
58
|
+
export function worktreeReapable(state: WorktreeReapState | null | undefined): boolean {
|
|
59
|
+
if (!state) return false;
|
|
60
|
+
if ((state.dirtyCount ?? 0) > 0) return false;
|
|
61
|
+
if (state.landed === true) return true;
|
|
62
|
+
return (state.unmergedAhead ?? state.ahead ?? 0) === 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// How long a workspace may sit in a ready-to-land status before the directive
|
|
66
|
+
// projection stops saying "healthy, just wait" and surfaces it as needs-attention
|
|
67
|
+
// (#242 watchdog). A clean auto-merge runs ~every 2 min, so a handful of missed
|
|
68
|
+
// sweeps means something is wrong (wrong status filter, no online orchestrator,
|
|
69
|
+
// an unpushed branch, a wedged steward) and the agent/human should be told —
|
|
70
|
+
// instead of the old behavior where it looked healthy for 90 minutes.
|
|
71
|
+
export const LAND_PENDING_STALL_MS = 15 * 60 * 1000;
|
|
72
|
+
|
|
24
73
|
export type WorkspacePhase =
|
|
25
74
|
| "working" // active — your turn: commit, then mark ready
|
|
26
75
|
| "land-pending" // ready | review_requested — handed off; auto-merge will land it
|
|
@@ -66,7 +115,17 @@ const READY_ACTION: WorkspaceNextAction = {
|
|
|
66
115
|
// Map every WorkspaceStatus to the branch agent's mental model. Statuses that
|
|
67
116
|
// look scary but are healthy (review_requested, conflict) carry actionNeeded:false
|
|
68
117
|
// and an explicit "not your job" hint.
|
|
69
|
-
|
|
118
|
+
//
|
|
119
|
+
// `opts.now` (defaults to wall-clock) drives the #242 stall watchdog: a workspace
|
|
120
|
+
// pending-to-land past LAND_PENDING_STALL_MS flips from the "healthy, wait" view
|
|
121
|
+
// to needs-attention with a real blocker, so the status surface the agent polls
|
|
122
|
+
// can't keep masking a stuck land. The clock is `readyAt` (set once when the agent
|
|
123
|
+
// marks ready, immune to the heartbeat `updated_at` bump) — not `updatedAt`, which
|
|
124
|
+
// keeps ticking on every heartbeat and made the stall look fresh forever.
|
|
125
|
+
export function describeWorkspacePhase(
|
|
126
|
+
workspace: Pick<WorkspaceRecord, "status" | "branch" | "stewardAgentId" | "readyAt">,
|
|
127
|
+
opts: { now?: number; stallMs?: number } = {},
|
|
128
|
+
): WorkspacePhaseView {
|
|
70
129
|
switch (workspace.status) {
|
|
71
130
|
case "active":
|
|
72
131
|
return {
|
|
@@ -78,10 +137,27 @@ export function describeWorkspacePhase(workspace: Pick<WorkspaceRecord, "status"
|
|
|
78
137
|
blockers: [],
|
|
79
138
|
};
|
|
80
139
|
case "ready":
|
|
81
|
-
case "review_requested":
|
|
140
|
+
case "review_requested": {
|
|
82
141
|
// The #235 crux: these are the SAME healthy "handed off, waiting" state.
|
|
83
142
|
// `review_requested` reads like an escalation but is the normal post-ready
|
|
84
143
|
// node; an absent steward is the healthy case, not a stall.
|
|
144
|
+
const now = opts.now ?? Date.now();
|
|
145
|
+
const stallMs = opts.stallMs ?? LAND_PENDING_STALL_MS;
|
|
146
|
+
const pendingMs = typeof workspace.readyAt === "number" ? now - workspace.readyAt : undefined;
|
|
147
|
+
// #242 watchdog: past the bound this is no longer "healthy, wait." Surface it
|
|
148
|
+
// as needs-attention with a real blocker instead of the anti-panic view, so
|
|
149
|
+
// the agent (and the dashboard) stop reporting a wedged land as healthy.
|
|
150
|
+
if (pendingMs !== undefined && pendingMs > stallMs) {
|
|
151
|
+
const mins = Math.round(pendingMs / 60_000);
|
|
152
|
+
return {
|
|
153
|
+
phase: "land-pending",
|
|
154
|
+
headline: `Stalled — handed off ${mins} min ago but still hasn't landed. A clean auto-merge runs every ~2 min, so this is past the healthy window and likely stuck (no online orchestrator, an unpushed branch, or a wedged merge/steward).`,
|
|
155
|
+
hint: "Do NOT merge, push, rebase, or touch the main checkout yourself. Flag this to a human or the repo steward — the auto-merge/steward path isn't progressing and needs attention.",
|
|
156
|
+
actionNeeded: true,
|
|
157
|
+
nextActions: [WAIT_ACTION],
|
|
158
|
+
blockers: [`pending land for ~${mins} min with no progress — auto-merge/steward isn't landing it`],
|
|
159
|
+
};
|
|
160
|
+
}
|
|
85
161
|
return {
|
|
86
162
|
phase: "land-pending",
|
|
87
163
|
headline: "Handed off — waiting for the auto-merge to land your branch. This is the normal, healthy post-ready state (not an escalation).",
|
|
@@ -90,6 +166,7 @@ export function describeWorkspacePhase(workspace: Pick<WorkspaceRecord, "status"
|
|
|
90
166
|
nextActions: [WAIT_ACTION],
|
|
91
167
|
blockers: [],
|
|
92
168
|
};
|
|
169
|
+
}
|
|
93
170
|
case "merge_planned":
|
|
94
171
|
return {
|
|
95
172
|
phase: "landing",
|
|
@@ -157,7 +234,7 @@ export function worktreeMcpInstructions(workspace: Pick<WorkspaceRecord, "branch
|
|
|
157
234
|
`You are in an isolated git worktree on branch ${branch}, based on ${base} — NOT the main checkout. ${base} moves under you as other agents land in parallel; that's expected.`,
|
|
158
235
|
"Changes reach the base via: commit your work, then call `relay_workspace_ready`. Relay rebases onto the latest base, lands, and pushes for you.",
|
|
159
236
|
"Do NOT push, rebase, merge, resolve conflicts, or `cd` into the main checkout — Relay (and a steward, spawned only if a clean auto-merge isn't possible) own all of that.",
|
|
160
|
-
"After `ready` the status is `
|
|
237
|
+
"After `ready` the status is `ready` (a normal, healthy hand-off state, not a stall). Call `relay_workspace_status` with `wait:true` to block until your branch lands; you'll then continue on a fresh rebased branch (name gains a `--N` suffix).",
|
|
161
238
|
"Call `relay_workspace_status` anytime to see where you are and the exact next step.",
|
|
162
239
|
].join("\n");
|
|
163
240
|
}
|