agent-relay-server 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-server",
3
- "version": "0.24.0",
3
+ "version": "0.26.0",
4
4
  "description": "Lightweight HTTP message relay for inter-agent communication across machines",
5
5
  "module": "src/index.ts",
6
6
  "type": "module",
@@ -33,7 +33,7 @@
33
33
  "CONTRIBUTING.md"
34
34
  ],
35
35
  "dependencies": {
36
- "agent-relay-sdk": "0.2.13"
36
+ "agent-relay-sdk": "0.2.15"
37
37
  },
38
38
  "scripts": {
39
39
  "prepack": "bun run build:dashboard:bundle >&2",
package/public/index.html CHANGED
@@ -10168,6 +10168,8 @@ function parseSseFrame(frame) {
10168
10168
  }
10169
10169
  //#endregion
10170
10170
  //#region src/lib/api.ts
10171
+ var API_TIMEOUT_MS = 2e4;
10172
+ var SSE_STALE_MS = 35e3;
10171
10173
  var authToken = "";
10172
10174
  function setAuthToken(token) {
10173
10175
  authToken = token;
@@ -10211,11 +10213,11 @@ function openTerminalWebSocket(orchestratorId, session) {
10211
10213
  return new WebSocket(url);
10212
10214
  }
10213
10215
  function openRelayEventStream(token, handlers) {
10214
- const abort = new AbortController();
10215
10216
  const eventUrl = new URL("api/events", baseUrl()).toString();
10216
10217
  let closed = false;
10217
10218
  let retryMs = 5e3;
10218
10219
  let reconnectTimer = null;
10220
+ let activeAbort = null;
10219
10221
  const scheduleReconnect = () => {
10220
10222
  if (closed) return;
10221
10223
  reconnectTimer = setTimeout(connect, retryMs);
@@ -10226,6 +10228,11 @@ function openRelayEventStream(token, handlers) {
10226
10228
  if (data.length > 0) handlers.message(event, data.join("\n"));
10227
10229
  };
10228
10230
  const connect = async () => {
10231
+ if (closed) return;
10232
+ const ac = new AbortController();
10233
+ activeAbort = ac;
10234
+ let lastFrameAt = Date.now();
10235
+ let staleTimer = null;
10229
10236
  try {
10230
10237
  const headers = { Accept: "text/event-stream" };
10231
10238
  const effectiveToken = token || getAuthToken();
@@ -10233,10 +10240,14 @@ function openRelayEventStream(token, handlers) {
10233
10240
  const response = await fetch(eventUrl, {
10234
10241
  headers,
10235
10242
  cache: "no-store",
10236
- signal: abort.signal
10243
+ signal: ac.signal
10237
10244
  });
10238
10245
  if (!response.ok || !response.body) throw new Error(`SSE failed: ${response.status}`);
10239
10246
  handlers.connected?.();
10247
+ lastFrameAt = Date.now();
10248
+ staleTimer = setInterval(() => {
10249
+ if (Date.now() - lastFrameAt > SSE_STALE_MS) ac.abort();
10250
+ }, 5e3);
10240
10251
  const reader = response.body.getReader();
10241
10252
  const decoder = new TextDecoder();
10242
10253
  let buffer = "";
@@ -10246,6 +10257,7 @@ function openRelayEventStream(token, handlers) {
10246
10257
  buffer += decoder.decode();
10247
10258
  break;
10248
10259
  }
10260
+ lastFrameAt = Date.now();
10249
10261
  buffer += decoder.decode(value, { stream: true });
10250
10262
  let frameEnd = buffer.indexOf("\n\n");
10251
10263
  while (frameEnd >= 0) {
@@ -10256,6 +10268,7 @@ function openRelayEventStream(token, handlers) {
10256
10268
  }
10257
10269
  }
10258
10270
  } catch {} finally {
10271
+ if (staleTimer) clearInterval(staleTimer);
10259
10272
  if (closed) return;
10260
10273
  handlers.disconnected?.();
10261
10274
  scheduleReconnect();
@@ -10265,13 +10278,14 @@ function openRelayEventStream(token, handlers) {
10265
10278
  return { close() {
10266
10279
  closed = true;
10267
10280
  if (reconnectTimer) clearTimeout(reconnectTimer);
10268
- abort.abort();
10281
+ activeAbort?.abort();
10269
10282
  } };
10270
10283
  }
10271
10284
  async function api(method, path, body) {
10272
10285
  const opts = {
10273
10286
  method,
10274
- headers: {}
10287
+ headers: {},
10288
+ signal: AbortSignal.timeout(API_TIMEOUT_MS)
10275
10289
  };
10276
10290
  const headers = opts.headers;
10277
10291
  if (authToken) headers["X-Agent-Relay-Token"] = authToken;
@@ -12991,10 +13005,13 @@ var useRelayStore = create$1()(persist((set, get) => ({
12991
13005
  connectSSE() {
12992
13006
  get().disconnectSSE();
12993
13007
  set({ _es: openRelayEventStream(get().authToken, {
12994
- connected: () => set(get().connectionError ? {
12995
- connected: true,
12996
- connectionError: false
12997
- } : { connected: true }),
13008
+ connected: () => {
13009
+ set(get().connectionError ? {
13010
+ connected: true,
13011
+ connectionError: false
13012
+ } : { connected: true });
13013
+ get().refreshLiveData();
13014
+ },
12998
13015
  disconnected: () => set({ connected: false }),
12999
13016
  message: (event, data) => {
13000
13017
  if (event === "connected") return;
@@ -0,0 +1,111 @@
1
+ import { emitRelayEvent } from "./events";
2
+ import { getNotificationsConfig } from "./config-store";
3
+ import { notifySystemMessage } from "./notify";
4
+ import { listAgents } from "./db";
5
+ import { isAgentOnline } from "./agent-ref";
6
+ import type { AgentCard, WorkspaceRecord } from "./types";
7
+
8
+ export interface BranchLandedInput {
9
+ /**
10
+ * The workspace as it was AT land time — `branch` must be the branch that landed,
11
+ * captured before any land-and-continue recycle repoints the row (#206). `ownerAgentId`
12
+ * is the author the "landed" notice is pushed to.
13
+ */
14
+ workspace: Pick<WorkspaceRecord, "id" | "repoRoot" | "branch" | "baseRef" | "ownerAgentId">;
15
+ /** SHA the base now points at after the land. */
16
+ mergedSha?: string;
17
+ /** Subject line of the landed commit, when the orchestrator reported it. */
18
+ subject?: string;
19
+ /** Fresh branch the worktree was recycled onto (land-and-continue), if any. */
20
+ newBranch?: string;
21
+ }
22
+
23
+ /**
24
+ * #239 — turn an authoritative land completion into a relay-driven push so the author
25
+ * stops polling to learn it merged. Always emits the durable `branch.landed` event (the
26
+ * rest of the bus does the same); only the agent-facing push is gated, since it wakes the
27
+ * recipient. Offline authors get it on next poll via store-ahead (#234).
28
+ *
29
+ * Agents-on-main fan-out (the second #239 recipient class) lands in a follow-up commit.
30
+ */
31
+ export function notifyBranchLanded(input: BranchLandedInput): void {
32
+ const { workspace } = input;
33
+ const base = workspace.baseRef ?? "base";
34
+ const landedBranch = workspace.branch;
35
+ const shortSha = input.mergedSha ? input.mergedSha.slice(0, 12) : undefined;
36
+
37
+ emitRelayEvent({
38
+ type: "branch.landed",
39
+ source: "server",
40
+ subject: workspace.id,
41
+ data: {
42
+ workspaceId: workspace.id,
43
+ repoRoot: workspace.repoRoot,
44
+ branch: landedBranch,
45
+ base,
46
+ sha: input.mergedSha,
47
+ subject: input.subject,
48
+ author: workspace.ownerAgentId,
49
+ newBranch: input.newBranch,
50
+ },
51
+ });
52
+
53
+ const config = getNotificationsConfig();
54
+ if (!config.enabled || !config.branchLanded) return;
55
+
56
+ const author = workspace.ownerAgentId;
57
+ const shaLabel = shortSha ? ` as \`${shortSha}\`` : "";
58
+ const subjectLabel = input.subject ? ` — "${input.subject}"` : "";
59
+ const payload = {
60
+ kind: "branch.landed",
61
+ workspaceId: workspace.id,
62
+ repoRoot: workspace.repoRoot,
63
+ branch: landedBranch,
64
+ base,
65
+ sha: input.mergedSha,
66
+ author,
67
+ newBranch: input.newBranch,
68
+ };
69
+
70
+ // The branch author cares most — push regardless of online (store-ahead delivers it on
71
+ // next poll if they've moved on, #234). They land-and-continue onto the recycled branch.
72
+ if (author) {
73
+ const branchLabel = landedBranch ? `\`${landedBranch}\`` : "Your branch";
74
+ const continueLabel = input.newBranch
75
+ ? ` You're now on \`${input.newBranch}\` — keep working there.`
76
+ : " Worktree reclaimed.";
77
+ notifySystemMessage(author, {
78
+ subject: "Your branch landed",
79
+ body: `✅ ${branchLabel} landed on \`${base}\`${shaLabel}${subjectLabel}.${continueLabel}`,
80
+ payload,
81
+ });
82
+ }
83
+
84
+ // Agents on `main` — those whose cwd IS the main checkout (not an isolated worktree) —
85
+ // get a live "merged" notice so a long-lived main agent's context stays current as work
86
+ // lands under it (#239). Online-only: a stale/exited main session needs no wake, and
87
+ // store-ahead to it would just pile up noise. The author is in a worktree (cwd ≠ repoRoot)
88
+ // so it's naturally excluded; guard anyway for shared-mode owners.
89
+ const branchLabel = landedBranch ? `\`${landedBranch}\`` : "A branch";
90
+ const authorLabel = author ? ` by \`${author}\`` : "";
91
+ for (const agent of agentsOnMain(workspace.repoRoot, author)) {
92
+ notifySystemMessage(agent.id, {
93
+ subject: `Merged to ${base}`,
94
+ body: `🔀 ${branchLabel}${authorLabel} merged to \`${base}\`${shaLabel}${subjectLabel}.`,
95
+ payload,
96
+ });
97
+ }
98
+ }
99
+
100
+ // An agent is "on `main`" when its registered cwd equals the repo's main checkout — i.e. it
101
+ // works in the base, not an isolated worktree. Excludes the author, pseudo agents (system/
102
+ // user), channels, and offline sessions.
103
+ function agentsOnMain(repoRoot: string, author: string | undefined): AgentCard[] {
104
+ return listAgents().filter((a) => {
105
+ if (a.id === author || a.id === "system" || a.id === "user") return false;
106
+ if (a.kind === "channel" || a.meta?.kind === "channel") return false;
107
+ const cwd = a.meta?.cwd;
108
+ if (typeof cwd !== "string" || cwd !== repoRoot) return false;
109
+ return isAgentOnline(a);
110
+ });
111
+ }
@@ -10,6 +10,7 @@ import type {
10
10
  InsightsConfig,
11
11
  ManagedAgentState,
12
12
  ManagedAgentStatus,
13
+ NotificationsConfig,
13
14
  SpawnApprovalMode,
14
15
  SpawnPolicy,
15
16
  SpawnProvider,
@@ -24,6 +25,8 @@ const STEWARD_NAMESPACE = "steward";
24
25
  const STEWARD_KEY = "default";
25
26
  const INSIGHTS_NAMESPACE = "insights";
26
27
  const INSIGHTS_KEY = "default";
28
+ const NOTIFICATIONS_NAMESPACE = "notifications";
29
+ const NOTIFICATIONS_KEY = "default";
27
30
  const WORKSPACE_NAMESPACE = "workspace";
28
31
  const WORKSPACE_KEY = "default";
29
32
  const VALID_PROFILE_PROVIDERS = ["any", "claude", "codex"] as const;
@@ -460,6 +463,26 @@ function validateInsightsConfig(value: unknown): InsightsConfig {
460
463
  };
461
464
  }
462
465
 
466
+ // Relay-driven lifecycle push notifications (#239 event bus). Default-on; the
467
+ // operator can flip the master switch or individual events off via the generic
468
+ // config route. Push messages wake recipients, so they must be suppressible.
469
+ const NOTIFICATIONS_CONFIG_DEFAULTS: NotificationsConfig = {
470
+ enabled: true,
471
+ branchLanded: true,
472
+ };
473
+
474
+ function validateNotificationsConfig(value: unknown): NotificationsConfig {
475
+ if (!isRecord(value)) throw new ValidationError("notifications config value must be an object");
476
+ return {
477
+ enabled: value.enabled === undefined
478
+ ? NOTIFICATIONS_CONFIG_DEFAULTS.enabled
479
+ : cleanBoolean(value.enabled, "enabled"),
480
+ branchLanded: value.branchLanded === undefined
481
+ ? NOTIFICATIONS_CONFIG_DEFAULTS.branchLanded
482
+ : cleanBoolean(value.branchLanded, "branchLanded"),
483
+ };
484
+ }
485
+
463
486
  // Global workspace provisioning config for isolated worktrees (#159 follow-up).
464
487
  // Defaults seed the two untracked paths an isolated agent almost always needs:
465
488
  // the agent guide and the rig config, both gitignored so a fresh worktree lacks them.
@@ -487,6 +510,7 @@ function normalizeValue(namespace: string, key: string, value: unknown): unknown
487
510
  if (namespace === AGENT_PROFILE_NAMESPACE) return validateAgentProfile(key, value);
488
511
  if (namespace === STEWARD_NAMESPACE) return validateStewardConfig(value);
489
512
  if (namespace === INSIGHTS_NAMESPACE) return validateInsightsConfig(value);
513
+ if (namespace === NOTIFICATIONS_NAMESPACE) return validateNotificationsConfig(value);
490
514
  if (namespace === WORKSPACE_NAMESPACE) return validateWorkspaceConfig(value);
491
515
  if (JSON.stringify(value) === undefined) throw new ValidationError("value must be valid JSON");
492
516
  return value;
@@ -620,6 +644,13 @@ export function getInsightsConfigEntry(): ConfigEntry<InsightsConfig> {
620
644
  };
621
645
  }
622
646
 
647
+ /** Lifecycle-notification config (#239), merged over defaults (always usable). */
648
+ export function getNotificationsConfig(): NotificationsConfig {
649
+ const entry = getConfig<Partial<NotificationsConfig>>(NOTIFICATIONS_NAMESPACE, NOTIFICATIONS_KEY);
650
+ if (!entry) return { ...NOTIFICATIONS_CONFIG_DEFAULTS };
651
+ return validateNotificationsConfig({ ...NOTIFICATIONS_CONFIG_DEFAULTS, ...entry.value });
652
+ }
653
+
623
654
  export function setInsightsConfig(value: unknown, updatedBy?: string): ConfigEntry<InsightsConfig> {
624
655
  return setConfig(INSIGHTS_NAMESPACE, INSIGHTS_KEY, value as InsightsConfig, updatedBy);
625
656
  }
@@ -27,14 +27,14 @@ import {
27
27
  releaseExpiredMergeLeases,
28
28
  releaseOrphanedTasks,
29
29
  runDbMaintenance,
30
- sendMessage,
31
30
  sweepArtifacts,
32
31
  updateWorkspaceStatus,
33
32
  } from "./db";
34
33
  import type { WorkspaceMergePreview, WorkspaceRecord, WorkspaceStatus } from "./types";
35
34
  import { requestWorkspaceMerge } from "./workspace-merge";
36
35
  import { workspaceActiveClaim } from "./workspace-claim";
37
- import { TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
36
+ import { reapOrphanedWorktrees } from "./workspace-orphans";
37
+ import { READY_TO_LAND_STATUSES, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
38
38
  import { errMessage, RELAY_TOKEN_HEADER } from "agent-relay-sdk";
39
39
  import { getStewardConfig } from "./config-store";
40
40
  import { ensureRepoSteward } from "./steward";
@@ -46,11 +46,11 @@ import {
46
46
  emitAgentStatus,
47
47
  emitMessageClaimReleased,
48
48
  emitMessageExpired,
49
- emitNewMessage,
50
49
  emitOrchestratorStatus,
51
50
  emitPoolBindingChanged,
52
51
  emitTaskChanged,
53
52
  } from "./sse";
53
+ import { notifySystemMessage } from "./notify";
54
54
  import { pruneExpiredTokenRecords } from "./token-db";
55
55
  import type { Command, MaintenanceJob, MaintenanceJobRun } from "./types";
56
56
 
@@ -67,6 +67,10 @@ const DB_VACUUM_EVERY = Number(process.env.AGENT_RELAY_DB_VACUUM_EVERY) || 7;
67
67
  let dbMaintenanceRuns = 0;
68
68
  const WORKSPACE_REVIEW_TTL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_REVIEW_TTL_MS) || 3 * DAY_MS;
69
69
  const WORKSPACE_GC_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_GC_INTERVAL_MS) || 60 * 60 * 1000;
70
+ // Disk⇄DB orphan reconcile cadence (#244). Runs on start for a boot-time pass,
71
+ // then periodically — orphans accrue slowly (one per crashed/killed session), so
72
+ // a 30-min sweep is plenty without hammering the hosts with probes.
73
+ const WORKSPACE_ORPHAN_REAPER_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_ORPHAN_REAPER_INTERVAL_MS) || 30 * 60 * 1000;
70
74
  // Deterministic auto-land (Layer 0): merge clean fast-forwards with no human in
71
75
  // the loop. Default on for the seamless workflow; set AGENT_RELAY_WORKSPACE_AUTO_MERGE=0
72
76
  // to require a manual or steward merge per repo. Read at call-time so operators can
@@ -83,7 +87,10 @@ const STEWARD_WAKE_COOLDOWN_MS = Number(process.env.AGENT_RELAY_STEWARD_WAKE_COO
83
87
  const stewardEscalationMs = () => Number(process.env.AGENT_RELAY_WORKSPACE_STEWARD_ESCALATION_MS) || 60 * 60 * 1000;
84
88
  const stewardFallbackTarget = () => (process.env.AGENT_RELAY_WORKSPACE_STEWARD_FALLBACK || "").trim();
85
89
  // Statuses that need an owner — a stranded one of these is what escalation rescues.
86
- const STRANDABLE_STATUSES = new Set<WorkspaceStatus>(["review_requested", "conflict"]);
90
+ // Derived from the shared ready-to-land set (#242) plus `conflict`, so a stranded
91
+ // `ready` worktree (no online steward) escalates to the fallback target instead of
92
+ // rotting silently — same gap that left the original #242 branch parked.
93
+ const STRANDABLE_STATUSES = new Set<WorkspaceStatus>([...READY_TO_LAND_STATUSES, "conflict"]);
87
94
  // Live statuses worth scanning. Terminal (cleaned/merged/abandoned) and
88
95
  // in-flight (cleanup_requested) states are skipped.
89
96
  const CONFLICT_SCAN_STATUSES = new Set<WorkspaceStatus>(["active", "ready", "review_requested", "merge_planned", "conflict"]);
@@ -394,7 +401,7 @@ const definitions: MaintenanceJobDefinition[] = [
394
401
  {
395
402
  id: "workspace-auto-merge",
396
403
  title: "Workspace auto-merge",
397
- description: "Auto-merge any non-conflicting review_requested worktree into base under the per-repo lease (rebasing when the base moved on); only real or unknown conflicts are left for the steward.",
404
+ description: "Auto-merge any non-conflicting ready/review_requested worktree into base under the per-repo lease (rebasing when the base moved on); only real or unknown conflicts are left for the steward.",
398
405
  intervalMs: WORKSPACE_AUTO_MERGE_INTERVAL_MS,
399
406
  runOnStart: false,
400
407
  timeoutMs: 60 * 1000,
@@ -409,6 +416,15 @@ const definitions: MaintenanceJobDefinition[] = [
409
416
  timeoutMs: 60 * 1000,
410
417
  handler: workspaceGC,
411
418
  },
419
+ {
420
+ id: "workspace-orphan-reaper",
421
+ title: "Workspace orphan reaper",
422
+ description: "Reconcile disk⇄DB: reap orphaned worktrees whose work has landed (or is empty), flag orphans holding un-landed work as needs-attention instead of deleting, and report rows whose worktree vanished. git worktree prune can't do this — it no-ops while the directory still exists.",
423
+ intervalMs: WORKSPACE_ORPHAN_REAPER_INTERVAL_MS,
424
+ runOnStart: true,
425
+ timeoutMs: 2 * 60 * 1000,
426
+ handler: reapOrphanedWorktrees,
427
+ },
412
428
  ];
413
429
 
414
430
  function workspacePathWithinBase(path: string | undefined, baseDir: string | undefined): boolean {
@@ -532,15 +548,11 @@ function wakeRepoSteward(ws: WorkspaceRecord, reason: string): string | null {
532
548
  const policyName = ensureRepoSteward(ws.repoRoot);
533
549
  if (!policyName) return null;
534
550
  try {
535
- const msg = sendMessage({
536
- from: "system",
537
- to: `policy:${policyName}`,
538
- kind: "system",
551
+ notifySystemMessage(`policy:${policyName}`, {
539
552
  subject: `Steward: ${ws.status} workspace needs attention`,
540
553
  body: `Workspace \`${ws.branch ?? ws.id}\` (id ${ws.id}) in ${ws.repoRoot} is ${ws.status} and could not auto-land (${reason}). Claim it first so auto-merge yields: \`agent-relay workspace claim --id ${ws.id} --purpose steward\`. Inspect: \`agent-relay steward inspect ${ws.id}\`. Then cd into ${ws.worktreePath}, rebase onto ${ws.baseRef ?? "base"}, resolve, run checks, and land: \`agent-relay workspace land --id ${ws.id} --strategy rebase-ff\` — or \`agent-relay workspace release --id ${ws.id}\` and escalate if you can't.`,
541
554
  payload: { kind: "workspace.steward-task", workspaceId: ws.id, repoRoot: ws.repoRoot, worktreePath: ws.worktreePath, branch: ws.branch, baseRef: ws.baseRef, status: ws.status, reason },
542
555
  });
543
- emitNewMessage(msg);
544
556
  getLifecycleManager().onMessageForPolicy(policyName);
545
557
  patchWorkspaceMetadata(ws.id, { stewardWokenAt: Date.now(), stewardPolicy: policyName });
546
558
  return policyName;
@@ -631,15 +643,11 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
631
643
  if (woke) notifiedStewards.push(woke);
632
644
  } else if (ws.stewardAgentId) {
633
645
  try {
634
- const msg = sendMessage({
635
- from: "system",
636
- to: ws.stewardAgentId,
637
- kind: "system",
646
+ notifySystemMessage(ws.stewardAgentId, {
638
647
  subject: "Workspace merge conflict",
639
648
  body: `Workspace \`${ws.branch ?? ws.id}\` in ${ws.repoRoot} can no longer merge cleanly into ${p.baseRef ?? "base"} (${p.ahead ?? "?"} ahead, ${p.behind ?? "?"} behind). As repo steward, please coordinate resolution.`,
640
649
  payload: { kind: "workspace.conflict", workspaceId: ws.id, repoRoot: ws.repoRoot, branch: ws.branch, baseRef: p.baseRef, ahead: p.ahead, behind: p.behind },
641
650
  });
642
- emitNewMessage(msg);
643
651
  notifiedStewards.push(ws.stewardAgentId);
644
652
  } catch {
645
653
  // Steward unregistered/stale — the activity event still records it.
@@ -657,9 +665,11 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
657
665
  return { scanned: candidates.length, flagged, cleared, merged, notifiedStewards };
658
666
  }
659
667
 
660
- // Deterministic auto-land (Layer 0, issue #167 / #207). Walk the "ready to land"
661
- // queue (`review_requested` isolated worktrees) and land any whose merge is
662
- // predicted conflict-free, via the shared lease-serialized merge helper — even
668
+ // Deterministic auto-land (Layer 0, issue #167 / #207 / #242). Walk the "ready to
669
+ // land" queue (isolated worktrees in any READY_TO_LAND status — `ready` from
670
+ // `relay_workspace_ready`, or `review_requested` from a failed-merge retry) and
671
+ // land any whose merge is predicted conflict-free, via the shared lease-serialized
672
+ // merge helper — even
663
673
  // when the base moved on (behind>0): mergeRebaseFf rebases onto the current base
664
674
  // before fast-forwarding. Only a predicted conflict or an unknown merge state is
665
675
  // left for the steward; clean parallel work lands with no agent in the loop.
@@ -669,7 +679,7 @@ async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
669
679
  if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
670
680
 
671
681
  const candidates = listWorkspaces().filter(
672
- (ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && ws.status === "review_requested",
682
+ (ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && READY_TO_LAND_STATUSES.has(ws.status),
673
683
  );
674
684
  const stewardEnabled = getStewardConfig().enabled;
675
685
  const merged: string[] = [];
@@ -738,7 +748,7 @@ async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
738
748
  function notifyTarget(target: string, subject: string, body: string, payload: Record<string, unknown>): string | null {
739
749
  if (!target) return null;
740
750
  try {
741
- emitNewMessage(sendMessage({ from: "system", to: target, kind: "system", subject, body, payload }));
751
+ notifySystemMessage(target, { subject, body, payload });
742
752
  return target;
743
753
  } catch {
744
754
  return null;
package/src/notify.ts ADDED
@@ -0,0 +1,31 @@
1
+ import { sendMessage } from "./db";
2
+ import { emitNewMessage } from "./sse";
3
+ import type { Message, MessageKind } from "./types";
4
+
5
+ export interface SystemNotifyOptions {
6
+ subject?: string;
7
+ body: string;
8
+ payload?: Record<string, unknown>;
9
+ /** Defaults to "system" — a bypass-targeting kind that wakes the recipient like a prompt. */
10
+ kind?: MessageKind;
11
+ /** Sender id; defaults to "system". */
12
+ from?: string;
13
+ }
14
+
15
+ /**
16
+ * Post a system DM to one agent and fan it out over the bus. This is the one home for
17
+ * "relay tells an agent something happened" — store-ahead delivers it on next poll if the
18
+ * recipient is offline (#234). Used by the GC sweep (maintenance) and lifecycle events (#239).
19
+ */
20
+ export function notifySystemMessage(to: string, opts: SystemNotifyOptions): Message {
21
+ const msg = sendMessage({
22
+ from: opts.from ?? "system",
23
+ to,
24
+ kind: opts.kind ?? "system",
25
+ subject: opts.subject,
26
+ body: opts.body,
27
+ payload: opts.payload,
28
+ });
29
+ emitNewMessage(msg);
30
+ return msg;
31
+ }
package/src/routes.ts CHANGED
@@ -177,6 +177,8 @@ import {
177
177
  WORKSPACE_ACTIONS,
178
178
  } from "./workspace-actions";
179
179
  import { describeWorkspacePhase, landReceipt, TERMINAL_WORKSPACE_STATUSES } from "./workspace-phase";
180
+ import { notifyBranchLanded } from "./branch-landed";
181
+ import { collectWorkspaceOrphans } from "./workspace-orphans";
180
182
  import type { WorkspaceDiagnostics, WorkspaceGitState, WorkspaceRecord } from "./types";
181
183
  import {
182
184
  getComponentAuth,
@@ -3878,41 +3880,14 @@ const getWorkspaceDiff: Handler = (req, params) => {
3878
3880
  };
3879
3881
 
3880
3882
  // Worktrees found on disk (agent/* branches) with no live workspace row — left
3881
- // behind by crashes or failed cleanups. Probes each known repo's owning host
3882
- // and subtracts active DB rows. Reclaim them via POST .../orphans/reclaim.
3883
+ // behind by crashes or failed cleanups. Probes each known repo's owning host and
3884
+ // subtracts live DB rows, enriching each with land-state so reap-safe cruft is
3885
+ // distinguishable from stranded work. Also reports the inverse drift (live rows
3886
+ // whose worktree vanished). Discovery is shared with the scheduled reaper
3887
+ // (workspace-orphan-reaper). Reclaim via POST .../orphans/reclaim.
3883
3888
  const getWorkspaceOrphans: Handler = async () => {
3884
- const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
3885
- if (!orchestrators.length) return json({ orphans: [], reason: "no online orchestrators" });
3886
- const all = listWorkspaces();
3887
- const repoRoots = [...new Set(all.map((ws) => ws.repoRoot).filter(Boolean))];
3888
- const headers: Record<string, string> = {};
3889
- const relayToken = process.env.AGENT_RELAY_TOKEN;
3890
- if (relayToken) headers[RELAY_TOKEN_HEADER] = relayToken;
3891
- const orphans: WorkspaceOrphan[] = [];
3892
-
3893
- for (const repoRoot of repoRoots) {
3894
- const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
3895
- if (!orch?.apiUrl) continue;
3896
- let probe: WorkspaceProbe | undefined;
3897
- try {
3898
- const res = await fetch(`${orch.apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, { headers, signal: AbortSignal.timeout(10_000) });
3899
- if (!res.ok) continue;
3900
- probe = await res.json() as WorkspaceProbe;
3901
- } catch {
3902
- continue;
3903
- }
3904
- const rowsByPath = new Map(all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]));
3905
- for (const worktree of probe?.worktrees ?? []) {
3906
- if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
3907
- // Only agent-relay-created worktrees (agent/* branches) are reclaimable —
3908
- // never touch a user's own linked worktrees.
3909
- if (!worktree.branch?.startsWith("agent/")) continue;
3910
- const row = rowsByPath.get(resolve(worktree.path));
3911
- if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
3912
- orphans.push({ worktreePath: worktree.path, repoRoot, branch: worktree.branch, headSha: worktree.headSha, hadTerminalRow: Boolean(row) });
3913
- }
3914
- }
3915
- return json({ orphans });
3889
+ const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
3890
+ return json(reason ? { orphans, missingWorktrees, reason } : { orphans, missingWorktrees });
3916
3891
  };
3917
3892
 
3918
3893
  const postWorkspaceOrphanReclaim: Handler = async (req) => {
@@ -3926,11 +3901,25 @@ const postWorkspaceOrphanReclaim: Handler = async (req) => {
3926
3901
  const repoRoot = cleanString(parsed.body.repoRoot, "repoRoot", { max: 1000 });
3927
3902
  const branch = cleanString(parsed.body.branch, "branch", { max: 240 });
3928
3903
  if (!worktreePath || !repoRoot) return error("worktreePath and repoRoot required", 400);
3904
+ const force = parsed.body.force === true;
3929
3905
  // Refuse to reclaim a path that still backs a live workspace row.
3930
3906
  const live = listWorkspaces().find((ws) => ws.worktreePath && resolve(ws.worktreePath) === resolve(worktreePath) && !TERMINAL_WORKSPACE_STATUSES.has(ws.status));
3931
3907
  if (live) return error(`path backs live workspace ${live.id}; clean it through the workspace, not orphan reclaim`, 409);
3932
3908
  const orch = listOrchestrators().find((candidate) => candidate.status === "online" && isPathWithinBase(repoRoot, candidate.baseDir));
3933
3909
  if (!orch) return error("no online orchestrator owns this path", 409);
3910
+ // Land-safety gate (#244): reclaim force-removes the worktree, so refuse when
3911
+ // it holds un-landed work unless the caller explicitly opts into discarding
3912
+ // it. Mirrors the scheduled reaper — never destroy work on uncertainty.
3913
+ if (!force) {
3914
+ const { orphans } = await collectWorkspaceOrphans();
3915
+ const target = orphans.find((o) => resolve(o.worktreePath) === resolve(worktreePath));
3916
+ if (target && target.safeToReap !== true) {
3917
+ const why = target.safeToReap === undefined
3918
+ ? "land-state could not be determined"
3919
+ : target.dirty ? "uncommitted changes" : `${target.unmergedAhead ?? target.ahead ?? "?"} un-landed commit(s)`;
3920
+ return error(`worktree holds un-landed work (${why}); recover it first, or pass {"force":true} to discard`, 409);
3921
+ }
3922
+ }
3934
3923
  const command = createCommand({
3935
3924
  type: "workspace.cleanup",
3936
3925
  source: "system",
@@ -4478,6 +4467,9 @@ const patchCommand: Handler = async (req, params) => {
4478
4467
  const workspaceId = cleanString(command.result.workspaceId, "result.workspaceId", { max: 160 });
4479
4468
  const resultStatus = optionalEnum(command.result.status, "result.status", VALID_WORKSPACE_STATUSES) as WorkspaceStatus | undefined;
4480
4469
  if (workspaceId && resultStatus) {
4470
+ // Snapshot the row BEFORE the recycle repoints `branch` (#206) — the landed
4471
+ // branch name + author (#239 branch.landed push) come from this pre-mutation state.
4472
+ const landedWorkspace = getWorkspace(workspaceId);
4481
4473
  updateWorkspaceStatus(workspaceId, resultStatus, {
4482
4474
  mergeResult: command.result,
4483
4475
  mergeCommandId: command.id,
@@ -4491,10 +4483,20 @@ const patchCommand: Handler = async (req, params) => {
4491
4483
  // Land-and-continue (#206): the worktree was recycled onto a fresh branch.
4492
4484
  // Repoint the row so the next merge targets the live branch, not the deleted one.
4493
4485
  const newBranch = cleanString(command.result.newBranch, "result.newBranch", { max: 240 });
4486
+ const mergedSha = cleanString(command.result.mergedSha, "result.mergedSha", { max: 64 });
4494
4487
  if (newBranch) {
4495
- const mergedSha = cleanString(command.result.mergedSha, "result.mergedSha", { max: 64 });
4496
4488
  setWorkspaceBranch(workspaceId, newBranch, mergedSha);
4497
4489
  }
4490
+ // #239 — push the author a "your branch landed" notice (no polling). Only on a
4491
+ // real land; a no-op resolution (#230) merged nothing, so it earns no notice.
4492
+ if (command.result.merged === true && landedWorkspace) {
4493
+ notifyBranchLanded({
4494
+ workspace: landedWorkspace,
4495
+ mergedSha,
4496
+ subject: cleanString(command.result.subject, "result.subject", { max: 200 }),
4497
+ newBranch,
4498
+ });
4499
+ }
4498
4500
  }
4499
4501
  } else if (command.status === "failed" && command.correlationId) {
4500
4502
  // Merge couldn't complete — don't leave it stuck in merge_planned.
@@ -0,0 +1,289 @@
1
+ // Orphaned-worktree reconciliation (#244). An isolated worktree on disk with no
2
+ // live DB row is "orphaned" — left behind when a session ended without a clean
3
+ // teardown (crash, killed runner, a reaped row). Symmetrically, a live row whose
4
+ // worktree is gone from disk is the other half of the same drift. Neither is
5
+ // visible to the agent or the dashboard, so unlanded work can sit stranded for
6
+ // weeks (one real casualty: a CI-guard test, recovered by hand).
7
+ //
8
+ // THE invariant (single home, see `worktreeReapable`): reaping is gated on
9
+ // "nothing would be lost" — landed or empty — NEVER on session liveness or a
10
+ // timer. A worktree holding un-landed commits is flagged for attention, never
11
+ // force-removed. This module is the disk⇄DB reconciler the GC's `git worktree
12
+ // prune` (a no-op while the directory exists) never was.
13
+
14
+ import { resolve } from "node:path";
15
+ import { RELAY_TOKEN_HEADER, errMessage } from "agent-relay-sdk";
16
+ import type { WorkspaceMergePreview, WorkspaceOrphan, WorkspaceProbe, WorkspaceRecord } from "./types";
17
+ import { createActivityEvent, listOrchestrators, listWorkspaces } from "./db";
18
+ import { createCommand } from "./commands-db";
19
+ import { emitRelayEvent } from "./events";
20
+ import { isPathWithinBase } from "./utils";
21
+ import { TERMINAL_WORKSPACE_STATUSES, worktreeReapable, type WorktreeReapState } from "./workspace-phase";
22
+
23
+ // Don't re-flag the same un-landed orphan every sweep — surface it once, then
24
+ // stay quiet for this window. In-memory (keyed by worktree path) like the
25
+ // orphaned-session reaper: a restart re-announces, which is acceptable noise.
26
+ const UNLANDED_FLAG_COOLDOWN_MS = Number(process.env.AGENT_RELAY_ORPHAN_FLAG_COOLDOWN_MS) || 6 * 60 * 60 * 1000;
27
+ // Set AGENT_RELAY_ORPHAN_WORKTREE_REAP=0 to detect + report orphans but never
28
+ // remove them (parity with the session reaper's detect-only switch).
29
+ const orphanWorktreeReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_WORKTREE_REAP !== "0";
30
+ const flaggedAt = new Map<string, number>();
31
+
32
+ export function resetOrphanWorktreeStateForTests(): void {
33
+ flaggedAt.clear();
34
+ }
35
+
36
+ interface OnlineOrchestrator {
37
+ id: string;
38
+ agentId: string;
39
+ apiUrl?: string;
40
+ baseDir?: string;
41
+ }
42
+
43
+ function relayHeaders(): Record<string, string> {
44
+ const headers: Record<string, string> = {};
45
+ const token = process.env.AGENT_RELAY_TOKEN;
46
+ if (token) headers[RELAY_TOKEN_HEADER] = token;
47
+ return headers;
48
+ }
49
+
50
+ async function fetchHostProbe(apiUrl: string, repoRoot: string): Promise<WorkspaceProbe | null> {
51
+ try {
52
+ const res = await fetch(`${apiUrl}/api/workspace/probe?path=${encodeURIComponent(repoRoot)}`, {
53
+ headers: relayHeaders(),
54
+ signal: AbortSignal.timeout(10_000),
55
+ });
56
+ if (!res.ok) return null;
57
+ return await res.json() as WorkspaceProbe;
58
+ } catch {
59
+ return null;
60
+ }
61
+ }
62
+
63
+ // Land-state for a single worktree path. Reuses the host merge-preview (squash-
64
+ // aware `landed`, `unmergedAhead`, `dirtyCount`) the conflict scan already trusts.
65
+ async function fetchWorktreeReapState(apiUrl: string, worktreePath: string, baseRef?: string): Promise<WorkspaceMergePreview | null> {
66
+ const query = new URLSearchParams({ path: worktreePath, checkPr: "1" });
67
+ if (baseRef) query.set("baseRef", baseRef);
68
+ try {
69
+ const res = await fetch(`${apiUrl}/api/workspace/merge-preview?${query.toString()}`, {
70
+ headers: relayHeaders(),
71
+ signal: AbortSignal.timeout(8_000),
72
+ });
73
+ if (!res.ok) return null;
74
+ return await res.json() as WorkspaceMergePreview;
75
+ } catch {
76
+ return null;
77
+ }
78
+ }
79
+
80
+ function onlineOrchestrators(): OnlineOrchestrator[] {
81
+ return listOrchestrators()
82
+ .filter((orch) => orch.status === "online" && orch.apiUrl && orch.agentId)
83
+ .map((orch) => ({ id: orch.id, agentId: orch.agentId!, apiUrl: orch.apiUrl, baseDir: orch.baseDir }));
84
+ }
85
+
86
+ /** Repo roots any workspace row references — the seeds we probe for orphans.
87
+ * One probe per repo returns ALL its worktrees, so a single (even shared) row
88
+ * per repo is enough to discover every orphan under it. */
89
+ function knownRepoRoots(workspaces: WorkspaceRecord[]): string[] {
90
+ return [...new Set(workspaces.map((ws) => ws.repoRoot).filter(Boolean))];
91
+ }
92
+
93
+ export interface CollectOrphansResult {
94
+ orphans: WorkspaceOrphan[];
95
+ /** Live isolated rows whose worktree is missing on disk (DB→disk drift). */
96
+ missingWorktrees: Array<{ workspaceId: string; worktreePath: string; repoRoot: string; status: string }>;
97
+ reason?: string;
98
+ }
99
+
100
+ /**
101
+ * The disk⇄DB reconcile pass. For every known repo with an online owning host:
102
+ * probe its worktrees, subtract live DB rows → orphans (disk without a live row),
103
+ * and the inverse → live rows whose worktree is gone (DB without disk). Each
104
+ * orphan is enriched with land-state so callers can tell reap-safe cruft from
105
+ * stranded work. Shared by the `/orphans` route and the scheduled reaper.
106
+ */
107
+ export async function collectWorkspaceOrphans(): Promise<CollectOrphansResult> {
108
+ const orchestrators = onlineOrchestrators();
109
+ if (!orchestrators.length) return { orphans: [], missingWorktrees: [], reason: "no online orchestrators" };
110
+
111
+ const all = listWorkspaces();
112
+ const orphans: WorkspaceOrphan[] = [];
113
+ const missingWorktrees: CollectOrphansResult["missingWorktrees"] = [];
114
+
115
+ for (const repoRoot of knownRepoRoots(all)) {
116
+ const orch = orchestrators.find((candidate) => candidate.apiUrl && isPathWithinBase(repoRoot, candidate.baseDir));
117
+ if (!orch?.apiUrl) continue;
118
+ const probe = await fetchHostProbe(orch.apiUrl, repoRoot);
119
+ if (!probe?.worktrees) continue;
120
+
121
+ const liveRowsByPath = new Map(
122
+ all
123
+ .filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath && !TERMINAL_WORKSPACE_STATUSES.has(ws.status))
124
+ .map((ws) => [resolve(ws.worktreePath), ws]),
125
+ );
126
+ const onDisk = new Set(probe.worktrees.map((wt) => (wt.path ? resolve(wt.path) : "")).filter(Boolean));
127
+
128
+ // DB→disk drift: a live isolated row whose worktree is no longer on disk.
129
+ for (const [path, ws] of liveRowsByPath) {
130
+ if (ws.mode === "isolated" && !onDisk.has(path)) {
131
+ missingWorktrees.push({ workspaceId: ws.id, worktreePath: ws.worktreePath, repoRoot, status: ws.status });
132
+ }
133
+ }
134
+
135
+ // disk→DB drift: a worktree on disk with no live row.
136
+ const rowsByPath = new Map(
137
+ all.filter((ws) => ws.repoRoot === repoRoot && ws.worktreePath).map((ws) => [resolve(ws.worktreePath), ws]),
138
+ );
139
+ for (const worktree of probe.worktrees) {
140
+ if (!worktree.path || resolve(worktree.path) === resolve(repoRoot)) continue;
141
+ // Only agent-relay-created worktrees (agent/* branches) are reclaimable —
142
+ // never touch a user's own linked worktrees.
143
+ if (!worktree.branch?.startsWith("agent/")) continue;
144
+ const row = rowsByPath.get(resolve(worktree.path));
145
+ if (row && !TERMINAL_WORKSPACE_STATUSES.has(row.status)) continue; // tracked & live
146
+
147
+ const orphan: WorkspaceOrphan = {
148
+ worktreePath: worktree.path,
149
+ repoRoot,
150
+ branch: worktree.branch,
151
+ headSha: worktree.headSha,
152
+ hadTerminalRow: Boolean(row),
153
+ };
154
+ const preview = await fetchWorktreeReapState(orch.apiUrl, worktree.path, probe.branch);
155
+ if (preview && !preview.missing && !preview.error) {
156
+ const state: WorktreeReapState = {
157
+ landed: preview.landed,
158
+ ahead: preview.ahead,
159
+ unmergedAhead: preview.unmergedAhead,
160
+ dirtyCount: preview.dirtyCount,
161
+ };
162
+ orphan.landed = preview.landed;
163
+ orphan.ahead = preview.ahead;
164
+ orphan.unmergedAhead = preview.unmergedAhead;
165
+ orphan.dirty = (preview.dirtyCount ?? 0) > 0;
166
+ orphan.safeToReap = worktreeReapable(state);
167
+ }
168
+ // No probe → safeToReap stays undefined (treated as not-safe by callers).
169
+ orphans.push(orphan);
170
+ }
171
+ }
172
+
173
+ return { orphans, missingWorktrees };
174
+ }
175
+
176
+ function dispatchCleanup(orch: OnlineOrchestrator, orphan: WorkspaceOrphan): string {
177
+ const command = createCommand({
178
+ type: "workspace.cleanup",
179
+ source: "system",
180
+ target: orch.agentId,
181
+ params: {
182
+ action: "cleanup",
183
+ worktreePath: orphan.worktreePath,
184
+ repoRoot: orphan.repoRoot,
185
+ branch: orphan.branch,
186
+ deleteBranch: true,
187
+ reclaim: true,
188
+ requestedBy: "workspace-orphan-reaper",
189
+ requestedAt: Date.now(),
190
+ },
191
+ });
192
+ emitRelayEvent({ type: `command.${command.status}`, source: command.source, subject: command.id, data: { command } });
193
+ return command.id;
194
+ }
195
+
196
+ /**
197
+ * Scheduled reaper (maintenance job). Auto-removes orphaned worktrees that are
198
+ * safe to reap (landed/empty, clean tree) and flags the rest — un-landed work or
199
+ * an un-probeable host — as needs-attention instead of destroying them. Also
200
+ * reports the inverse drift (live rows whose worktree vanished) so both
201
+ * directions surface. Never removes on uncertainty.
202
+ */
203
+ export async function reapOrphanedWorktrees(): Promise<Record<string, unknown>> {
204
+ const { orphans, missingWorktrees, reason } = await collectWorkspaceOrphans();
205
+ if (reason) return { skipped: reason };
206
+
207
+ const orchestrators = onlineOrchestrators();
208
+ const reapEnabled = orphanWorktreeReapEnabled();
209
+ const reaped: string[] = [];
210
+ const flagged: string[] = [];
211
+ const now = Date.now();
212
+
213
+ for (const orphan of orphans) {
214
+ const orch = orchestrators.find((candidate) => isPathWithinBase(orphan.repoRoot, candidate.baseDir));
215
+ if (!orch) continue;
216
+
217
+ if (orphan.safeToReap === true) {
218
+ if (!reapEnabled) continue; // detect-only mode
219
+ const commandId = dispatchCleanup(orch, orphan);
220
+ reaped.push(orphan.worktreePath);
221
+ flaggedAt.delete(orphan.worktreePath);
222
+ createActivityEvent({
223
+ clientId: `workspace-orphan-reaped-${orphan.worktreePath}-${now}`,
224
+ kind: "state",
225
+ title: "Orphaned worktree reaped",
226
+ body: `${orphan.branch ?? orphan.worktreePath} — ${orphan.landed ? "work already landed" : "no work to preserve"}; removing the stale worktree`,
227
+ meta: orphan.branch ?? orphan.worktreePath,
228
+ icon: "ti-trash",
229
+ view: "orchestrators",
230
+ metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, commandId, landed: orphan.landed },
231
+ });
232
+ continue;
233
+ }
234
+
235
+ // Not safe (un-landed work, dirty tree, or un-probeable) — flag once per
236
+ // cooldown, never remove. This is the stranded-work needs-attention entry.
237
+ const last = flaggedAt.get(orphan.worktreePath) ?? 0;
238
+ if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
239
+ flaggedAt.set(orphan.worktreePath, now);
240
+ flagged.push(orphan.worktreePath);
241
+ const detail = orphan.safeToReap === undefined
242
+ ? "host could not be probed for land-state"
243
+ : orphan.dirty
244
+ ? "uncommitted changes in the worktree"
245
+ : `${orphan.unmergedAhead ?? orphan.ahead ?? "?"} un-landed commit(s)`;
246
+ createActivityEvent({
247
+ clientId: `workspace-orphan-stranded-${orphan.worktreePath}-${now}`,
248
+ kind: "state",
249
+ title: "Stranded worktree needs attention",
250
+ body: `${orphan.branch ?? orphan.worktreePath} in ${orphan.repoRoot} is orphaned (no live workspace row) and holds work that hasn't landed — ${detail}. Reclaim with force to discard, or recover the commits before removing.`,
251
+ meta: orphan.branch ?? orphan.worktreePath,
252
+ icon: "ti-alert-triangle",
253
+ view: "orchestrators",
254
+ metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", worktreePath: orphan.worktreePath, repoRoot: orphan.repoRoot, branch: orphan.branch, ahead: orphan.ahead, unmergedAhead: orphan.unmergedAhead, dirty: orphan.dirty, headSha: orphan.headSha },
255
+ });
256
+ }
257
+
258
+ // DB→disk drift is observability-only: a live row whose worktree vanished is
259
+ // surfaced, not auto-deleted (the row may still be mid-land or recoverable).
260
+ for (const missing of missingWorktrees) {
261
+ const key = `missing:${missing.worktreePath}`;
262
+ const last = flaggedAt.get(key) ?? 0;
263
+ if (now - last < UNLANDED_FLAG_COOLDOWN_MS) continue;
264
+ flaggedAt.set(key, now);
265
+ createActivityEvent({
266
+ clientId: `workspace-row-no-worktree-${missing.workspaceId}-${now}`,
267
+ kind: "state",
268
+ title: "Workspace row has no worktree on disk",
269
+ body: `Workspace ${missing.workspaceId} (${missing.status}) points at ${missing.worktreePath}, which no longer exists on disk — disk/DB drift.`,
270
+ meta: missing.workspaceId,
271
+ icon: "ti-unlink",
272
+ view: "orchestrators",
273
+ metadata: { source: "server", maintenanceJobId: "workspace-orphan-reaper", workspaceId: missing.workspaceId, worktreePath: missing.worktreePath, status: missing.status },
274
+ });
275
+ }
276
+
277
+ // Forget cooldown entries for orphans that are gone (reaped/recovered) so a
278
+ // future re-orphaning of the same path re-announces immediately.
279
+ const liveKeys = new Set([...orphans.map((o) => o.worktreePath), ...missingWorktrees.map((m) => `missing:${m.worktreePath}`)]);
280
+ for (const key of flaggedAt.keys()) if (!liveKeys.has(key) && !reaped.includes(key)) flaggedAt.delete(key);
281
+
282
+ return {
283
+ scanned: orphans.length,
284
+ reaped,
285
+ flagged,
286
+ missingWorktrees: missingWorktrees.map((m) => m.workspaceId),
287
+ reapEnabled,
288
+ };
289
+ }
@@ -21,6 +21,55 @@ import type { WorkspaceRecord, WorkspaceStatus } from "./types";
21
21
  // initialize primer (don't brief an agent on a dead workspace). Was duplicated.
22
22
  export const TERMINAL_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["cleaned", "merged", "abandoned"]);
23
23
 
24
+ // The "handed off, waiting to land" statuses — an agent has finished and the
25
+ // auto-merge-back is responsible for getting the branch onto base. SINGLE HOME:
26
+ // the auto-land consumer (maintenance `autoMergeCleanFastForwards`) and the
27
+ // strand-escalation set MUST both derive from this. They drifted before (#242):
28
+ // `relay_workspace_ready` sets `ready`, but the consumer only scanned
29
+ // `review_requested`, so a clean `ready` worktree was never a merge candidate and
30
+ // parked forever while this phase view kept reporting "healthy, wait." Producer
31
+ // and consumer now read the same set so a `ready` can never silently fall out of
32
+ // the land queue again. (`review_requested` is the same healthy hand-off state —
33
+ // it's also where a failed auto-merge lands for a retry, see routes.ts.)
34
+ export const READY_TO_LAND_STATUSES = new Set<WorkspaceStatus>(["ready", "review_requested"]);
35
+
36
+ // Land-state shape a host reports for a worktree (subset of WorkspaceMergePreview
37
+ // / WorkspaceGitState — the fields that decide reapability). Kept structural so
38
+ // both the merge-preview path and a raw git-state probe satisfy it.
39
+ export interface WorktreeReapState {
40
+ /** Work already in base (squash/cherry/PR-merged). Detection only under-reports. */
41
+ landed?: boolean;
42
+ /** Commits ahead of base by raw count (a squash-landed branch still shows >0). */
43
+ ahead?: number;
44
+ /** Commits ahead whose patch is NOT already in base — the squash-aware count. */
45
+ unmergedAhead?: number;
46
+ /** Uncommitted working-tree changes. */
47
+ dirtyCount?: number;
48
+ }
49
+
50
+ // THE reap-safety invariant (#244): a worktree may be removed only when nothing
51
+ // would be lost — clean tree AND (no commits ahead OR the work already landed).
52
+ // SINGLE HOME: the orphan reaper, the orphan-reclaim gate, and the host's
53
+ // exit-time `reconcileWorkspace` "empty" check all mean the same thing; they
54
+ // drifted into three private copies and a land-blind force-remove slipped
55
+ // through (the recovered NUL-guard test was one keystroke from deletion).
56
+ // Mirror `reconcileWorkspace`: landing detection can only under-report, so an
57
+ // uncertain worktree is NEVER reapable — it gets flagged for review instead.
58
+ export function worktreeReapable(state: WorktreeReapState | null | undefined): boolean {
59
+ if (!state) return false;
60
+ if ((state.dirtyCount ?? 0) > 0) return false;
61
+ if (state.landed === true) return true;
62
+ return (state.unmergedAhead ?? state.ahead ?? 0) === 0;
63
+ }
64
+
65
+ // How long a workspace may sit in a ready-to-land status before the directive
66
+ // projection stops saying "healthy, just wait" and surfaces it as needs-attention
67
+ // (#242 watchdog). A clean auto-merge runs ~every 2 min, so a handful of missed
68
+ // sweeps means something is wrong (wrong status filter, no online orchestrator,
69
+ // an unpushed branch, a wedged steward) and the agent/human should be told —
70
+ // instead of the old behavior where it looked healthy for 90 minutes.
71
+ export const LAND_PENDING_STALL_MS = 15 * 60 * 1000;
72
+
24
73
  export type WorkspacePhase =
25
74
  | "working" // active — your turn: commit, then mark ready
26
75
  | "land-pending" // ready | review_requested — handed off; auto-merge will land it
@@ -66,7 +115,17 @@ const READY_ACTION: WorkspaceNextAction = {
66
115
  // Map every WorkspaceStatus to the branch agent's mental model. Statuses that
67
116
  // look scary but are healthy (review_requested, conflict) carry actionNeeded:false
68
117
  // and an explicit "not your job" hint.
69
- export function describeWorkspacePhase(workspace: Pick<WorkspaceRecord, "status" | "branch" | "stewardAgentId">): WorkspacePhaseView {
118
+ //
119
+ // `opts.now` (defaults to wall-clock) drives the #242 stall watchdog: a workspace
120
+ // pending-to-land past LAND_PENDING_STALL_MS flips from the "healthy, wait" view
121
+ // to needs-attention with a real blocker, so the status surface the agent polls
122
+ // can't keep masking a stuck land. The clock is `readyAt` (set once when the agent
123
+ // marks ready, immune to the heartbeat `updated_at` bump) — not `updatedAt`, which
124
+ // keeps ticking on every heartbeat and made the stall look fresh forever.
125
+ export function describeWorkspacePhase(
126
+ workspace: Pick<WorkspaceRecord, "status" | "branch" | "stewardAgentId" | "readyAt">,
127
+ opts: { now?: number; stallMs?: number } = {},
128
+ ): WorkspacePhaseView {
70
129
  switch (workspace.status) {
71
130
  case "active":
72
131
  return {
@@ -78,10 +137,27 @@ export function describeWorkspacePhase(workspace: Pick<WorkspaceRecord, "status"
78
137
  blockers: [],
79
138
  };
80
139
  case "ready":
81
- case "review_requested":
140
+ case "review_requested": {
82
141
  // The #235 crux: these are the SAME healthy "handed off, waiting" state.
83
142
  // `review_requested` reads like an escalation but is the normal post-ready
84
143
  // node; an absent steward is the healthy case, not a stall.
144
+ const now = opts.now ?? Date.now();
145
+ const stallMs = opts.stallMs ?? LAND_PENDING_STALL_MS;
146
+ const pendingMs = typeof workspace.readyAt === "number" ? now - workspace.readyAt : undefined;
147
+ // #242 watchdog: past the bound this is no longer "healthy, wait." Surface it
148
+ // as needs-attention with a real blocker instead of the anti-panic view, so
149
+ // the agent (and the dashboard) stop reporting a wedged land as healthy.
150
+ if (pendingMs !== undefined && pendingMs > stallMs) {
151
+ const mins = Math.round(pendingMs / 60_000);
152
+ return {
153
+ phase: "land-pending",
154
+ headline: `Stalled — handed off ${mins} min ago but still hasn't landed. A clean auto-merge runs every ~2 min, so this is past the healthy window and likely stuck (no online orchestrator, an unpushed branch, or a wedged merge/steward).`,
155
+ hint: "Do NOT merge, push, rebase, or touch the main checkout yourself. Flag this to a human or the repo steward — the auto-merge/steward path isn't progressing and needs attention.",
156
+ actionNeeded: true,
157
+ nextActions: [WAIT_ACTION],
158
+ blockers: [`pending land for ~${mins} min with no progress — auto-merge/steward isn't landing it`],
159
+ };
160
+ }
85
161
  return {
86
162
  phase: "land-pending",
87
163
  headline: "Handed off — waiting for the auto-merge to land your branch. This is the normal, healthy post-ready state (not an escalation).",
@@ -90,6 +166,7 @@ export function describeWorkspacePhase(workspace: Pick<WorkspaceRecord, "status"
90
166
  nextActions: [WAIT_ACTION],
91
167
  blockers: [],
92
168
  };
169
+ }
93
170
  case "merge_planned":
94
171
  return {
95
172
  phase: "landing",
@@ -157,7 +234,7 @@ export function worktreeMcpInstructions(workspace: Pick<WorkspaceRecord, "branch
157
234
  `You are in an isolated git worktree on branch ${branch}, based on ${base} — NOT the main checkout. ${base} moves under you as other agents land in parallel; that's expected.`,
158
235
  "Changes reach the base via: commit your work, then call `relay_workspace_ready`. Relay rebases onto the latest base, lands, and pushes for you.",
159
236
  "Do NOT push, rebase, merge, resolve conflicts, or `cd` into the main checkout — Relay (and a steward, spawned only if a clean auto-merge isn't possible) own all of that.",
160
- "After `ready` the status is `review_requested` that is the NORMAL, healthy hand-off state, not a stall. Call `relay_workspace_status` with `wait:true` to block until your branch lands; you'll then continue on a fresh rebased branch (name gains a `--N` suffix).",
237
+ "After `ready` the status is `ready` (a normal, healthy hand-off state, not a stall). Call `relay_workspace_status` with `wait:true` to block until your branch lands; you'll then continue on a fresh rebased branch (name gains a `--N` suffix).",
161
238
  "Call `relay_workspace_status` anytime to see where you are and the exact next step.",
162
239
  ].join("\n");
163
240
  }