agent-relay-server 0.11.8 → 0.11.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,10 @@ export interface ProviderStatusEvent {
12
12
  status: string;
13
13
  id?: string;
14
14
  timestamp?: number;
15
+ title?: string;
16
+ body?: string;
17
+ icon?: string;
18
+ metadata?: Record<string, unknown>;
15
19
  };
16
20
  id?: string;
17
21
  label?: string;
package/src/bus.ts CHANGED
@@ -159,6 +159,7 @@ function handleFrame(ws: BusWebSocket, frame: ReturnType<typeof validateClientFr
159
159
  }
160
160
  const after = getAgent(conn.agentId);
161
161
  auditProviderStateTransition(conn.agentId, before, after);
162
+ auditRunnerTimelineEvent(conn.agentId, after?.meta?.timelineEvent);
162
163
  // A real PreCompact/SessionStart hook arrives as a timelineEvent in the
163
164
  // merged meta — clears any pending stall watch (stale events ignored).
164
165
  noteAgentTimelineEvent(conn.agentId, after?.meta?.timelineEvent);
@@ -558,6 +559,47 @@ function auditProviderStateTransition(agentId: string, before: AgentCard | null
558
559
  }
559
560
  }
560
561
 
562
+ function auditRunnerTimelineEvent(agentId: string, timelineEvent: unknown): void {
563
+ if (!isRecord(timelineEvent)) return;
564
+ const metadata = isRecord(timelineEvent.metadata) ? timelineEvent.metadata : {};
565
+ if (metadata.source !== "runner") return;
566
+ const status = stringValue(timelineEvent.status);
567
+ if (!status) return;
568
+ const eventType = stringValue(metadata.eventType) ?? status;
569
+ const timestamp = numberValue(timelineEvent.timestamp) ?? Date.now();
570
+ const id = stringValue(timelineEvent.id) ?? `${eventType}-${timestamp}`;
571
+ const title = stringValue(timelineEvent.title) ?? status.replace(/[._-]+/g, " ");
572
+ const body = stringValue(timelineEvent.body);
573
+ const icon = stringValue(timelineEvent.icon) ?? "ti-activity";
574
+ try {
575
+ const event = createActivityEvent({
576
+ clientId: `runner-timeline-${agentId}-${id}`,
577
+ kind: "state",
578
+ title,
579
+ body,
580
+ meta: agentId,
581
+ icon,
582
+ view: "agents",
583
+ agentId,
584
+ metadata: {
585
+ ...metadata,
586
+ eventType,
587
+ timelineStatus: status,
588
+ timelineId: id,
589
+ timelineTimestamp: timestamp,
590
+ },
591
+ });
592
+ emitRelayEvent({
593
+ type: "activity.created",
594
+ source: "server",
595
+ subject: String(event.id),
596
+ data: event as unknown as Record<string, unknown>,
597
+ });
598
+ } catch {
599
+ // Timeline writes must never block bus status updates.
600
+ }
601
+ }
602
+
561
603
  function providerStateFromAgent(agent: AgentCard | null | undefined): Record<string, unknown> | null {
562
604
  const value = agent?.meta?.providerState;
563
605
  if (!isRecord(value) || typeof value.state !== "string") return null;
@@ -7,13 +7,17 @@ import type {
7
7
  ConfigHistoryEntry,
8
8
  ManagedAgentState,
9
9
  ManagedAgentStatus,
10
+ SpawnApprovalMode,
10
11
  SpawnPolicy,
11
12
  SpawnProvider,
13
+ StewardConfig,
12
14
  } from "./types";
13
15
 
14
16
  const CONFIG_HISTORY_LIMIT = 50;
15
17
  const SPAWN_POLICY_NAMESPACE = "spawn-policy";
16
18
  const AGENT_PROFILE_NAMESPACE = "agent-profile";
19
+ const STEWARD_NAMESPACE = "steward";
20
+ const STEWARD_KEY = "default";
17
21
  const VALID_PROVIDERS = ["claude", "codex"] as const;
18
22
  const VALID_PROFILE_PROVIDERS = ["any", "claude", "codex"] as const;
19
23
  const VALID_PROFILE_BASES = ["host", "minimal", "isolated"] as const;
@@ -390,10 +394,42 @@ function cleanBinding(value: unknown): NonNullable<SpawnPolicy["binding"]> {
390
394
  };
391
395
  }
392
396
 
397
+ const STEWARD_CONFIG_DEFAULTS: StewardConfig = {
398
+ enabled: false,
399
+ provider: "claude",
400
+ permissionMode: "open",
401
+ keepaliveSeconds: 300,
402
+ };
403
+
404
+ function validateStewardConfig(value: unknown): StewardConfig {
405
+ if (!isRecord(value)) throw new ValidationError("steward config value must be an object");
406
+ const config: StewardConfig = {
407
+ enabled: value.enabled === undefined ? false : cleanBoolean(value.enabled, "enabled"),
408
+ provider: cleanEnum(value.provider, "provider", VALID_PROVIDERS) as SpawnProvider,
409
+ model: cleanString(value.model, "model", { max: 120 }),
410
+ effort: value.effort === undefined || value.effort === null ? undefined : cleanEnum(value.effort, "effort", VALID_EFFORTS) as ProviderEffort,
411
+ permissionMode: (value.permissionMode === undefined || value.permissionMode === null
412
+ ? "open"
413
+ : cleanEnum(value.permissionMode, "permissionMode", VALID_PERMISSION_MODES)) as SpawnApprovalMode,
414
+ keepaliveSeconds: value.keepaliveSeconds === undefined || value.keepaliveSeconds === null
415
+ ? 300
416
+ : cleanNumber(value.keepaliveSeconds, "keepaliveSeconds", { min: 0, max: 2_592_000 }),
417
+ };
418
+ // Reject a provider/model/effort combo the catalog can't resolve before it ever
419
+ // reaches a spawn (same guard as spawn policies).
420
+ try {
421
+ resolveProviderSelection({ provider: config.provider, model: config.model, effort: config.effort });
422
+ } catch (error) {
423
+ throw new ValidationError(error instanceof Error ? error.message : String(error));
424
+ }
425
+ return config;
426
+ }
427
+
393
428
  function normalizeValue(namespace: string, key: string, value: unknown): unknown {
394
429
  if (value === undefined) throw new ValidationError("value required");
395
430
  if (namespace === SPAWN_POLICY_NAMESPACE) return validateSpawnPolicy(key, value);
396
431
  if (namespace === AGENT_PROFILE_NAMESPACE) return validateAgentProfile(key, value);
432
+ if (namespace === STEWARD_NAMESPACE) return validateStewardConfig(value);
397
433
  if (JSON.stringify(value) === undefined) throw new ValidationError("value must be valid JSON");
398
434
  return value;
399
435
  }
@@ -485,6 +521,28 @@ function setSpawnPolicy(policy: SpawnPolicy, updatedBy?: string): ConfigEntry<Sp
485
521
  return setConfig(SPAWN_POLICY_NAMESPACE, policy.name, policy, updatedBy);
486
522
  }
487
523
 
524
+ /** Global steward config, merged over defaults (always returns a usable value). */
525
+ export function getStewardConfig(): StewardConfig {
526
+ const entry = getConfig<StewardConfig>(STEWARD_NAMESPACE, STEWARD_KEY);
527
+ return entry ? { ...STEWARD_CONFIG_DEFAULTS, ...entry.value } : { ...STEWARD_CONFIG_DEFAULTS };
528
+ }
529
+
530
+ export function getStewardConfigEntry(): ConfigEntry<StewardConfig> {
531
+ const entry = getConfig<StewardConfig>(STEWARD_NAMESPACE, STEWARD_KEY);
532
+ return entry ?? {
533
+ namespace: STEWARD_NAMESPACE,
534
+ key: STEWARD_KEY,
535
+ value: { ...STEWARD_CONFIG_DEFAULTS },
536
+ version: 0,
537
+ updatedAt: "default",
538
+ updatedBy: "system",
539
+ };
540
+ }
541
+
542
+ export function setStewardConfig(value: unknown, updatedBy?: string): ConfigEntry<StewardConfig> {
543
+ return setConfig(STEWARD_NAMESPACE, STEWARD_KEY, value as StewardConfig, updatedBy);
544
+ }
545
+
488
546
  function builtInProfileEntry(profile: AgentProfile): ConfigEntry<AgentProfile> {
489
547
  return {
490
548
  namespace: AGENT_PROFILE_NAMESPACE,
@@ -31,6 +31,9 @@ import {
31
31
  updateWorkspaceStatus,
32
32
  } from "./db";
33
33
  import type { WorkspaceMergePreview, WorkspaceRecord, WorkspaceStatus } from "./types";
34
+ import { requestWorkspaceMerge } from "./workspace-merge";
35
+ import { getStewardConfig } from "./config-store";
36
+ import { ensureRepoSteward } from "./steward";
34
37
  import { emitRelayEvent } from "./events";
35
38
  import { getLifecycleManager } from "./lifecycle-manager";
36
39
  import { applyCommandToRecipe } from "./recipe-runner";
@@ -55,6 +58,14 @@ const CONFLICT_SCAN_INTERVAL_MS = Number(process.env.AGENT_RELAY_CONFLICT_SCAN_I
55
58
  const WORKSPACE_RETENTION_MS = Number(process.env.AGENT_RELAY_WORKSPACE_RETENTION_MS) || DAY_MS;
56
59
  const WORKSPACE_REVIEW_TTL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_REVIEW_TTL_MS) || 3 * DAY_MS;
57
60
  const WORKSPACE_GC_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_GC_INTERVAL_MS) || 60 * 60 * 1000;
61
+ // Deterministic auto-land (Layer 0): merge clean fast-forwards with no human in
62
+ // the loop. Default on for the seamless workflow; set AGENT_RELAY_WORKSPACE_AUTO_MERGE=0
63
+ // to require a manual or steward merge per repo. Read at call-time so operators can
64
+ // toggle it without a restart.
65
+ const WORKSPACE_AUTO_MERGE_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_AUTO_MERGE_INTERVAL_MS) || CONFLICT_SCAN_INTERVAL_MS;
66
+ // Don't re-wake the managed steward for the same workspace more than once per
67
+ // this window — a persistent conflict/behind row would otherwise re-ping every sweep.
68
+ const STEWARD_WAKE_COOLDOWN_MS = Number(process.env.AGENT_RELAY_STEWARD_WAKE_COOLDOWN_MS) || 10 * 60 * 1000;
58
69
  // How long a stranded review_requested/conflict worktree (no online steward) may
59
70
  // sit before escalating to the configured fallback target, and the durable
60
71
  // escalation target itself (`policy:<name>`, `label:<name>`, `cap:<name>`, an
@@ -68,6 +79,37 @@ const STRANDABLE_STATUSES = new Set<WorkspaceStatus>(["review_requested", "confl
68
79
  // in-flight (cleanup_requested) states are skipped.
69
80
  const CONFLICT_SCAN_STATUSES = new Set<WorkspaceStatus>(["active", "ready", "review_requested", "merge_planned", "conflict"]);
70
81
  const TERMINAL_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["cleaned", "merged", "abandoned"]);
82
+ // In-flight merge statuses that should reconcile to `merged` once the host
83
+ // reports the branch's work has landed in base (squash/cherry-pick, or a merged
84
+ // PR). Excludes active/ready: an agent still working may have landed an early
85
+ // commit while more work is in flight — don't yank its workspace out from under it.
86
+ const LANDED_RECONCILE_STATUSES = new Set<WorkspaceStatus>(["merge_planned", "review_requested", "conflict"]);
87
+
88
+ // Orphaned-session reaper. A spawned agent's process can outlive its relay
89
+ // presence: the relay agent goes offline/pruned but the orchestrator still
90
+ // reports the session's process running, so it lingers forever (visible under the
91
+ // orchestrator, gone from the Agents panel). Runtime-token self-heal recovers the
92
+ // recoverable ones; this is the backstop that stops the genuinely stuck ones.
93
+ // Conservative by design — a session must be observed continuously orphaned by
94
+ // THIS relay for the grace window before it is reaped, and the tracker is in-memory
95
+ // so a relay restart restarts the clock (giving self-heal first crack every time).
96
+ const ORPHAN_REAPER_INTERVAL_MS = Number(process.env.AGENT_RELAY_ORPHAN_REAPER_INTERVAL_MS) || 5 * 60 * 1000;
97
+ // Read at call-time so changes take effect without a restart (and so tests can tune
98
+ // them). Parsed to allow an explicit 0 (immediate) — `|| default` would reject it.
99
+ const envMsOrDefault = (name: string, fallback: number): number => {
100
+ const v = Number(process.env[name]);
101
+ return Number.isFinite(v) && v >= 0 ? v : fallback;
102
+ };
103
+ const orphanGraceMs = () => envMsOrDefault("AGENT_RELAY_ORPHAN_GRACE_MS", 30 * 60 * 1000);
104
+ const orphanReapCooldownMs = () => envMsOrDefault("AGENT_RELAY_ORPHAN_REAP_COOLDOWN_MS", 5 * 60 * 1000);
105
+ // Set AGENT_RELAY_ORPHAN_REAP=0 to detect + log orphans but never stop them.
106
+ const orphanReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_REAP !== "0";
107
+ // orchestratorId + session identity -> when we first saw it orphaned (and last reaped).
108
+ const orphanTracker = new Map<string, { firstOrphanedAt: number; lastReapAt?: number }>();
109
+
110
+ export function resetOrphanTrackerForTests(): void {
111
+ orphanTracker.clear();
112
+ }
71
113
 
72
114
  interface MaintenanceJobDefinition {
73
115
  id: string;
@@ -220,6 +262,14 @@ const definitions: MaintenanceJobDefinition[] = [
220
262
  return { prunedAgentIds };
221
263
  },
222
264
  },
265
+ {
266
+ id: "orphaned-session-reaper",
267
+ title: "Orphaned session reaper",
268
+ description: "Stop spawned sessions whose relay agent is offline/gone but whose process the orchestrator still reports running, after a grace period for self-heal.",
269
+ intervalMs: ORPHAN_REAPER_INTERVAL_MS,
270
+ runOnStart: false,
271
+ handler: reapOrphanedSessions,
272
+ },
223
273
  {
224
274
  id: "orchestrator-reaper",
225
275
  title: "Orchestrator reaper",
@@ -320,6 +370,15 @@ const definitions: MaintenanceJobDefinition[] = [
320
370
  timeoutMs: 60 * 1000,
321
371
  handler: scanWorkspaceConflicts,
322
372
  },
373
+ {
374
+ id: "workspace-auto-merge",
375
+ title: "Workspace auto-merge",
376
+ description: "Auto-merge clean fast-forward review_requested worktrees into base under the per-repo lease; conflicts and diverged bases are left for the steward.",
377
+ intervalMs: WORKSPACE_AUTO_MERGE_INTERVAL_MS,
378
+ runOnStart: false,
379
+ timeoutMs: 60 * 1000,
380
+ handler: autoMergeCleanFastForwards,
381
+ },
323
382
  {
324
383
  id: "workspace-gc",
325
384
  title: "Workspace GC",
@@ -338,7 +397,7 @@ function workspacePathWithinBase(path: string | undefined, baseDir: string | und
338
397
  }
339
398
 
340
399
  async function fetchHostMergePreview(apiUrl: string, workspace: WorkspaceRecord): Promise<WorkspaceMergePreview | { available: false } | null> {
341
- const query = new URLSearchParams({ path: workspace.worktreePath });
400
+ const query = new URLSearchParams({ path: workspace.worktreePath, checkPr: "1" });
342
401
  if (workspace.baseRef) query.set("baseRef", workspace.baseRef);
343
402
  if (workspace.baseSha) query.set("baseSha", workspace.baseSha);
344
403
  const headers: Record<string, string> = {};
@@ -357,6 +416,118 @@ async function fetchHostMergePreview(apiUrl: string, workspace: WorkspaceRecord)
357
416
  // cleanly. Auto-flag `conflict` when a clean merge is no longer possible, and
358
417
  // auto-clear conflicts we set ourselves once they resolve (restoring the prior
359
418
  // status). Human-set conflicts are never cleared.
419
+ // Stop orphaned spawned sessions: process reported alive by the orchestrator, but
420
+ // the relay agent is offline/pruned and self-heal has had its chance. See the
421
+ // ORPHAN_* notes above. Covers both policy-managed and dashboard/ad-hoc spawns by
422
+ // iterating the orchestrators' reported managedAgents directly.
423
+ function reapOrphanedSessions(): Record<string, unknown> {
424
+ const now = Date.now();
425
+ const grace = orphanGraceMs();
426
+ const cooldown = orphanReapCooldownMs();
427
+ const reapEnabled = orphanReapEnabled();
428
+ const seen = new Set<string>();
429
+ const reaped: string[] = [];
430
+ let orphaned = 0;
431
+
432
+ for (const orch of listOrchestrators()) {
433
+ if (orch.status !== "online") continue; // can't trust the report or deliver the stop
434
+ for (const agent of orch.managedAgents) {
435
+ const sessionId = agent.spawnRequestId || agent.tmuxSession || agent.sessionName || agent.agentId;
436
+ if (!sessionId) continue;
437
+ const key = `${orch.id}:${sessionId}`;
438
+ const relayAgent = agent.agentId ? getAgent(agent.agentId) : null;
439
+ // Orphan = orchestrator reports the process running, but no live relay agent.
440
+ // "stale" is a recent/borderline disconnect — treat as alive and give it time;
441
+ // it will either recover or progress to "offline" and be caught next pass.
442
+ const isOrphan = !relayAgent || relayAgent.status === "offline";
443
+ if (!isOrphan) { orphanTracker.delete(key); continue; }
444
+ seen.add(key);
445
+ orphaned++;
446
+ const entry = orphanTracker.get(key) ?? { firstOrphanedAt: now };
447
+ orphanTracker.set(key, entry);
448
+ if (now - entry.firstOrphanedAt < grace) continue; // let self-heal recover it first
449
+ if (!reapEnabled) continue; // detect-only mode
450
+ if (entry.lastReapAt && now - entry.lastReapAt < cooldown) continue; // don't spam shutdowns
451
+ entry.lastReapAt = now;
452
+ const command = createCommand({
453
+ type: "agent.shutdown",
454
+ source: "system",
455
+ target: orch.agentId,
456
+ correlationId: agent.spawnRequestId,
457
+ params: {
458
+ action: "shutdown",
459
+ agentId: agent.agentId,
460
+ spawnRequestId: agent.spawnRequestId,
461
+ sessionName: agent.sessionName,
462
+ tmuxSession: agent.tmuxSession,
463
+ policyName: agent.policyName,
464
+ graceful: false,
465
+ timeoutMs: 10_000,
466
+ reason: "orphaned-session-reaper",
467
+ requestedBy: "orphaned-session-reaper",
468
+ requestedAt: now,
469
+ orchestratorId: orch.id,
470
+ },
471
+ });
472
+ emitRelayEvent({ type: "command.requested", source: "system", subject: command.id, data: { command } });
473
+ createActivityEvent({
474
+ clientId: `orphaned-session-reaper-${key}-${now}`,
475
+ kind: "state",
476
+ title: "Orphaned session reaped",
477
+ body: `${agent.label ?? agent.agentId ?? sessionId}: process still running on ${orch.id}, but its relay agent has been offline > ${Math.round(grace / 60000)}m and did not self-heal — stopping it`,
478
+ meta: agent.label ?? agent.agentId ?? sessionId,
479
+ icon: "ti-ghost",
480
+ view: "orchestrators",
481
+ agentId: agent.agentId || undefined,
482
+ metadata: {
483
+ source: "server",
484
+ maintenanceJobId: "orphaned-session-reaper",
485
+ orchestratorId: orch.id,
486
+ agentId: agent.agentId,
487
+ spawnRequestId: agent.spawnRequestId,
488
+ tmuxSession: agent.tmuxSession,
489
+ commandId: command.id,
490
+ orphanAgeMs: now - entry.firstOrphanedAt,
491
+ },
492
+ });
493
+ reaped.push(key);
494
+ }
495
+ }
496
+ // Forget sessions that recovered or are no longer reported, so a future orphaning
497
+ // of the same session starts a fresh grace window.
498
+ for (const key of orphanTracker.keys()) if (!seen.has(key)) orphanTracker.delete(key);
499
+ return { orphaned, reaped, tracked: orphanTracker.size, reapEnabled };
500
+ }
501
+
502
+ // Wake the managed per-repo steward (issue #167) for a workspace it should handle:
503
+ // auto-provision the policy from global steward config, then queue a `policy:` wake
504
+ // message (which also spawns the on-demand agent now via onMessageForPolicy). Honors a
505
+ // per-workspace cooldown so a persistent conflict/behind row isn't re-pinged every sweep.
506
+ // Returns the steward policy name on a fresh wake, or null (disabled / no owner / cooled down).
507
+ function wakeRepoSteward(ws: WorkspaceRecord, reason: string): string | null {
508
+ const meta = ws.metadata as Record<string, unknown>;
509
+ const lastWoke = typeof meta.stewardWokenAt === "number" ? meta.stewardWokenAt : 0;
510
+ if (lastWoke && Date.now() - lastWoke < STEWARD_WAKE_COOLDOWN_MS) return null;
511
+ const policyName = ensureRepoSteward(ws.repoRoot);
512
+ if (!policyName) return null;
513
+ try {
514
+ const msg = sendMessage({
515
+ from: "system",
516
+ to: `policy:${policyName}`,
517
+ kind: "system",
518
+ subject: `Steward: ${ws.status} workspace needs attention`,
519
+ body: `Workspace \`${ws.branch ?? ws.id}\` in ${ws.repoRoot} is ${ws.status} and could not auto-land (${reason}). cd into ${ws.worktreePath}, rebase onto ${ws.baseRef ?? "base"}, resolve, run checks, then land it via POST /api/workspaces/${ws.id}/actions {"action":"merge","strategy":"rebase-ff"} — or escalate if you can't.`,
520
+ payload: { kind: "workspace.steward-task", workspaceId: ws.id, repoRoot: ws.repoRoot, worktreePath: ws.worktreePath, branch: ws.branch, baseRef: ws.baseRef, status: ws.status, reason },
521
+ });
522
+ emitNewMessage(msg);
523
+ getLifecycleManager().onMessageForPolicy(policyName);
524
+ patchWorkspaceMetadata(ws.id, { stewardWokenAt: Date.now(), stewardPolicy: policyName });
525
+ return policyName;
526
+ } catch {
527
+ return null;
528
+ }
529
+ }
530
+
360
531
  async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
361
532
  const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
362
533
  if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
@@ -366,6 +537,7 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
366
537
  );
367
538
  const flagged: string[] = [];
368
539
  const cleared: string[] = [];
540
+ const merged: string[] = [];
369
541
  const notifiedStewards: string[] = [];
370
542
 
371
543
  for (const ws of candidates) {
@@ -377,6 +549,37 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
377
549
  if (p.error || p.missing || p.conflict === undefined) continue;
378
550
 
379
551
  const meta = ws.metadata as Record<string, unknown>;
552
+
553
+ // Landing wins over everything else. Once the work is in base — whether the
554
+ // PR was squash/cherry-pick merged on GitHub or fast-forwarded locally — the
555
+ // workspace is done, even if `git merge-tree` still predicts a textual
556
+ // conflict against the now-moved base (a PR-strategy row sits at
557
+ // merge_planned forever otherwise, and the conflict scan can even pin a
558
+ // landed branch to `conflict`). Reconcile to the terminal `merged` status so
559
+ // the dashboard stops showing it as unmerged and GC prunes it on schedule.
560
+ const landed = p.landed === true || p.prMerged === true;
561
+ if (landed && LANDED_RECONCILE_STATUSES.has(ws.status)) {
562
+ updateWorkspaceStatus(ws.id, "merged", {
563
+ autoMerged: true,
564
+ mergedFromStatus: ws.status,
565
+ landedDetectedAt: Date.now(),
566
+ landedVia: p.prMerged === true ? "pr" : "git",
567
+ autoConflict: false,
568
+ });
569
+ merged.push(ws.id);
570
+ createActivityEvent({
571
+ clientId: "server-workspace-" + ws.id + "-merged-" + Date.now(),
572
+ kind: "state",
573
+ title: "Workspace work landed in base",
574
+ body: `${ws.branch ?? ws.id} is ${p.prMerged === true ? "merged on the remote (PR)" : "already merged into base"} ${p.baseRef ? `(${p.baseRef})` : ""} — marking merged`,
575
+ meta: ws.branch ?? ws.id,
576
+ icon: "ti-git-merge",
577
+ view: "orchestrators",
578
+ metadata: { source: "server", maintenanceJobId: "workspace-conflict-scan", workspaceId: ws.id, fromStatus: ws.status },
579
+ });
580
+ continue;
581
+ }
582
+
380
583
  if (p.conflict === true && ws.status !== "conflict") {
381
584
  updateWorkspaceStatus(ws.id, "conflict", {
382
585
  autoConflict: true,
@@ -397,10 +600,15 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
397
600
  view: "orchestrators",
398
601
  metadata: { source: "server", maintenanceJobId: "workspace-conflict-scan", workspaceId: ws.id, ahead: p.ahead, behind: p.behind },
399
602
  });
400
- // The steward is the repo's coordination point ping it so a conflict
401
- // gets resolved instead of silently rotting until merge time. Once-per-
402
- // onset (we only enter this branch on the active→conflict transition).
403
- if (ws.stewardAgentId) {
603
+ // Hand the conflict to a steward so it gets resolved instead of rotting
604
+ // until merge time. Once-per-onset (we only enter this branch on the
605
+ // active→conflict transition). When managed stewards are enabled, wake the
606
+ // auto-provisioned per-repo steward agent (#167); otherwise fall back to the
607
+ // legacy direct ping of the elected steward agent.
608
+ if (getStewardConfig().enabled) {
609
+ const woke = wakeRepoSteward(getWorkspace(ws.id) ?? ws, "conflict");
610
+ if (woke) notifiedStewards.push(woke);
611
+ } else if (ws.stewardAgentId) {
404
612
  try {
405
613
  const msg = sendMessage({
406
614
  from: "system",
@@ -425,7 +633,74 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
425
633
  }
426
634
  }
427
635
 
428
- return { scanned: candidates.length, flagged, cleared, notifiedStewards };
636
+ return { scanned: candidates.length, flagged, cleared, merged, notifiedStewards };
637
+ }
638
+
639
+ // Deterministic auto-land (Layer 0, issue #167). Walk the "ready to land" queue
640
+ // (`review_requested` isolated worktrees) and, for any whose work is a strict
641
+ // clean fast-forward (no conflict, base hasn't moved, real commits ahead), land
642
+ // it via the shared merge helper — the same lease-serialized path the merge route
643
+ // uses. Conflicts and diverged bases (`behind>0`, even if cleanly rebasable) are
644
+ // deliberately left for the steward (a human or, later, the managed steward
645
+ // agent): per the chosen "Clean FF immediate" gate, anything needing a rebase or
646
+ // conflict reasoning is not auto-landed. No agent in the loop for the easy case.
647
+ async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
648
+ if (process.env.AGENT_RELAY_WORKSPACE_AUTO_MERGE === "0") return { skipped: "disabled" };
649
+ const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
650
+ if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
651
+
652
+ const candidates = listWorkspaces().filter(
653
+ (ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && ws.status === "review_requested",
654
+ );
655
+ const stewardEnabled = getStewardConfig().enabled;
656
+ const merged: string[] = [];
657
+ const heldByLease: string[] = [];
658
+ const leftForSteward: string[] = [];
659
+ const wokeStewards: string[] = [];
660
+
661
+ for (const ws of candidates) {
662
+ const orch = orchestrators.find((candidate) => workspacePathWithinBase(ws.sourceCwd, candidate.baseDir));
663
+ if (!orch?.apiUrl) continue;
664
+ const preview = await fetchHostMergePreview(orch.apiUrl, ws);
665
+ if (!preview || (preview as { available?: false }).available === false) continue;
666
+ const p = preview as WorkspaceMergePreview;
667
+ if (p.error || p.missing) continue;
668
+
669
+ const ahead = p.unmergedAhead ?? p.ahead ?? 0;
670
+ const cleanFF = p.cleanFastForward === true && p.conflict !== true && (p.behind ?? 0) === 0 && ahead > 0;
671
+ if (!cleanFF) {
672
+ // Base moved on (behind>0) or conflict — needs reasoning/rebase, which is the
673
+ // steward's job. Wake the managed steward when enabled (cooldown-guarded);
674
+ // otherwise leave it for conflict-scan's legacy ping / human review.
675
+ leftForSteward.push(ws.id);
676
+ if (stewardEnabled) {
677
+ const woke = wakeRepoSteward(ws, (p.behind ?? 0) > 0 ? "base moved on (behind>0)" : "conflict");
678
+ if (woke) wokeStewards.push(woke);
679
+ }
680
+ continue;
681
+ }
682
+
683
+ const result = requestWorkspaceMerge(ws, { strategy: "rebase-ff", requestedBy: "auto-merge" });
684
+ if (!result.ok) {
685
+ // 409 = another merge holds this repo's lease this tick; retry next sweep.
686
+ heldByLease.push(ws.id);
687
+ continue;
688
+ }
689
+ emitCommand(result.command);
690
+ merged.push(ws.id);
691
+ createActivityEvent({
692
+ clientId: `workspace-auto-merge-${ws.id}-${Date.now()}`,
693
+ kind: "state",
694
+ title: "Workspace auto-merging (clean fast-forward)",
695
+ body: `${ws.branch ?? ws.id} → ${p.baseRef ?? "base"} (${ahead} ahead, clean)`,
696
+ meta: ws.branch ?? ws.id,
697
+ icon: "ti-git-merge",
698
+ view: "orchestrators",
699
+ metadata: { source: "server", maintenanceJobId: "workspace-auto-merge", workspaceId: ws.id, commandId: result.command.id, ahead },
700
+ });
701
+ }
702
+
703
+ return { scanned: candidates.length, merged, heldByLease, leftForSteward, wokeStewards };
429
704
  }
430
705
 
431
706
  // Send a system DM, swallowing failures (a stale/missing/misconfigured target