agent-relay-server 0.11.8 → 0.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/openapi.json +182 -1
- package/package.json +2 -2
- package/public/index.html +6012 -1098
- package/runner/src/adapter.ts +4 -0
- package/src/bus.ts +42 -0
- package/src/config-store.ts +58 -0
- package/src/maintenance.ts +281 -6
- package/src/routes.ts +208 -86
- package/src/runtime-tokens.ts +44 -1
- package/src/security.ts +17 -0
- package/src/steward.ts +117 -0
- package/src/workspace-merge.ts +108 -0
package/runner/src/adapter.ts
CHANGED
package/src/bus.ts
CHANGED
|
@@ -159,6 +159,7 @@ function handleFrame(ws: BusWebSocket, frame: ReturnType<typeof validateClientFr
|
|
|
159
159
|
}
|
|
160
160
|
const after = getAgent(conn.agentId);
|
|
161
161
|
auditProviderStateTransition(conn.agentId, before, after);
|
|
162
|
+
auditRunnerTimelineEvent(conn.agentId, after?.meta?.timelineEvent);
|
|
162
163
|
// A real PreCompact/SessionStart hook arrives as a timelineEvent in the
|
|
163
164
|
// merged meta — clears any pending stall watch (stale events ignored).
|
|
164
165
|
noteAgentTimelineEvent(conn.agentId, after?.meta?.timelineEvent);
|
|
@@ -558,6 +559,47 @@ function auditProviderStateTransition(agentId: string, before: AgentCard | null
|
|
|
558
559
|
}
|
|
559
560
|
}
|
|
560
561
|
|
|
562
|
+
function auditRunnerTimelineEvent(agentId: string, timelineEvent: unknown): void {
|
|
563
|
+
if (!isRecord(timelineEvent)) return;
|
|
564
|
+
const metadata = isRecord(timelineEvent.metadata) ? timelineEvent.metadata : {};
|
|
565
|
+
if (metadata.source !== "runner") return;
|
|
566
|
+
const status = stringValue(timelineEvent.status);
|
|
567
|
+
if (!status) return;
|
|
568
|
+
const eventType = stringValue(metadata.eventType) ?? status;
|
|
569
|
+
const timestamp = numberValue(timelineEvent.timestamp) ?? Date.now();
|
|
570
|
+
const id = stringValue(timelineEvent.id) ?? `${eventType}-${timestamp}`;
|
|
571
|
+
const title = stringValue(timelineEvent.title) ?? status.replace(/[._-]+/g, " ");
|
|
572
|
+
const body = stringValue(timelineEvent.body);
|
|
573
|
+
const icon = stringValue(timelineEvent.icon) ?? "ti-activity";
|
|
574
|
+
try {
|
|
575
|
+
const event = createActivityEvent({
|
|
576
|
+
clientId: `runner-timeline-${agentId}-${id}`,
|
|
577
|
+
kind: "state",
|
|
578
|
+
title,
|
|
579
|
+
body,
|
|
580
|
+
meta: agentId,
|
|
581
|
+
icon,
|
|
582
|
+
view: "agents",
|
|
583
|
+
agentId,
|
|
584
|
+
metadata: {
|
|
585
|
+
...metadata,
|
|
586
|
+
eventType,
|
|
587
|
+
timelineStatus: status,
|
|
588
|
+
timelineId: id,
|
|
589
|
+
timelineTimestamp: timestamp,
|
|
590
|
+
},
|
|
591
|
+
});
|
|
592
|
+
emitRelayEvent({
|
|
593
|
+
type: "activity.created",
|
|
594
|
+
source: "server",
|
|
595
|
+
subject: String(event.id),
|
|
596
|
+
data: event as unknown as Record<string, unknown>,
|
|
597
|
+
});
|
|
598
|
+
} catch {
|
|
599
|
+
// Timeline writes must never block bus status updates.
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
561
603
|
function providerStateFromAgent(agent: AgentCard | null | undefined): Record<string, unknown> | null {
|
|
562
604
|
const value = agent?.meta?.providerState;
|
|
563
605
|
if (!isRecord(value) || typeof value.state !== "string") return null;
|
package/src/config-store.ts
CHANGED
|
@@ -7,13 +7,17 @@ import type {
|
|
|
7
7
|
ConfigHistoryEntry,
|
|
8
8
|
ManagedAgentState,
|
|
9
9
|
ManagedAgentStatus,
|
|
10
|
+
SpawnApprovalMode,
|
|
10
11
|
SpawnPolicy,
|
|
11
12
|
SpawnProvider,
|
|
13
|
+
StewardConfig,
|
|
12
14
|
} from "./types";
|
|
13
15
|
|
|
14
16
|
const CONFIG_HISTORY_LIMIT = 50;
|
|
15
17
|
const SPAWN_POLICY_NAMESPACE = "spawn-policy";
|
|
16
18
|
const AGENT_PROFILE_NAMESPACE = "agent-profile";
|
|
19
|
+
const STEWARD_NAMESPACE = "steward";
|
|
20
|
+
const STEWARD_KEY = "default";
|
|
17
21
|
const VALID_PROVIDERS = ["claude", "codex"] as const;
|
|
18
22
|
const VALID_PROFILE_PROVIDERS = ["any", "claude", "codex"] as const;
|
|
19
23
|
const VALID_PROFILE_BASES = ["host", "minimal", "isolated"] as const;
|
|
@@ -390,10 +394,42 @@ function cleanBinding(value: unknown): NonNullable<SpawnPolicy["binding"]> {
|
|
|
390
394
|
};
|
|
391
395
|
}
|
|
392
396
|
|
|
397
|
+
const STEWARD_CONFIG_DEFAULTS: StewardConfig = {
|
|
398
|
+
enabled: false,
|
|
399
|
+
provider: "claude",
|
|
400
|
+
permissionMode: "open",
|
|
401
|
+
keepaliveSeconds: 300,
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
function validateStewardConfig(value: unknown): StewardConfig {
|
|
405
|
+
if (!isRecord(value)) throw new ValidationError("steward config value must be an object");
|
|
406
|
+
const config: StewardConfig = {
|
|
407
|
+
enabled: value.enabled === undefined ? false : cleanBoolean(value.enabled, "enabled"),
|
|
408
|
+
provider: cleanEnum(value.provider, "provider", VALID_PROVIDERS) as SpawnProvider,
|
|
409
|
+
model: cleanString(value.model, "model", { max: 120 }),
|
|
410
|
+
effort: value.effort === undefined || value.effort === null ? undefined : cleanEnum(value.effort, "effort", VALID_EFFORTS) as ProviderEffort,
|
|
411
|
+
permissionMode: (value.permissionMode === undefined || value.permissionMode === null
|
|
412
|
+
? "open"
|
|
413
|
+
: cleanEnum(value.permissionMode, "permissionMode", VALID_PERMISSION_MODES)) as SpawnApprovalMode,
|
|
414
|
+
keepaliveSeconds: value.keepaliveSeconds === undefined || value.keepaliveSeconds === null
|
|
415
|
+
? 300
|
|
416
|
+
: cleanNumber(value.keepaliveSeconds, "keepaliveSeconds", { min: 0, max: 2_592_000 }),
|
|
417
|
+
};
|
|
418
|
+
// Reject a provider/model/effort combo the catalog can't resolve before it ever
|
|
419
|
+
// reaches a spawn (same guard as spawn policies).
|
|
420
|
+
try {
|
|
421
|
+
resolveProviderSelection({ provider: config.provider, model: config.model, effort: config.effort });
|
|
422
|
+
} catch (error) {
|
|
423
|
+
throw new ValidationError(error instanceof Error ? error.message : String(error));
|
|
424
|
+
}
|
|
425
|
+
return config;
|
|
426
|
+
}
|
|
427
|
+
|
|
393
428
|
function normalizeValue(namespace: string, key: string, value: unknown): unknown {
|
|
394
429
|
if (value === undefined) throw new ValidationError("value required");
|
|
395
430
|
if (namespace === SPAWN_POLICY_NAMESPACE) return validateSpawnPolicy(key, value);
|
|
396
431
|
if (namespace === AGENT_PROFILE_NAMESPACE) return validateAgentProfile(key, value);
|
|
432
|
+
if (namespace === STEWARD_NAMESPACE) return validateStewardConfig(value);
|
|
397
433
|
if (JSON.stringify(value) === undefined) throw new ValidationError("value must be valid JSON");
|
|
398
434
|
return value;
|
|
399
435
|
}
|
|
@@ -485,6 +521,28 @@ function setSpawnPolicy(policy: SpawnPolicy, updatedBy?: string): ConfigEntry<Sp
|
|
|
485
521
|
return setConfig(SPAWN_POLICY_NAMESPACE, policy.name, policy, updatedBy);
|
|
486
522
|
}
|
|
487
523
|
|
|
524
|
+
/** Global steward config, merged over defaults (always returns a usable value). */
|
|
525
|
+
export function getStewardConfig(): StewardConfig {
|
|
526
|
+
const entry = getConfig<StewardConfig>(STEWARD_NAMESPACE, STEWARD_KEY);
|
|
527
|
+
return entry ? { ...STEWARD_CONFIG_DEFAULTS, ...entry.value } : { ...STEWARD_CONFIG_DEFAULTS };
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
export function getStewardConfigEntry(): ConfigEntry<StewardConfig> {
|
|
531
|
+
const entry = getConfig<StewardConfig>(STEWARD_NAMESPACE, STEWARD_KEY);
|
|
532
|
+
return entry ?? {
|
|
533
|
+
namespace: STEWARD_NAMESPACE,
|
|
534
|
+
key: STEWARD_KEY,
|
|
535
|
+
value: { ...STEWARD_CONFIG_DEFAULTS },
|
|
536
|
+
version: 0,
|
|
537
|
+
updatedAt: "default",
|
|
538
|
+
updatedBy: "system",
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
export function setStewardConfig(value: unknown, updatedBy?: string): ConfigEntry<StewardConfig> {
|
|
543
|
+
return setConfig(STEWARD_NAMESPACE, STEWARD_KEY, value as StewardConfig, updatedBy);
|
|
544
|
+
}
|
|
545
|
+
|
|
488
546
|
function builtInProfileEntry(profile: AgentProfile): ConfigEntry<AgentProfile> {
|
|
489
547
|
return {
|
|
490
548
|
namespace: AGENT_PROFILE_NAMESPACE,
|
package/src/maintenance.ts
CHANGED
|
@@ -31,6 +31,9 @@ import {
|
|
|
31
31
|
updateWorkspaceStatus,
|
|
32
32
|
} from "./db";
|
|
33
33
|
import type { WorkspaceMergePreview, WorkspaceRecord, WorkspaceStatus } from "./types";
|
|
34
|
+
import { requestWorkspaceMerge } from "./workspace-merge";
|
|
35
|
+
import { getStewardConfig } from "./config-store";
|
|
36
|
+
import { ensureRepoSteward } from "./steward";
|
|
34
37
|
import { emitRelayEvent } from "./events";
|
|
35
38
|
import { getLifecycleManager } from "./lifecycle-manager";
|
|
36
39
|
import { applyCommandToRecipe } from "./recipe-runner";
|
|
@@ -55,6 +58,14 @@ const CONFLICT_SCAN_INTERVAL_MS = Number(process.env.AGENT_RELAY_CONFLICT_SCAN_I
|
|
|
55
58
|
const WORKSPACE_RETENTION_MS = Number(process.env.AGENT_RELAY_WORKSPACE_RETENTION_MS) || DAY_MS;
|
|
56
59
|
const WORKSPACE_REVIEW_TTL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_REVIEW_TTL_MS) || 3 * DAY_MS;
|
|
57
60
|
const WORKSPACE_GC_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_GC_INTERVAL_MS) || 60 * 60 * 1000;
|
|
61
|
+
// Deterministic auto-land (Layer 0): merge clean fast-forwards with no human in
|
|
62
|
+
// the loop. Default on for the seamless workflow; set AGENT_RELAY_WORKSPACE_AUTO_MERGE=0
|
|
63
|
+
// to require a manual or steward merge per repo. Read at call-time so operators can
|
|
64
|
+
// toggle it without a restart.
|
|
65
|
+
const WORKSPACE_AUTO_MERGE_INTERVAL_MS = Number(process.env.AGENT_RELAY_WORKSPACE_AUTO_MERGE_INTERVAL_MS) || CONFLICT_SCAN_INTERVAL_MS;
|
|
66
|
+
// Don't re-wake the managed steward for the same workspace more than once per
|
|
67
|
+
// this window — a persistent conflict/behind row would otherwise re-ping every sweep.
|
|
68
|
+
const STEWARD_WAKE_COOLDOWN_MS = Number(process.env.AGENT_RELAY_STEWARD_WAKE_COOLDOWN_MS) || 10 * 60 * 1000;
|
|
58
69
|
// How long a stranded review_requested/conflict worktree (no online steward) may
|
|
59
70
|
// sit before escalating to the configured fallback target, and the durable
|
|
60
71
|
// escalation target itself (`policy:<name>`, `label:<name>`, `cap:<name>`, an
|
|
@@ -68,6 +79,37 @@ const STRANDABLE_STATUSES = new Set<WorkspaceStatus>(["review_requested", "confl
|
|
|
68
79
|
// in-flight (cleanup_requested) states are skipped.
|
|
69
80
|
const CONFLICT_SCAN_STATUSES = new Set<WorkspaceStatus>(["active", "ready", "review_requested", "merge_planned", "conflict"]);
|
|
70
81
|
const TERMINAL_WORKSPACE_STATUSES = new Set<WorkspaceStatus>(["cleaned", "merged", "abandoned"]);
|
|
82
|
+
// In-flight merge statuses that should reconcile to `merged` once the host
|
|
83
|
+
// reports the branch's work has landed in base (squash/cherry-pick, or a merged
|
|
84
|
+
// PR). Excludes active/ready: an agent still working may have landed an early
|
|
85
|
+
// commit while more work is in flight — don't yank its workspace out from under it.
|
|
86
|
+
const LANDED_RECONCILE_STATUSES = new Set<WorkspaceStatus>(["merge_planned", "review_requested", "conflict"]);
|
|
87
|
+
|
|
88
|
+
// Orphaned-session reaper. A spawned agent's process can outlive its relay
|
|
89
|
+
// presence: the relay agent goes offline/pruned but the orchestrator still
|
|
90
|
+
// reports the session's process running, so it lingers forever (visible under the
|
|
91
|
+
// orchestrator, gone from the Agents panel). Runtime-token self-heal recovers the
|
|
92
|
+
// recoverable ones; this is the backstop that stops the genuinely stuck ones.
|
|
93
|
+
// Conservative by design — a session must be observed continuously orphaned by
|
|
94
|
+
// THIS relay for the grace window before it is reaped, and the tracker is in-memory
|
|
95
|
+
// so a relay restart restarts the clock (giving self-heal first crack every time).
|
|
96
|
+
const ORPHAN_REAPER_INTERVAL_MS = Number(process.env.AGENT_RELAY_ORPHAN_REAPER_INTERVAL_MS) || 5 * 60 * 1000;
|
|
97
|
+
// Read at call-time so changes take effect without a restart (and so tests can tune
|
|
98
|
+
// them). Parsed to allow an explicit 0 (immediate) — `|| default` would reject it.
|
|
99
|
+
const envMsOrDefault = (name: string, fallback: number): number => {
|
|
100
|
+
const v = Number(process.env[name]);
|
|
101
|
+
return Number.isFinite(v) && v >= 0 ? v : fallback;
|
|
102
|
+
};
|
|
103
|
+
const orphanGraceMs = () => envMsOrDefault("AGENT_RELAY_ORPHAN_GRACE_MS", 30 * 60 * 1000);
|
|
104
|
+
const orphanReapCooldownMs = () => envMsOrDefault("AGENT_RELAY_ORPHAN_REAP_COOLDOWN_MS", 5 * 60 * 1000);
|
|
105
|
+
// Set AGENT_RELAY_ORPHAN_REAP=0 to detect + log orphans but never stop them.
|
|
106
|
+
const orphanReapEnabled = () => process.env.AGENT_RELAY_ORPHAN_REAP !== "0";
|
|
107
|
+
// orchestratorId + session identity -> when we first saw it orphaned (and last reaped).
|
|
108
|
+
const orphanTracker = new Map<string, { firstOrphanedAt: number; lastReapAt?: number }>();
|
|
109
|
+
|
|
110
|
+
export function resetOrphanTrackerForTests(): void {
|
|
111
|
+
orphanTracker.clear();
|
|
112
|
+
}
|
|
71
113
|
|
|
72
114
|
interface MaintenanceJobDefinition {
|
|
73
115
|
id: string;
|
|
@@ -220,6 +262,14 @@ const definitions: MaintenanceJobDefinition[] = [
|
|
|
220
262
|
return { prunedAgentIds };
|
|
221
263
|
},
|
|
222
264
|
},
|
|
265
|
+
{
|
|
266
|
+
id: "orphaned-session-reaper",
|
|
267
|
+
title: "Orphaned session reaper",
|
|
268
|
+
description: "Stop spawned sessions whose relay agent is offline/gone but whose process the orchestrator still reports running, after a grace period for self-heal.",
|
|
269
|
+
intervalMs: ORPHAN_REAPER_INTERVAL_MS,
|
|
270
|
+
runOnStart: false,
|
|
271
|
+
handler: reapOrphanedSessions,
|
|
272
|
+
},
|
|
223
273
|
{
|
|
224
274
|
id: "orchestrator-reaper",
|
|
225
275
|
title: "Orchestrator reaper",
|
|
@@ -320,6 +370,15 @@ const definitions: MaintenanceJobDefinition[] = [
|
|
|
320
370
|
timeoutMs: 60 * 1000,
|
|
321
371
|
handler: scanWorkspaceConflicts,
|
|
322
372
|
},
|
|
373
|
+
{
|
|
374
|
+
id: "workspace-auto-merge",
|
|
375
|
+
title: "Workspace auto-merge",
|
|
376
|
+
description: "Auto-merge clean fast-forward review_requested worktrees into base under the per-repo lease; conflicts and diverged bases are left for the steward.",
|
|
377
|
+
intervalMs: WORKSPACE_AUTO_MERGE_INTERVAL_MS,
|
|
378
|
+
runOnStart: false,
|
|
379
|
+
timeoutMs: 60 * 1000,
|
|
380
|
+
handler: autoMergeCleanFastForwards,
|
|
381
|
+
},
|
|
323
382
|
{
|
|
324
383
|
id: "workspace-gc",
|
|
325
384
|
title: "Workspace GC",
|
|
@@ -338,7 +397,7 @@ function workspacePathWithinBase(path: string | undefined, baseDir: string | und
|
|
|
338
397
|
}
|
|
339
398
|
|
|
340
399
|
async function fetchHostMergePreview(apiUrl: string, workspace: WorkspaceRecord): Promise<WorkspaceMergePreview | { available: false } | null> {
|
|
341
|
-
const query = new URLSearchParams({ path: workspace.worktreePath });
|
|
400
|
+
const query = new URLSearchParams({ path: workspace.worktreePath, checkPr: "1" });
|
|
342
401
|
if (workspace.baseRef) query.set("baseRef", workspace.baseRef);
|
|
343
402
|
if (workspace.baseSha) query.set("baseSha", workspace.baseSha);
|
|
344
403
|
const headers: Record<string, string> = {};
|
|
@@ -357,6 +416,118 @@ async function fetchHostMergePreview(apiUrl: string, workspace: WorkspaceRecord)
|
|
|
357
416
|
// cleanly. Auto-flag `conflict` when a clean merge is no longer possible, and
|
|
358
417
|
// auto-clear conflicts we set ourselves once they resolve (restoring the prior
|
|
359
418
|
// status). Human-set conflicts are never cleared.
|
|
419
|
+
// Stop orphaned spawned sessions: process reported alive by the orchestrator, but
|
|
420
|
+
// the relay agent is offline/pruned and self-heal has had its chance. See the
|
|
421
|
+
// ORPHAN_* notes above. Covers both policy-managed and dashboard/ad-hoc spawns by
|
|
422
|
+
// iterating the orchestrators' reported managedAgents directly.
|
|
423
|
+
function reapOrphanedSessions(): Record<string, unknown> {
|
|
424
|
+
const now = Date.now();
|
|
425
|
+
const grace = orphanGraceMs();
|
|
426
|
+
const cooldown = orphanReapCooldownMs();
|
|
427
|
+
const reapEnabled = orphanReapEnabled();
|
|
428
|
+
const seen = new Set<string>();
|
|
429
|
+
const reaped: string[] = [];
|
|
430
|
+
let orphaned = 0;
|
|
431
|
+
|
|
432
|
+
for (const orch of listOrchestrators()) {
|
|
433
|
+
if (orch.status !== "online") continue; // can't trust the report or deliver the stop
|
|
434
|
+
for (const agent of orch.managedAgents) {
|
|
435
|
+
const sessionId = agent.spawnRequestId || agent.tmuxSession || agent.sessionName || agent.agentId;
|
|
436
|
+
if (!sessionId) continue;
|
|
437
|
+
const key = `${orch.id}:${sessionId}`;
|
|
438
|
+
const relayAgent = agent.agentId ? getAgent(agent.agentId) : null;
|
|
439
|
+
// Orphan = orchestrator reports the process running, but no live relay agent.
|
|
440
|
+
// "stale" is a recent/borderline disconnect — treat as alive and give it time;
|
|
441
|
+
// it will either recover or progress to "offline" and be caught next pass.
|
|
442
|
+
const isOrphan = !relayAgent || relayAgent.status === "offline";
|
|
443
|
+
if (!isOrphan) { orphanTracker.delete(key); continue; }
|
|
444
|
+
seen.add(key);
|
|
445
|
+
orphaned++;
|
|
446
|
+
const entry = orphanTracker.get(key) ?? { firstOrphanedAt: now };
|
|
447
|
+
orphanTracker.set(key, entry);
|
|
448
|
+
if (now - entry.firstOrphanedAt < grace) continue; // let self-heal recover it first
|
|
449
|
+
if (!reapEnabled) continue; // detect-only mode
|
|
450
|
+
if (entry.lastReapAt && now - entry.lastReapAt < cooldown) continue; // don't spam shutdowns
|
|
451
|
+
entry.lastReapAt = now;
|
|
452
|
+
const command = createCommand({
|
|
453
|
+
type: "agent.shutdown",
|
|
454
|
+
source: "system",
|
|
455
|
+
target: orch.agentId,
|
|
456
|
+
correlationId: agent.spawnRequestId,
|
|
457
|
+
params: {
|
|
458
|
+
action: "shutdown",
|
|
459
|
+
agentId: agent.agentId,
|
|
460
|
+
spawnRequestId: agent.spawnRequestId,
|
|
461
|
+
sessionName: agent.sessionName,
|
|
462
|
+
tmuxSession: agent.tmuxSession,
|
|
463
|
+
policyName: agent.policyName,
|
|
464
|
+
graceful: false,
|
|
465
|
+
timeoutMs: 10_000,
|
|
466
|
+
reason: "orphaned-session-reaper",
|
|
467
|
+
requestedBy: "orphaned-session-reaper",
|
|
468
|
+
requestedAt: now,
|
|
469
|
+
orchestratorId: orch.id,
|
|
470
|
+
},
|
|
471
|
+
});
|
|
472
|
+
emitRelayEvent({ type: "command.requested", source: "system", subject: command.id, data: { command } });
|
|
473
|
+
createActivityEvent({
|
|
474
|
+
clientId: `orphaned-session-reaper-${key}-${now}`,
|
|
475
|
+
kind: "state",
|
|
476
|
+
title: "Orphaned session reaped",
|
|
477
|
+
body: `${agent.label ?? agent.agentId ?? sessionId}: process still running on ${orch.id}, but its relay agent has been offline > ${Math.round(grace / 60000)}m and did not self-heal — stopping it`,
|
|
478
|
+
meta: agent.label ?? agent.agentId ?? sessionId,
|
|
479
|
+
icon: "ti-ghost",
|
|
480
|
+
view: "orchestrators",
|
|
481
|
+
agentId: agent.agentId || undefined,
|
|
482
|
+
metadata: {
|
|
483
|
+
source: "server",
|
|
484
|
+
maintenanceJobId: "orphaned-session-reaper",
|
|
485
|
+
orchestratorId: orch.id,
|
|
486
|
+
agentId: agent.agentId,
|
|
487
|
+
spawnRequestId: agent.spawnRequestId,
|
|
488
|
+
tmuxSession: agent.tmuxSession,
|
|
489
|
+
commandId: command.id,
|
|
490
|
+
orphanAgeMs: now - entry.firstOrphanedAt,
|
|
491
|
+
},
|
|
492
|
+
});
|
|
493
|
+
reaped.push(key);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
// Forget sessions that recovered or are no longer reported, so a future orphaning
|
|
497
|
+
// of the same session starts a fresh grace window.
|
|
498
|
+
for (const key of orphanTracker.keys()) if (!seen.has(key)) orphanTracker.delete(key);
|
|
499
|
+
return { orphaned, reaped, tracked: orphanTracker.size, reapEnabled };
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Wake the managed per-repo steward (issue #167) for a workspace it should handle:
|
|
503
|
+
// auto-provision the policy from global steward config, then queue a `policy:` wake
|
|
504
|
+
// message (which also spawns the on-demand agent now via onMessageForPolicy). Honors a
|
|
505
|
+
// per-workspace cooldown so a persistent conflict/behind row isn't re-pinged every sweep.
|
|
506
|
+
// Returns the steward policy name on a fresh wake, or null (disabled / no owner / cooled down).
|
|
507
|
+
function wakeRepoSteward(ws: WorkspaceRecord, reason: string): string | null {
|
|
508
|
+
const meta = ws.metadata as Record<string, unknown>;
|
|
509
|
+
const lastWoke = typeof meta.stewardWokenAt === "number" ? meta.stewardWokenAt : 0;
|
|
510
|
+
if (lastWoke && Date.now() - lastWoke < STEWARD_WAKE_COOLDOWN_MS) return null;
|
|
511
|
+
const policyName = ensureRepoSteward(ws.repoRoot);
|
|
512
|
+
if (!policyName) return null;
|
|
513
|
+
try {
|
|
514
|
+
const msg = sendMessage({
|
|
515
|
+
from: "system",
|
|
516
|
+
to: `policy:${policyName}`,
|
|
517
|
+
kind: "system",
|
|
518
|
+
subject: `Steward: ${ws.status} workspace needs attention`,
|
|
519
|
+
body: `Workspace \`${ws.branch ?? ws.id}\` in ${ws.repoRoot} is ${ws.status} and could not auto-land (${reason}). cd into ${ws.worktreePath}, rebase onto ${ws.baseRef ?? "base"}, resolve, run checks, then land it via POST /api/workspaces/${ws.id}/actions {"action":"merge","strategy":"rebase-ff"} — or escalate if you can't.`,
|
|
520
|
+
payload: { kind: "workspace.steward-task", workspaceId: ws.id, repoRoot: ws.repoRoot, worktreePath: ws.worktreePath, branch: ws.branch, baseRef: ws.baseRef, status: ws.status, reason },
|
|
521
|
+
});
|
|
522
|
+
emitNewMessage(msg);
|
|
523
|
+
getLifecycleManager().onMessageForPolicy(policyName);
|
|
524
|
+
patchWorkspaceMetadata(ws.id, { stewardWokenAt: Date.now(), stewardPolicy: policyName });
|
|
525
|
+
return policyName;
|
|
526
|
+
} catch {
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
360
531
|
async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
361
532
|
const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
|
|
362
533
|
if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
|
|
@@ -366,6 +537,7 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
366
537
|
);
|
|
367
538
|
const flagged: string[] = [];
|
|
368
539
|
const cleared: string[] = [];
|
|
540
|
+
const merged: string[] = [];
|
|
369
541
|
const notifiedStewards: string[] = [];
|
|
370
542
|
|
|
371
543
|
for (const ws of candidates) {
|
|
@@ -377,6 +549,37 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
377
549
|
if (p.error || p.missing || p.conflict === undefined) continue;
|
|
378
550
|
|
|
379
551
|
const meta = ws.metadata as Record<string, unknown>;
|
|
552
|
+
|
|
553
|
+
// Landing wins over everything else. Once the work is in base — whether the
|
|
554
|
+
// PR was squash/cherry-pick merged on GitHub or fast-forwarded locally — the
|
|
555
|
+
// workspace is done, even if `git merge-tree` still predicts a textual
|
|
556
|
+
// conflict against the now-moved base (a PR-strategy row sits at
|
|
557
|
+
// merge_planned forever otherwise, and the conflict scan can even pin a
|
|
558
|
+
// landed branch to `conflict`). Reconcile to the terminal `merged` status so
|
|
559
|
+
// the dashboard stops showing it as unmerged and GC prunes it on schedule.
|
|
560
|
+
const landed = p.landed === true || p.prMerged === true;
|
|
561
|
+
if (landed && LANDED_RECONCILE_STATUSES.has(ws.status)) {
|
|
562
|
+
updateWorkspaceStatus(ws.id, "merged", {
|
|
563
|
+
autoMerged: true,
|
|
564
|
+
mergedFromStatus: ws.status,
|
|
565
|
+
landedDetectedAt: Date.now(),
|
|
566
|
+
landedVia: p.prMerged === true ? "pr" : "git",
|
|
567
|
+
autoConflict: false,
|
|
568
|
+
});
|
|
569
|
+
merged.push(ws.id);
|
|
570
|
+
createActivityEvent({
|
|
571
|
+
clientId: "server-workspace-" + ws.id + "-merged-" + Date.now(),
|
|
572
|
+
kind: "state",
|
|
573
|
+
title: "Workspace work landed in base",
|
|
574
|
+
body: `${ws.branch ?? ws.id} is ${p.prMerged === true ? "merged on the remote (PR)" : "already merged into base"} ${p.baseRef ? `(${p.baseRef})` : ""} — marking merged`,
|
|
575
|
+
meta: ws.branch ?? ws.id,
|
|
576
|
+
icon: "ti-git-merge",
|
|
577
|
+
view: "orchestrators",
|
|
578
|
+
metadata: { source: "server", maintenanceJobId: "workspace-conflict-scan", workspaceId: ws.id, fromStatus: ws.status },
|
|
579
|
+
});
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
|
|
380
583
|
if (p.conflict === true && ws.status !== "conflict") {
|
|
381
584
|
updateWorkspaceStatus(ws.id, "conflict", {
|
|
382
585
|
autoConflict: true,
|
|
@@ -397,10 +600,15 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
397
600
|
view: "orchestrators",
|
|
398
601
|
metadata: { source: "server", maintenanceJobId: "workspace-conflict-scan", workspaceId: ws.id, ahead: p.ahead, behind: p.behind },
|
|
399
602
|
});
|
|
400
|
-
//
|
|
401
|
-
//
|
|
402
|
-
//
|
|
403
|
-
|
|
603
|
+
// Hand the conflict to a steward so it gets resolved instead of rotting
|
|
604
|
+
// until merge time. Once-per-onset (we only enter this branch on the
|
|
605
|
+
// active→conflict transition). When managed stewards are enabled, wake the
|
|
606
|
+
// auto-provisioned per-repo steward agent (#167); otherwise fall back to the
|
|
607
|
+
// legacy direct ping of the elected steward agent.
|
|
608
|
+
if (getStewardConfig().enabled) {
|
|
609
|
+
const woke = wakeRepoSteward(getWorkspace(ws.id) ?? ws, "conflict");
|
|
610
|
+
if (woke) notifiedStewards.push(woke);
|
|
611
|
+
} else if (ws.stewardAgentId) {
|
|
404
612
|
try {
|
|
405
613
|
const msg = sendMessage({
|
|
406
614
|
from: "system",
|
|
@@ -425,7 +633,74 @@ async function scanWorkspaceConflicts(): Promise<Record<string, unknown>> {
|
|
|
425
633
|
}
|
|
426
634
|
}
|
|
427
635
|
|
|
428
|
-
return { scanned: candidates.length, flagged, cleared, notifiedStewards };
|
|
636
|
+
return { scanned: candidates.length, flagged, cleared, merged, notifiedStewards };
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Deterministic auto-land (Layer 0, issue #167). Walk the "ready to land" queue
|
|
640
|
+
// (`review_requested` isolated worktrees) and, for any whose work is a strict
|
|
641
|
+
// clean fast-forward (no conflict, base hasn't moved, real commits ahead), land
|
|
642
|
+
// it via the shared merge helper — the same lease-serialized path the merge route
|
|
643
|
+
// uses. Conflicts and diverged bases (`behind>0`, even if cleanly rebasable) are
|
|
644
|
+
// deliberately left for the steward (a human or, later, the managed steward
|
|
645
|
+
// agent): per the chosen "Clean FF immediate" gate, anything needing a rebase or
|
|
646
|
+
// conflict reasoning is not auto-landed. No agent in the loop for the easy case.
|
|
647
|
+
async function autoMergeCleanFastForwards(): Promise<Record<string, unknown>> {
|
|
648
|
+
if (process.env.AGENT_RELAY_WORKSPACE_AUTO_MERGE === "0") return { skipped: "disabled" };
|
|
649
|
+
const orchestrators = listOrchestrators().filter((orch) => orch.status === "online" && orch.apiUrl);
|
|
650
|
+
if (!orchestrators.length) return { scanned: 0, skipped: "no online orchestrators" };
|
|
651
|
+
|
|
652
|
+
const candidates = listWorkspaces().filter(
|
|
653
|
+
(ws) => ws.mode === "isolated" && Boolean(ws.worktreePath) && ws.status === "review_requested",
|
|
654
|
+
);
|
|
655
|
+
const stewardEnabled = getStewardConfig().enabled;
|
|
656
|
+
const merged: string[] = [];
|
|
657
|
+
const heldByLease: string[] = [];
|
|
658
|
+
const leftForSteward: string[] = [];
|
|
659
|
+
const wokeStewards: string[] = [];
|
|
660
|
+
|
|
661
|
+
for (const ws of candidates) {
|
|
662
|
+
const orch = orchestrators.find((candidate) => workspacePathWithinBase(ws.sourceCwd, candidate.baseDir));
|
|
663
|
+
if (!orch?.apiUrl) continue;
|
|
664
|
+
const preview = await fetchHostMergePreview(orch.apiUrl, ws);
|
|
665
|
+
if (!preview || (preview as { available?: false }).available === false) continue;
|
|
666
|
+
const p = preview as WorkspaceMergePreview;
|
|
667
|
+
if (p.error || p.missing) continue;
|
|
668
|
+
|
|
669
|
+
const ahead = p.unmergedAhead ?? p.ahead ?? 0;
|
|
670
|
+
const cleanFF = p.cleanFastForward === true && p.conflict !== true && (p.behind ?? 0) === 0 && ahead > 0;
|
|
671
|
+
if (!cleanFF) {
|
|
672
|
+
// Base moved on (behind>0) or conflict — needs reasoning/rebase, which is the
|
|
673
|
+
// steward's job. Wake the managed steward when enabled (cooldown-guarded);
|
|
674
|
+
// otherwise leave it for conflict-scan's legacy ping / human review.
|
|
675
|
+
leftForSteward.push(ws.id);
|
|
676
|
+
if (stewardEnabled) {
|
|
677
|
+
const woke = wakeRepoSteward(ws, (p.behind ?? 0) > 0 ? "base moved on (behind>0)" : "conflict");
|
|
678
|
+
if (woke) wokeStewards.push(woke);
|
|
679
|
+
}
|
|
680
|
+
continue;
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
const result = requestWorkspaceMerge(ws, { strategy: "rebase-ff", requestedBy: "auto-merge" });
|
|
684
|
+
if (!result.ok) {
|
|
685
|
+
// 409 = another merge holds this repo's lease this tick; retry next sweep.
|
|
686
|
+
heldByLease.push(ws.id);
|
|
687
|
+
continue;
|
|
688
|
+
}
|
|
689
|
+
emitCommand(result.command);
|
|
690
|
+
merged.push(ws.id);
|
|
691
|
+
createActivityEvent({
|
|
692
|
+
clientId: `workspace-auto-merge-${ws.id}-${Date.now()}`,
|
|
693
|
+
kind: "state",
|
|
694
|
+
title: "Workspace auto-merging (clean fast-forward)",
|
|
695
|
+
body: `${ws.branch ?? ws.id} → ${p.baseRef ?? "base"} (${ahead} ahead, clean)`,
|
|
696
|
+
meta: ws.branch ?? ws.id,
|
|
697
|
+
icon: "ti-git-merge",
|
|
698
|
+
view: "orchestrators",
|
|
699
|
+
metadata: { source: "server", maintenanceJobId: "workspace-auto-merge", workspaceId: ws.id, commandId: result.command.id, ahead },
|
|
700
|
+
});
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
return { scanned: candidates.length, merged, heldByLease, leftForSteward, wokeStewards };
|
|
429
704
|
}
|
|
430
705
|
|
|
431
706
|
// Send a system DM, swallowing failures (a stale/missing/misconfigured target
|