agent-relay-server 0.11.6 → 0.11.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/routes.ts CHANGED
@@ -85,6 +85,9 @@ import {
85
85
  getWorkspace,
86
86
  listWorkspaces,
87
87
  updateWorkspaceStatus,
88
+ releaseMergeLease,
89
+ listRepoStewards,
90
+ listMergeLeases,
88
91
  deleteWorkspace,
89
92
  deleteOrchestrator,
90
93
  evaluatePoolBindings,
@@ -102,11 +105,13 @@ import {
102
105
  getAgentProfile,
103
106
  getManagedAgentState,
104
107
  getSpawnPolicy,
108
+ getStewardConfigEntry,
105
109
  listAgentProfiles,
106
110
  listSpawnPolicies,
107
111
  listConfig,
108
112
  setAgentProfile,
109
113
  setConfig,
114
+ setStewardConfig,
110
115
  upsertManagedAgentState,
111
116
  updateManagedAgentState,
112
117
  } from "./config-store";
@@ -135,7 +140,7 @@ import {
135
140
  updateAutomation,
136
141
  type AutomationDispatchResult,
137
142
  } from "./automations";
138
- import type { ActivityEventInput, ActivityKind, AgentCard, AgentKind, AgentProfile, AgentSessionGuard, ChannelBinding, ChannelBindingMode, ChannelDirection, ChannelRouteTarget, ChannelSummary, Command, CommandStatus, CreateCommandInput, CreatePairInput, IntegrationEventInput, IntegrationSummary, ManagedAgent, ManagedSessionExitDiagnostics, Message, OrchestratorRuntimeInput, PairActionInput, PairMessageInput, PairStatus, RegisterAgentInput, RegisterOrchestratorInput, SendMessageInput, SpawnApprovalMode, SpawnPolicy, SpawnProvider, TaskStatus, TaskStatusInput, WorkspaceMetadata, WorkspaceMode, WorkspaceOrphan, WorkspaceProbe, WorkspaceStatus } from "./types";
143
+ import type { ActivityEventInput, ActivityKind, AgentCard, AgentKind, AgentProfile, AgentSessionGuard, ChannelBinding, ChannelBindingMode, ChannelDirection, ChannelRouteTarget, ChannelSummary, Command, CommandStatus, CreateCommandInput, CreatePairInput, IntegrationEventInput, IntegrationSummary, ManagedAgent, ManagedSessionExitDiagnostics, Message, OrchestratorRuntimeInput, PairActionInput, PairMessageInput, PairStatus, RegisterAgentInput, RegisterOrchestratorInput, SendMessageInput, SpawnApprovalMode, SpawnPolicy, SpawnProvider, TaskStatus, TaskStatusInput, WorkspaceMergeStrategy, WorkspaceMetadata, WorkspaceMode, WorkspaceOrphan, WorkspaceProbe, WorkspaceStatus } from "./types";
139
144
  import { getIntegrationTokens, INTEGRATION_RATE_LIMIT_PER_MINUTE, MAX_BODY_BYTES, VERSION, type IntegrationTokenConfig } from "./config";
140
145
  import { CONTRACT_VERSIONS, parseRuntimeCapabilities, parseRuntimeContracts, parseRuntimePackage, type RuntimeCapabilities, type RuntimeContracts, type RuntimePackageMetadata } from "./contracts";
141
146
  import { listHostDirectories } from "./agent-spawn";
@@ -144,6 +149,7 @@ import type { ProviderConfig } from "../runner/src/adapter";
144
149
  import { resolveProviderSelection, type ProviderEffort } from "agent-relay-sdk/provider-catalog";
145
150
  import { effectiveProviderCatalogList } from "./provider-catalog-store";
146
151
  import { buildManagedSpawnParams, effectiveManagedPolicyWorkspaceMode } from "./managed-policy";
152
+ import { requestWorkspaceMerge } from "./workspace-merge";
147
153
  import {
148
154
  getComponentAuth,
149
155
  getIntegrationAuth,
@@ -182,7 +188,7 @@ import { postMcp } from "./mcp";
182
188
  import { readFileSync } from "node:fs";
183
189
  import { isAbsolute, relative, resolve } from "node:path";
184
190
  import type { ArtifactKind, ArtifactSensitivity, AttachmentRef, ContextBudget, CreateMemoryInput, MemoryBrokerContext, MemoryConfidence, MemoryQuery, MemoryRedactionState, MemorySensitivity, MemoryType, MemoryVisibility, TaskRoutingHints, TokenConstraints, UpdateMemoryInput } from "./types";
185
- import { issueIntegrationRuntimeToken, issueInteractiveRunnerRuntimeToken, issueMcpRuntimeToken, issueOrchestratorRuntimeToken, runnerRuntimeTokenEnv } from "./runtime-tokens";
191
+ import { issueIntegrationRuntimeToken, issueInteractiveRunnerRuntimeToken, issueMcpRuntimeToken, issueOrchestratorRuntimeToken, reissueRunnerRuntimeToken, runnerRuntimeTokenEnv } from "./runtime-tokens";
186
192
  import { listMaintenanceJobs, runLegacyMaintenanceReaper, runMaintenanceJobNow } from "./maintenance";
187
193
 
188
194
  type Handler = (
@@ -314,7 +320,8 @@ const VALID_CHANNEL_BINDING_TARGET_TYPES = ["agent", "label", "tag", "capability
314
320
  const VALID_WORKSPACE_MODES = ["isolated", "shared", "inherit"] as const;
315
321
  const VALID_WORKSPACE_STATUSES = ["active", "ready", "conflict", "review_requested", "merge_planned", "merged", "abandoned", "cleanup_requested", "cleaned"] as const;
316
322
  const VALID_CHANNEL_BINDING_MODES = ["exclusive", "broadcast"] as const;
317
- const VALID_AGENT_ACTIONS = ["restart", "shutdown", "reconnect", "compact", "clearContext"] as const;
323
+ const VALID_AGENT_ACTIONS = ["restart", "shutdown", "reconnect", "compact", "clearContext", "resume"] as const;
324
+ const CLAUDE_RESUME_ID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
318
325
  const VALID_AGENT_SPAWN_PROVIDERS = ["codex"] as const;
319
326
  const VALID_CODEX_SPAWN_APPROVALS = ["open", "guarded", "read-only"] as const;
320
327
  const VALID_PROVIDER_EFFORTS = ["low", "medium", "high", "xhigh", "max"] as const;
@@ -1243,7 +1250,10 @@ function auditEvent(input: ActivityEventInput): void {
1243
1250
  function auditCommandOutcome(command: Command): void {
1244
1251
  if (command.status !== "succeeded" && command.status !== "failed") return;
1245
1252
  if (command.type !== "agent.restart" && command.type !== "agent.shutdown" && command.type !== "agent.reconnect") return;
1246
- const action = agentControlActionFromCommandType(command.type);
1253
+ const paramAction = typeof command.params?.action === "string" && (VALID_AGENT_ACTIONS as readonly string[]).includes(command.params.action)
1254
+ ? command.params.action as AgentControlAction
1255
+ : null;
1256
+ const action = paramAction ?? agentControlActionFromCommandType(command.type);
1247
1257
  if (!action) return;
1248
1258
  const agentId = typeof command.params?.agentId === "string" ? command.params.agentId : command.target;
1249
1259
  const succeeded = command.status === "succeeded";
@@ -2138,7 +2148,7 @@ const deleteAgentById: Handler = (_req, params) => {
2138
2148
  type AgentControlAction = (typeof VALID_AGENT_ACTIONS)[number];
2139
2149
 
2140
2150
  function agentControlActionCommandType(action: AgentControlAction): "agent.restart" | "agent.shutdown" | "agent.reconnect" | "agent.compact" | "agent.clearContext" {
2141
- if (action === "restart") return "agent.restart";
2151
+ if (action === "restart" || action === "resume") return "agent.restart";
2142
2152
  if (action === "shutdown") return "agent.shutdown";
2143
2153
  if (action === "compact") return "agent.compact";
2144
2154
  if (action === "clearContext") return "agent.clearContext";
@@ -2156,6 +2166,7 @@ function agentControlActionFromCommandType(type: string): AgentControlAction | n
2156
2166
 
2157
2167
  function agentControlActionRequestedTitle(action: AgentControlAction): string {
2158
2168
  if (action === "restart") return "Agent restart requested";
2169
+ if (action === "resume") return "Agent resume requested";
2159
2170
  if (action === "shutdown") return "Agent shutdown requested";
2160
2171
  if (action === "compact") return "Agent compaction requested";
2161
2172
  if (action === "clearContext") return "Agent context clear requested";
@@ -2164,6 +2175,7 @@ function agentControlActionRequestedTitle(action: AgentControlAction): string {
2164
2175
 
2165
2176
  function agentControlActionCompletedTitle(action: AgentControlAction): string {
2166
2177
  if (action === "restart") return "Agent restarted";
2178
+ if (action === "resume") return "Agent resumed";
2167
2179
  if (action === "shutdown") return "Agent shut down";
2168
2180
  if (action === "compact") return "Agent compacted";
2169
2181
  if (action === "clearContext") return "Agent context cleared";
@@ -2174,6 +2186,7 @@ function agentControlActionIcon(action: AgentControlAction): string {
2174
2186
  if (action === "shutdown") return "ti-power";
2175
2187
  if (action === "compact") return "ti-compress";
2176
2188
  if (action === "clearContext") return "ti-eraser";
2189
+ if (action === "resume") return "ti-player-play";
2177
2190
  return "ti-refresh";
2178
2191
  }
2179
2192
 
@@ -2186,6 +2199,7 @@ function agentIsControlEligible(agent: AgentCard): boolean {
2186
2199
 
2187
2200
  function agentCanReceiveControlAction(agent: AgentCard, action: AgentControlAction): boolean {
2188
2201
  if (!agentIsControlEligible(agent)) return false;
2202
+ if (action === "resume") return agentRuntimeProvider(agent) === "claude" && (agent.status === "offline" || agent.status === "stale");
2189
2203
  const lifecycle = agent.providerCapabilities?.lifecycle;
2190
2204
  if (lifecycle) {
2191
2205
  if (action === "restart") return lifecycle.restartHard === true;
@@ -2197,6 +2211,10 @@ function agentCanReceiveControlAction(agent: AgentCard, action: AgentControlActi
2197
2211
  return agent.meta?.runnerManaged === true && (action === "restart" || action === "shutdown");
2198
2212
  }
2199
2213
 
2214
+ function agentRuntimeProvider(agent: AgentCard): string | undefined {
2215
+ return metaString(agent.meta, "provider") ?? agent.providerCapabilities?.model?.provider;
2216
+ }
2217
+
2200
2218
  function managedControlOrchestrator(agent: AgentCard): NonNullable<ReturnType<typeof getOrchestrator>> | null {
2201
2219
  if (agent.meta?.runnerManaged !== true) return null;
2202
2220
  const metaSessionName = typeof agent.meta.sessionName === "string" ? agent.meta.sessionName : "";
@@ -2221,11 +2239,16 @@ function restartSpawnParamsForAgent(
2221
2239
  orchestrator: NonNullable<ReturnType<typeof getOrchestrator>> | null,
2222
2240
  policyName?: string,
2223
2241
  spawnRequestId?: string,
2242
+ opts: { resumeId?: string } = {},
2224
2243
  ): Record<string, unknown> | undefined {
2225
2244
  if (!orchestrator) return undefined;
2226
2245
  const requestId = spawnRequestId ?? spawnRequestIdForRestart();
2227
2246
  const policy = policyName ? getSpawnPolicy(policyName) : null;
2228
- if (policy) return { ...managedSpawnParams(policy.value, requestId), agentId: agent.id, requestedBy: "dashboard-restart" };
2247
+ const requestedBy = opts.resumeId ? "dashboard-resume" : "dashboard-restart";
2248
+ if (policy) {
2249
+ const params = { ...managedSpawnParams(policy.value, requestId), agentId: agent.id, requestedBy };
2250
+ return opts.resumeId ? withClaudeResumeParams(params, opts.resumeId, agent.id) : params;
2251
+ }
2229
2252
 
2230
2253
  const provider = metaString(agent.meta, "provider");
2231
2254
  if (provider !== "claude" && provider !== "codex") return undefined;
@@ -2254,7 +2277,7 @@ function restartSpawnParamsForAgent(
2254
2277
  };
2255
2278
  }
2256
2279
  }
2257
- return {
2280
+ const params = {
2258
2281
  action: "spawn",
2259
2282
  provider,
2260
2283
  ...resolvedModel,
@@ -2279,17 +2302,57 @@ function restartSpawnParamsForAgent(
2279
2302
  label,
2280
2303
  policyName,
2281
2304
  spawnRequestId: requestId,
2282
- createdBy: "dashboard-restart",
2305
+ createdBy: requestedBy,
2283
2306
  }),
2284
- requestedBy: "dashboard-restart",
2307
+ requestedBy,
2285
2308
  requestedAt: Date.now(),
2286
2309
  };
2310
+ return opts.resumeId ? withClaudeResumeParams(params, opts.resumeId, agent.id) : params;
2287
2311
  }
2288
2312
 
2289
2313
  function spawnRequestIdForRestart(): string {
2290
2314
  return `sp_${crypto.randomUUID()}`;
2291
2315
  }
2292
2316
 
2317
+ function withClaudeResumeParams(params: Record<string, unknown>, resumeId: string, agentId: string): Record<string, unknown> {
2318
+ return {
2319
+ ...params,
2320
+ providerArgs: providerArgsWithClaudeResume(recordStringArray(params.providerArgs), resumeId),
2321
+ resumeOfAgentId: agentId,
2322
+ claudeResumeId: resumeId,
2323
+ };
2324
+ }
2325
+
2326
+ function providerArgsWithClaudeResume(args: string[], resumeId: string): string[] {
2327
+ const cleaned: string[] = [];
2328
+ for (let i = 0; i < args.length; i += 1) {
2329
+ const arg = args[i];
2330
+ if (!arg) continue;
2331
+ if (arg === "--resume") {
2332
+ i += 1;
2333
+ continue;
2334
+ }
2335
+ if (arg.startsWith("--resume=")) continue;
2336
+ cleaned.push(arg);
2337
+ }
2338
+ return [...cleaned, "--resume", resumeId];
2339
+ }
2340
+
2341
+ function recordStringArray(value: unknown): string[] {
2342
+ return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string" && item.trim().length > 0) : [];
2343
+ }
2344
+
2345
+ function latestClaudeResumeIdForAgent(agent: AgentCard): string | undefined {
2346
+ for (const entry of getAgentTimeline(agent.id, { limit: 50 })) {
2347
+ const metadata = entry.metadata;
2348
+ if (!metadata) continue;
2349
+ const provider = metaString(metadata, "provider");
2350
+ const resumeId = metaString(metadata, "claudeResumeId");
2351
+ if (provider === "claude" && resumeId && CLAUDE_RESUME_ID_RE.test(resumeId)) return resumeId;
2352
+ }
2353
+ return undefined;
2354
+ }
2355
+
2293
2356
  function compactMemoryParams(agent: AgentCard): Record<string, unknown> {
2294
2357
  const alwaysReload = agent.tags
2295
2358
  .filter((tag) => tag.startsWith("memory-reload:"))
@@ -2310,11 +2373,14 @@ const postAgentAction: Handler = async (req, params) => {
2310
2373
  if (!agent) return error("agent not found", 404);
2311
2374
  if (!agentCanReceiveControlAction(agent, action)) return error(`agent does not support ${action}`, 400);
2312
2375
 
2313
- const orchestrator = (action === "restart" || action === "shutdown") ? managedControlOrchestrator(agent) : null;
2376
+ const orchestrator = (action === "restart" || action === "shutdown" || action === "resume") ? managedControlOrchestrator(agent) : null;
2314
2377
  const metaSessionName = typeof agent.meta?.sessionName === "string" ? agent.meta.sessionName : undefined;
2315
2378
  const metaTmuxSession = typeof agent.meta?.tmuxSession === "string" ? agent.meta.tmuxSession : undefined;
2316
2379
  const metaPolicyName = typeof agent.meta?.policyName === "string" ? agent.meta.policyName : undefined;
2317
2380
  const metaSpawnRequestId = typeof agent.meta?.spawnRequestId === "string" ? agent.meta.spawnRequestId : undefined;
2381
+ const resumeId = action === "resume" ? latestClaudeResumeIdForAgent(agent) : undefined;
2382
+ if (action === "resume" && !orchestrator) return error("no online orchestrator available to resume agent", 409);
2383
+ if (action === "resume" && !resumeId) return error("no Claude resume id recorded for agent", 422);
2318
2384
  const denied = authorizeRoute(req, {
2319
2385
  scope: "agent:write",
2320
2386
  resource: { agentId: agent.id, orchestratorId: orchestrator?.id, policyName: metaPolicyName, spawnRequestId: metaSpawnRequestId },
@@ -2333,14 +2399,15 @@ const postAgentAction: Handler = async (req, params) => {
2333
2399
  ...(metaTmuxSession ? { tmuxSession: metaTmuxSession } : {}),
2334
2400
  ...(metaPolicyName ? { policyName: metaPolicyName } : {}),
2335
2401
  ...(metaSpawnRequestId ? { spawnRequestId: metaSpawnRequestId } : {}),
2336
- ...(action === "restart" ? { restartSpawn: restartSpawnParamsForAgent(agent, orchestrator, metaPolicyName, metaSpawnRequestId) } : {}),
2402
+ ...(action === "restart" || action === "resume" ? { restartSpawn: restartSpawnParamsForAgent(agent, orchestrator, metaPolicyName, metaSpawnRequestId, { resumeId }) } : {}),
2337
2403
  ...(action === "compact" ? compactMemoryParams(agent) : {}),
2404
+ ...(resumeId ? { claudeResumeId: resumeId } : {}),
2338
2405
  requestedBy: "dashboard",
2339
2406
  requestedAt: Date.now(),
2340
2407
  },
2341
2408
  });
2342
- if (action === "shutdown" || action === "restart") {
2343
- const lifecycleAction = action === "shutdown" ? "shutting-down" : "restarting";
2409
+ if (action === "shutdown" || action === "restart" || action === "resume") {
2410
+ const lifecycleAction = action === "shutdown" ? "shutting-down" : action === "resume" ? "resuming" : "restarting";
2344
2411
  markReady(agent.id, false);
2345
2412
  mergeAgentMeta(agent.id, { lifecycleAction, lifecycleActionAt: Date.now(), lifecycleCommandId: command.id });
2346
2413
  emitAgentStatus(agent.id);
@@ -2821,6 +2888,40 @@ const postRuntimeTokenRenew: Handler = async (req) => {
2821
2888
  }, 201);
2822
2889
  };
2823
2890
 
2891
+ // Orchestrator-mediated runner-token re-mint. A live runner whose runtime token
2892
+ // has expired (e.g. the relay was unreachable across the renewal window) cannot
2893
+ // self-renew — its dead token can't authenticate. Its orchestrator, which holds
2894
+ // a long-lived credential, proxies the runner's expired token here. We verify the
2895
+ // token is a genuine, non-revoked runner token owned by THIS orchestrator and
2896
+ // mint a fresh one cloning its scope, so the session self-heals without a restart.
2897
+ const postOrchestratorRunnerToken: Handler = async (req, params) => {
2898
+ const orch = getOrchestrator(params.id!);
2899
+ if (!orch) return error("orchestrator not found", 404);
2900
+ const denied = authorizeRoute(req, { scope: "command:write", resource: { orchestratorId: orch.id } });
2901
+ if (denied) return denied;
2902
+ const parsed = await parseBody<unknown>(req);
2903
+ if (!parsed.ok) return error(parsed.error, parsed.status);
2904
+ if (!isRecord(parsed.body)) return error("body required");
2905
+ const token = cleanString(parsed.body.token, "token", { required: true, max: 8192 })!;
2906
+ const result = reissueRunnerRuntimeToken({
2907
+ expiredToken: token,
2908
+ orchestratorId: orch.id,
2909
+ createdBy: `orchestrator-remint:${orch.id}`,
2910
+ });
2911
+ if ("error" in result) return error(result.error, 403);
2912
+ auditEvent({
2913
+ clientId: "server-runner-token-remint-" + result.record.jti + "-" + Date.now(),
2914
+ kind: "state",
2915
+ title: "Runner token re-minted",
2916
+ body: result.record.sub,
2917
+ meta: result.record.jti,
2918
+ icon: "ti-key",
2919
+ view: "security",
2920
+ metadata: { orchestratorId: orch.id, jti: result.record.jti, sub: result.record.sub, ...authAuditMetadata(req) },
2921
+ });
2922
+ return json(result, 201);
2923
+ };
2924
+
2824
2925
  const postInteractiveRunnerRuntimeToken: Handler = async (req) => {
2825
2926
  if (!isRootCredentialRequest(req)) return error("root credential required for runtime token exchange", 403);
2826
2927
  const parsed = await parseBody<unknown>(req);
@@ -3799,6 +3900,10 @@ const getWorkspaceById: Handler = (_req, params) => {
3799
3900
  return json(workspace);
3800
3901
  };
3801
3902
 
3903
+ // Per-repo coordination state: persistent steward records (survive offline gaps)
3904
+ // and in-flight merge serialization leases (issue #157).
3905
+ const getWorkspaceStewards: Handler = () => json({ stewards: listRepoStewards(), mergeLeases: listMergeLeases() });
3906
+
3802
3907
  // Proxy a read-only workspace interrogation to the owning orchestrator's host
3803
3908
  // API. Degrades to { available: false } rather than erroring so the dashboard
3804
3909
  // can render a placeholder when the host is offline or there's no worktree.
@@ -3949,13 +4054,39 @@ const postWorkspaceAction: Handler = async (req, params) => {
3949
4054
  const denied = authorizeRoute(req, { scope: requiresCommand ? "command:write" : "agent:write", resource: { agentId, cwd: workspace.worktreePath } });
3950
4055
  if (denied) return denied;
3951
4056
  if (action === "status") return json(workspace);
4057
+ // Base merges go through the shared helper (lease + command + bind), the same
4058
+ // path the auto-merge job uses, so both serialize per repo (issue #157).
4059
+ if (action === "merge") {
4060
+ const strategy = cleanEnum(parsed.body.strategy, "strategy", ["pr", "rebase-ff", "auto"] as const, "auto") as WorkspaceMergeStrategy;
4061
+ const result = requestWorkspaceMerge(workspace, {
4062
+ requestedBy: agentId ?? "dashboard",
4063
+ strategy,
4064
+ deleteBranch: parsed.body.deleteBranch !== false,
4065
+ prTitle: cleanString(parsed.body.prTitle, "prTitle", { max: 240 }),
4066
+ prBody: cleanString(parsed.body.prBody, "prBody", { max: 8000 }),
4067
+ metadata: { ...metadata, ...(detail ? { detail } : {}), ...(agentId ? { updatedByAgentId: agentId } : {}) },
4068
+ });
4069
+ if (!result.ok) return error(result.error, result.status);
4070
+ emitCommand(result.command);
4071
+ auditEvent({
4072
+ clientId: `workspace-merge-${workspace.id}-${Date.now()}`,
4073
+ kind: "state",
4074
+ title: "Workspace merge",
4075
+ body: detail ?? workspace.worktreePath,
4076
+ meta: workspace.branch ?? workspace.id,
4077
+ icon: "ti-git-merge",
4078
+ view: "orchestrators",
4079
+ agentId,
4080
+ metadata: { action: "merge", workspaceId: workspace.id, repoRoot: workspace.repoRoot, worktreePath: workspace.worktreePath, status: result.workspace.status, commandId: result.command.id, ...authAuditMetadata(req) },
4081
+ });
4082
+ return json({ workspace: result.workspace, command: result.command }, 202);
4083
+ }
3952
4084
  const statusByAction: Record<string, WorkspaceStatus | undefined> = {
3953
4085
  status: undefined,
3954
4086
  ready: "ready",
3955
4087
  "conflict-found": "conflict",
3956
4088
  "request-review": "review_requested",
3957
4089
  "merge-plan": "merge_planned",
3958
- merge: "merge_planned",
3959
4090
  abandon: "abandoned",
3960
4091
  cleanup: "cleanup_requested",
3961
4092
  };
@@ -3969,51 +4100,31 @@ const postWorkspaceAction: Handler = async (req, params) => {
3969
4100
  if (!updated) return error("workspace not found", 404);
3970
4101
  let command: Command | undefined;
3971
4102
  if (requiresCommand) {
3972
- // All orchestrators whose baseDir contains the workspace; prefer an online one.
4103
+ // Only `cleanup` reaches here `merge` returned early via the shared helper.
4104
+ // Cleanup may queue: if the owning orchestrator is offline, the command (no
4105
+ // TTL) waits and reconciles when it reconnects. Only hard-fail when no
4106
+ // orchestrator owns the path at all — then DELETE is the escape.
3973
4107
  const owners = listOrchestrators().filter((candidate) => pathWithinBase(workspace.sourceCwd, candidate.baseDir));
3974
4108
  const onlineOwner = owners.find((candidate) => candidate.status === "online");
3975
- const baseParams = {
3976
- workspaceId: workspace.id,
3977
- repoRoot: workspace.repoRoot,
3978
- worktreePath: workspace.worktreePath,
3979
- branch: workspace.branch,
3980
- requestedBy: agentId ?? "dashboard",
3981
- requestedAt: Date.now(),
3982
- };
3983
- if (action === "merge") {
3984
- // Merge needs a live host: rebasing against a stale base later is unsafe.
3985
- if (!onlineOwner) return error("no online orchestrator available for workspace merge", 409);
3986
- const strategy = cleanEnum(parsed.body.strategy, "strategy", ["pr", "rebase-ff", "auto"] as const, "auto");
3987
- command = createCommand({
3988
- type: "workspace.merge",
3989
- source: "system",
3990
- target: onlineOwner.agentId,
3991
- correlationId: workspace.id,
3992
- params: {
3993
- action: "merge",
3994
- ...baseParams,
3995
- baseRef: workspace.baseRef,
3996
- baseSha: workspace.baseSha,
3997
- strategy,
3998
- deleteBranch: parsed.body.deleteBranch !== false,
3999
- prTitle: cleanString(parsed.body.prTitle, "prTitle", { max: 240 }),
4000
- prBody: cleanString(parsed.body.prBody, "prBody", { max: 8000 }),
4001
- },
4002
- });
4003
- } else {
4004
- // Cleanup may queue: if the owning orchestrator is offline, the command
4005
- // (no TTL) waits and reconciles when it reconnects. Only hard-fail when
4006
- // no orchestrator owns the path at all — then DELETE is the escape.
4007
- const owner = onlineOwner ?? owners[0];
4008
- if (!owner) return error("no orchestrator owns this workspace path; use DELETE /api/workspaces/:id to purge the record", 409);
4009
- command = createCommand({
4010
- type: "workspace.cleanup",
4011
- source: "system",
4012
- target: owner.agentId,
4013
- correlationId: workspace.id,
4014
- params: { action: "cleanup", ...baseParams, deleteBranch: true, queued: owner.status !== "online" },
4015
- });
4016
- }
4109
+ const owner = onlineOwner ?? owners[0];
4110
+ if (!owner) return error("no orchestrator owns this workspace path; use DELETE /api/workspaces/:id to purge the record", 409);
4111
+ command = createCommand({
4112
+ type: "workspace.cleanup",
4113
+ source: "system",
4114
+ target: owner.agentId,
4115
+ correlationId: workspace.id,
4116
+ params: {
4117
+ action: "cleanup",
4118
+ workspaceId: workspace.id,
4119
+ repoRoot: workspace.repoRoot,
4120
+ worktreePath: workspace.worktreePath,
4121
+ branch: workspace.branch,
4122
+ requestedBy: agentId ?? "dashboard",
4123
+ requestedAt: Date.now(),
4124
+ deleteBranch: true,
4125
+ queued: owner.status !== "online",
4126
+ },
4127
+ });
4017
4128
  emitCommand(command);
4018
4129
  }
4019
4130
  auditEvent({
@@ -4022,7 +4133,7 @@ const postWorkspaceAction: Handler = async (req, params) => {
4022
4133
  title: `Workspace ${action}`,
4023
4134
  body: detail ?? workspace.worktreePath,
4024
4135
  meta: workspace.branch ?? workspace.id,
4025
- icon: action === "cleanup" ? "ti-trash" : action === "merge" ? "ti-git-merge" : action === "conflict-found" ? "ti-alert-triangle" : "ti-git-branch",
4136
+ icon: action === "cleanup" ? "ti-trash" : action === "conflict-found" ? "ti-alert-triangle" : "ti-git-branch",
4026
4137
  view: "orchestrators",
4027
4138
  agentId,
4028
4139
  metadata: { action, workspaceId: workspace.id, repoRoot: workspace.repoRoot, worktreePath: workspace.worktreePath, status: updated.status, commandId: command?.id, ...authAuditMetadata(req) },
@@ -4336,6 +4447,11 @@ const patchCommand: Handler = async (req, params) => {
4336
4447
  }
4337
4448
  }
4338
4449
  if (command.type === "workspace.merge") {
4450
+ // Merge settled (either way) — free the per-repo merge lease so the next
4451
+ // base merge can proceed (issue #157).
4452
+ if (command.status === "succeeded" || command.status === "failed") {
4453
+ releaseMergeLease({ commandId: command.id });
4454
+ }
4339
4455
  if (command.status === "succeeded" && isRecord(command.result)) {
4340
4456
  const workspaceId = cleanString(command.result.workspaceId, "result.workspaceId", { max: 160 });
4341
4457
  const resultStatus = cleanEnum(command.result.status, "result.status", VALID_WORKSPACE_STATUSES) as WorkspaceStatus | undefined;
@@ -4360,16 +4476,35 @@ const patchCommand: Handler = async (req, params) => {
4360
4476
  if (command.type === "workspace.reconcile" && command.status === "succeeded" && isRecord(command.result)) {
4361
4477
  const workspaceId = cleanString(command.result.workspaceId, "result.workspaceId", { max: 160 });
4362
4478
  const resultStatus = cleanEnum(command.result.status, "result.status", VALID_WORKSPACE_STATUSES) as WorkspaceStatus | undefined;
4479
+ const removed = command.result.removed === true;
4363
4480
  if (workspaceId && resultStatus) {
4364
4481
  // Only act on workspaces the agent left in a live state; never overwrite
4365
4482
  // a status a human/agent has since moved on (merge_planned, abandoned, …).
4366
4483
  const current = getWorkspace(workspaceId);
4367
4484
  if (current && (current.status === "active" || current.status === "ready")) {
4368
- updateWorkspaceStatus(workspaceId, resultStatus, {
4369
- reconcileResult: command.result,
4370
- reconcileCommandId: command.id,
4371
- reconciledAt: Date.now(),
4372
- });
4485
+ if (removed) {
4486
+ // The owner exited and the worktree had no work — the orchestrator
4487
+ // already deleted it on disk. Drop the DB row immediately instead of
4488
+ // parking it at `cleaned` for 24h: a no-change session is pure junk
4489
+ // from the user's view, so it should leave the Workspaces panel now.
4490
+ deleteWorkspace(workspaceId);
4491
+ auditEvent({
4492
+ clientId: `workspace-reconcile-removed-${workspaceId}-${Date.now()}`,
4493
+ kind: "state",
4494
+ title: "Workspace removed (no changes)",
4495
+ body: current.worktreePath,
4496
+ meta: current.branch ?? workspaceId,
4497
+ icon: "ti-trash",
4498
+ view: "orchestrators",
4499
+ metadata: { reconcileCommandId: command.id, workspaceId, repoRoot: current.repoRoot },
4500
+ });
4501
+ } else {
4502
+ updateWorkspaceStatus(workspaceId, resultStatus, {
4503
+ reconcileResult: command.result,
4504
+ reconcileCommandId: command.id,
4505
+ reconciledAt: Date.now(),
4506
+ });
4507
+ }
4373
4508
  }
4374
4509
  }
4375
4510
  }
@@ -4432,6 +4567,27 @@ const deleteAgentProfileRoute: Handler = (req, params) => {
4432
4567
  }
4433
4568
  };
4434
4569
 
4570
+ // --- Steward config (global, provider-independent — issue #167) ---
4571
+
4572
+ const getStewardConfigRoute: Handler = () => json(getStewardConfigEntry());
4573
+
4574
+ const putStewardConfigRoute: Handler = async (req) => {
4575
+ const parsed = await parseBody<unknown>(req);
4576
+ if (!parsed.ok) return error(parsed.error, parsed.status);
4577
+ try {
4578
+ const value = isRecord(parsed.body) && Object.prototype.hasOwnProperty.call(parsed.body, "value")
4579
+ ? parsed.body.value
4580
+ : parsed.body;
4581
+ const updatedBy = isRecord(parsed.body) ? cleanString(parsed.body.updatedBy, "updatedBy", { max: 200 }) : undefined;
4582
+ const entry = setStewardConfig(value, updatedBy);
4583
+ emitConfigChanged(entry.namespace, entry.key, entry.version);
4584
+ return json(entry, entry.version === 1 ? 201 : 200);
4585
+ } catch (e) {
4586
+ if (e instanceof ValidationError) return error(e.message, 400);
4587
+ throw e;
4588
+ }
4589
+ };
4590
+
4435
4591
  // --- Config routes ---
4436
4592
 
4437
4593
  function normalizeConfigPathParam(raw: string | undefined, field: string): string {
@@ -6239,6 +6395,7 @@ const routes: Route[] = [
6239
6395
  route("POST", "/api/orchestrators/:id/heartbeat", postOrchestratorHeartbeat),
6240
6396
  route("PATCH", "/api/orchestrators/:id/agents", patchOrchestratorAgents),
6241
6397
  route("POST", "/api/orchestrators/:id/spawn", postOrchestratorSpawn),
6398
+ route("POST", "/api/orchestrators/:id/runner-token", postOrchestratorRunnerToken),
6242
6399
  route("POST", "/api/orchestrators/:id/actions", postOrchestratorAction),
6243
6400
  route("GET", "/api/orchestrators/:id/directories", getOrchestratorDirectories),
6244
6401
  route("POST", "/api/orchestrators/:id/directories", postOrchestratorCreateDirectory),
@@ -6259,6 +6416,7 @@ const routes: Route[] = [
6259
6416
  // Static segments before :id so "/workspaces/orphans" isn't captured as an id.
6260
6417
  route("GET", "/api/workspaces/orphans", getWorkspaceOrphans),
6261
6418
  route("POST", "/api/workspaces/orphans/reclaim", postWorkspaceOrphanReclaim),
6419
+ route("GET", "/api/workspaces/stewards", getWorkspaceStewards),
6262
6420
  route("GET", "/api/workspaces/:id", getWorkspaceById),
6263
6421
  route("GET", "/api/workspaces/:id/git-state", getWorkspaceGitState),
6264
6422
  route("GET", "/api/workspaces/:id/merge-preview", getWorkspaceMergePreview),
@@ -6296,6 +6454,8 @@ const routes: Route[] = [
6296
6454
  route("GET", "/api/agent-profiles/:name", getAgentProfileRoute),
6297
6455
  route("PUT", "/api/agent-profiles/:name", putAgentProfileRoute),
6298
6456
  route("DELETE", "/api/agent-profiles/:name", deleteAgentProfileRoute),
6457
+ route("GET", "/api/steward-config", getStewardConfigRoute),
6458
+ route("PUT", "/api/steward-config", putStewardConfigRoute),
6299
6459
  route("GET", "/api/config/:namespace", getConfigNamespace),
6300
6460
  route("GET", "/api/config/:namespace/:key/history", getConfigKeyHistory),
6301
6461
  route("GET", "/api/config/:namespace/:key", getConfigKey),
@@ -1,4 +1,5 @@
1
- import { createToken } from "./token-db";
1
+ import { createToken, revokeToken } from "./token-db";
2
+ import { verifyComponentTokenAllowExpired } from "./security";
2
3
  import type { TokenRecord } from "./types";
3
4
 
4
5
  interface RuntimeTokenResult {
@@ -6,6 +7,48 @@ interface RuntimeTokenResult {
6
7
  record: TokenRecord;
7
8
  }
8
9
 
10
+ // How long after expiry a runner token may still be re-minted. Bounds replay of
11
+ // a long-dead token to a sane window — a live session re-mints well within this.
12
+ const REMINT_MAX_EXPIRED_AGE_SECONDS = 30 * 24 * 60 * 60; // 30 days
13
+
14
+ // Orchestrator-mediated re-mint: given a runner's current (possibly expired) token
15
+ // and the calling orchestrator's id, verify the token belongs to a runner of THIS
16
+ // orchestrator and issue a fresh provider-agent token cloning its scope. The old
17
+ // token is revoked. The caller MUST already be authenticated/authorized as the
18
+ // orchestrator — this only establishes that the presented token is a genuine,
19
+ // non-revoked runner token owned by that orchestrator. See approach #1 in the
20
+ // runner self-heal design: the relay stays strict, the orchestrator's standing
21
+ // privilege is the authorization, the signed token is the identity.
22
+ export function reissueRunnerRuntimeToken(input: {
23
+ expiredToken: string;
24
+ orchestratorId: string;
25
+ createdBy?: string;
26
+ }): RuntimeTokenResult | { error: string } {
27
+ const payload = verifyComponentTokenAllowExpired(input.expiredToken);
28
+ if (!payload) return { error: "invalid or revoked runner token" };
29
+ if (payload.role !== "provider" || !payload.sub.startsWith("runner:")) {
30
+ return { error: "not a runner token" };
31
+ }
32
+ const orchestrators = payload.constraints?.orchestrators;
33
+ if (!Array.isArray(orchestrators) || !orchestrators.includes(input.orchestratorId)) {
34
+ return { error: "runner token not owned by this orchestrator" };
35
+ }
36
+ if (payload.exp !== undefined) {
37
+ const ageSeconds = Math.floor(Date.now() / 1000) - payload.exp;
38
+ if (ageSeconds > REMINT_MAX_EXPIRED_AGE_SECONDS) return { error: "runner token expired too long ago" };
39
+ }
40
+ const reissued = createToken({
41
+ profileId: "provider-agent",
42
+ sub: payload.sub,
43
+ role: "provider",
44
+ scope: payload.scope,
45
+ constraints: payload.constraints,
46
+ createdBy: input.createdBy ?? `remint:${payload.jti ?? "unknown"}`,
47
+ });
48
+ if (payload.jti) revokeToken(payload.jti);
49
+ return reissued;
50
+ }
51
+
9
52
  export function issueOrchestratorRuntimeToken(input: {
10
53
  orchestratorId: string;
11
54
  baseDir: string;
package/src/security.ts CHANGED
@@ -294,6 +294,23 @@ export function verifyComponentToken(token: string, nowSeconds = Math.floor(Date
294
294
  return payload;
295
295
  }
296
296
 
297
+ // Verify a component token's signature and revocation WITHOUT enforcing expiry.
298
+ // Used only for orchestrator-mediated runner-token re-minting: an expired runner
299
+ // token is unforgeable proof of identity (it is HMAC-signed by this relay), even
300
+ // though it can no longer authenticate a request. The caller (an authenticated
301
+ // orchestrator) supplies the authorization; this just establishes which runner.
302
+ // Revoked tokens are still rejected — revocation is a hard kill.
303
+ export function verifyComponentTokenAllowExpired(token: string): ComponentToken | null {
304
+ const parts = token.split(".");
305
+ if (parts.length !== 3) return null;
306
+ const [headerRaw, payloadRaw, signature] = parts as [string, string, string];
307
+ if (!safeEqual(signature, hmac(`${headerRaw}.${payloadRaw}`))) return null;
308
+ const payload = parseBase64urlJson(payloadRaw);
309
+ if (!isComponentToken(payload)) return null;
310
+ if (payload.jti && isTokenRevoked(payload.jti)) return null;
311
+ return payload;
312
+ }
313
+
297
314
  export function forbidden(req: Request): Response {
298
315
  return applyCors(req, Response.json({ error: "forbidden" }, { status: 403 }));
299
316
  }