patchrelay 0.75.3 → 0.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agent-input-service.js +40 -26
  2. package/dist/build-info.json +3 -3
  3. package/dist/cli/data.js +3 -1
  4. package/dist/db/issue-session-store.js +44 -9
  5. package/dist/db/issue-store.js +11 -2
  6. package/dist/db/migrations.js +3 -0
  7. package/dist/factory-state.js +23 -0
  8. package/dist/github-webhook-reactive-run.js +15 -11
  9. package/dist/github-webhook-stack-coordination.js +8 -4
  10. package/dist/github-webhook-state-projector.js +204 -139
  11. package/dist/github-webhook-terminal-handler.js +37 -27
  12. package/dist/idle-reconciliation.js +122 -66
  13. package/dist/implementation-outcome-policy.js +5 -1
  14. package/dist/issue-session-projection-invalidator.js +9 -0
  15. package/dist/linear-agent-session-client.js +16 -8
  16. package/dist/linear-issue-projection.js +15 -11
  17. package/dist/linear-status-comment-sync.js +8 -4
  18. package/dist/linear-workflow-state-sync.js +9 -5
  19. package/dist/merged-linear-completion-reconciler.js +39 -17
  20. package/dist/no-pr-completion-check.js +51 -29
  21. package/dist/orchestration-parent-wake.js +15 -8
  22. package/dist/queue-health-monitor.js +17 -8
  23. package/dist/reactive-run-policy.js +5 -1
  24. package/dist/run-budgets.js +40 -6
  25. package/dist/run-completion-policy.js +50 -9
  26. package/dist/run-failure-policy.js +463 -0
  27. package/dist/run-finalizer.js +68 -35
  28. package/dist/run-launcher.js +63 -12
  29. package/dist/run-notification-handler.js +19 -9
  30. package/dist/run-orchestrator.js +70 -78
  31. package/dist/run-reconciler.js +137 -64
  32. package/dist/run-settlement.js +57 -0
  33. package/dist/run-wake-planner.js +39 -29
  34. package/dist/service-issue-actions.js +45 -28
  35. package/dist/service-startup-recovery.js +61 -35
  36. package/dist/telemetry.js +9 -0
  37. package/dist/terminal-wake-reconciler.js +20 -3
  38. package/dist/webhooks/agent-session-handler.js +22 -12
  39. package/dist/webhooks/dependency-readiness-handler.js +17 -10
  40. package/dist/webhooks/desired-stage-recorder.js +32 -13
  41. package/dist/webhooks/issue-removal-handler.js +24 -13
  42. package/package.json +1 -1
  43. package/dist/interrupted-run-recovery.js +0 -227
  44. package/dist/run-recovery-service.js +0 -202
  45. package/dist/zombie-recovery.js +0 -13
@@ -4,6 +4,7 @@ import { buildRunFailureActivity } from "./linear-session-reporting.js";
4
4
  import { loadPatchRelayRepoPrompting } from "./patchrelay-customization.js";
5
5
  import { buildRunPrompt as buildPatchRelayRunPrompt, findDisallowedPatchRelayPromptSectionIds, findUnknownPatchRelayPromptSectionIds, mergePromptCustomizationLayers, resolvePromptLayers, } from "./prompting/patchrelay.js";
6
6
  import { sanitizeDiagnosticText } from "./utils.js";
7
+ const WRITER = "run-launcher";
7
8
  function slugify(value) {
8
9
  return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
9
10
  }
@@ -128,7 +129,7 @@ export class RunLauncher {
128
129
  ? params.effectiveContext.failureHeadSha
129
130
  : typeof params.effectiveContext?.headSha === "string" ? params.effectiveContext.headSha : undefined;
130
131
  const failureSignature = typeof params.effectiveContext?.failureSignature === "string" ? params.effectiveContext.failureSignature : undefined;
131
- this.db.issues.upsertIssue({
132
+ const claimUpdate = {
132
133
  projectId: params.item.projectId,
133
134
  linearIssueId: params.item.issueId,
134
135
  pendingRunType: null,
@@ -148,7 +149,18 @@ export class RunLauncher {
148
149
  lastAttemptedFailureAt: new Date().toISOString(),
149
150
  }
150
151
  : {}),
152
+ };
153
+ const claimCommit = this.db.issueSessions.commitIssueState({
154
+ writer: WRITER,
155
+ // `wakeIssue` is the freshest row this claim transaction has seen
156
+ // (materializeLegacyPendingWake may have bumped the version).
157
+ expectedVersion: wakeIssue.version,
158
+ update: claimUpdate,
159
+ // Never steal a slot another writer claimed concurrently.
160
+ onConflict: (current) => (current.activeRunId == null ? claimUpdate : undefined),
151
161
  });
162
+ if (claimCommit.outcome !== "applied")
163
+ return undefined;
152
164
  this.db.issueSessions.consumeIssueSessionEvents(params.item.projectId, params.item.issueId, freshWake.eventIds, created.id);
153
165
  this.db.issueSessions.setIssueSessionLastWakeReason(params.item.projectId, params.item.issueId, freshWake.wakeReason ?? null);
154
166
  return created;
@@ -201,8 +213,17 @@ export class RunLauncher {
201
213
  const thread = await this.codex.startThread({ cwd: params.worktreePath });
202
214
  threadId = thread.id;
203
215
  createdThreadForRun = true;
204
- this.db.issueSessions.upsertIssueWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId });
216
+ this.db.issueSessions.commitIssueState({
217
+ writer: WRITER,
218
+ lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
219
+ update: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId },
220
+ });
205
221
  }
222
+ // Plan §B5: persist the thread id on the run row BEFORE startTurn is
223
+ // awaited, so a turn/completed notification arriving while the turn is
224
+ // starting can already resolve the run by thread id. The orchestrator
225
+ // re-records it (with the turn id) after the launch returns.
226
+ this.recordRunThread(params, threadId, parentThreadId);
206
227
  this.db.runs.updateLaunchPhase(params.run.id, "thread_started");
207
228
  try {
208
229
  const turn = await this.codex.startTurn({ threadId, cwd: params.worktreePath, input: params.prompt });
@@ -216,7 +237,14 @@ export class RunLauncher {
216
237
  const thread = await this.codex.startThread({ cwd: params.worktreePath });
217
238
  threadId = thread.id;
218
239
  createdThreadForRun = true;
219
- this.db.issueSessions.upsertIssueWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId });
240
+ this.db.issueSessions.commitIssueState({
241
+ writer: WRITER,
242
+ lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
243
+ update: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId },
244
+ });
245
+ // Plan §B5: re-point the run row at the fresh thread before the
246
+ // retried startTurn, for the same notification race.
247
+ this.recordRunThread(params, threadId, parentThreadId);
220
248
  const turn = await this.codex.startTurn({ threadId, cwd: params.worktreePath, input: params.prompt });
221
249
  turnId = turn.turnId;
222
250
  this.db.runs.updateLaunchPhase(params.run.id, "turn_started");
@@ -236,15 +264,25 @@ export class RunLauncher {
236
264
  const lostLease = error instanceof Error && error.name === "IssueSessionLeaseLostError";
237
265
  if (!lostLease) {
238
266
  const nextState = resolveFailureFactoryState(params.runType);
239
- this.db.issueSessions.finishRunWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, params.run.id, {
240
- status: "failed",
241
- failureReason: message,
242
- });
243
- this.db.issueSessions.upsertIssueWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, {
244
- projectId: params.project.id,
245
- linearIssueId: params.issue.linearIssueId,
246
- activeRunId: null,
247
- factoryState: nextState,
267
+ // Issue clear + run-terminal write ride in one transaction; the run
268
+ // finish is gated on the issue commit so a lost lease skips both.
269
+ this.db.transaction(() => {
270
+ const commit = this.db.issueSessions.commitIssueState({
271
+ writer: WRITER,
272
+ lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
273
+ update: {
274
+ projectId: params.project.id,
275
+ linearIssueId: params.issue.linearIssueId,
276
+ activeRunId: null,
277
+ factoryState: nextState,
278
+ },
279
+ });
280
+ if (commit.outcome !== "applied")
281
+ return;
282
+ this.db.runs.finishRun(params.run.id, {
283
+ status: "failed",
284
+ failureReason: message,
285
+ });
248
286
  });
249
287
  }
250
288
  this.logger.error({ issueKey: params.issue.issueKey, runType: params.runType, error: message }, `Failed to launch ${params.runType} run`);
@@ -255,6 +293,19 @@ export class RunLauncher {
255
293
  throw error;
256
294
  }
257
295
  }
296
+ // Persist the Codex thread id on the run row under the launch lease.
297
+ // Losing the lease here aborts the launch the same way assertLaunchLease
298
+ // does — the run row must not be touched by a worker that no longer owns
299
+ // the session.
300
+ recordRunThread(params, threadId, parentThreadId) {
301
+ const recorded = this.db.issueSessions.updateRunThreadWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, params.run.id, { threadId, ...(parentThreadId ? { parentThreadId } : {}) });
302
+ if (recorded)
303
+ return;
304
+ const error = new Error("Lost issue-session lease while recording the Codex thread id");
305
+ error.name = "IssueSessionLeaseLostError";
306
+ this.logger.warn({ runId: params.run.id, issueId: params.issue.linearIssueId }, "Aborting run launch after losing issue-session lease while recording the Codex thread id");
307
+ throw error;
308
+ }
258
309
  async setInitialImplementationGoal(threadId, issue) {
259
310
  const goalSetter = this.codex.setThreadGoal;
260
311
  if (typeof goalSetter !== "function") {
@@ -1,7 +1,7 @@
1
1
  import { buildRunFailureActivity } from "./linear-session-reporting.js";
2
2
  import { extractTurnId, resolveRunCompletionStatus } from "./run-reporting.js";
3
- import { resolveRecoverablePostRunState } from "./interrupted-run-recovery.js";
4
3
  import { resolveFailureFactoryState } from "./reactive-pr-state.js";
4
+ const WRITER = "run-notification-handler";
5
5
  const DEFAULT_PUBLISH_COMMAND_TIMEOUT_MS = 10 * 60 * 1000;
6
6
  export class RunNotificationHandler {
7
7
  config;
@@ -90,19 +90,30 @@ export class RunNotificationHandler {
90
90
  return;
91
91
  }
92
92
  const nextState = resolveFailureFactoryState(run.runType);
93
+ const failureUpdate = {
94
+ projectId: run.projectId,
95
+ linearIssueId: run.linearIssueId,
96
+ activeRunId: null,
97
+ factoryState: nextState,
98
+ };
93
99
  const updated = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (lease) => {
94
- this.db.issueSessions.finishRunWithLease(lease, run.id, {
100
+ const commit = this.db.issueSessions.commitIssueState({
101
+ writer: WRITER,
102
+ lease,
103
+ // The issue row was read before awaiting the failed-run recovery;
104
+ // only clear the slot if it still belongs to this run.
105
+ expectedVersion: issue.version,
106
+ update: failureUpdate,
107
+ onConflict: (current) => (current.activeRunId === run.id ? failureUpdate : undefined),
108
+ });
109
+ if (commit.outcome !== "applied")
110
+ return false;
111
+ this.db.runs.finishRun(run.id, {
95
112
  status: "failed",
96
113
  threadId,
97
114
  ...(completedTurnId ? { turnId: completedTurnId } : {}),
98
115
  failureReason,
99
116
  });
100
- this.db.issueSessions.upsertIssueWithLease(lease, {
101
- projectId: run.projectId,
102
- linearIssueId: run.linearIssueId,
103
- activeRunId: null,
104
- factoryState: nextState,
105
- });
106
117
  return true;
107
118
  });
108
119
  if (!updated) {
@@ -134,7 +145,6 @@ export class RunNotificationHandler {
134
145
  thread,
135
146
  threadId,
136
147
  ...(completedTurnId ? { completedTurnId } : {}),
137
- resolveRecoverableRunState: resolveRecoverablePostRunState,
138
148
  });
139
149
  this.activeThreadId = undefined;
140
150
  }
@@ -9,16 +9,16 @@ import { IdleIssueReconciler } from "./idle-reconciliation.js";
9
9
  import { LinearSessionSync } from "./linear-session-sync.js";
10
10
  import { recoverLinearAgentActivityContext } from "./linear-agent-activity-recovery.js";
11
11
  import { IssueSessionLeaseService } from "./issue-session-lease-service.js";
12
- import { InterruptedRunRecovery } from "./interrupted-run-recovery.js";
13
12
  import { RunCompletionPolicy } from "./run-completion-policy.js";
13
+ import { RunFailurePolicy } from "./run-failure-policy.js";
14
14
  import { RunFinalizer } from "./run-finalizer.js";
15
15
  import { RunLauncher } from "./run-launcher.js";
16
16
  import { RunNotificationHandler } from "./run-notification-handler.js";
17
17
  import { RunReconciler } from "./run-reconciler.js";
18
- import { RunRecoveryService } from "./run-recovery-service.js";
19
18
  import { RunWakePlanner } from "./run-wake-planner.js";
20
19
  import { WakeDispatcher } from "./wake-dispatcher.js";
21
- import { getRemainingZombieRecoveryDelayMs } from "./zombie-recovery.js";
20
+ import { settleRun } from "./run-settlement.js";
21
+ import { getRemainingZombieRecoveryDelayMs } from "./run-budgets.js";
22
22
  import { classifyIssue } from "./issue-class.js";
23
23
  import { buildIssueTriageHash, IssueTriageService } from "./issue-triage.js";
24
24
  import { loadConfig } from "./config.js";
@@ -26,10 +26,7 @@ import { CodexThreadMaterializingError, isThreadMaterializingError } from "./cod
26
26
  import { emitTelemetry, noopTelemetry } from "./telemetry.js";
27
27
  import { LinearIssueProjectionService } from "./linear-issue-projection.js";
28
28
  import { RunAdmissionController } from "./run-admission-controller.js";
29
- // A terminal run must hold the active slot for at least this long before
30
- // the orchestrator force-clears it, so we never race the normal
31
- // notification-driven finalize that runs within seconds of completion.
32
- const DANGLING_ACTIVE_RUN_MIN_AGE_MS = 2 * 60_000;
29
+ const WRITER = "run-orchestrator";
33
30
  function lowerCaseFirst(value) {
34
31
  return value ? `${value.slice(0, 1).toLowerCase()}${value.slice(1)}` : value;
35
32
  }
@@ -61,9 +58,8 @@ export class RunOrchestrator {
61
58
  leaseService;
62
59
  runFinalizer;
63
60
  runLauncher;
64
- runRecovery;
61
+ runFailurePolicy;
65
62
  runWakePlanner;
66
- interruptedRunRecovery;
67
63
  runCompletionPolicy;
68
64
  completionCheck;
69
65
  issueTriage;
@@ -85,7 +81,6 @@ export class RunOrchestrator {
85
81
  recoveryPorts = {
86
82
  failRunAndClear: (run, message, nextState) => this.failRunAndClear(run, message, nextState),
87
83
  restoreIdleWorktree: (issue) => this.restoreIdleWorktree(issue),
88
- recoverOrEscalate: (issue, runType, reason) => this.recoverOrEscalate(issue, runType, reason),
89
84
  };
90
85
  activeSessionLeases;
91
86
  botIdentity;
@@ -137,9 +132,8 @@ export class RunOrchestrator {
137
132
  this.runFinalizer = new RunFinalizer(db, logger, this.linearSync, this.wakeDispatcher, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, (lease, issue, runType, context, dedupeScope) => this.appendWakeEventWithLease(lease, issue, runType, context, dedupeScope), this.recoveryPorts.failRunAndClear, this.runCompletionPolicy, this.completionCheck, feed);
138
133
  this.runLauncher = new RunLauncher(config, db, codex, logger, this.worktreeManager);
139
134
  this.runNotificationHandler = new RunNotificationHandler(config, db, logger, this.linearSync, this.runFinalizer, this.threadPorts.readThreadWithRetry, this.leasePorts.withHeldLease, this.leasePorts.heartbeatLease, this.leasePorts.releaseLease, feed, { interruptTurn: (options) => codex.interruptTurn(options) });
140
- this.runRecovery = new RunRecoveryService(db, logger, this.linearSync, this.leasePorts.withHeldLease, this.leasePorts.getHeldLease, (lease, issue, runType, context, dedupeScope) => this.appendWakeEventWithLease(lease, issue, runType, context, dedupeScope), this.leasePorts.releaseLease, (projectId, issueId) => this.enqueueIssue(projectId, issueId), feed);
141
- this.interruptedRunRecovery = new InterruptedRunRecovery(db, logger, this.linearSync, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, this.recoveryPorts.failRunAndClear, this.recoveryPorts.restoreIdleWorktree, this.runCompletionPolicy, (projectId, issueId) => this.enqueueIssue(projectId, issueId), feed);
142
- this.runReconciler = new RunReconciler(db, logger, linearProvider, this.linearSync, this.interruptedRunRecovery, this.runFinalizer, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, this.threadPorts.readThreadWithRetry, this.recoveryPorts.recoverOrEscalate, (projectId) => this.config.projects.find((project) => project.id === projectId)?.github?.repoFullName, feed);
135
+ this.runFailurePolicy = new RunFailurePolicy(db, logger, this.linearSync, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, (lease, issue, runType, context, dedupeScope) => this.appendWakeEventWithLease(lease, issue, runType, context, dedupeScope), this.wakeDispatcher, this.recoveryPorts.restoreIdleWorktree, this.runCompletionPolicy, (projectId) => this.config.projects.find((project) => project.id === projectId), feed);
136
+ this.runReconciler = new RunReconciler(db, logger, linearProvider, this.linearSync, this.runFailurePolicy, this.runFinalizer, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, this.threadPorts.readThreadWithRetry, (projectId) => this.config.projects.find((project) => project.id === projectId)?.github?.repoFullName, feed, telemetry);
143
137
  this.runWakePlanner = new RunWakePlanner(db);
144
138
  this.linearIssueProjection = new LinearIssueProjectionService(db, linearProvider, logger);
145
139
  this.runAdmission = new RunAdmissionController(db, this.linearIssueProjection);
@@ -224,14 +218,21 @@ export class RunOrchestrator {
224
218
  try {
225
219
  const triage = await this.issueTriage.classify({ issue, childIssues });
226
220
  if (triage) {
227
- return this.db.issues.upsertIssue({
228
- projectId: issue.projectId,
229
- linearIssueId: issue.linearIssueId,
230
- issueClass: triage.issueClass,
231
- issueClassSource: "triage",
232
- issueTriageHash: triageHash,
233
- issueTriageResultJson: JSON.stringify(triage),
221
+ // The triage verdict is an external classifier response; persist it
222
+ // unconditionally so a benign version bump during the (slow) triage
223
+ // call cannot discard the result.
224
+ const triageCommit = this.db.issueSessions.commitIssueState({
225
+ writer: WRITER,
226
+ update: {
227
+ projectId: issue.projectId,
228
+ linearIssueId: issue.linearIssueId,
229
+ issueClass: triage.issueClass,
230
+ issueClassSource: "triage",
231
+ issueTriageHash: triageHash,
232
+ issueTriageResultJson: JSON.stringify(triage),
233
+ },
234
234
  });
235
+ return triageCommit.outcome === "applied" ? triageCommit.issue : issue;
235
236
  }
236
237
  }
237
238
  catch (error) {
@@ -242,12 +243,22 @@ export class RunOrchestrator {
242
243
  const fallbackClassification = classification.issueClassSource === "triage" && !triageCacheFresh
243
244
  ? { issueClass: "implementation", issueClassSource: "heuristic" }
244
245
  : classification;
245
- return this.db.issues.upsertIssue({
246
- projectId: issue.projectId,
247
- linearIssueId: issue.linearIssueId,
248
- issueClass: fallbackClassification.issueClass,
249
- issueClassSource: fallbackClassification.issueClassSource,
246
+ const fallbackCommit = this.db.issueSessions.commitIssueState({
247
+ writer: WRITER,
248
+ expectedVersion: issue.version,
249
+ update: {
250
+ projectId: issue.projectId,
251
+ linearIssueId: issue.linearIssueId,
252
+ issueClass: fallbackClassification.issueClass,
253
+ issueClassSource: fallbackClassification.issueClassSource,
254
+ },
255
+ // A concurrent writer is newer truth; the next pass reclassifies.
256
+ onConflict: () => undefined,
250
257
  });
258
+ if (fallbackCommit.outcome === "applied") {
259
+ return fallbackCommit.issue;
260
+ }
261
+ return (fallbackCommit.outcome === "conflict_skipped" ? fallbackCommit.issue : undefined) ?? issue;
251
262
  }
252
263
  // ─── Run ────────────────────────────────────────────────────────
253
264
  async run(item) {
@@ -309,7 +320,11 @@ export class RunOrchestrator {
309
320
  return;
310
321
  }
311
322
  if (issue.prState === "merged") {
312
- this.db.issueSessions.upsertIssueWithLease({ projectId: issue.projectId, linearIssueId: issue.linearIssueId, leaseId }, { projectId: issue.projectId, linearIssueId: issue.linearIssueId, pendingRunType: null, factoryState: "done" });
323
+ this.db.issueSessions.commitIssueState({
324
+ writer: WRITER,
325
+ lease: { projectId: issue.projectId, linearIssueId: issue.linearIssueId, leaseId },
326
+ update: { projectId: issue.projectId, linearIssueId: issue.linearIssueId, pendingRunType: null, factoryState: "done" },
327
+ });
313
328
  this.leaseService.release(item.projectId, item.issueId);
314
329
  return;
315
330
  }
@@ -479,11 +494,15 @@ export class RunOrchestrator {
479
494
  }
480
495
  // Reset zombie recovery counter — this run started successfully
481
496
  if (issue.zombieRecoveryAttempts > 0) {
482
- this.db.issueSessions.upsertIssueWithLease({ projectId: item.projectId, linearIssueId: item.issueId, leaseId }, {
483
- projectId: item.projectId,
484
- linearIssueId: item.issueId,
485
- zombieRecoveryAttempts: 0,
486
- lastZombieRecoveryAt: null,
497
+ this.db.issueSessions.commitIssueState({
498
+ writer: WRITER,
499
+ lease: { projectId: item.projectId, linearIssueId: item.issueId, leaseId },
500
+ update: {
501
+ projectId: item.projectId,
502
+ linearIssueId: item.issueId,
503
+ zombieRecoveryAttempts: 0,
504
+ lastZombieRecoveryAt: null,
505
+ },
487
506
  });
488
507
  }
489
508
  this.logger.info({ issueKey: issue.issueKey, runType, threadId, turnId }, `Started ${runType} run`);
@@ -563,10 +582,10 @@ export class RunOrchestrator {
563
582
  for (const run of this.db.runs.listRunningRuns()) {
564
583
  await this.reconcileRun(run);
565
584
  }
566
- // Free any issue whose active slot is pinned to an already-terminal
585
+ // Settle any issue whose active slot is pinned to an already-terminal
567
586
  // run (post-run finalize interrupted by restart). Must run before the
568
587
  // idle reconciler so the freed issue is routed in this same pass.
569
- this.finalizeDanglingActiveRuns();
588
+ this.settleDanglingActiveRuns();
570
589
  // Preemptively detect stuck merge-queue PRs (conflicts visible on
571
590
  // GitHub) and dispatch queue_repair before the Steward evicts.
572
591
  await this.queueHealthMonitor.reconcile();
@@ -579,62 +598,35 @@ export class RunOrchestrator {
579
598
  advanceIdleIssue(issue, newState, options) {
580
599
  this.idleReconciler.advanceIdleIssue(issue, newState, options);
581
600
  }
582
- /**
583
- * After a zombie/stale run is cleared, decide whether to re-enqueue
584
- * or escalate. Checks: PR already merged → done; budget exhausted →
585
- * escalate; backoff delay not elapsed → skip.
586
- */
587
- recoverOrEscalate(issue, runType, reason) {
588
- this.runRecovery.recoverOrEscalate({
589
- issue,
590
- runType,
591
- reason,
592
- isRequestedChangesRunType,
593
- });
594
- }
595
- // Clear a dangling active slot: an issue still pointing at an
601
+ // Settle a dangling active slot: an issue still pointing at an
596
602
  // already-terminal run via `activeRunId`. The post-run finalize was
597
603
  // interrupted (almost always a restart between marking the run
598
604
  // terminal and clearing the slot), so the run can never drive the
599
605
  // session forward, yet every idle/recovery pass skips the issue
600
- // because `activeRunId` is set. We re-read under the issue-session
601
- // lease and null the slot; the idle reconciler then routes the issue
602
- // from GitHub truth (e.g. a missed changes_requested review_fix).
603
- finalizeDanglingActiveRuns() {
606
+ // because `activeRunId` is set. settleRun is idempotent and its slot
607
+ // clear is a predicate-guarded versioned commit, so no age gate is
608
+ // needed it cannot destructively race the notification finalizer.
609
+ // The idle reconciler then routes the issue from GitHub truth (e.g. a
610
+ // missed changes_requested → review_fix).
611
+ settleDanglingActiveRuns() {
604
612
  for (const issue of this.db.issues.listIssuesWithTerminalActiveRun()) {
605
613
  if (issue.activeRunId === undefined)
606
614
  continue;
607
615
  const run = this.db.runs.getRunById(issue.activeRunId);
608
- // The query already filters to terminal runs; this guards against a
609
- // race where the run advanced back to active between query and read.
610
- if (!run || run.status === "running" || run.status === "queued")
611
- continue;
612
- // Hold off until the run has been terminal long enough that the
613
- // normal notification-driven finalize has demonstrably not run —
614
- // avoids racing a live completion that is milliseconds from clearing
615
- // the slot itself.
616
- const endedAtMs = run.endedAt ? Date.parse(run.endedAt) : Number.NaN;
617
- if (Number.isFinite(endedAtMs) && Date.now() - endedAtMs < DANGLING_ACTIVE_RUN_MIN_AGE_MS)
616
+ if (!run)
618
617
  continue;
619
618
  const lease = this.claimLeaseForReconciliation(run.projectId, run.linearIssueId);
620
- // "skip" → a live lease owns the session (a real run is in flight);
621
- // leave it alone. "owned" an outer local scope holds it, so we
622
- // must not release it here.
619
+ // "skip" → a live lease owns the session (a worker is mid-finalize or
620
+ // mid-launch); settleRun could not corrupt its writes, but deferring
621
+ // lets the owner land its richer post-run state first. "owned" → an
622
+ // outer local scope holds it, so we must not release it here.
623
623
  if (lease === "skip")
624
624
  continue;
625
625
  try {
626
- const cleared = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (held) => {
627
- const fresh = this.db.issues.getIssue(run.projectId, run.linearIssueId);
628
- if (!fresh || fresh.activeRunId !== run.id)
629
- return false;
630
- this.db.issueSessions.upsertIssueWithLease(held, {
631
- projectId: run.projectId,
632
- linearIssueId: run.linearIssueId,
633
- activeRunId: null,
634
- });
635
- return true;
636
- });
637
- if (cleared) {
626
+ // No `finish` outcome: the run is already terminal, and settleRun
627
+ // leaves a run that raced back to non-terminal status untouched.
628
+ const settled = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (held) => settleRun({ db: this.db, run, lease: held }));
629
+ if (settled?.slotCleared) {
638
630
  this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType, runStatus: run.status }, "Cleared dangling active-run slot left by a terminal run; idle reconcile will resume the issue");
639
631
  this.feed?.publish({
640
632
  level: "warn",
@@ -667,14 +659,14 @@ export class RunOrchestrator {
667
659
  }
668
660
  // ─── Internal helpers ─────────────────────────────────────────────
669
661
  escalate(issue, runType, reason) {
670
- this.runRecovery.escalate({
662
+ this.runFailurePolicy.escalate({
671
663
  issue,
672
664
  runType,
673
665
  reason,
674
666
  });
675
667
  }
676
668
  failRunAndClear(run, message, nextState = "failed") {
677
- this.runRecovery.failRunAndClear({
669
+ this.runFailurePolicy.failRunAndClear({
678
670
  run,
679
671
  message,
680
672
  nextState,