patchrelay 0.75.3 → 0.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agent-input-service.js +40 -26
  2. package/dist/build-info.json +3 -3
  3. package/dist/cli/data.js +3 -1
  4. package/dist/db/issue-session-store.js +44 -9
  5. package/dist/db/issue-store.js +11 -2
  6. package/dist/db/migrations.js +3 -0
  7. package/dist/factory-state.js +23 -0
  8. package/dist/github-webhook-reactive-run.js +15 -11
  9. package/dist/github-webhook-stack-coordination.js +8 -4
  10. package/dist/github-webhook-state-projector.js +204 -139
  11. package/dist/github-webhook-terminal-handler.js +37 -27
  12. package/dist/idle-reconciliation.js +122 -66
  13. package/dist/implementation-outcome-policy.js +5 -1
  14. package/dist/issue-session-projection-invalidator.js +9 -0
  15. package/dist/linear-agent-session-client.js +16 -8
  16. package/dist/linear-issue-projection.js +15 -11
  17. package/dist/linear-status-comment-sync.js +8 -4
  18. package/dist/linear-workflow-state-sync.js +9 -5
  19. package/dist/merged-linear-completion-reconciler.js +39 -17
  20. package/dist/no-pr-completion-check.js +51 -29
  21. package/dist/orchestration-parent-wake.js +15 -8
  22. package/dist/queue-health-monitor.js +17 -8
  23. package/dist/reactive-run-policy.js +5 -1
  24. package/dist/run-budgets.js +40 -6
  25. package/dist/run-completion-policy.js +50 -9
  26. package/dist/run-failure-policy.js +463 -0
  27. package/dist/run-finalizer.js +68 -35
  28. package/dist/run-launcher.js +63 -12
  29. package/dist/run-notification-handler.js +19 -9
  30. package/dist/run-orchestrator.js +70 -78
  31. package/dist/run-reconciler.js +137 -64
  32. package/dist/run-settlement.js +57 -0
  33. package/dist/run-wake-planner.js +39 -29
  34. package/dist/service-issue-actions.js +45 -28
  35. package/dist/service-startup-recovery.js +61 -35
  36. package/dist/telemetry.js +9 -0
  37. package/dist/terminal-wake-reconciler.js +20 -3
  38. package/dist/webhooks/agent-session-handler.js +22 -12
  39. package/dist/webhooks/dependency-readiness-handler.js +17 -10
  40. package/dist/webhooks/desired-stage-recorder.js +32 -13
  41. package/dist/webhooks/issue-removal-handler.js +24 -13
  42. package/package.json +1 -1
  43. package/dist/interrupted-run-recovery.js +0 -227
  44. package/dist/run-recovery-service.js +0 -202
  45. package/dist/zombie-recovery.js +0 -13
@@ -0,0 +1,463 @@
1
+ import { buildRunFailureActivity } from "./linear-session-reporting.js";
2
+ import { getRemainingZombieRecoveryDelayMs, getZombieRecoveryBudget } from "./run-budgets.js";
3
+ import { resolvePostRunFactoryState } from "./run-completion-policy.js";
4
+ import { isRequestedChangesRunType } from "./reactive-pr-state.js";
5
+ import { settleRun } from "./run-settlement.js";
6
+ const WRITER = "run-failure-policy";
7
+ // Roll back the attempt counter consumed by the interrupted run and clear the
8
+ // attempted-failure provenance for repair runs, as a single issue update so
9
+ // the whole repair commits (and conflict-recomputes) atomically.
10
+ function buildInterruptedAttemptRepairUpdate(runType, issue) {
11
+ const counter = runType === "ci_repair" && issue.ciRepairAttempts > 0
12
+ ? { ciRepairAttempts: issue.ciRepairAttempts - 1 }
13
+ : runType === "queue_repair" && issue.queueRepairAttempts > 0
14
+ ? { queueRepairAttempts: issue.queueRepairAttempts - 1 }
15
+ : isRequestedChangesRunType(runType) && issue.reviewFixAttempts > 0
16
+ ? { reviewFixAttempts: issue.reviewFixAttempts - 1 }
17
+ : undefined;
18
+ const provenance = runType === "ci_repair" || runType === "queue_repair"
19
+ ? {
20
+ lastAttemptedFailureHeadSha: null,
21
+ lastAttemptedFailureSignature: null,
22
+ lastAttemptedFailureAt: null,
23
+ }
24
+ : undefined;
25
+ if (!counter && !provenance)
26
+ return undefined;
27
+ return {
28
+ projectId: issue.projectId,
29
+ linearIssueId: issue.linearIssueId,
30
+ ...counter,
31
+ ...provenance,
32
+ };
33
+ }
34
+ function resolveRetryRunType(runType, context) {
35
+ if (runType === "branch_upkeep") {
36
+ return "branch_upkeep";
37
+ }
38
+ return context?.reviewFixMode === "branch_upkeep" || context?.branchUpkeepRequired === true
39
+ ? "branch_upkeep"
40
+ : "review_fix";
41
+ }
42
+ // Plan §B4: the one run-failure policy. Merges the former
43
+ // RunRecoveryService (zombie retry/escalate + backoff) and
44
+ // InterruptedRunRecovery (interrupted-turn handling, counter decrements,
45
+ // re-enqueue) into a single module that answers: given a stranded or
46
+ // failed run + its issue — retry (with which backoff/budget), re-enqueue
47
+ // (which runType/context), or escalate?
48
+ //
49
+ // Ownership: run-reconciler and service-startup-recovery only DETECT
50
+ // stranded states and hand them here; this policy DECIDES; execution of
51
+ // the run/slot writes goes through settleRun, and dispatch of follow-up
52
+ // work goes through the WakeDispatcher.
53
+ export class RunFailurePolicy {
54
+ db;
55
+ logger;
56
+ linearSync;
57
+ withHeldLease;
58
+ releaseLease;
59
+ appendWakeEventWithLease;
60
+ wakeDispatcher;
61
+ restoreIdleWorktree;
62
+ completionPolicy;
63
+ resolveProject;
64
+ feed;
65
+ constructor(db, logger, linearSync, withHeldLease, releaseLease, appendWakeEventWithLease, wakeDispatcher, restoreIdleWorktree, completionPolicy, resolveProject, feed) {
66
+ this.db = db;
67
+ this.logger = logger;
68
+ this.linearSync = linearSync;
69
+ this.withHeldLease = withHeldLease;
70
+ this.releaseLease = releaseLease;
71
+ this.appendWakeEventWithLease = appendWakeEventWithLease;
72
+ this.wakeDispatcher = wakeDispatcher;
73
+ this.restoreIdleWorktree = restoreIdleWorktree;
74
+ this.completionPolicy = completionPolicy;
75
+ this.resolveProject = resolveProject;
76
+ this.feed = feed;
77
+ }
78
+ // ─── Stranded runs (zombie / stale thread) ───────────────────────
79
+ /**
80
+ * Detector entry point: the reconciler found a run that can never make
81
+ * progress (no Codex thread after a restart, or the thread is gone).
82
+ * Settle the run (mark failed, release the slot) and decide retry vs
83
+ * escalate via the zombie budget/backoff.
84
+ */
85
+ settleStrandedRunAndRecover(params) {
86
+ const { run, issue } = params;
87
+ this.withHeldLease(run.projectId, run.linearIssueId, (lease) => settleRun({
88
+ db: this.db,
89
+ run,
90
+ finish: { status: "failed", failureReason: params.failureReason },
91
+ lease,
92
+ }));
93
+ this.recoverOrEscalate({ issue, runType: run.runType, reason: params.reason });
94
+ }
95
+ /**
96
+ * Decide what happens after a run died without doing its work: PR
97
+ * already merged → done; zombie budget exhausted → escalate; backoff
98
+ * not elapsed → keep the wake but defer; otherwise consume one budget
99
+ * unit, append a recovery wake, and dispatch.
100
+ */
101
+ recoverOrEscalate(params) {
102
+ const { issue, runType, reason } = params;
103
+ const fresh = this.db.issues.getIssue(issue.projectId, issue.linearIssueId);
104
+ if (!fresh)
105
+ return;
106
+ if (isRequestedChangesRunType(runType)) {
107
+ const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
108
+ this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
109
+ this.db.issueSessions.commitIssueState({
110
+ writer: WRITER,
111
+ lease,
112
+ update: {
113
+ projectId: fresh.projectId,
114
+ linearIssueId: fresh.linearIssueId,
115
+ pendingRunType: null,
116
+ pendingRunContextJson: null,
117
+ factoryState: "escalated",
118
+ },
119
+ });
120
+ return true;
121
+ });
122
+ if (!updated) {
123
+ this.logger.warn({ issueKey: fresh.issueKey, reason }, "Skipping review-fix recovery escalation after losing issue-session lease");
124
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
125
+ return;
126
+ }
127
+ this.logger.warn({ issueKey: fresh.issueKey, reason }, "Requested-changes run failed before a new head was published - escalating");
128
+ this.feed?.publish({
129
+ level: "error",
130
+ kind: "workflow",
131
+ issueKey: fresh.issueKey,
132
+ projectId: fresh.projectId,
133
+ stage: runType,
134
+ status: "escalated",
135
+ summary: `Requested-changes run failed before publishing a new head (${reason})`,
136
+ });
137
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
138
+ return;
139
+ }
140
+ if (fresh.prState === "merged") {
141
+ const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
142
+ this.db.issueSessions.commitIssueState({
143
+ writer: WRITER,
144
+ lease,
145
+ update: {
146
+ projectId: fresh.projectId,
147
+ linearIssueId: fresh.linearIssueId,
148
+ factoryState: "done",
149
+ zombieRecoveryAttempts: 0,
150
+ lastZombieRecoveryAt: null,
151
+ },
152
+ });
153
+ return true;
154
+ });
155
+ if (!updated) {
156
+ this.logger.warn({ issueKey: fresh.issueKey, reason }, "Skipping merged recovery completion after losing issue-session lease");
157
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
158
+ return;
159
+ }
160
+ this.logger.info({ issueKey: fresh.issueKey, reason }, "Recovery: PR already merged - transitioning to done");
161
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
162
+ return;
163
+ }
164
+ const zombieRecoveryBudget = getZombieRecoveryBudget(this.resolveProject(fresh.projectId));
165
+ const attempts = fresh.zombieRecoveryAttempts + 1;
166
+ if (attempts > zombieRecoveryBudget) {
167
+ const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
168
+ this.db.issueSessions.commitIssueState({
169
+ writer: WRITER,
170
+ lease,
171
+ update: {
172
+ projectId: fresh.projectId,
173
+ linearIssueId: fresh.linearIssueId,
174
+ factoryState: "escalated",
175
+ },
176
+ });
177
+ return true;
178
+ });
179
+ if (!updated) {
180
+ this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Skipping recovery escalation after losing issue-session lease");
181
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
182
+ return;
183
+ }
184
+ this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: budget exhausted - escalating");
185
+ this.feed?.publish({
186
+ level: "error",
187
+ kind: "workflow",
188
+ issueKey: fresh.issueKey,
189
+ projectId: fresh.projectId,
190
+ stage: "escalated",
191
+ status: "budget_exhausted",
192
+ summary: `${reason} recovery failed after ${zombieRecoveryBudget} attempts`,
193
+ });
194
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
195
+ return;
196
+ }
197
+ if (fresh.lastZombieRecoveryAt) {
198
+ const remainingDelayMs = getRemainingZombieRecoveryDelayMs(fresh.lastZombieRecoveryAt, fresh.zombieRecoveryAttempts);
199
+ if (remainingDelayMs > 0) {
200
+ this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
201
+ this.appendWakeEventWithLease(lease, fresh, runType, undefined, `recovery:${attempts}`);
202
+ });
203
+ this.logger.debug({ issueKey: fresh.issueKey, attempts: fresh.zombieRecoveryAttempts, remainingDelayMs }, "Recovery: backoff not elapsed, deferring retry");
204
+ return;
205
+ }
206
+ }
207
+ const requeued = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
208
+ // `attempts` is read-modify-write against the fresh row read above; on
209
+ // conflict recompute the counter from the current row.
210
+ const buildRequeueUpdate = (record) => ({
211
+ projectId: fresh.projectId,
212
+ linearIssueId: fresh.linearIssueId,
213
+ pendingRunType: null,
214
+ pendingRunContextJson: null,
215
+ zombieRecoveryAttempts: record.zombieRecoveryAttempts + 1,
216
+ lastZombieRecoveryAt: new Date().toISOString(),
217
+ });
218
+ this.db.issueSessions.commitIssueState({
219
+ writer: WRITER,
220
+ lease,
221
+ expectedVersion: fresh.version,
222
+ update: buildRequeueUpdate(fresh),
223
+ onConflict: (current) => buildRequeueUpdate(current),
224
+ });
225
+ return this.appendWakeEventWithLease(lease, fresh, runType, undefined, `recovery:${attempts}`);
226
+ });
227
+ if (!requeued) {
228
+ this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Skipping recovery re-enqueue after losing issue-session lease");
229
+ this.releaseLease(fresh.projectId, fresh.linearIssueId);
230
+ return;
231
+ }
232
+ this.wakeDispatcher.dispatchIfWakePending(fresh.projectId, fresh.linearIssueId);
233
+ this.logger.info({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: re-enqueued with backoff");
234
+ }
235
+ // ─── Terminal decisions ──────────────────────────────────────────
236
+ escalate(params) {
237
+ const { issue, runType, reason } = params;
238
+ this.logger.warn({ issueKey: issue.issueKey, runType, reason }, "Escalating to human");
239
+ const escalated = this.withHeldLease(issue.projectId, issue.linearIssueId, (lease) => {
240
+ // Escalation is an operator-facing decision: the issue write and the
241
+ // run release ride in the held-lease transaction. When a run still
242
+ // holds the slot, settleRun owns the paired run-release + slot-clear;
243
+ // it refuses to clear a slot that was re-pointed at another run.
244
+ const escalateFields = {
245
+ pendingRunType: null,
246
+ pendingRunContextJson: null,
247
+ factoryState: "escalated",
248
+ };
249
+ if (issue.activeRunId !== undefined) {
250
+ const settled = settleRun({
251
+ db: this.db,
252
+ run: { id: issue.activeRunId, projectId: issue.projectId, linearIssueId: issue.linearIssueId },
253
+ finish: { status: "released" },
254
+ lease,
255
+ buildIssueUpdate: () => escalateFields,
256
+ });
257
+ if (!settled.slotCleared)
258
+ return false;
259
+ }
260
+ else {
261
+ const commit = this.db.issueSessions.commitIssueState({
262
+ writer: WRITER,
263
+ lease,
264
+ update: {
265
+ projectId: issue.projectId,
266
+ linearIssueId: issue.linearIssueId,
267
+ ...escalateFields,
268
+ },
269
+ });
270
+ if (commit.outcome !== "applied")
271
+ return false;
272
+ }
273
+ this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
274
+ return true;
275
+ });
276
+ if (!escalated) {
277
+ this.logger.warn({ issueKey: issue.issueKey, runType }, "Skipping escalation write after losing issue-session lease");
278
+ this.releaseLease(issue.projectId, issue.linearIssueId);
279
+ return;
280
+ }
281
+ this.feed?.publish({
282
+ level: "error",
283
+ kind: "workflow",
284
+ issueKey: issue.issueKey,
285
+ projectId: issue.projectId,
286
+ stage: runType,
287
+ status: "escalated",
288
+ summary: `Escalated: ${reason}`,
289
+ });
290
+ const escalatedIssue = this.db.issues.getIssue(issue.projectId, issue.linearIssueId) ?? issue;
291
+ void this.linearSync.emitActivity(escalatedIssue, {
292
+ type: "error",
293
+ body: `PatchRelay needs human help to continue.\n\n${reason}`,
294
+ });
295
+ void this.linearSync.syncSession(escalatedIssue);
296
+ this.releaseLease(issue.projectId, issue.linearIssueId);
297
+ }
298
+ failRunAndClear(params) {
299
+ const { run, message, nextState } = params;
300
+ const updated = this.withHeldLease(run.projectId, run.linearIssueId, (lease) => {
301
+ settleRun({
302
+ db: this.db,
303
+ run,
304
+ finish: { status: "failed", failureReason: message },
305
+ lease,
306
+ buildIssueUpdate: () => ({ factoryState: nextState }),
307
+ });
308
+ if (nextState === "failed" || nextState === "escalated" || nextState === "awaiting_input" || nextState === "done") {
309
+ this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
310
+ }
311
+ return true;
312
+ });
313
+ if (!updated) {
314
+ this.logger.warn({ runId: run.id, issueId: run.linearIssueId }, "Skipping failure cleanup after losing issue-session lease");
315
+ }
316
+ this.releaseLease(run.projectId, run.linearIssueId);
317
+ }
318
+ // ─── Interrupted turns (formerly InterruptedRunRecovery) ─────────
319
+ async handleInterruptedRun(run, issue) {
320
+ this.logger.warn({ issueKey: issue.issueKey, runType: run.runType, threadId: run.threadId }, "Run has interrupted turn - marking as failed");
321
+ const repairedCounters = this.withHeldLease(issue.projectId, issue.linearIssueId, (lease) => {
322
+ // The decrement is read-modify-write against an issue row read before
323
+ // the awaits that led here; on conflict, recompute from the fresh row.
324
+ const update = buildInterruptedAttemptRepairUpdate(run.runType, issue);
325
+ if (update) {
326
+ this.db.issueSessions.commitIssueState({
327
+ writer: WRITER,
328
+ lease,
329
+ expectedVersion: issue.version,
330
+ update,
331
+ onConflict: (current) => buildInterruptedAttemptRepairUpdate(run.runType, current),
332
+ });
333
+ }
334
+ return true;
335
+ });
336
+ if (!repairedCounters) {
337
+ this.logger.warn({ runId: run.id, issueId: run.linearIssueId }, "Skipping interrupted-run recovery after losing issue-session lease");
338
+ this.releaseLease(run.projectId, run.linearIssueId);
339
+ return;
340
+ }
341
+ if (isRequestedChangesRunType(run.runType)) {
342
+ await this.handleInterruptedRequestedChangesRun(run, issue);
343
+ return;
344
+ }
345
+ if (run.runType === "implementation" && !issue.prNumber) {
346
+ await this.handleInterruptedImplementationRun(run, issue);
347
+ return;
348
+ }
349
+ const recoveredState = resolvePostRunFactoryState(this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue, run, { outcome: "recovered" });
350
+ this.failRunAndClear({ run, message: "Codex turn was interrupted", nextState: recoveredState ?? "failed" });
351
+ await this.restoreIdleWorktree(issue);
352
+ const failedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
353
+ if (recoveredState) {
354
+ this.feed?.publish({
355
+ level: "info",
356
+ kind: "stage",
357
+ issueKey: issue.issueKey,
358
+ projectId: run.projectId,
359
+ stage: recoveredState,
360
+ status: "reconciled",
361
+ summary: `Interrupted ${run.runType} recovered -> ${recoveredState}`,
362
+ });
363
+ }
364
+ else {
365
+ void this.linearSync.emitActivity(failedIssue, buildRunFailureActivity(run.runType, "The Codex turn was interrupted."));
366
+ }
367
+ void this.linearSync.syncSession(failedIssue, { activeRunType: run.runType });
368
+ this.releaseLease(run.projectId, run.linearIssueId);
369
+ }
370
+ async handleInterruptedImplementationRun(run, issue) {
371
+ const interruptedMessage = "Implementation run was interrupted before PatchRelay could publish a PR";
372
+ this.failRunAndClear({ run, message: "Codex turn was interrupted", nextState: "delegated" });
373
+ await this.restoreIdleWorktree(issue);
374
+ const refreshedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
375
+ this.db.issueSessions.appendIssueSessionEventRespectingActiveLease(run.projectId, run.linearIssueId, {
376
+ projectId: run.projectId,
377
+ linearIssueId: run.linearIssueId,
378
+ eventType: "delegated",
379
+ dedupeKey: `interrupted_implementation:implementation:${run.linearIssueId}`,
380
+ });
381
+ if (!this.db.workflowWakes.peekIssueWake(run.projectId, run.linearIssueId)) {
382
+ const failedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
383
+ this.feed?.publish({
384
+ level: "error",
385
+ kind: "workflow",
386
+ issueKey: issue.issueKey,
387
+ projectId: run.projectId,
388
+ stage: run.runType,
389
+ status: "escalated",
390
+ summary: interruptedMessage,
391
+ });
392
+ void this.linearSync.emitActivity(failedIssue, buildRunFailureActivity(run.runType, interruptedMessage));
393
+ void this.linearSync.syncSession(failedIssue, { activeRunType: run.runType });
394
+ this.releaseLease(run.projectId, run.linearIssueId);
395
+ return;
396
+ }
397
+ this.feed?.publish({
398
+ level: "warn",
399
+ kind: "workflow",
400
+ issueKey: issue.issueKey,
401
+ projectId: run.projectId,
402
+ stage: run.runType,
403
+ status: "retry_queued",
404
+ summary: "Implementation run was interrupted; PatchRelay will retry automatically",
405
+ });
406
+ const recoveredIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
407
+ void this.linearSync.syncSession(recoveredIssue, { activeRunType: run.runType });
408
+ this.wakeDispatcher.dispatchIfWakePending(run.projectId, run.linearIssueId);
409
+ this.releaseLease(run.projectId, run.linearIssueId);
410
+ }
411
+ async handleInterruptedRequestedChangesRun(run, issue) {
412
+ const freshIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
413
+ const refreshedIssue = await this.completionPolicy.refreshIssueAfterReactivePublish(run, freshIssue);
414
+ const retryContext = await this.completionPolicy.resolveRequestedChangesWakeContext(refreshedIssue, run.runType, run.runType === "branch_upkeep"
415
+ ? {
416
+ branchUpkeepRequired: true,
417
+ reviewFixMode: "branch_upkeep",
418
+ wakeReason: "branch_upkeep",
419
+ }
420
+ : undefined);
421
+ const retryRunType = resolveRetryRunType(run.runType, retryContext);
422
+ const recoveredState = resolvePostRunFactoryState(refreshedIssue, run, { outcome: "recovered" }) ?? "failed";
423
+ const interruptedMessage = "Requested-changes run was interrupted before PatchRelay could verify that a new PR head was published";
424
+ this.failRunAndClear({ run, message: interruptedMessage, nextState: recoveredState });
425
+ await this.restoreIdleWorktree(issue);
426
+ const recoveredIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
427
+ if (recoveredState === "changes_requested") {
428
+ this.db.issueSessions.commitIssueState({
429
+ writer: WRITER,
430
+ update: {
431
+ projectId: run.projectId,
432
+ linearIssueId: run.linearIssueId,
433
+ pendingRunType: retryRunType,
434
+ pendingRunContextJson: retryContext ? JSON.stringify(retryContext) : null,
435
+ },
436
+ });
437
+ this.feed?.publish({
438
+ level: "warn",
439
+ kind: "workflow",
440
+ issueKey: issue.issueKey,
441
+ projectId: run.projectId,
442
+ stage: run.runType,
443
+ status: "retry_queued",
444
+ summary: "Requested-changes run was interrupted; PatchRelay will retry from fresh GitHub truth",
445
+ });
446
+ this.wakeDispatcher.dispatchIfWakePending(run.projectId, run.linearIssueId);
447
+ }
448
+ else {
449
+ this.feed?.publish({
450
+ level: "error",
451
+ kind: "workflow",
452
+ issueKey: issue.issueKey,
453
+ projectId: run.projectId,
454
+ stage: run.runType,
455
+ status: "escalated",
456
+ summary: interruptedMessage,
457
+ });
458
+ }
459
+ void this.linearSync.emitActivity(recoveredIssue, buildRunFailureActivity(run.runType, interruptedMessage));
460
+ void this.linearSync.syncSession(recoveredIssue, { activeRunType: run.runType });
461
+ this.releaseLease(run.projectId, run.linearIssueId);
462
+ }
463
+ }
@@ -2,10 +2,12 @@ import { CLEARED_FAILURE_PROVENANCE } from "./failure-provenance.js";
2
2
  import { buildStageReport, countEventMethods } from "./run-reporting.js";
3
3
  import { buildRunCompletedActivity, buildRunFailureActivity } from "./linear-session-reporting.js";
4
4
  import { handleNoPrCompletionCheck } from "./no-pr-completion-check.js";
5
- import { resolveCompletedRunState } from "./run-completion-policy.js";
5
+ import { resolvePostRunFactoryState } from "./run-completion-policy.js";
6
6
  import { computeChangeIdentityFromWorktree } from "./change-identity.js";
7
7
  import { inspectGitWorktreeStatus, isRepairRunType } from "./git-worktree-status.js";
8
8
  import { buildRunOutcomeSummary } from "./run-outcome-summary.js";
9
+ import { settleRun } from "./run-settlement.js";
10
+ const WRITER = "run-finalizer";
9
11
  function parseEventJson(eventJson) {
10
12
  if (!eventJson)
11
13
  return undefined;
@@ -150,12 +152,16 @@ export class RunFinalizer {
150
152
  });
151
153
  if (!identity.patchId && !identity.integrationTreeId)
152
154
  return;
153
- this.db.issues.upsertIssue({
154
- projectId: issue.projectId,
155
- linearIssueId: issue.linearIssueId,
156
- ...(identity.patchId ? { lastPublishedPatchId: identity.patchId } : {}),
157
- ...(identity.integrationTreeId ? { lastPublishedIntegrationTreeId: identity.integrationTreeId } : {}),
158
- lastPublishedHeadSha: issue.prHeadSha,
155
+ this.db.issueSessions.commitIssueState({
156
+ writer: WRITER,
157
+ expectedVersion: issue.version,
158
+ update: {
159
+ projectId: issue.projectId,
160
+ linearIssueId: issue.linearIssueId,
161
+ ...(identity.patchId ? { lastPublishedPatchId: identity.patchId } : {}),
162
+ ...(identity.integrationTreeId ? { lastPublishedIntegrationTreeId: identity.integrationTreeId } : {}),
163
+ lastPublishedHeadSha: issue.prHeadSha,
164
+ },
159
165
  });
160
166
  this.logger.info({
161
167
  issueKey: issue.issueKey,
@@ -194,12 +200,15 @@ export class RunFinalizer {
194
200
  ...(completedTurnId ? { turnId: completedTurnId } : {}),
195
201
  failureReason: run.failureReason ?? "approved on the same head; further publication suppressed",
196
202
  });
197
- this.db.issues.upsertIssue({
198
- projectId: run.projectId,
199
- linearIssueId: run.linearIssueId,
200
- activeRunId: null,
201
- pendingRunType: null,
202
- pendingRunContextJson: null,
203
+ this.db.issueSessions.commitIssueState({
204
+ writer: WRITER,
205
+ update: {
206
+ projectId: run.projectId,
207
+ linearIssueId: run.linearIssueId,
208
+ activeRunId: null,
209
+ pendingRunType: null,
210
+ pendingRunContextJson: null,
211
+ },
203
212
  });
204
213
  });
205
214
  this.clearProgressAndRelease(run);
@@ -274,23 +283,33 @@ export class RunFinalizer {
274
283
  report: params.report,
275
284
  outcomeSummary,
276
285
  }));
277
- this.db.issueSessions.upsertIssueWithLease(lease, {
286
+ // The attempt decrements are read-modify-write against the issue row;
287
+ // on conflict, recompute them from the fresh row instead of writing
288
+ // counters derived from a stale read.
289
+ const buildContinueUpdate = (record) => ({
278
290
  projectId: params.run.projectId,
279
291
  linearIssueId: params.run.linearIssueId,
280
292
  activeRunId: null,
281
293
  factoryState: "delegated",
282
294
  pendingRunType: null,
283
295
  pendingRunContextJson: null,
284
- ...(params.run.runType === "ci_repair" && params.issue.ciRepairAttempts > 0
285
- ? { ciRepairAttempts: params.issue.ciRepairAttempts - 1 }
296
+ ...(params.run.runType === "ci_repair" && record.ciRepairAttempts > 0
297
+ ? { ciRepairAttempts: record.ciRepairAttempts - 1 }
286
298
  : {}),
287
- ...(params.run.runType === "queue_repair" && params.issue.queueRepairAttempts > 0
288
- ? { queueRepairAttempts: params.issue.queueRepairAttempts - 1 }
299
+ ...(params.run.runType === "queue_repair" && record.queueRepairAttempts > 0
300
+ ? { queueRepairAttempts: record.queueRepairAttempts - 1 }
289
301
  : {}),
290
- ...((params.run.runType === "review_fix" || params.run.runType === "branch_upkeep") && params.issue.reviewFixAttempts > 0
291
- ? { reviewFixAttempts: params.issue.reviewFixAttempts - 1 }
302
+ ...((params.run.runType === "review_fix" || params.run.runType === "branch_upkeep") && record.reviewFixAttempts > 0
303
+ ? { reviewFixAttempts: record.reviewFixAttempts - 1 }
292
304
  : {}),
293
305
  });
306
+ this.db.issueSessions.commitIssueState({
307
+ writer: WRITER,
308
+ lease,
309
+ expectedVersion: params.issue.version,
310
+ update: buildContinueUpdate(params.issue),
311
+ onConflict: (current) => buildContinueUpdate(current),
312
+ });
294
313
  return Boolean(this.db.issueSessions.appendIssueSessionEventWithLease(lease, {
295
314
  projectId: params.run.projectId,
296
315
  linearIssueId: params.run.linearIssueId,
@@ -358,7 +377,9 @@ export class RunFinalizer {
358
377
  }
359
378
  const verifiedRepairError = await this.completionPolicy.verifyReactiveRunAdvancedBranch(run, freshIssue);
360
379
  if (verifiedRepairError) {
361
- const holdState = params.resolveRecoverableRunState(freshIssue) ?? "failed";
380
+ // The run failed verification — it did not do its work, so resolve
381
+ // the hold state from GitHub truth like any other recovery path.
382
+ const holdState = resolvePostRunFactoryState(freshIssue, run, { outcome: "recovered" }) ?? "failed";
362
383
  this.failRunAndClear(run, verifiedRepairError, holdState);
363
384
  this.syncFailureOutcome({
364
385
  run,
@@ -428,30 +449,42 @@ export class RunFinalizer {
428
449
  // any git error returns undefined and we leave the cache as-is.
429
450
  this.maybeUpdateLastPublishedIdentity(run, refreshedIssue);
430
451
  const postRunFollowUp = await this.completionPolicy.resolvePostRunFollowUp(run, refreshedIssue);
431
- const postRunState = postRunFollowUp?.factoryState ?? resolveCompletedRunState(refreshedIssue, run);
452
+ const postRunState = postRunFollowUp?.factoryState ?? resolvePostRunFactoryState(refreshedIssue, run);
432
453
  const outcomeSummary = this.buildOutcomeSummary({
433
454
  run,
434
455
  issue: refreshedIssue,
435
456
  postRunState,
436
457
  latestAssistantSummary: report.assistantMessages.at(-1),
437
458
  });
438
- const completed = this.withHeldLease(run.projectId, run.linearIssueId, (lease) => {
439
- this.db.runs.finishRun(run.id, this.buildCompletedRunUpdate({
440
- threadId,
441
- ...(params.completedTurnId ? { completedTurnId: params.completedTurnId } : {}),
442
- report,
443
- outcomeSummary,
444
- }));
445
- this.db.issues.upsertIssue({
446
- projectId: run.projectId,
447
- linearIssueId: run.linearIssueId,
448
- activeRunId: null,
449
- ...(postRunState ? { factoryState: postRunState } : {}),
459
+ // `refreshedIssue` was read before several async policy checks; a webhook
460
+ // may have landed mid-finalize. settleRun re-reads the row inside its
461
+ // transaction and resolves the post-run state from that fresh truth, so
462
+ // we never regress it (e.g. the PR merged while we were verifying the
463
+ // publish). settleRun also owns the slot clear (plan §B1): it refuses to
464
+ // touch a slot that no longer points at this run.
465
+ const buildCompletionUpdate = (record) => {
466
+ const state = postRunFollowUp?.factoryState ?? resolvePostRunFactoryState(record, run);
467
+ return {
468
+ ...(state ? { factoryState: state } : {}),
450
469
  pendingRunType: null,
451
470
  pendingRunContextJson: null,
452
- ...(postRunFollowUp ? {} : (postRunState === "awaiting_queue" || postRunState === "done"
471
+ ...(postRunFollowUp ? {} : (state === "awaiting_queue" || state === "done"
453
472
  ? { ...CLEARED_FAILURE_PROVENANCE }
454
473
  : {})),
474
+ };
475
+ };
476
+ const completed = this.withHeldLease(run.projectId, run.linearIssueId, (lease) => {
477
+ settleRun({
478
+ db: this.db,
479
+ run,
480
+ finish: this.buildCompletedRunUpdate({
481
+ threadId,
482
+ ...(params.completedTurnId ? { completedTurnId: params.completedTurnId } : {}),
483
+ report,
484
+ outcomeSummary,
485
+ }),
486
+ lease,
487
+ buildIssueUpdate: buildCompletionUpdate,
455
488
  });
456
489
  if (postRunFollowUp) {
457
490
  return this.appendWakeEventWithLease(lease, issue, postRunFollowUp.pendingRunType, postRunFollowUp.context, "post_run");