patchrelay 0.75.3 → 0.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-input-service.js +40 -26
- package/dist/build-info.json +3 -3
- package/dist/cli/data.js +3 -1
- package/dist/db/issue-session-store.js +44 -9
- package/dist/db/issue-store.js +11 -2
- package/dist/db/migrations.js +3 -0
- package/dist/factory-state.js +23 -0
- package/dist/github-webhook-reactive-run.js +15 -11
- package/dist/github-webhook-stack-coordination.js +8 -4
- package/dist/github-webhook-state-projector.js +204 -139
- package/dist/github-webhook-terminal-handler.js +37 -27
- package/dist/idle-reconciliation.js +122 -66
- package/dist/implementation-outcome-policy.js +5 -1
- package/dist/issue-session-projection-invalidator.js +9 -0
- package/dist/linear-agent-session-client.js +16 -8
- package/dist/linear-issue-projection.js +15 -11
- package/dist/linear-status-comment-sync.js +8 -4
- package/dist/linear-workflow-state-sync.js +9 -5
- package/dist/merged-linear-completion-reconciler.js +39 -17
- package/dist/no-pr-completion-check.js +51 -29
- package/dist/orchestration-parent-wake.js +15 -8
- package/dist/queue-health-monitor.js +17 -8
- package/dist/reactive-run-policy.js +5 -1
- package/dist/run-budgets.js +40 -6
- package/dist/run-completion-policy.js +50 -9
- package/dist/run-failure-policy.js +463 -0
- package/dist/run-finalizer.js +68 -35
- package/dist/run-launcher.js +63 -12
- package/dist/run-notification-handler.js +19 -9
- package/dist/run-orchestrator.js +70 -78
- package/dist/run-reconciler.js +137 -64
- package/dist/run-settlement.js +57 -0
- package/dist/run-wake-planner.js +39 -29
- package/dist/service-issue-actions.js +45 -28
- package/dist/service-startup-recovery.js +61 -35
- package/dist/telemetry.js +9 -0
- package/dist/terminal-wake-reconciler.js +20 -3
- package/dist/webhooks/agent-session-handler.js +22 -12
- package/dist/webhooks/dependency-readiness-handler.js +17 -10
- package/dist/webhooks/desired-stage-recorder.js +32 -13
- package/dist/webhooks/issue-removal-handler.js +24 -13
- package/package.json +1 -1
- package/dist/interrupted-run-recovery.js +0 -227
- package/dist/run-recovery-service.js +0 -202
- package/dist/zombie-recovery.js +0 -13
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
import { buildRunFailureActivity } from "./linear-session-reporting.js";
|
|
2
|
+
import { getRemainingZombieRecoveryDelayMs, getZombieRecoveryBudget } from "./run-budgets.js";
|
|
3
|
+
import { resolvePostRunFactoryState } from "./run-completion-policy.js";
|
|
4
|
+
import { isRequestedChangesRunType } from "./reactive-pr-state.js";
|
|
5
|
+
import { settleRun } from "./run-settlement.js";
|
|
6
|
+
const WRITER = "run-failure-policy";
|
|
7
|
+
// Roll back the attempt counter consumed by the interrupted run and clear the
|
|
8
|
+
// attempted-failure provenance for repair runs, as a single issue update so
|
|
9
|
+
// the whole repair commits (and conflict-recomputes) atomically.
|
|
10
|
+
function buildInterruptedAttemptRepairUpdate(runType, issue) {
|
|
11
|
+
const counter = runType === "ci_repair" && issue.ciRepairAttempts > 0
|
|
12
|
+
? { ciRepairAttempts: issue.ciRepairAttempts - 1 }
|
|
13
|
+
: runType === "queue_repair" && issue.queueRepairAttempts > 0
|
|
14
|
+
? { queueRepairAttempts: issue.queueRepairAttempts - 1 }
|
|
15
|
+
: isRequestedChangesRunType(runType) && issue.reviewFixAttempts > 0
|
|
16
|
+
? { reviewFixAttempts: issue.reviewFixAttempts - 1 }
|
|
17
|
+
: undefined;
|
|
18
|
+
const provenance = runType === "ci_repair" || runType === "queue_repair"
|
|
19
|
+
? {
|
|
20
|
+
lastAttemptedFailureHeadSha: null,
|
|
21
|
+
lastAttemptedFailureSignature: null,
|
|
22
|
+
lastAttemptedFailureAt: null,
|
|
23
|
+
}
|
|
24
|
+
: undefined;
|
|
25
|
+
if (!counter && !provenance)
|
|
26
|
+
return undefined;
|
|
27
|
+
return {
|
|
28
|
+
projectId: issue.projectId,
|
|
29
|
+
linearIssueId: issue.linearIssueId,
|
|
30
|
+
...counter,
|
|
31
|
+
...provenance,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
function resolveRetryRunType(runType, context) {
|
|
35
|
+
if (runType === "branch_upkeep") {
|
|
36
|
+
return "branch_upkeep";
|
|
37
|
+
}
|
|
38
|
+
return context?.reviewFixMode === "branch_upkeep" || context?.branchUpkeepRequired === true
|
|
39
|
+
? "branch_upkeep"
|
|
40
|
+
: "review_fix";
|
|
41
|
+
}
|
|
42
|
+
// Plan §B4: the one run-failure policy. Merges the former
|
|
43
|
+
// RunRecoveryService (zombie retry/escalate + backoff) and
|
|
44
|
+
// InterruptedRunRecovery (interrupted-turn handling, counter decrements,
|
|
45
|
+
// re-enqueue) into a single module that answers: given a stranded or
|
|
46
|
+
// failed run + its issue — retry (with which backoff/budget), re-enqueue
|
|
47
|
+
// (which runType/context), or escalate?
|
|
48
|
+
//
|
|
49
|
+
// Ownership: run-reconciler and service-startup-recovery only DETECT
|
|
50
|
+
// stranded states and hand them here; this policy DECIDES; execution of
|
|
51
|
+
// the run/slot writes goes through settleRun, and dispatch of follow-up
|
|
52
|
+
// work goes through the WakeDispatcher.
|
|
53
|
+
export class RunFailurePolicy {
|
|
54
|
+
db;
|
|
55
|
+
logger;
|
|
56
|
+
linearSync;
|
|
57
|
+
withHeldLease;
|
|
58
|
+
releaseLease;
|
|
59
|
+
appendWakeEventWithLease;
|
|
60
|
+
wakeDispatcher;
|
|
61
|
+
restoreIdleWorktree;
|
|
62
|
+
completionPolicy;
|
|
63
|
+
resolveProject;
|
|
64
|
+
feed;
|
|
65
|
+
constructor(db, logger, linearSync, withHeldLease, releaseLease, appendWakeEventWithLease, wakeDispatcher, restoreIdleWorktree, completionPolicy, resolveProject, feed) {
|
|
66
|
+
this.db = db;
|
|
67
|
+
this.logger = logger;
|
|
68
|
+
this.linearSync = linearSync;
|
|
69
|
+
this.withHeldLease = withHeldLease;
|
|
70
|
+
this.releaseLease = releaseLease;
|
|
71
|
+
this.appendWakeEventWithLease = appendWakeEventWithLease;
|
|
72
|
+
this.wakeDispatcher = wakeDispatcher;
|
|
73
|
+
this.restoreIdleWorktree = restoreIdleWorktree;
|
|
74
|
+
this.completionPolicy = completionPolicy;
|
|
75
|
+
this.resolveProject = resolveProject;
|
|
76
|
+
this.feed = feed;
|
|
77
|
+
}
|
|
78
|
+
// ─── Stranded runs (zombie / stale thread) ───────────────────────
|
|
79
|
+
/**
|
|
80
|
+
* Detector entry point: the reconciler found a run that can never make
|
|
81
|
+
* progress (no Codex thread after a restart, or the thread is gone).
|
|
82
|
+
* Settle the run (mark failed, release the slot) and decide retry vs
|
|
83
|
+
* escalate via the zombie budget/backoff.
|
|
84
|
+
*/
|
|
85
|
+
settleStrandedRunAndRecover(params) {
|
|
86
|
+
const { run, issue } = params;
|
|
87
|
+
this.withHeldLease(run.projectId, run.linearIssueId, (lease) => settleRun({
|
|
88
|
+
db: this.db,
|
|
89
|
+
run,
|
|
90
|
+
finish: { status: "failed", failureReason: params.failureReason },
|
|
91
|
+
lease,
|
|
92
|
+
}));
|
|
93
|
+
this.recoverOrEscalate({ issue, runType: run.runType, reason: params.reason });
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Decide what happens after a run died without doing its work: PR
|
|
97
|
+
* already merged → done; zombie budget exhausted → escalate; backoff
|
|
98
|
+
* not elapsed → keep the wake but defer; otherwise consume one budget
|
|
99
|
+
* unit, append a recovery wake, and dispatch.
|
|
100
|
+
*/
|
|
101
|
+
recoverOrEscalate(params) {
|
|
102
|
+
const { issue, runType, reason } = params;
|
|
103
|
+
const fresh = this.db.issues.getIssue(issue.projectId, issue.linearIssueId);
|
|
104
|
+
if (!fresh)
|
|
105
|
+
return;
|
|
106
|
+
if (isRequestedChangesRunType(runType)) {
|
|
107
|
+
const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
|
|
108
|
+
this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
|
|
109
|
+
this.db.issueSessions.commitIssueState({
|
|
110
|
+
writer: WRITER,
|
|
111
|
+
lease,
|
|
112
|
+
update: {
|
|
113
|
+
projectId: fresh.projectId,
|
|
114
|
+
linearIssueId: fresh.linearIssueId,
|
|
115
|
+
pendingRunType: null,
|
|
116
|
+
pendingRunContextJson: null,
|
|
117
|
+
factoryState: "escalated",
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
return true;
|
|
121
|
+
});
|
|
122
|
+
if (!updated) {
|
|
123
|
+
this.logger.warn({ issueKey: fresh.issueKey, reason }, "Skipping review-fix recovery escalation after losing issue-session lease");
|
|
124
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
this.logger.warn({ issueKey: fresh.issueKey, reason }, "Requested-changes run failed before a new head was published - escalating");
|
|
128
|
+
this.feed?.publish({
|
|
129
|
+
level: "error",
|
|
130
|
+
kind: "workflow",
|
|
131
|
+
issueKey: fresh.issueKey,
|
|
132
|
+
projectId: fresh.projectId,
|
|
133
|
+
stage: runType,
|
|
134
|
+
status: "escalated",
|
|
135
|
+
summary: `Requested-changes run failed before publishing a new head (${reason})`,
|
|
136
|
+
});
|
|
137
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
if (fresh.prState === "merged") {
|
|
141
|
+
const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
|
|
142
|
+
this.db.issueSessions.commitIssueState({
|
|
143
|
+
writer: WRITER,
|
|
144
|
+
lease,
|
|
145
|
+
update: {
|
|
146
|
+
projectId: fresh.projectId,
|
|
147
|
+
linearIssueId: fresh.linearIssueId,
|
|
148
|
+
factoryState: "done",
|
|
149
|
+
zombieRecoveryAttempts: 0,
|
|
150
|
+
lastZombieRecoveryAt: null,
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
return true;
|
|
154
|
+
});
|
|
155
|
+
if (!updated) {
|
|
156
|
+
this.logger.warn({ issueKey: fresh.issueKey, reason }, "Skipping merged recovery completion after losing issue-session lease");
|
|
157
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
this.logger.info({ issueKey: fresh.issueKey, reason }, "Recovery: PR already merged - transitioning to done");
|
|
161
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
const zombieRecoveryBudget = getZombieRecoveryBudget(this.resolveProject(fresh.projectId));
|
|
165
|
+
const attempts = fresh.zombieRecoveryAttempts + 1;
|
|
166
|
+
if (attempts > zombieRecoveryBudget) {
|
|
167
|
+
const updated = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
|
|
168
|
+
this.db.issueSessions.commitIssueState({
|
|
169
|
+
writer: WRITER,
|
|
170
|
+
lease,
|
|
171
|
+
update: {
|
|
172
|
+
projectId: fresh.projectId,
|
|
173
|
+
linearIssueId: fresh.linearIssueId,
|
|
174
|
+
factoryState: "escalated",
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
return true;
|
|
178
|
+
});
|
|
179
|
+
if (!updated) {
|
|
180
|
+
this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Skipping recovery escalation after losing issue-session lease");
|
|
181
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: budget exhausted - escalating");
|
|
185
|
+
this.feed?.publish({
|
|
186
|
+
level: "error",
|
|
187
|
+
kind: "workflow",
|
|
188
|
+
issueKey: fresh.issueKey,
|
|
189
|
+
projectId: fresh.projectId,
|
|
190
|
+
stage: "escalated",
|
|
191
|
+
status: "budget_exhausted",
|
|
192
|
+
summary: `${reason} recovery failed after ${zombieRecoveryBudget} attempts`,
|
|
193
|
+
});
|
|
194
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
if (fresh.lastZombieRecoveryAt) {
|
|
198
|
+
const remainingDelayMs = getRemainingZombieRecoveryDelayMs(fresh.lastZombieRecoveryAt, fresh.zombieRecoveryAttempts);
|
|
199
|
+
if (remainingDelayMs > 0) {
|
|
200
|
+
this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
|
|
201
|
+
this.appendWakeEventWithLease(lease, fresh, runType, undefined, `recovery:${attempts}`);
|
|
202
|
+
});
|
|
203
|
+
this.logger.debug({ issueKey: fresh.issueKey, attempts: fresh.zombieRecoveryAttempts, remainingDelayMs }, "Recovery: backoff not elapsed, deferring retry");
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
const requeued = this.withHeldLease(fresh.projectId, fresh.linearIssueId, (lease) => {
|
|
208
|
+
// `attempts` is read-modify-write against the fresh row read above; on
|
|
209
|
+
// conflict recompute the counter from the current row.
|
|
210
|
+
const buildRequeueUpdate = (record) => ({
|
|
211
|
+
projectId: fresh.projectId,
|
|
212
|
+
linearIssueId: fresh.linearIssueId,
|
|
213
|
+
pendingRunType: null,
|
|
214
|
+
pendingRunContextJson: null,
|
|
215
|
+
zombieRecoveryAttempts: record.zombieRecoveryAttempts + 1,
|
|
216
|
+
lastZombieRecoveryAt: new Date().toISOString(),
|
|
217
|
+
});
|
|
218
|
+
this.db.issueSessions.commitIssueState({
|
|
219
|
+
writer: WRITER,
|
|
220
|
+
lease,
|
|
221
|
+
expectedVersion: fresh.version,
|
|
222
|
+
update: buildRequeueUpdate(fresh),
|
|
223
|
+
onConflict: (current) => buildRequeueUpdate(current),
|
|
224
|
+
});
|
|
225
|
+
return this.appendWakeEventWithLease(lease, fresh, runType, undefined, `recovery:${attempts}`);
|
|
226
|
+
});
|
|
227
|
+
if (!requeued) {
|
|
228
|
+
this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Skipping recovery re-enqueue after losing issue-session lease");
|
|
229
|
+
this.releaseLease(fresh.projectId, fresh.linearIssueId);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
this.wakeDispatcher.dispatchIfWakePending(fresh.projectId, fresh.linearIssueId);
|
|
233
|
+
this.logger.info({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: re-enqueued with backoff");
|
|
234
|
+
}
|
|
235
|
+
// ─── Terminal decisions ──────────────────────────────────────────
|
|
236
|
+
escalate(params) {
|
|
237
|
+
const { issue, runType, reason } = params;
|
|
238
|
+
this.logger.warn({ issueKey: issue.issueKey, runType, reason }, "Escalating to human");
|
|
239
|
+
const escalated = this.withHeldLease(issue.projectId, issue.linearIssueId, (lease) => {
|
|
240
|
+
// Escalation is an operator-facing decision: the issue write and the
|
|
241
|
+
// run release ride in the held-lease transaction. When a run still
|
|
242
|
+
// holds the slot, settleRun owns the paired run-release + slot-clear;
|
|
243
|
+
// it refuses to clear a slot that was re-pointed at another run.
|
|
244
|
+
const escalateFields = {
|
|
245
|
+
pendingRunType: null,
|
|
246
|
+
pendingRunContextJson: null,
|
|
247
|
+
factoryState: "escalated",
|
|
248
|
+
};
|
|
249
|
+
if (issue.activeRunId !== undefined) {
|
|
250
|
+
const settled = settleRun({
|
|
251
|
+
db: this.db,
|
|
252
|
+
run: { id: issue.activeRunId, projectId: issue.projectId, linearIssueId: issue.linearIssueId },
|
|
253
|
+
finish: { status: "released" },
|
|
254
|
+
lease,
|
|
255
|
+
buildIssueUpdate: () => escalateFields,
|
|
256
|
+
});
|
|
257
|
+
if (!settled.slotCleared)
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
else {
|
|
261
|
+
const commit = this.db.issueSessions.commitIssueState({
|
|
262
|
+
writer: WRITER,
|
|
263
|
+
lease,
|
|
264
|
+
update: {
|
|
265
|
+
projectId: issue.projectId,
|
|
266
|
+
linearIssueId: issue.linearIssueId,
|
|
267
|
+
...escalateFields,
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
if (commit.outcome !== "applied")
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
|
|
274
|
+
return true;
|
|
275
|
+
});
|
|
276
|
+
if (!escalated) {
|
|
277
|
+
this.logger.warn({ issueKey: issue.issueKey, runType }, "Skipping escalation write after losing issue-session lease");
|
|
278
|
+
this.releaseLease(issue.projectId, issue.linearIssueId);
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
this.feed?.publish({
|
|
282
|
+
level: "error",
|
|
283
|
+
kind: "workflow",
|
|
284
|
+
issueKey: issue.issueKey,
|
|
285
|
+
projectId: issue.projectId,
|
|
286
|
+
stage: runType,
|
|
287
|
+
status: "escalated",
|
|
288
|
+
summary: `Escalated: ${reason}`,
|
|
289
|
+
});
|
|
290
|
+
const escalatedIssue = this.db.issues.getIssue(issue.projectId, issue.linearIssueId) ?? issue;
|
|
291
|
+
void this.linearSync.emitActivity(escalatedIssue, {
|
|
292
|
+
type: "error",
|
|
293
|
+
body: `PatchRelay needs human help to continue.\n\n${reason}`,
|
|
294
|
+
});
|
|
295
|
+
void this.linearSync.syncSession(escalatedIssue);
|
|
296
|
+
this.releaseLease(issue.projectId, issue.linearIssueId);
|
|
297
|
+
}
|
|
298
|
+
failRunAndClear(params) {
|
|
299
|
+
const { run, message, nextState } = params;
|
|
300
|
+
const updated = this.withHeldLease(run.projectId, run.linearIssueId, (lease) => {
|
|
301
|
+
settleRun({
|
|
302
|
+
db: this.db,
|
|
303
|
+
run,
|
|
304
|
+
finish: { status: "failed", failureReason: message },
|
|
305
|
+
lease,
|
|
306
|
+
buildIssueUpdate: () => ({ factoryState: nextState }),
|
|
307
|
+
});
|
|
308
|
+
if (nextState === "failed" || nextState === "escalated" || nextState === "awaiting_input" || nextState === "done") {
|
|
309
|
+
this.db.issueSessions.clearPendingIssueSessionEventsWithLease(lease);
|
|
310
|
+
}
|
|
311
|
+
return true;
|
|
312
|
+
});
|
|
313
|
+
if (!updated) {
|
|
314
|
+
this.logger.warn({ runId: run.id, issueId: run.linearIssueId }, "Skipping failure cleanup after losing issue-session lease");
|
|
315
|
+
}
|
|
316
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
317
|
+
}
|
|
318
|
+
// ─── Interrupted turns (formerly InterruptedRunRecovery) ─────────
|
|
319
|
+
async handleInterruptedRun(run, issue) {
|
|
320
|
+
this.logger.warn({ issueKey: issue.issueKey, runType: run.runType, threadId: run.threadId }, "Run has interrupted turn - marking as failed");
|
|
321
|
+
const repairedCounters = this.withHeldLease(issue.projectId, issue.linearIssueId, (lease) => {
|
|
322
|
+
// The decrement is read-modify-write against an issue row read before
|
|
323
|
+
// the awaits that led here; on conflict, recompute from the fresh row.
|
|
324
|
+
const update = buildInterruptedAttemptRepairUpdate(run.runType, issue);
|
|
325
|
+
if (update) {
|
|
326
|
+
this.db.issueSessions.commitIssueState({
|
|
327
|
+
writer: WRITER,
|
|
328
|
+
lease,
|
|
329
|
+
expectedVersion: issue.version,
|
|
330
|
+
update,
|
|
331
|
+
onConflict: (current) => buildInterruptedAttemptRepairUpdate(run.runType, current),
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
return true;
|
|
335
|
+
});
|
|
336
|
+
if (!repairedCounters) {
|
|
337
|
+
this.logger.warn({ runId: run.id, issueId: run.linearIssueId }, "Skipping interrupted-run recovery after losing issue-session lease");
|
|
338
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
if (isRequestedChangesRunType(run.runType)) {
|
|
342
|
+
await this.handleInterruptedRequestedChangesRun(run, issue);
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
if (run.runType === "implementation" && !issue.prNumber) {
|
|
346
|
+
await this.handleInterruptedImplementationRun(run, issue);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
const recoveredState = resolvePostRunFactoryState(this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue, run, { outcome: "recovered" });
|
|
350
|
+
this.failRunAndClear({ run, message: "Codex turn was interrupted", nextState: recoveredState ?? "failed" });
|
|
351
|
+
await this.restoreIdleWorktree(issue);
|
|
352
|
+
const failedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
|
|
353
|
+
if (recoveredState) {
|
|
354
|
+
this.feed?.publish({
|
|
355
|
+
level: "info",
|
|
356
|
+
kind: "stage",
|
|
357
|
+
issueKey: issue.issueKey,
|
|
358
|
+
projectId: run.projectId,
|
|
359
|
+
stage: recoveredState,
|
|
360
|
+
status: "reconciled",
|
|
361
|
+
summary: `Interrupted ${run.runType} recovered -> ${recoveredState}`,
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
void this.linearSync.emitActivity(failedIssue, buildRunFailureActivity(run.runType, "The Codex turn was interrupted."));
|
|
366
|
+
}
|
|
367
|
+
void this.linearSync.syncSession(failedIssue, { activeRunType: run.runType });
|
|
368
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
369
|
+
}
|
|
370
|
+
async handleInterruptedImplementationRun(run, issue) {
|
|
371
|
+
const interruptedMessage = "Implementation run was interrupted before PatchRelay could publish a PR";
|
|
372
|
+
this.failRunAndClear({ run, message: "Codex turn was interrupted", nextState: "delegated" });
|
|
373
|
+
await this.restoreIdleWorktree(issue);
|
|
374
|
+
const refreshedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
|
|
375
|
+
this.db.issueSessions.appendIssueSessionEventRespectingActiveLease(run.projectId, run.linearIssueId, {
|
|
376
|
+
projectId: run.projectId,
|
|
377
|
+
linearIssueId: run.linearIssueId,
|
|
378
|
+
eventType: "delegated",
|
|
379
|
+
dedupeKey: `interrupted_implementation:implementation:${run.linearIssueId}`,
|
|
380
|
+
});
|
|
381
|
+
if (!this.db.workflowWakes.peekIssueWake(run.projectId, run.linearIssueId)) {
|
|
382
|
+
const failedIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
|
|
383
|
+
this.feed?.publish({
|
|
384
|
+
level: "error",
|
|
385
|
+
kind: "workflow",
|
|
386
|
+
issueKey: issue.issueKey,
|
|
387
|
+
projectId: run.projectId,
|
|
388
|
+
stage: run.runType,
|
|
389
|
+
status: "escalated",
|
|
390
|
+
summary: interruptedMessage,
|
|
391
|
+
});
|
|
392
|
+
void this.linearSync.emitActivity(failedIssue, buildRunFailureActivity(run.runType, interruptedMessage));
|
|
393
|
+
void this.linearSync.syncSession(failedIssue, { activeRunType: run.runType });
|
|
394
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
this.feed?.publish({
|
|
398
|
+
level: "warn",
|
|
399
|
+
kind: "workflow",
|
|
400
|
+
issueKey: issue.issueKey,
|
|
401
|
+
projectId: run.projectId,
|
|
402
|
+
stage: run.runType,
|
|
403
|
+
status: "retry_queued",
|
|
404
|
+
summary: "Implementation run was interrupted; PatchRelay will retry automatically",
|
|
405
|
+
});
|
|
406
|
+
const recoveredIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
|
|
407
|
+
void this.linearSync.syncSession(recoveredIssue, { activeRunType: run.runType });
|
|
408
|
+
this.wakeDispatcher.dispatchIfWakePending(run.projectId, run.linearIssueId);
|
|
409
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
410
|
+
}
|
|
411
|
+
async handleInterruptedRequestedChangesRun(run, issue) {
|
|
412
|
+
const freshIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? issue;
|
|
413
|
+
const refreshedIssue = await this.completionPolicy.refreshIssueAfterReactivePublish(run, freshIssue);
|
|
414
|
+
const retryContext = await this.completionPolicy.resolveRequestedChangesWakeContext(refreshedIssue, run.runType, run.runType === "branch_upkeep"
|
|
415
|
+
? {
|
|
416
|
+
branchUpkeepRequired: true,
|
|
417
|
+
reviewFixMode: "branch_upkeep",
|
|
418
|
+
wakeReason: "branch_upkeep",
|
|
419
|
+
}
|
|
420
|
+
: undefined);
|
|
421
|
+
const retryRunType = resolveRetryRunType(run.runType, retryContext);
|
|
422
|
+
const recoveredState = resolvePostRunFactoryState(refreshedIssue, run, { outcome: "recovered" }) ?? "failed";
|
|
423
|
+
const interruptedMessage = "Requested-changes run was interrupted before PatchRelay could verify that a new PR head was published";
|
|
424
|
+
this.failRunAndClear({ run, message: interruptedMessage, nextState: recoveredState });
|
|
425
|
+
await this.restoreIdleWorktree(issue);
|
|
426
|
+
const recoveredIssue = this.db.issues.getIssue(run.projectId, run.linearIssueId) ?? refreshedIssue;
|
|
427
|
+
if (recoveredState === "changes_requested") {
|
|
428
|
+
this.db.issueSessions.commitIssueState({
|
|
429
|
+
writer: WRITER,
|
|
430
|
+
update: {
|
|
431
|
+
projectId: run.projectId,
|
|
432
|
+
linearIssueId: run.linearIssueId,
|
|
433
|
+
pendingRunType: retryRunType,
|
|
434
|
+
pendingRunContextJson: retryContext ? JSON.stringify(retryContext) : null,
|
|
435
|
+
},
|
|
436
|
+
});
|
|
437
|
+
this.feed?.publish({
|
|
438
|
+
level: "warn",
|
|
439
|
+
kind: "workflow",
|
|
440
|
+
issueKey: issue.issueKey,
|
|
441
|
+
projectId: run.projectId,
|
|
442
|
+
stage: run.runType,
|
|
443
|
+
status: "retry_queued",
|
|
444
|
+
summary: "Requested-changes run was interrupted; PatchRelay will retry from fresh GitHub truth",
|
|
445
|
+
});
|
|
446
|
+
this.wakeDispatcher.dispatchIfWakePending(run.projectId, run.linearIssueId);
|
|
447
|
+
}
|
|
448
|
+
else {
|
|
449
|
+
this.feed?.publish({
|
|
450
|
+
level: "error",
|
|
451
|
+
kind: "workflow",
|
|
452
|
+
issueKey: issue.issueKey,
|
|
453
|
+
projectId: run.projectId,
|
|
454
|
+
stage: run.runType,
|
|
455
|
+
status: "escalated",
|
|
456
|
+
summary: interruptedMessage,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
void this.linearSync.emitActivity(recoveredIssue, buildRunFailureActivity(run.runType, interruptedMessage));
|
|
460
|
+
void this.linearSync.syncSession(recoveredIssue, { activeRunType: run.runType });
|
|
461
|
+
this.releaseLease(run.projectId, run.linearIssueId);
|
|
462
|
+
}
|
|
463
|
+
}
|
package/dist/run-finalizer.js
CHANGED
|
@@ -2,10 +2,12 @@ import { CLEARED_FAILURE_PROVENANCE } from "./failure-provenance.js";
|
|
|
2
2
|
import { buildStageReport, countEventMethods } from "./run-reporting.js";
|
|
3
3
|
import { buildRunCompletedActivity, buildRunFailureActivity } from "./linear-session-reporting.js";
|
|
4
4
|
import { handleNoPrCompletionCheck } from "./no-pr-completion-check.js";
|
|
5
|
-
import {
|
|
5
|
+
import { resolvePostRunFactoryState } from "./run-completion-policy.js";
|
|
6
6
|
import { computeChangeIdentityFromWorktree } from "./change-identity.js";
|
|
7
7
|
import { inspectGitWorktreeStatus, isRepairRunType } from "./git-worktree-status.js";
|
|
8
8
|
import { buildRunOutcomeSummary } from "./run-outcome-summary.js";
|
|
9
|
+
import { settleRun } from "./run-settlement.js";
|
|
10
|
+
const WRITER = "run-finalizer";
|
|
9
11
|
function parseEventJson(eventJson) {
|
|
10
12
|
if (!eventJson)
|
|
11
13
|
return undefined;
|
|
@@ -150,12 +152,16 @@ export class RunFinalizer {
|
|
|
150
152
|
});
|
|
151
153
|
if (!identity.patchId && !identity.integrationTreeId)
|
|
152
154
|
return;
|
|
153
|
-
this.db.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
155
|
+
this.db.issueSessions.commitIssueState({
|
|
156
|
+
writer: WRITER,
|
|
157
|
+
expectedVersion: issue.version,
|
|
158
|
+
update: {
|
|
159
|
+
projectId: issue.projectId,
|
|
160
|
+
linearIssueId: issue.linearIssueId,
|
|
161
|
+
...(identity.patchId ? { lastPublishedPatchId: identity.patchId } : {}),
|
|
162
|
+
...(identity.integrationTreeId ? { lastPublishedIntegrationTreeId: identity.integrationTreeId } : {}),
|
|
163
|
+
lastPublishedHeadSha: issue.prHeadSha,
|
|
164
|
+
},
|
|
159
165
|
});
|
|
160
166
|
this.logger.info({
|
|
161
167
|
issueKey: issue.issueKey,
|
|
@@ -194,12 +200,15 @@ export class RunFinalizer {
|
|
|
194
200
|
...(completedTurnId ? { turnId: completedTurnId } : {}),
|
|
195
201
|
failureReason: run.failureReason ?? "approved on the same head; further publication suppressed",
|
|
196
202
|
});
|
|
197
|
-
this.db.
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
this.db.issueSessions.commitIssueState({
|
|
204
|
+
writer: WRITER,
|
|
205
|
+
update: {
|
|
206
|
+
projectId: run.projectId,
|
|
207
|
+
linearIssueId: run.linearIssueId,
|
|
208
|
+
activeRunId: null,
|
|
209
|
+
pendingRunType: null,
|
|
210
|
+
pendingRunContextJson: null,
|
|
211
|
+
},
|
|
203
212
|
});
|
|
204
213
|
});
|
|
205
214
|
this.clearProgressAndRelease(run);
|
|
@@ -274,23 +283,33 @@ export class RunFinalizer {
|
|
|
274
283
|
report: params.report,
|
|
275
284
|
outcomeSummary,
|
|
276
285
|
}));
|
|
277
|
-
|
|
286
|
+
// The attempt decrements are read-modify-write against the issue row;
|
|
287
|
+
// on conflict, recompute them from the fresh row instead of writing
|
|
288
|
+
// counters derived from a stale read.
|
|
289
|
+
const buildContinueUpdate = (record) => ({
|
|
278
290
|
projectId: params.run.projectId,
|
|
279
291
|
linearIssueId: params.run.linearIssueId,
|
|
280
292
|
activeRunId: null,
|
|
281
293
|
factoryState: "delegated",
|
|
282
294
|
pendingRunType: null,
|
|
283
295
|
pendingRunContextJson: null,
|
|
284
|
-
...(params.run.runType === "ci_repair" &&
|
|
285
|
-
? { ciRepairAttempts:
|
|
296
|
+
...(params.run.runType === "ci_repair" && record.ciRepairAttempts > 0
|
|
297
|
+
? { ciRepairAttempts: record.ciRepairAttempts - 1 }
|
|
286
298
|
: {}),
|
|
287
|
-
...(params.run.runType === "queue_repair" &&
|
|
288
|
-
? { queueRepairAttempts:
|
|
299
|
+
...(params.run.runType === "queue_repair" && record.queueRepairAttempts > 0
|
|
300
|
+
? { queueRepairAttempts: record.queueRepairAttempts - 1 }
|
|
289
301
|
: {}),
|
|
290
|
-
...((params.run.runType === "review_fix" || params.run.runType === "branch_upkeep") &&
|
|
291
|
-
? { reviewFixAttempts:
|
|
302
|
+
...((params.run.runType === "review_fix" || params.run.runType === "branch_upkeep") && record.reviewFixAttempts > 0
|
|
303
|
+
? { reviewFixAttempts: record.reviewFixAttempts - 1 }
|
|
292
304
|
: {}),
|
|
293
305
|
});
|
|
306
|
+
this.db.issueSessions.commitIssueState({
|
|
307
|
+
writer: WRITER,
|
|
308
|
+
lease,
|
|
309
|
+
expectedVersion: params.issue.version,
|
|
310
|
+
update: buildContinueUpdate(params.issue),
|
|
311
|
+
onConflict: (current) => buildContinueUpdate(current),
|
|
312
|
+
});
|
|
294
313
|
return Boolean(this.db.issueSessions.appendIssueSessionEventWithLease(lease, {
|
|
295
314
|
projectId: params.run.projectId,
|
|
296
315
|
linearIssueId: params.run.linearIssueId,
|
|
@@ -358,7 +377,9 @@ export class RunFinalizer {
|
|
|
358
377
|
}
|
|
359
378
|
const verifiedRepairError = await this.completionPolicy.verifyReactiveRunAdvancedBranch(run, freshIssue);
|
|
360
379
|
if (verifiedRepairError) {
|
|
361
|
-
|
|
380
|
+
// The run failed verification — it did not do its work, so resolve
|
|
381
|
+
// the hold state from GitHub truth like any other recovery path.
|
|
382
|
+
const holdState = resolvePostRunFactoryState(freshIssue, run, { outcome: "recovered" }) ?? "failed";
|
|
362
383
|
this.failRunAndClear(run, verifiedRepairError, holdState);
|
|
363
384
|
this.syncFailureOutcome({
|
|
364
385
|
run,
|
|
@@ -428,30 +449,42 @@ export class RunFinalizer {
|
|
|
428
449
|
// any git error returns undefined and we leave the cache as-is.
|
|
429
450
|
this.maybeUpdateLastPublishedIdentity(run, refreshedIssue);
|
|
430
451
|
const postRunFollowUp = await this.completionPolicy.resolvePostRunFollowUp(run, refreshedIssue);
|
|
431
|
-
const postRunState = postRunFollowUp?.factoryState ??
|
|
452
|
+
const postRunState = postRunFollowUp?.factoryState ?? resolvePostRunFactoryState(refreshedIssue, run);
|
|
432
453
|
const outcomeSummary = this.buildOutcomeSummary({
|
|
433
454
|
run,
|
|
434
455
|
issue: refreshedIssue,
|
|
435
456
|
postRunState,
|
|
436
457
|
latestAssistantSummary: report.assistantMessages.at(-1),
|
|
437
458
|
});
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
activeRunId: null,
|
|
449
|
-
...(postRunState ? { factoryState: postRunState } : {}),
|
|
459
|
+
// `refreshedIssue` was read before several async policy checks; a webhook
|
|
460
|
+
// may have landed mid-finalize. settleRun re-reads the row inside its
|
|
461
|
+
// transaction and resolves the post-run state from that fresh truth, so
|
|
462
|
+
// we never regress it (e.g. the PR merged while we were verifying the
|
|
463
|
+
// publish). settleRun also owns the slot clear (plan §B1): it refuses to
|
|
464
|
+
// touch a slot that no longer points at this run.
|
|
465
|
+
const buildCompletionUpdate = (record) => {
|
|
466
|
+
const state = postRunFollowUp?.factoryState ?? resolvePostRunFactoryState(record, run);
|
|
467
|
+
return {
|
|
468
|
+
...(state ? { factoryState: state } : {}),
|
|
450
469
|
pendingRunType: null,
|
|
451
470
|
pendingRunContextJson: null,
|
|
452
|
-
...(postRunFollowUp ? {} : (
|
|
471
|
+
...(postRunFollowUp ? {} : (state === "awaiting_queue" || state === "done"
|
|
453
472
|
? { ...CLEARED_FAILURE_PROVENANCE }
|
|
454
473
|
: {})),
|
|
474
|
+
};
|
|
475
|
+
};
|
|
476
|
+
const completed = this.withHeldLease(run.projectId, run.linearIssueId, (lease) => {
|
|
477
|
+
settleRun({
|
|
478
|
+
db: this.db,
|
|
479
|
+
run,
|
|
480
|
+
finish: this.buildCompletedRunUpdate({
|
|
481
|
+
threadId,
|
|
482
|
+
...(params.completedTurnId ? { completedTurnId: params.completedTurnId } : {}),
|
|
483
|
+
report,
|
|
484
|
+
outcomeSummary,
|
|
485
|
+
}),
|
|
486
|
+
lease,
|
|
487
|
+
buildIssueUpdate: buildCompletionUpdate,
|
|
455
488
|
});
|
|
456
489
|
if (postRunFollowUp) {
|
|
457
490
|
return this.appendWakeEventWithLease(lease, issue, postRunFollowUp.pendingRunType, postRunFollowUp.context, "post_run");
|