patchrelay 0.75.3 → 0.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-input-service.js +40 -26
- package/dist/build-info.json +3 -3
- package/dist/cli/data.js +3 -1
- package/dist/db/issue-session-store.js +44 -9
- package/dist/db/issue-store.js +11 -2
- package/dist/db/migrations.js +3 -0
- package/dist/factory-state.js +23 -0
- package/dist/github-webhook-reactive-run.js +15 -11
- package/dist/github-webhook-stack-coordination.js +8 -4
- package/dist/github-webhook-state-projector.js +204 -139
- package/dist/github-webhook-terminal-handler.js +37 -27
- package/dist/idle-reconciliation.js +122 -66
- package/dist/implementation-outcome-policy.js +5 -1
- package/dist/issue-session-projection-invalidator.js +9 -0
- package/dist/linear-agent-session-client.js +16 -8
- package/dist/linear-issue-projection.js +15 -11
- package/dist/linear-status-comment-sync.js +8 -4
- package/dist/linear-workflow-state-sync.js +9 -5
- package/dist/merged-linear-completion-reconciler.js +39 -17
- package/dist/no-pr-completion-check.js +51 -29
- package/dist/orchestration-parent-wake.js +15 -8
- package/dist/queue-health-monitor.js +17 -8
- package/dist/reactive-run-policy.js +5 -1
- package/dist/run-budgets.js +40 -6
- package/dist/run-completion-policy.js +50 -9
- package/dist/run-failure-policy.js +463 -0
- package/dist/run-finalizer.js +68 -35
- package/dist/run-launcher.js +63 -12
- package/dist/run-notification-handler.js +19 -9
- package/dist/run-orchestrator.js +70 -78
- package/dist/run-reconciler.js +137 -64
- package/dist/run-settlement.js +57 -0
- package/dist/run-wake-planner.js +39 -29
- package/dist/service-issue-actions.js +45 -28
- package/dist/service-startup-recovery.js +61 -35
- package/dist/telemetry.js +9 -0
- package/dist/terminal-wake-reconciler.js +20 -3
- package/dist/webhooks/agent-session-handler.js +22 -12
- package/dist/webhooks/dependency-readiness-handler.js +17 -10
- package/dist/webhooks/desired-stage-recorder.js +32 -13
- package/dist/webhooks/issue-removal-handler.js +24 -13
- package/package.json +1 -1
- package/dist/interrupted-run-recovery.js +0 -227
- package/dist/run-recovery-service.js +0 -202
- package/dist/zombie-recovery.js +0 -13
package/dist/run-launcher.js
CHANGED
|
@@ -4,6 +4,7 @@ import { buildRunFailureActivity } from "./linear-session-reporting.js";
|
|
|
4
4
|
import { loadPatchRelayRepoPrompting } from "./patchrelay-customization.js";
|
|
5
5
|
import { buildRunPrompt as buildPatchRelayRunPrompt, findDisallowedPatchRelayPromptSectionIds, findUnknownPatchRelayPromptSectionIds, mergePromptCustomizationLayers, resolvePromptLayers, } from "./prompting/patchrelay.js";
|
|
6
6
|
import { sanitizeDiagnosticText } from "./utils.js";
|
|
7
|
+
const WRITER = "run-launcher";
|
|
7
8
|
function slugify(value) {
|
|
8
9
|
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
9
10
|
}
|
|
@@ -128,7 +129,7 @@ export class RunLauncher {
|
|
|
128
129
|
? params.effectiveContext.failureHeadSha
|
|
129
130
|
: typeof params.effectiveContext?.headSha === "string" ? params.effectiveContext.headSha : undefined;
|
|
130
131
|
const failureSignature = typeof params.effectiveContext?.failureSignature === "string" ? params.effectiveContext.failureSignature : undefined;
|
|
131
|
-
|
|
132
|
+
const claimUpdate = {
|
|
132
133
|
projectId: params.item.projectId,
|
|
133
134
|
linearIssueId: params.item.issueId,
|
|
134
135
|
pendingRunType: null,
|
|
@@ -148,7 +149,18 @@ export class RunLauncher {
|
|
|
148
149
|
lastAttemptedFailureAt: new Date().toISOString(),
|
|
149
150
|
}
|
|
150
151
|
: {}),
|
|
152
|
+
};
|
|
153
|
+
const claimCommit = this.db.issueSessions.commitIssueState({
|
|
154
|
+
writer: WRITER,
|
|
155
|
+
// `wakeIssue` is the freshest row this claim transaction has seen
|
|
156
|
+
// (materializeLegacyPendingWake may have bumped the version).
|
|
157
|
+
expectedVersion: wakeIssue.version,
|
|
158
|
+
update: claimUpdate,
|
|
159
|
+
// Never steal a slot another writer claimed concurrently.
|
|
160
|
+
onConflict: (current) => (current.activeRunId == null ? claimUpdate : undefined),
|
|
151
161
|
});
|
|
162
|
+
if (claimCommit.outcome !== "applied")
|
|
163
|
+
return undefined;
|
|
152
164
|
this.db.issueSessions.consumeIssueSessionEvents(params.item.projectId, params.item.issueId, freshWake.eventIds, created.id);
|
|
153
165
|
this.db.issueSessions.setIssueSessionLastWakeReason(params.item.projectId, params.item.issueId, freshWake.wakeReason ?? null);
|
|
154
166
|
return created;
|
|
@@ -201,8 +213,17 @@ export class RunLauncher {
|
|
|
201
213
|
const thread = await this.codex.startThread({ cwd: params.worktreePath });
|
|
202
214
|
threadId = thread.id;
|
|
203
215
|
createdThreadForRun = true;
|
|
204
|
-
this.db.issueSessions.
|
|
216
|
+
this.db.issueSessions.commitIssueState({
|
|
217
|
+
writer: WRITER,
|
|
218
|
+
lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
|
|
219
|
+
update: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId },
|
|
220
|
+
});
|
|
205
221
|
}
|
|
222
|
+
// Plan §B5: persist the thread id on the run row BEFORE startTurn is
|
|
223
|
+
// awaited, so a turn/completed notification arriving while the turn is
|
|
224
|
+
// starting can already resolve the run by thread id. The orchestrator
|
|
225
|
+
// re-records it (with the turn id) after the launch returns.
|
|
226
|
+
this.recordRunThread(params, threadId, parentThreadId);
|
|
206
227
|
this.db.runs.updateLaunchPhase(params.run.id, "thread_started");
|
|
207
228
|
try {
|
|
208
229
|
const turn = await this.codex.startTurn({ threadId, cwd: params.worktreePath, input: params.prompt });
|
|
@@ -216,7 +237,14 @@ export class RunLauncher {
|
|
|
216
237
|
const thread = await this.codex.startThread({ cwd: params.worktreePath });
|
|
217
238
|
threadId = thread.id;
|
|
218
239
|
createdThreadForRun = true;
|
|
219
|
-
this.db.issueSessions.
|
|
240
|
+
this.db.issueSessions.commitIssueState({
|
|
241
|
+
writer: WRITER,
|
|
242
|
+
lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
|
|
243
|
+
update: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, threadId },
|
|
244
|
+
});
|
|
245
|
+
// Plan §B5: re-point the run row at the fresh thread before the
|
|
246
|
+
// retried startTurn, for the same notification race.
|
|
247
|
+
this.recordRunThread(params, threadId, parentThreadId);
|
|
220
248
|
const turn = await this.codex.startTurn({ threadId, cwd: params.worktreePath, input: params.prompt });
|
|
221
249
|
turnId = turn.turnId;
|
|
222
250
|
this.db.runs.updateLaunchPhase(params.run.id, "turn_started");
|
|
@@ -236,15 +264,25 @@ export class RunLauncher {
|
|
|
236
264
|
const lostLease = error instanceof Error && error.name === "IssueSessionLeaseLostError";
|
|
237
265
|
if (!lostLease) {
|
|
238
266
|
const nextState = resolveFailureFactoryState(params.runType);
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
267
|
+
// Issue clear + run-terminal write ride in one transaction; the run
|
|
268
|
+
// finish is gated on the issue commit so a lost lease skips both.
|
|
269
|
+
this.db.transaction(() => {
|
|
270
|
+
const commit = this.db.issueSessions.commitIssueState({
|
|
271
|
+
writer: WRITER,
|
|
272
|
+
lease: { projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId },
|
|
273
|
+
update: {
|
|
274
|
+
projectId: params.project.id,
|
|
275
|
+
linearIssueId: params.issue.linearIssueId,
|
|
276
|
+
activeRunId: null,
|
|
277
|
+
factoryState: nextState,
|
|
278
|
+
},
|
|
279
|
+
});
|
|
280
|
+
if (commit.outcome !== "applied")
|
|
281
|
+
return;
|
|
282
|
+
this.db.runs.finishRun(params.run.id, {
|
|
283
|
+
status: "failed",
|
|
284
|
+
failureReason: message,
|
|
285
|
+
});
|
|
248
286
|
});
|
|
249
287
|
}
|
|
250
288
|
this.logger.error({ issueKey: params.issue.issueKey, runType: params.runType, error: message }, `Failed to launch ${params.runType} run`);
|
|
@@ -255,6 +293,19 @@ export class RunLauncher {
|
|
|
255
293
|
throw error;
|
|
256
294
|
}
|
|
257
295
|
}
|
|
296
|
+
// Persist the Codex thread id on the run row under the launch lease.
|
|
297
|
+
// Losing the lease here aborts the launch the same way assertLaunchLease
|
|
298
|
+
// does — the run row must not be touched by a worker that no longer owns
|
|
299
|
+
// the session.
|
|
300
|
+
recordRunThread(params, threadId, parentThreadId) {
|
|
301
|
+
const recorded = this.db.issueSessions.updateRunThreadWithLease({ projectId: params.project.id, linearIssueId: params.issue.linearIssueId, leaseId: params.leaseId }, params.run.id, { threadId, ...(parentThreadId ? { parentThreadId } : {}) });
|
|
302
|
+
if (recorded)
|
|
303
|
+
return;
|
|
304
|
+
const error = new Error("Lost issue-session lease while recording the Codex thread id");
|
|
305
|
+
error.name = "IssueSessionLeaseLostError";
|
|
306
|
+
this.logger.warn({ runId: params.run.id, issueId: params.issue.linearIssueId }, "Aborting run launch after losing issue-session lease while recording the Codex thread id");
|
|
307
|
+
throw error;
|
|
308
|
+
}
|
|
258
309
|
async setInitialImplementationGoal(threadId, issue) {
|
|
259
310
|
const goalSetter = this.codex.setThreadGoal;
|
|
260
311
|
if (typeof goalSetter !== "function") {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { buildRunFailureActivity } from "./linear-session-reporting.js";
|
|
2
2
|
import { extractTurnId, resolveRunCompletionStatus } from "./run-reporting.js";
|
|
3
|
-
import { resolveRecoverablePostRunState } from "./interrupted-run-recovery.js";
|
|
4
3
|
import { resolveFailureFactoryState } from "./reactive-pr-state.js";
|
|
4
|
+
const WRITER = "run-notification-handler";
|
|
5
5
|
const DEFAULT_PUBLISH_COMMAND_TIMEOUT_MS = 10 * 60 * 1000;
|
|
6
6
|
export class RunNotificationHandler {
|
|
7
7
|
config;
|
|
@@ -90,19 +90,30 @@ export class RunNotificationHandler {
|
|
|
90
90
|
return;
|
|
91
91
|
}
|
|
92
92
|
const nextState = resolveFailureFactoryState(run.runType);
|
|
93
|
+
const failureUpdate = {
|
|
94
|
+
projectId: run.projectId,
|
|
95
|
+
linearIssueId: run.linearIssueId,
|
|
96
|
+
activeRunId: null,
|
|
97
|
+
factoryState: nextState,
|
|
98
|
+
};
|
|
93
99
|
const updated = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (lease) => {
|
|
94
|
-
this.db.issueSessions.
|
|
100
|
+
const commit = this.db.issueSessions.commitIssueState({
|
|
101
|
+
writer: WRITER,
|
|
102
|
+
lease,
|
|
103
|
+
// The issue row was read before awaiting the failed-run recovery;
|
|
104
|
+
// only clear the slot if it still belongs to this run.
|
|
105
|
+
expectedVersion: issue.version,
|
|
106
|
+
update: failureUpdate,
|
|
107
|
+
onConflict: (current) => (current.activeRunId === run.id ? failureUpdate : undefined),
|
|
108
|
+
});
|
|
109
|
+
if (commit.outcome !== "applied")
|
|
110
|
+
return false;
|
|
111
|
+
this.db.runs.finishRun(run.id, {
|
|
95
112
|
status: "failed",
|
|
96
113
|
threadId,
|
|
97
114
|
...(completedTurnId ? { turnId: completedTurnId } : {}),
|
|
98
115
|
failureReason,
|
|
99
116
|
});
|
|
100
|
-
this.db.issueSessions.upsertIssueWithLease(lease, {
|
|
101
|
-
projectId: run.projectId,
|
|
102
|
-
linearIssueId: run.linearIssueId,
|
|
103
|
-
activeRunId: null,
|
|
104
|
-
factoryState: nextState,
|
|
105
|
-
});
|
|
106
117
|
return true;
|
|
107
118
|
});
|
|
108
119
|
if (!updated) {
|
|
@@ -134,7 +145,6 @@ export class RunNotificationHandler {
|
|
|
134
145
|
thread,
|
|
135
146
|
threadId,
|
|
136
147
|
...(completedTurnId ? { completedTurnId } : {}),
|
|
137
|
-
resolveRecoverableRunState: resolveRecoverablePostRunState,
|
|
138
148
|
});
|
|
139
149
|
this.activeThreadId = undefined;
|
|
140
150
|
}
|
package/dist/run-orchestrator.js
CHANGED
|
@@ -9,16 +9,16 @@ import { IdleIssueReconciler } from "./idle-reconciliation.js";
|
|
|
9
9
|
import { LinearSessionSync } from "./linear-session-sync.js";
|
|
10
10
|
import { recoverLinearAgentActivityContext } from "./linear-agent-activity-recovery.js";
|
|
11
11
|
import { IssueSessionLeaseService } from "./issue-session-lease-service.js";
|
|
12
|
-
import { InterruptedRunRecovery } from "./interrupted-run-recovery.js";
|
|
13
12
|
import { RunCompletionPolicy } from "./run-completion-policy.js";
|
|
13
|
+
import { RunFailurePolicy } from "./run-failure-policy.js";
|
|
14
14
|
import { RunFinalizer } from "./run-finalizer.js";
|
|
15
15
|
import { RunLauncher } from "./run-launcher.js";
|
|
16
16
|
import { RunNotificationHandler } from "./run-notification-handler.js";
|
|
17
17
|
import { RunReconciler } from "./run-reconciler.js";
|
|
18
|
-
import { RunRecoveryService } from "./run-recovery-service.js";
|
|
19
18
|
import { RunWakePlanner } from "./run-wake-planner.js";
|
|
20
19
|
import { WakeDispatcher } from "./wake-dispatcher.js";
|
|
21
|
-
import {
|
|
20
|
+
import { settleRun } from "./run-settlement.js";
|
|
21
|
+
import { getRemainingZombieRecoveryDelayMs } from "./run-budgets.js";
|
|
22
22
|
import { classifyIssue } from "./issue-class.js";
|
|
23
23
|
import { buildIssueTriageHash, IssueTriageService } from "./issue-triage.js";
|
|
24
24
|
import { loadConfig } from "./config.js";
|
|
@@ -26,10 +26,7 @@ import { CodexThreadMaterializingError, isThreadMaterializingError } from "./cod
|
|
|
26
26
|
import { emitTelemetry, noopTelemetry } from "./telemetry.js";
|
|
27
27
|
import { LinearIssueProjectionService } from "./linear-issue-projection.js";
|
|
28
28
|
import { RunAdmissionController } from "./run-admission-controller.js";
|
|
29
|
-
|
|
30
|
-
// the orchestrator force-clears it, so we never race the normal
|
|
31
|
-
// notification-driven finalize that runs within seconds of completion.
|
|
32
|
-
const DANGLING_ACTIVE_RUN_MIN_AGE_MS = 2 * 60_000;
|
|
29
|
+
const WRITER = "run-orchestrator";
|
|
33
30
|
function lowerCaseFirst(value) {
|
|
34
31
|
return value ? `${value.slice(0, 1).toLowerCase()}${value.slice(1)}` : value;
|
|
35
32
|
}
|
|
@@ -61,9 +58,8 @@ export class RunOrchestrator {
|
|
|
61
58
|
leaseService;
|
|
62
59
|
runFinalizer;
|
|
63
60
|
runLauncher;
|
|
64
|
-
|
|
61
|
+
runFailurePolicy;
|
|
65
62
|
runWakePlanner;
|
|
66
|
-
interruptedRunRecovery;
|
|
67
63
|
runCompletionPolicy;
|
|
68
64
|
completionCheck;
|
|
69
65
|
issueTriage;
|
|
@@ -85,7 +81,6 @@ export class RunOrchestrator {
|
|
|
85
81
|
recoveryPorts = {
|
|
86
82
|
failRunAndClear: (run, message, nextState) => this.failRunAndClear(run, message, nextState),
|
|
87
83
|
restoreIdleWorktree: (issue) => this.restoreIdleWorktree(issue),
|
|
88
|
-
recoverOrEscalate: (issue, runType, reason) => this.recoverOrEscalate(issue, runType, reason),
|
|
89
84
|
};
|
|
90
85
|
activeSessionLeases;
|
|
91
86
|
botIdentity;
|
|
@@ -137,9 +132,8 @@ export class RunOrchestrator {
|
|
|
137
132
|
this.runFinalizer = new RunFinalizer(db, logger, this.linearSync, this.wakeDispatcher, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, (lease, issue, runType, context, dedupeScope) => this.appendWakeEventWithLease(lease, issue, runType, context, dedupeScope), this.recoveryPorts.failRunAndClear, this.runCompletionPolicy, this.completionCheck, feed);
|
|
138
133
|
this.runLauncher = new RunLauncher(config, db, codex, logger, this.worktreeManager);
|
|
139
134
|
this.runNotificationHandler = new RunNotificationHandler(config, db, logger, this.linearSync, this.runFinalizer, this.threadPorts.readThreadWithRetry, this.leasePorts.withHeldLease, this.leasePorts.heartbeatLease, this.leasePorts.releaseLease, feed, { interruptTurn: (options) => codex.interruptTurn(options) });
|
|
140
|
-
this.
|
|
141
|
-
this.
|
|
142
|
-
this.runReconciler = new RunReconciler(db, logger, linearProvider, this.linearSync, this.interruptedRunRecovery, this.runFinalizer, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, this.threadPorts.readThreadWithRetry, this.recoveryPorts.recoverOrEscalate, (projectId) => this.config.projects.find((project) => project.id === projectId)?.github?.repoFullName, feed);
|
|
135
|
+
this.runFailurePolicy = new RunFailurePolicy(db, logger, this.linearSync, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, (lease, issue, runType, context, dedupeScope) => this.appendWakeEventWithLease(lease, issue, runType, context, dedupeScope), this.wakeDispatcher, this.recoveryPorts.restoreIdleWorktree, this.runCompletionPolicy, (projectId) => this.config.projects.find((project) => project.id === projectId), feed);
|
|
136
|
+
this.runReconciler = new RunReconciler(db, logger, linearProvider, this.linearSync, this.runFailurePolicy, this.runFinalizer, this.leasePorts.withHeldLease, this.leasePorts.releaseLease, this.threadPorts.readThreadWithRetry, (projectId) => this.config.projects.find((project) => project.id === projectId)?.github?.repoFullName, feed, telemetry);
|
|
143
137
|
this.runWakePlanner = new RunWakePlanner(db);
|
|
144
138
|
this.linearIssueProjection = new LinearIssueProjectionService(db, linearProvider, logger);
|
|
145
139
|
this.runAdmission = new RunAdmissionController(db, this.linearIssueProjection);
|
|
@@ -224,14 +218,21 @@ export class RunOrchestrator {
|
|
|
224
218
|
try {
|
|
225
219
|
const triage = await this.issueTriage.classify({ issue, childIssues });
|
|
226
220
|
if (triage) {
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
221
|
+
// The triage verdict is an external classifier response; persist it
|
|
222
|
+
// unconditionally so a benign version bump during the (slow) triage
|
|
223
|
+
// call cannot discard the result.
|
|
224
|
+
const triageCommit = this.db.issueSessions.commitIssueState({
|
|
225
|
+
writer: WRITER,
|
|
226
|
+
update: {
|
|
227
|
+
projectId: issue.projectId,
|
|
228
|
+
linearIssueId: issue.linearIssueId,
|
|
229
|
+
issueClass: triage.issueClass,
|
|
230
|
+
issueClassSource: "triage",
|
|
231
|
+
issueTriageHash: triageHash,
|
|
232
|
+
issueTriageResultJson: JSON.stringify(triage),
|
|
233
|
+
},
|
|
234
234
|
});
|
|
235
|
+
return triageCommit.outcome === "applied" ? triageCommit.issue : issue;
|
|
235
236
|
}
|
|
236
237
|
}
|
|
237
238
|
catch (error) {
|
|
@@ -242,12 +243,22 @@ export class RunOrchestrator {
|
|
|
242
243
|
const fallbackClassification = classification.issueClassSource === "triage" && !triageCacheFresh
|
|
243
244
|
? { issueClass: "implementation", issueClassSource: "heuristic" }
|
|
244
245
|
: classification;
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
246
|
+
const fallbackCommit = this.db.issueSessions.commitIssueState({
|
|
247
|
+
writer: WRITER,
|
|
248
|
+
expectedVersion: issue.version,
|
|
249
|
+
update: {
|
|
250
|
+
projectId: issue.projectId,
|
|
251
|
+
linearIssueId: issue.linearIssueId,
|
|
252
|
+
issueClass: fallbackClassification.issueClass,
|
|
253
|
+
issueClassSource: fallbackClassification.issueClassSource,
|
|
254
|
+
},
|
|
255
|
+
// A concurrent writer is newer truth; the next pass reclassifies.
|
|
256
|
+
onConflict: () => undefined,
|
|
250
257
|
});
|
|
258
|
+
if (fallbackCommit.outcome === "applied") {
|
|
259
|
+
return fallbackCommit.issue;
|
|
260
|
+
}
|
|
261
|
+
return (fallbackCommit.outcome === "conflict_skipped" ? fallbackCommit.issue : undefined) ?? issue;
|
|
251
262
|
}
|
|
252
263
|
// ─── Run ────────────────────────────────────────────────────────
|
|
253
264
|
async run(item) {
|
|
@@ -309,7 +320,11 @@ export class RunOrchestrator {
|
|
|
309
320
|
return;
|
|
310
321
|
}
|
|
311
322
|
if (issue.prState === "merged") {
|
|
312
|
-
this.db.issueSessions.
|
|
323
|
+
this.db.issueSessions.commitIssueState({
|
|
324
|
+
writer: WRITER,
|
|
325
|
+
lease: { projectId: issue.projectId, linearIssueId: issue.linearIssueId, leaseId },
|
|
326
|
+
update: { projectId: issue.projectId, linearIssueId: issue.linearIssueId, pendingRunType: null, factoryState: "done" },
|
|
327
|
+
});
|
|
313
328
|
this.leaseService.release(item.projectId, item.issueId);
|
|
314
329
|
return;
|
|
315
330
|
}
|
|
@@ -479,11 +494,15 @@ export class RunOrchestrator {
|
|
|
479
494
|
}
|
|
480
495
|
// Reset zombie recovery counter — this run started successfully
|
|
481
496
|
if (issue.zombieRecoveryAttempts > 0) {
|
|
482
|
-
this.db.issueSessions.
|
|
483
|
-
|
|
484
|
-
linearIssueId: item.issueId,
|
|
485
|
-
|
|
486
|
-
|
|
497
|
+
this.db.issueSessions.commitIssueState({
|
|
498
|
+
writer: WRITER,
|
|
499
|
+
lease: { projectId: item.projectId, linearIssueId: item.issueId, leaseId },
|
|
500
|
+
update: {
|
|
501
|
+
projectId: item.projectId,
|
|
502
|
+
linearIssueId: item.issueId,
|
|
503
|
+
zombieRecoveryAttempts: 0,
|
|
504
|
+
lastZombieRecoveryAt: null,
|
|
505
|
+
},
|
|
487
506
|
});
|
|
488
507
|
}
|
|
489
508
|
this.logger.info({ issueKey: issue.issueKey, runType, threadId, turnId }, `Started ${runType} run`);
|
|
@@ -563,10 +582,10 @@ export class RunOrchestrator {
|
|
|
563
582
|
for (const run of this.db.runs.listRunningRuns()) {
|
|
564
583
|
await this.reconcileRun(run);
|
|
565
584
|
}
|
|
566
|
-
//
|
|
585
|
+
// Settle any issue whose active slot is pinned to an already-terminal
|
|
567
586
|
// run (post-run finalize interrupted by restart). Must run before the
|
|
568
587
|
// idle reconciler so the freed issue is routed in this same pass.
|
|
569
|
-
this.
|
|
588
|
+
this.settleDanglingActiveRuns();
|
|
570
589
|
// Preemptively detect stuck merge-queue PRs (conflicts visible on
|
|
571
590
|
// GitHub) and dispatch queue_repair before the Steward evicts.
|
|
572
591
|
await this.queueHealthMonitor.reconcile();
|
|
@@ -579,62 +598,35 @@ export class RunOrchestrator {
|
|
|
579
598
|
advanceIdleIssue(issue, newState, options) {
|
|
580
599
|
this.idleReconciler.advanceIdleIssue(issue, newState, options);
|
|
581
600
|
}
|
|
582
|
-
|
|
583
|
-
* After a zombie/stale run is cleared, decide whether to re-enqueue
|
|
584
|
-
* or escalate. Checks: PR already merged → done; budget exhausted →
|
|
585
|
-
* escalate; backoff delay not elapsed → skip.
|
|
586
|
-
*/
|
|
587
|
-
recoverOrEscalate(issue, runType, reason) {
|
|
588
|
-
this.runRecovery.recoverOrEscalate({
|
|
589
|
-
issue,
|
|
590
|
-
runType,
|
|
591
|
-
reason,
|
|
592
|
-
isRequestedChangesRunType,
|
|
593
|
-
});
|
|
594
|
-
}
|
|
595
|
-
// Clear a dangling active slot: an issue still pointing at an
|
|
601
|
+
// Settle a dangling active slot: an issue still pointing at an
|
|
596
602
|
// already-terminal run via `activeRunId`. The post-run finalize was
|
|
597
603
|
// interrupted (almost always a restart between marking the run
|
|
598
604
|
// terminal and clearing the slot), so the run can never drive the
|
|
599
605
|
// session forward, yet every idle/recovery pass skips the issue
|
|
600
|
-
// because `activeRunId` is set.
|
|
601
|
-
//
|
|
602
|
-
//
|
|
603
|
-
|
|
606
|
+
// because `activeRunId` is set. settleRun is idempotent and its slot
|
|
607
|
+
// clear is a predicate-guarded versioned commit, so no age gate is
|
|
608
|
+
// needed — it cannot destructively race the notification finalizer.
|
|
609
|
+
// The idle reconciler then routes the issue from GitHub truth (e.g. a
|
|
610
|
+
// missed changes_requested → review_fix).
|
|
611
|
+
settleDanglingActiveRuns() {
|
|
604
612
|
for (const issue of this.db.issues.listIssuesWithTerminalActiveRun()) {
|
|
605
613
|
if (issue.activeRunId === undefined)
|
|
606
614
|
continue;
|
|
607
615
|
const run = this.db.runs.getRunById(issue.activeRunId);
|
|
608
|
-
|
|
609
|
-
// race where the run advanced back to active between query and read.
|
|
610
|
-
if (!run || run.status === "running" || run.status === "queued")
|
|
611
|
-
continue;
|
|
612
|
-
// Hold off until the run has been terminal long enough that the
|
|
613
|
-
// normal notification-driven finalize has demonstrably not run —
|
|
614
|
-
// avoids racing a live completion that is milliseconds from clearing
|
|
615
|
-
// the slot itself.
|
|
616
|
-
const endedAtMs = run.endedAt ? Date.parse(run.endedAt) : Number.NaN;
|
|
617
|
-
if (Number.isFinite(endedAtMs) && Date.now() - endedAtMs < DANGLING_ACTIVE_RUN_MIN_AGE_MS)
|
|
616
|
+
if (!run)
|
|
618
617
|
continue;
|
|
619
618
|
const lease = this.claimLeaseForReconciliation(run.projectId, run.linearIssueId);
|
|
620
|
-
// "skip" → a live lease owns the session (a
|
|
621
|
-
//
|
|
622
|
-
//
|
|
619
|
+
// "skip" → a live lease owns the session (a worker is mid-finalize or
|
|
620
|
+
// mid-launch); settleRun could not corrupt its writes, but deferring
|
|
621
|
+
// lets the owner land its richer post-run state first. "owned" → an
|
|
622
|
+
// outer local scope holds it, so we must not release it here.
|
|
623
623
|
if (lease === "skip")
|
|
624
624
|
continue;
|
|
625
625
|
try {
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
this.db.issueSessions.upsertIssueWithLease(held, {
|
|
631
|
-
projectId: run.projectId,
|
|
632
|
-
linearIssueId: run.linearIssueId,
|
|
633
|
-
activeRunId: null,
|
|
634
|
-
});
|
|
635
|
-
return true;
|
|
636
|
-
});
|
|
637
|
-
if (cleared) {
|
|
626
|
+
// No `finish` outcome: the run is already terminal, and settleRun
|
|
627
|
+
// leaves a run that raced back to non-terminal status untouched.
|
|
628
|
+
const settled = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (held) => settleRun({ db: this.db, run, lease: held }));
|
|
629
|
+
if (settled?.slotCleared) {
|
|
638
630
|
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType, runStatus: run.status }, "Cleared dangling active-run slot left by a terminal run; idle reconcile will resume the issue");
|
|
639
631
|
this.feed?.publish({
|
|
640
632
|
level: "warn",
|
|
@@ -667,14 +659,14 @@ export class RunOrchestrator {
|
|
|
667
659
|
}
|
|
668
660
|
// ─── Internal helpers ─────────────────────────────────────────────
|
|
669
661
|
escalate(issue, runType, reason) {
|
|
670
|
-
this.
|
|
662
|
+
this.runFailurePolicy.escalate({
|
|
671
663
|
issue,
|
|
672
664
|
runType,
|
|
673
665
|
reason,
|
|
674
666
|
});
|
|
675
667
|
}
|
|
676
668
|
failRunAndClear(run, message, nextState = "failed") {
|
|
677
|
-
this.
|
|
669
|
+
this.runFailurePolicy.failRunAndClear({
|
|
678
670
|
run,
|
|
679
671
|
message,
|
|
680
672
|
nextState,
|