patchrelay 0.75.2 → 0.75.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/db/issue-store.js +20 -0
- package/dist/run-orchestrator.js +69 -0
- package/package.json +1 -1
package/dist/build-info.json
CHANGED
package/dist/db/issue-store.js
CHANGED
|
@@ -92,6 +92,26 @@ export class IssueStore {
|
|
|
92
92
|
.all();
|
|
93
93
|
return rows.map(mapIssueRow);
|
|
94
94
|
}
|
|
95
|
+
// Recovery net for a dangling active slot: an issue whose
|
|
96
|
+
// `active_run_id` still points at a run that has already reached a
|
|
97
|
+
// terminal status. This happens when the post-run finalize never ran
|
|
98
|
+
// to completion — almost always a service restart landing between
|
|
99
|
+
// `finishRun` (which marks the run terminal) and the issue write that
|
|
100
|
+
// clears `active_run_id` and arms the next wake. The Codex
|
|
101
|
+
// `turn/completed` notification that would finalize it never re-fires
|
|
102
|
+
// after restart, and every idle/recovery pass gates on
|
|
103
|
+
// `active_run_id IS NULL`, so the issue is invisible to all of them
|
|
104
|
+
// and freezes indefinitely. The orchestrator clears the slot so the
|
|
105
|
+
// idle reconciler can route the issue forward (review_fix, etc.).
|
|
106
|
+
listIssuesWithTerminalActiveRun() {
|
|
107
|
+
const rows = this.connection
|
|
108
|
+
.prepare(`SELECT i.* FROM issues i
|
|
109
|
+
JOIN runs r ON r.id = i.active_run_id
|
|
110
|
+
WHERE i.active_run_id IS NOT NULL
|
|
111
|
+
AND r.status IN ('completed', 'failed', 'released', 'superseded')`)
|
|
112
|
+
.all();
|
|
113
|
+
return rows.map(mapIssueRow);
|
|
114
|
+
}
|
|
95
115
|
// Safety net for orphaned wakes: any delegated, non-terminal issue
|
|
96
116
|
// with at least one unprocessed session event but no active run.
|
|
97
117
|
// The orchestrator's enqueueIssue is the only path that drains these
|
package/dist/run-orchestrator.js
CHANGED
|
@@ -26,6 +26,10 @@ import { CodexThreadMaterializingError, isThreadMaterializingError } from "./cod
|
|
|
26
26
|
import { emitTelemetry, noopTelemetry } from "./telemetry.js";
|
|
27
27
|
import { LinearIssueProjectionService } from "./linear-issue-projection.js";
|
|
28
28
|
import { RunAdmissionController } from "./run-admission-controller.js";
|
|
29
|
+
// A terminal run must hold the active slot for at least this long before
|
|
30
|
+
// the orchestrator force-clears it, so we never race the normal
|
|
31
|
+
// notification-driven finalize that runs within seconds of completion.
|
|
32
|
+
const DANGLING_ACTIVE_RUN_MIN_AGE_MS = 2 * 60_000;
|
|
29
33
|
function lowerCaseFirst(value) {
|
|
30
34
|
return value ? `${value.slice(0, 1).toLowerCase()}${value.slice(1)}` : value;
|
|
31
35
|
}
|
|
@@ -559,6 +563,10 @@ export class RunOrchestrator {
|
|
|
559
563
|
for (const run of this.db.runs.listRunningRuns()) {
|
|
560
564
|
await this.reconcileRun(run);
|
|
561
565
|
}
|
|
566
|
+
// Free any issue whose active slot is pinned to an already-terminal
|
|
567
|
+
// run (post-run finalize interrupted by restart). Must run before the
|
|
568
|
+
// idle reconciler so the freed issue is routed in this same pass.
|
|
569
|
+
this.finalizeDanglingActiveRuns();
|
|
562
570
|
// Preemptively detect stuck merge-queue PRs (conflicts visible on
|
|
563
571
|
// GitHub) and dispatch queue_repair before the Steward evicts.
|
|
564
572
|
await this.queueHealthMonitor.reconcile();
|
|
@@ -584,6 +592,67 @@ export class RunOrchestrator {
|
|
|
584
592
|
isRequestedChangesRunType,
|
|
585
593
|
});
|
|
586
594
|
}
|
|
595
|
+
// Clear a dangling active slot: an issue still pointing at an
|
|
596
|
+
// already-terminal run via `activeRunId`. The post-run finalize was
|
|
597
|
+
// interrupted (almost always a restart between marking the run
|
|
598
|
+
// terminal and clearing the slot), so the run can never drive the
|
|
599
|
+
// session forward, yet every idle/recovery pass skips the issue
|
|
600
|
+
// because `activeRunId` is set. We re-read under the issue-session
|
|
601
|
+
// lease and null the slot; the idle reconciler then routes the issue
|
|
602
|
+
// from GitHub truth (e.g. a missed changes_requested → review_fix).
|
|
603
|
+
finalizeDanglingActiveRuns() {
|
|
604
|
+
for (const issue of this.db.issues.listIssuesWithTerminalActiveRun()) {
|
|
605
|
+
if (issue.activeRunId === undefined)
|
|
606
|
+
continue;
|
|
607
|
+
const run = this.db.runs.getRunById(issue.activeRunId);
|
|
608
|
+
// The query already filters to terminal runs; this guards against a
|
|
609
|
+
// race where the run advanced back to active between query and read.
|
|
610
|
+
if (!run || run.status === "running" || run.status === "queued")
|
|
611
|
+
continue;
|
|
612
|
+
// Hold off until the run has been terminal long enough that the
|
|
613
|
+
// normal notification-driven finalize has demonstrably not run —
|
|
614
|
+
// avoids racing a live completion that is milliseconds from clearing
|
|
615
|
+
// the slot itself.
|
|
616
|
+
const endedAtMs = run.endedAt ? Date.parse(run.endedAt) : Number.NaN;
|
|
617
|
+
if (Number.isFinite(endedAtMs) && Date.now() - endedAtMs < DANGLING_ACTIVE_RUN_MIN_AGE_MS)
|
|
618
|
+
continue;
|
|
619
|
+
const lease = this.claimLeaseForReconciliation(run.projectId, run.linearIssueId);
|
|
620
|
+
// "skip" → a live lease owns the session (a real run is in flight);
|
|
621
|
+
// leave it alone. "owned" → an outer local scope holds it, so we
|
|
622
|
+
// must not release it here.
|
|
623
|
+
if (lease === "skip")
|
|
624
|
+
continue;
|
|
625
|
+
try {
|
|
626
|
+
const cleared = this.withHeldIssueSessionLease(run.projectId, run.linearIssueId, (held) => {
|
|
627
|
+
const fresh = this.db.issues.getIssue(run.projectId, run.linearIssueId);
|
|
628
|
+
if (!fresh || fresh.activeRunId !== run.id)
|
|
629
|
+
return false;
|
|
630
|
+
this.db.issueSessions.upsertIssueWithLease(held, {
|
|
631
|
+
projectId: run.projectId,
|
|
632
|
+
linearIssueId: run.linearIssueId,
|
|
633
|
+
activeRunId: null,
|
|
634
|
+
});
|
|
635
|
+
return true;
|
|
636
|
+
});
|
|
637
|
+
if (cleared) {
|
|
638
|
+
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType, runStatus: run.status }, "Cleared dangling active-run slot left by a terminal run; idle reconcile will resume the issue");
|
|
639
|
+
this.feed?.publish({
|
|
640
|
+
level: "warn",
|
|
641
|
+
kind: "workflow",
|
|
642
|
+
issueKey: issue.issueKey,
|
|
643
|
+
projectId: run.projectId,
|
|
644
|
+
stage: run.runType,
|
|
645
|
+
status: "recovered",
|
|
646
|
+
summary: `Cleared stuck active slot: run #${run.id} was ${run.status} but still held the issue`,
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
finally {
|
|
651
|
+
if (lease !== "owned")
|
|
652
|
+
this.releaseIssueSessionLease(run.projectId, run.linearIssueId);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
587
656
|
async reconcileRun(run) {
|
|
588
657
|
const issue = this.db.issues.getIssue(run.projectId, run.linearIssueId);
|
|
589
658
|
if (!issue)
|