patchrelay 0.32.3 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/db.js +14 -0
- package/dist/run-orchestrator.js +110 -0
- package/package.json +1 -1
package/dist/build-info.json
CHANGED
package/dist/db.js
CHANGED
|
@@ -484,6 +484,20 @@ export class PatchRelayDatabase {
|
|
|
484
484
|
.all();
|
|
485
485
|
return rows.map(mapIssueRow);
|
|
486
486
|
}
|
|
487
|
+
/**
|
|
488
|
+
* Issues waiting in the merge queue with no active or pending run.
|
|
489
|
+
* Used by the queue health monitor to probe GitHub for stuck PRs.
|
|
490
|
+
*/
|
|
491
|
+
listAwaitingQueueIssues() {
|
|
492
|
+
const rows = this.connection
|
|
493
|
+
.prepare(`SELECT * FROM issues
|
|
494
|
+
WHERE factory_state = 'awaiting_queue'
|
|
495
|
+
AND active_run_id IS NULL
|
|
496
|
+
AND pending_run_type IS NULL
|
|
497
|
+
AND pr_number IS NOT NULL`)
|
|
498
|
+
.all();
|
|
499
|
+
return rows.map(mapIssueRow);
|
|
500
|
+
}
|
|
487
501
|
listIssuesByState(projectId, state) {
|
|
488
502
|
const rows = this.connection
|
|
489
503
|
.prepare("SELECT * FROM issues WHERE project_id = ? AND factory_state = ? ORDER BY pr_number ASC")
|
package/dist/run-orchestrator.js
CHANGED
|
@@ -17,6 +17,12 @@ const DEFAULT_QUEUE_REPAIR_BUDGET = 3;
|
|
|
17
17
|
const DEFAULT_REVIEW_FIX_BUDGET = 3;
|
|
18
18
|
const DEFAULT_ZOMBIE_RECOVERY_BUDGET = 5;
|
|
19
19
|
const ZOMBIE_RECOVERY_BASE_DELAY_MS = 15_000; // 15s, 30s, 60s, 120s, 240s
|
|
20
|
+
// Queue health monitor: wait before probing a freshly-queued PR.
|
|
21
|
+
// TODO: replace updatedAt with a true factory_state_changed_at timestamp —
|
|
22
|
+
// updatedAt can reset on unrelated row mutations (e.g. webhook metadata).
|
|
23
|
+
const QUEUE_HEALTH_GRACE_MS = 120_000;
|
|
24
|
+
// Suppress repeated probe-failure feed events — at most one per issue per window.
|
|
25
|
+
const QUEUE_HEALTH_PROBE_FAILURE_COOLDOWN_MS = 300_000; // 5 minutes
|
|
20
26
|
function slugify(value) {
|
|
21
27
|
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
22
28
|
}
|
|
@@ -98,6 +104,8 @@ export class RunOrchestrator {
|
|
|
98
104
|
feed;
|
|
99
105
|
worktreeManager;
|
|
100
106
|
progressThrottle = new Map();
|
|
107
|
+
/** Tracks last probe-failure feed event per issue to avoid spamming the operator feed. */
|
|
108
|
+
probeFailureFeedTimes = new Map();
|
|
101
109
|
activeThreadId;
|
|
102
110
|
botIdentity;
|
|
103
111
|
constructor(config, db, codex, linearProvider, enqueueIssue, logger, feed) {
|
|
@@ -555,10 +563,112 @@ export class RunOrchestrator {
|
|
|
555
563
|
for (const run of this.db.listRunningRuns()) {
|
|
556
564
|
await this.reconcileRun(run);
|
|
557
565
|
}
|
|
566
|
+
// Preemptively detect stuck merge-queue PRs (conflicts visible on
|
|
567
|
+
// GitHub) and dispatch queue_repair before the Steward evicts.
|
|
568
|
+
await this.reconcileQueueHealth();
|
|
558
569
|
// Advance issues stuck in pr_open whose stored PR metadata already
|
|
559
570
|
// shows they should transition (e.g. approved PR, missed webhook).
|
|
560
571
|
await this.reconcileIdleIssues();
|
|
561
572
|
}
|
|
573
|
+
// ─── Queue Health Monitor ──────────────────────────────────────────
|
|
574
|
+
async reconcileQueueHealth() {
|
|
575
|
+
for (const issue of this.db.listAwaitingQueueIssues()) {
|
|
576
|
+
await this.probeQueuedIssue(issue);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
async probeQueuedIssue(issue) {
|
|
580
|
+
if (!issue.prNumber)
|
|
581
|
+
return;
|
|
582
|
+
const project = this.config.projects.find((p) => p.id === issue.projectId);
|
|
583
|
+
if (!project?.github?.repoFullName)
|
|
584
|
+
return;
|
|
585
|
+
// Grace period — don't probe PRs that just entered the queue.
|
|
586
|
+
const age = Date.now() - Date.parse(issue.updatedAt);
|
|
587
|
+
if (age < QUEUE_HEALTH_GRACE_MS)
|
|
588
|
+
return;
|
|
589
|
+
const protocol = resolveMergeQueueProtocol(project);
|
|
590
|
+
let pr;
|
|
591
|
+
try {
|
|
592
|
+
const { stdout } = await execCommand("gh", [
|
|
593
|
+
"pr", "view", String(issue.prNumber),
|
|
594
|
+
"--repo", project.github.repoFullName,
|
|
595
|
+
"--json", "state,mergeable,mergeStateStatus,headRefOid,labels",
|
|
596
|
+
], { timeoutMs: 10_000 });
|
|
597
|
+
pr = JSON.parse(stdout);
|
|
598
|
+
}
|
|
599
|
+
catch (error) {
|
|
600
|
+
this.logger.debug({ issueKey: issue.issueKey, prNumber: issue.prNumber, error: error instanceof Error ? error.message : String(error) }, "Queue health: failed to probe GitHub PR state");
|
|
601
|
+
// Throttle feed events — at most one per issue per cooldown window.
|
|
602
|
+
const issueKey = `${issue.projectId}::${issue.linearIssueId}`;
|
|
603
|
+
const lastFeedAt = this.probeFailureFeedTimes.get(issueKey) ?? 0;
|
|
604
|
+
if (Date.now() - lastFeedAt >= QUEUE_HEALTH_PROBE_FAILURE_COOLDOWN_MS) {
|
|
605
|
+
this.probeFailureFeedTimes.set(issueKey, Date.now());
|
|
606
|
+
this.feed?.publish({
|
|
607
|
+
level: "info",
|
|
608
|
+
kind: "github",
|
|
609
|
+
issueKey: issue.issueKey,
|
|
610
|
+
projectId: issue.projectId,
|
|
611
|
+
stage: "awaiting_queue",
|
|
612
|
+
status: "queue_health_probe_failed",
|
|
613
|
+
summary: `Queue health: failed to probe PR #${issue.prNumber}`,
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
// Successful probe — clear any probe-failure throttle for this issue.
|
|
619
|
+
this.probeFailureFeedTimes.delete(`${issue.projectId}::${issue.linearIssueId}`);
|
|
620
|
+
// Missed merge webhook — advance to done.
|
|
621
|
+
if (pr.state === "MERGED") {
|
|
622
|
+
this.db.upsertIssue({ projectId: issue.projectId, linearIssueId: issue.linearIssueId, prState: "merged" });
|
|
623
|
+
this.advanceIdleIssue(issue, "done", { clearFailureProvenance: true });
|
|
624
|
+
return;
|
|
625
|
+
}
|
|
626
|
+
// Non-open PRs (closed, draft) — don't enter repair logic.
|
|
627
|
+
if (pr.state !== "OPEN")
|
|
628
|
+
return;
|
|
629
|
+
// Verify admission label is still present — if the Steward removed it
|
|
630
|
+
// (eviction, dequeue) but PatchRelay missed the webhook, we should not
|
|
631
|
+
// treat a DIRTY PR as a queue-health problem.
|
|
632
|
+
const hasQueueLabel = pr.labels?.some((l) => l.name === protocol.admissionLabel) ?? false;
|
|
633
|
+
if (!hasQueueLabel)
|
|
634
|
+
return;
|
|
635
|
+
// Conflict detected — dispatch preemptive queue repair.
|
|
636
|
+
if (pr.mergeStateStatus === "DIRTY" || pr.mergeable === "CONFLICTING") {
|
|
637
|
+
const headRefOid = pr.headRefOid ?? "unknown";
|
|
638
|
+
// TODO: include baseSha in signature (headRefOid + baseSha) so that a
|
|
639
|
+
// main-only advance with the same PR head is recognized as a new conflict.
|
|
640
|
+
const signature = `preemptive_queue_conflict:${headRefOid}`;
|
|
641
|
+
const pendingRunContext = {
|
|
642
|
+
source: "queue_health_monitor",
|
|
643
|
+
failureReason: "preemptive_conflict",
|
|
644
|
+
failureHeadSha: headRefOid,
|
|
645
|
+
failureSignature: signature,
|
|
646
|
+
};
|
|
647
|
+
if (isDuplicateRepairAttempt(issue, pendingRunContext)) {
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
this.db.upsertIssue({
|
|
651
|
+
projectId: issue.projectId,
|
|
652
|
+
linearIssueId: issue.linearIssueId,
|
|
653
|
+
lastAttemptedFailureHeadSha: headRefOid,
|
|
654
|
+
lastAttemptedFailureSignature: signature,
|
|
655
|
+
});
|
|
656
|
+
this.advanceIdleIssue(issue, "repairing_queue", {
|
|
657
|
+
pendingRunType: "queue_repair",
|
|
658
|
+
pendingRunContext,
|
|
659
|
+
});
|
|
660
|
+
this.logger.info({ issueKey: issue.issueKey, prNumber: issue.prNumber, headRefOid }, "Queue health: merge conflict detected, dispatching preemptive repair");
|
|
661
|
+
this.feed?.publish({
|
|
662
|
+
level: "warn",
|
|
663
|
+
kind: "github",
|
|
664
|
+
issueKey: issue.issueKey,
|
|
665
|
+
projectId: issue.projectId,
|
|
666
|
+
stage: "repairing_queue",
|
|
667
|
+
status: "queue_health_conflict_detected",
|
|
668
|
+
summary: `Queue health: merge conflict detected on PR #${issue.prNumber}, dispatching preemptive repair`,
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
}
|
|
562
672
|
async reconcileIdleIssues() {
|
|
563
673
|
for (const issue of this.db.listIdleNonTerminalIssues()) {
|
|
564
674
|
// PR already merged — advance to done regardless of current state
|