patchrelay 0.23.4 → 0.23.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/db/migrations.js +3 -0
- package/dist/db.js +10 -0
- package/dist/github-webhook-handler.js +5 -1
- package/dist/run-orchestrator.js +100 -24
- package/dist/worktree-manager.js +7 -1
- package/package.json +1 -1
package/dist/build-info.json
CHANGED
package/dist/db/migrations.js
CHANGED
|
@@ -141,6 +141,9 @@ export function runPatchRelayMigrations(connection) {
|
|
|
141
141
|
addColumnIfMissing(connection, "issues", "description", "TEXT");
|
|
142
142
|
addColumnIfMissing(connection, "issues", "priority", "INTEGER");
|
|
143
143
|
addColumnIfMissing(connection, "issues", "estimate", "REAL");
|
|
144
|
+
// Zombie/stale recovery backoff
|
|
145
|
+
addColumnIfMissing(connection, "issues", "zombie_recovery_attempts", "INTEGER NOT NULL DEFAULT 0");
|
|
146
|
+
addColumnIfMissing(connection, "issues", "last_zombie_recovery_at", "TEXT");
|
|
144
147
|
}
|
|
145
148
|
function addColumnIfMissing(connection, table, column, definition) {
|
|
146
149
|
const cols = connection.prepare(`PRAGMA table_info(${table})`).all();
|
package/dist/db.js
CHANGED
|
@@ -173,6 +173,14 @@ export class PatchRelayDatabase {
|
|
|
173
173
|
sets.push("pending_merge_prep = @pendingMergePrep");
|
|
174
174
|
values.pendingMergePrep = params.pendingMergePrep ? 1 : 0;
|
|
175
175
|
}
|
|
176
|
+
if (params.zombieRecoveryAttempts !== undefined) {
|
|
177
|
+
sets.push("zombie_recovery_attempts = @zombieRecoveryAttempts");
|
|
178
|
+
values.zombieRecoveryAttempts = params.zombieRecoveryAttempts;
|
|
179
|
+
}
|
|
180
|
+
if (params.lastZombieRecoveryAt !== undefined) {
|
|
181
|
+
sets.push("last_zombie_recovery_at = @lastZombieRecoveryAt");
|
|
182
|
+
values.lastZombieRecoveryAt = params.lastZombieRecoveryAt;
|
|
183
|
+
}
|
|
176
184
|
this.connection.prepare(`UPDATE issues SET ${sets.join(", ")} WHERE project_id = @projectId AND linear_issue_id = @linearIssueId`).run(values);
|
|
177
185
|
}
|
|
178
186
|
else {
|
|
@@ -424,6 +432,8 @@ function mapIssueRow(row) {
|
|
|
424
432
|
reviewFixAttempts: Number(row.review_fix_attempts ?? 0),
|
|
425
433
|
mergePrepAttempts: Number(row.merge_prep_attempts ?? 0),
|
|
426
434
|
pendingMergePrep: Boolean(row.pending_merge_prep),
|
|
435
|
+
zombieRecoveryAttempts: Number(row.zombie_recovery_attempts ?? 0),
|
|
436
|
+
...(row.last_zombie_recovery_at !== null && row.last_zombie_recovery_at !== undefined ? { lastZombieRecoveryAt: String(row.last_zombie_recovery_at) } : {}),
|
|
427
437
|
};
|
|
428
438
|
}
|
|
429
439
|
function mapRunRow(row) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { resolveFactoryStateFromGitHub } from "./factory-state.js";
|
|
1
|
+
import { resolveFactoryStateFromGitHub, TERMINAL_STATES } from "./factory-state.js";
|
|
2
2
|
import { normalizeGitHubWebhook, verifyGitHubWebhookSignature } from "./github-webhooks.js";
|
|
3
3
|
import { buildAgentSessionPlanForIssue } from "./agent-session-plan.js";
|
|
4
4
|
import { buildAgentSessionExternalUrls } from "./agent-session-presentation.js";
|
|
@@ -202,6 +202,10 @@ export class GitHubWebhookHandler {
|
|
|
202
202
|
// Don't trigger if there's already an active run
|
|
203
203
|
if (issue.activeRunId !== undefined)
|
|
204
204
|
return;
|
|
205
|
+
// Don't trigger on terminal issues — late-arriving webhooks (e.g.
|
|
206
|
+
// merge_group_failed after pr_merged) must not resurrect done issues.
|
|
207
|
+
if (TERMINAL_STATES.has(issue.factoryState))
|
|
208
|
+
return;
|
|
205
209
|
if (event.triggerEvent === "check_failed" && issue.prState === "open") {
|
|
206
210
|
this.db.upsertIssue({
|
|
207
211
|
projectId: issue.projectId,
|
package/dist/run-orchestrator.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, readFileSync } from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { ACTIVE_RUN_STATES } from "./factory-state.js";
|
|
3
|
+
import { ACTIVE_RUN_STATES, TERMINAL_STATES } from "./factory-state.js";
|
|
4
4
|
import { buildHookEnv, runProjectHook } from "./hook-runner.js";
|
|
5
5
|
import { buildAgentSessionPlanForIssue, } from "./agent-session-plan.js";
|
|
6
6
|
import { buildStageReport, countEventMethods, extractTurnId, resolveRunCompletionStatus, summarizeCurrentThread, } from "./run-reporting.js";
|
|
@@ -12,6 +12,8 @@ import { execCommand } from "./utils.js";
|
|
|
12
12
|
const DEFAULT_CI_REPAIR_BUDGET = 3;
|
|
13
13
|
const DEFAULT_QUEUE_REPAIR_BUDGET = 3;
|
|
14
14
|
const DEFAULT_REVIEW_FIX_BUDGET = 3;
|
|
15
|
+
const DEFAULT_ZOMBIE_RECOVERY_BUDGET = 5;
|
|
16
|
+
const ZOMBIE_RECOVERY_BASE_DELAY_MS = 15_000; // 15s, 30s, 60s, 120s, 240s
|
|
15
17
|
function slugify(value) {
|
|
16
18
|
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
17
19
|
}
|
|
@@ -191,7 +193,10 @@ export class RunOrchestrator {
|
|
|
191
193
|
// Freshen the worktree: fetch + rebase onto latest base branch.
|
|
192
194
|
// This prevents branch contamination when local main has drifted
|
|
193
195
|
// and avoids scope-bundling review rejections from stale commits.
|
|
194
|
-
|
|
196
|
+
// Skip for queue_repair — its entire purpose is to resolve rebase conflicts.
|
|
197
|
+
if (runType !== "queue_repair") {
|
|
198
|
+
await this.freshenWorktree(worktreePath, project, issue);
|
|
199
|
+
}
|
|
195
200
|
// Run prepare-worktree hook
|
|
196
201
|
const hookEnv = buildHookEnv(issue.issueKey ?? issue.linearIssueId, branchName, runType, worktreePath);
|
|
197
202
|
const prepareResult = await runProjectHook(project.repoPath, "prepare-worktree", { cwd: worktreePath, env: hookEnv });
|
|
@@ -244,6 +249,15 @@ export class RunOrchestrator {
|
|
|
244
249
|
throw error;
|
|
245
250
|
}
|
|
246
251
|
this.db.updateRunThread(run.id, { threadId, turnId });
|
|
252
|
+
// Reset zombie recovery counter — this run started successfully
|
|
253
|
+
if (issue.zombieRecoveryAttempts > 0) {
|
|
254
|
+
this.db.upsertIssue({
|
|
255
|
+
projectId: item.projectId,
|
|
256
|
+
linearIssueId: item.issueId,
|
|
257
|
+
zombieRecoveryAttempts: 0,
|
|
258
|
+
lastZombieRecoveryAt: null,
|
|
259
|
+
});
|
|
260
|
+
}
|
|
247
261
|
this.logger.info({ issueKey: issue.issueKey, runType, threadId, turnId }, `Started ${runType} run`);
|
|
248
262
|
// Emit Linear activity + plan
|
|
249
263
|
const freshIssue = this.db.getIssue(item.projectId, item.issueId) ?? issue;
|
|
@@ -286,11 +300,14 @@ export class RunOrchestrator {
|
|
|
286
300
|
// Rebase onto latest base
|
|
287
301
|
const rebaseResult = await execCommand(gitBin, ["-C", worktreePath, "rebase", `origin/${baseBranch}`], { timeoutMs: 120_000 });
|
|
288
302
|
if (rebaseResult.exitCode !== 0) {
|
|
289
|
-
// Abort the failed rebase and restore state
|
|
303
|
+
// Abort the failed rebase and restore state — then let the agent run
|
|
304
|
+
// proceed. The agent can resolve the conflict itself (the workflow
|
|
305
|
+
// prompt tells it to rebase and handle conflicts).
|
|
290
306
|
await execCommand(gitBin, ["-C", worktreePath, "rebase", "--abort"], { timeoutMs: 10_000 });
|
|
291
307
|
if (didStash)
|
|
292
308
|
await execCommand(gitBin, ["-C", worktreePath, "stash", "pop"], { timeoutMs: 10_000 });
|
|
293
|
-
|
|
309
|
+
this.logger.warn({ issueKey: issue.issueKey, baseBranch }, "Pre-run freshen: rebase conflict, agent will resolve");
|
|
310
|
+
return;
|
|
294
311
|
}
|
|
295
312
|
// Push the rebased branch (force-with-lease to protect against concurrent pushes)
|
|
296
313
|
const pushResult = await execCommand(gitBin, ["-C", worktreePath, "push", "--force-with-lease"], { timeoutMs: 60_000 });
|
|
@@ -563,26 +580,91 @@ export class RunOrchestrator {
|
|
|
563
580
|
this.enqueueIssue(issue.projectId, issue.linearIssueId);
|
|
564
581
|
}
|
|
565
582
|
}
|
|
583
|
+
/**
|
|
584
|
+
* After a zombie/stale run is cleared, decide whether to re-enqueue
|
|
585
|
+
* or escalate. Checks: PR already merged → done; budget exhausted →
|
|
586
|
+
* escalate; backoff delay not elapsed → skip.
|
|
587
|
+
*/
|
|
588
|
+
recoverOrEscalate(issue, runType, reason) {
|
|
589
|
+
// Re-read issue after the run was cleared (activeRunId is now null)
|
|
590
|
+
const fresh = this.db.getIssue(issue.projectId, issue.linearIssueId);
|
|
591
|
+
if (!fresh)
|
|
592
|
+
return;
|
|
593
|
+
// If PR already merged, transition to done — no retry needed
|
|
594
|
+
if (fresh.prState === "merged") {
|
|
595
|
+
this.db.upsertIssue({
|
|
596
|
+
projectId: fresh.projectId,
|
|
597
|
+
linearIssueId: fresh.linearIssueId,
|
|
598
|
+
factoryState: "done",
|
|
599
|
+
zombieRecoveryAttempts: 0,
|
|
600
|
+
lastZombieRecoveryAt: null,
|
|
601
|
+
});
|
|
602
|
+
this.logger.info({ issueKey: fresh.issueKey, reason }, "Recovery: PR already merged — transitioning to done");
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
// Budget check
|
|
606
|
+
const attempts = fresh.zombieRecoveryAttempts + 1;
|
|
607
|
+
if (attempts > DEFAULT_ZOMBIE_RECOVERY_BUDGET) {
|
|
608
|
+
this.db.upsertIssue({
|
|
609
|
+
projectId: fresh.projectId,
|
|
610
|
+
linearIssueId: fresh.linearIssueId,
|
|
611
|
+
factoryState: "escalated",
|
|
612
|
+
});
|
|
613
|
+
this.logger.warn({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: budget exhausted — escalating");
|
|
614
|
+
this.feed?.publish({
|
|
615
|
+
level: "error",
|
|
616
|
+
kind: "workflow",
|
|
617
|
+
issueKey: fresh.issueKey,
|
|
618
|
+
projectId: fresh.projectId,
|
|
619
|
+
stage: "escalated",
|
|
620
|
+
status: "budget_exhausted",
|
|
621
|
+
summary: `${reason} recovery failed after ${DEFAULT_ZOMBIE_RECOVERY_BUDGET} attempts`,
|
|
622
|
+
});
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
// Exponential backoff — skip if delay hasn't elapsed
|
|
626
|
+
if (fresh.lastZombieRecoveryAt) {
|
|
627
|
+
const elapsed = Date.now() - new Date(fresh.lastZombieRecoveryAt).getTime();
|
|
628
|
+
const delay = ZOMBIE_RECOVERY_BASE_DELAY_MS * Math.pow(2, fresh.zombieRecoveryAttempts);
|
|
629
|
+
if (elapsed < delay) {
|
|
630
|
+
this.logger.debug({ issueKey: fresh.issueKey, attempts: fresh.zombieRecoveryAttempts, delay, elapsed }, "Recovery: backoff not elapsed, skipping");
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
// Re-enqueue with backoff tracking
|
|
635
|
+
this.db.upsertIssue({
|
|
636
|
+
projectId: fresh.projectId,
|
|
637
|
+
linearIssueId: fresh.linearIssueId,
|
|
638
|
+
pendingRunType: runType,
|
|
639
|
+
pendingRunContextJson: null,
|
|
640
|
+
zombieRecoveryAttempts: attempts,
|
|
641
|
+
lastZombieRecoveryAt: new Date().toISOString(),
|
|
642
|
+
});
|
|
643
|
+
this.enqueueIssue(fresh.projectId, fresh.linearIssueId);
|
|
644
|
+
this.logger.info({ issueKey: fresh.issueKey, attempts, reason }, "Recovery: re-enqueued with backoff");
|
|
645
|
+
}
|
|
566
646
|
async reconcileRun(run) {
|
|
567
647
|
const issue = this.db.getIssue(run.projectId, run.linearIssueId);
|
|
568
648
|
if (!issue)
|
|
569
649
|
return;
|
|
650
|
+
// If the issue reached a terminal state while this run was active
|
|
651
|
+
// (e.g. pr_merged processed, DB manually edited), just release the run.
|
|
652
|
+
if (TERMINAL_STATES.has(issue.factoryState)) {
|
|
653
|
+
this.db.transaction(() => {
|
|
654
|
+
this.db.finishRun(run.id, { status: "released", failureReason: "Issue reached terminal state during active run" });
|
|
655
|
+
this.db.upsertIssue({ projectId: run.projectId, linearIssueId: run.linearIssueId, activeRunId: null });
|
|
656
|
+
});
|
|
657
|
+
this.logger.info({ issueKey: issue.issueKey, runId: run.id, factoryState: issue.factoryState }, "Reconciliation: released run on terminal issue");
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
570
660
|
// Zombie run: claimed in DB but Codex never started (no thread).
|
|
571
|
-
// This happens when the service crashes between claiming the run
|
|
572
|
-
// and starting the Codex turn. Re-enqueue instead of failing.
|
|
573
661
|
if (!run.threadId) {
|
|
574
|
-
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType }, "Zombie run detected (no thread)
|
|
662
|
+
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType }, "Zombie run detected (no thread)");
|
|
575
663
|
this.db.transaction(() => {
|
|
576
664
|
this.db.finishRun(run.id, { status: "failed", failureReason: "Zombie: never started (no thread after restart)" });
|
|
577
|
-
this.db.upsertIssue({
|
|
578
|
-
projectId: run.projectId,
|
|
579
|
-
linearIssueId: run.linearIssueId,
|
|
580
|
-
activeRunId: null,
|
|
581
|
-
pendingRunType: run.runType,
|
|
582
|
-
pendingRunContextJson: null,
|
|
583
|
-
});
|
|
665
|
+
this.db.upsertIssue({ projectId: run.projectId, linearIssueId: run.linearIssueId, activeRunId: null });
|
|
584
666
|
});
|
|
585
|
-
this.
|
|
667
|
+
this.recoverOrEscalate(issue, run.runType, "zombie");
|
|
586
668
|
return;
|
|
587
669
|
}
|
|
588
670
|
// Read Codex state — thread may not exist after app-server restart.
|
|
@@ -591,18 +673,12 @@ export class RunOrchestrator {
|
|
|
591
673
|
thread = await this.readThreadWithRetry(run.threadId);
|
|
592
674
|
}
|
|
593
675
|
catch {
|
|
594
|
-
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType, threadId: run.threadId }, "Stale thread during reconciliation
|
|
676
|
+
this.logger.warn({ issueKey: issue.issueKey, runId: run.id, runType: run.runType, threadId: run.threadId }, "Stale thread during reconciliation");
|
|
595
677
|
this.db.transaction(() => {
|
|
596
678
|
this.db.finishRun(run.id, { status: "failed", failureReason: "Stale thread after restart" });
|
|
597
|
-
this.db.upsertIssue({
|
|
598
|
-
projectId: run.projectId,
|
|
599
|
-
linearIssueId: run.linearIssueId,
|
|
600
|
-
activeRunId: null,
|
|
601
|
-
pendingRunType: run.runType,
|
|
602
|
-
pendingRunContextJson: null,
|
|
603
|
-
});
|
|
679
|
+
this.db.upsertIssue({ projectId: run.projectId, linearIssueId: run.linearIssueId, activeRunId: null });
|
|
604
680
|
});
|
|
605
|
-
this.
|
|
681
|
+
this.recoverOrEscalate(issue, run.runType, "stale_thread");
|
|
606
682
|
return;
|
|
607
683
|
}
|
|
608
684
|
// Check Linear state (non-fatal — token refresh may fail)
|
package/dist/worktree-manager.js
CHANGED
|
@@ -12,7 +12,13 @@ export class WorktreeManager {
|
|
|
12
12
|
return;
|
|
13
13
|
}
|
|
14
14
|
await ensureDir(path.dirname(worktreePath));
|
|
15
|
-
|
|
15
|
+
// Fetch latest main so the branch forks from a clean, up-to-date base.
|
|
16
|
+
// This prevents branch contamination when local HEAD has drifted.
|
|
17
|
+
// freshenWorktree in run-orchestrator acts as a secondary safety net.
|
|
18
|
+
await execCommand(this.config.runner.gitBin, ["-C", repoPath, "fetch", "origin", "main"], {
|
|
19
|
+
timeoutMs: 60_000,
|
|
20
|
+
});
|
|
21
|
+
await execCommand(this.config.runner.gitBin, ["-C", repoPath, "worktree", "add", "--force", "-B", branchName, worktreePath, "origin/main"], {
|
|
16
22
|
timeoutMs: 120_000,
|
|
17
23
|
});
|
|
18
24
|
}
|