agent-relay-orchestrator 0.78.5 → 0.78.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/control.ts +2 -0
- package/src/quota-poller.ts +55 -11
- package/src/workspace-probe/cleanup.ts +52 -5
- package/src/workspace-probe/git-state.ts +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-orchestrator",
|
|
3
|
-
"version": "0.78.
|
|
3
|
+
"version": "0.78.7",
|
|
4
4
|
"description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"test": "bun test"
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
|
-
"agent-relay-sdk": "0.2.
|
|
19
|
+
"agent-relay-sdk": "0.2.57"
|
|
20
20
|
},
|
|
21
21
|
"devDependencies": {
|
|
22
22
|
"@types/bun": "latest",
|
package/src/control.ts
CHANGED
|
@@ -95,6 +95,8 @@ export function createControlHandler(
|
|
|
95
95
|
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
96
96
|
worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
|
|
97
97
|
branch: typeof command.params.branch === "string" ? command.params.branch : undefined,
|
|
98
|
+
baseRef: typeof command.params.baseRef === "string" ? command.params.baseRef : undefined,
|
|
99
|
+
baseSha: typeof command.params.baseSha === "string" ? command.params.baseSha : undefined,
|
|
98
100
|
deleteBranch: command.params.deleteBranch !== false,
|
|
99
101
|
workspacesRoot: workspacesRoot(config.baseDir),
|
|
100
102
|
});
|
package/src/quota-poller.ts
CHANGED
|
@@ -25,6 +25,8 @@ import { codexCommandFromEnv, providerHomeRootFromEnv, type OrchestratorConfig }
|
|
|
25
25
|
|
|
26
26
|
const QUOTA_LEASE_TTL_MS = 90_000;
|
|
27
27
|
const QUOTA_LEASE_RENEW_MS = 30_000;
|
|
28
|
+
const QUOTA_RETRY_BACKOFF_MAX_MS = 15 * 60_000;
|
|
29
|
+
const QUOTA_RETRY_BACKOFF_MIN_MS = 1_000;
|
|
28
30
|
const CODEX_APP_SERVER_CONNECT_ATTEMPTS = 40;
|
|
29
31
|
const CODEX_APP_SERVER_CONNECT_RETRY_MS = 250;
|
|
30
32
|
|
|
@@ -62,6 +64,7 @@ type QuotaPollState = {
|
|
|
62
64
|
leaseExpiresAt?: number;
|
|
63
65
|
nextPollAt?: number;
|
|
64
66
|
lastAttemptAt?: number;
|
|
67
|
+
consecutiveFailures?: number;
|
|
65
68
|
lastLog?: { key: string; at: number };
|
|
66
69
|
};
|
|
67
70
|
|
|
@@ -115,14 +118,18 @@ export class OrchestratorQuotaPoller {
|
|
|
115
118
|
const { candidates, skips } = await this.discoverCandidates();
|
|
116
119
|
await this.releaseRemovedCandidates(candidates);
|
|
117
120
|
for (const candidate of candidates) {
|
|
118
|
-
|
|
121
|
+
try {
|
|
122
|
+
await this.processCandidate(candidate);
|
|
123
|
+
} catch (error) {
|
|
124
|
+
await this.handleCandidateFailure(candidate, error);
|
|
125
|
+
}
|
|
119
126
|
}
|
|
120
127
|
for (const skip of skips) {
|
|
121
128
|
await this.reportSkip(skip);
|
|
122
129
|
}
|
|
123
130
|
} finally {
|
|
124
131
|
this.inFlight = false;
|
|
125
|
-
this.schedule(this.options.intervalMs ?? QUOTA_LEASE_RENEW_MS);
|
|
132
|
+
this.schedule(this.nextScheduleDelay(this.options.intervalMs ?? QUOTA_LEASE_RENEW_MS));
|
|
126
133
|
}
|
|
127
134
|
}
|
|
128
135
|
|
|
@@ -136,6 +143,18 @@ export class OrchestratorQuotaPoller {
|
|
|
136
143
|
}, Math.max(1_000, delayMs));
|
|
137
144
|
}
|
|
138
145
|
|
|
146
|
+
private nextScheduleDelay(defaultDelayMs: number): number {
|
|
147
|
+
const now = this.now();
|
|
148
|
+
let delayMs = defaultDelayMs;
|
|
149
|
+
for (const state of this.states.values()) {
|
|
150
|
+
if (state.nextPollAt !== undefined) delayMs = Math.min(delayMs, state.nextPollAt - now);
|
|
151
|
+
if (state.leaseToken && state.leaseExpiresAt !== undefined) {
|
|
152
|
+
delayMs = Math.min(delayMs, state.leaseExpiresAt - now - QUOTA_LEASE_RENEW_MS);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return Math.max(QUOTA_RETRY_BACKOFF_MIN_MS, delayMs);
|
|
156
|
+
}
|
|
157
|
+
|
|
139
158
|
// Refresh per-provider quota config (#605). Best-effort: on failure we keep the
|
|
140
159
|
// last known config (defaults for any unset provider), so a transient relay blip
|
|
141
160
|
// never silently stops collection.
|
|
@@ -270,16 +289,14 @@ export class OrchestratorQuotaPoller {
|
|
|
270
289
|
await this.relay.reportProviderQuota(update);
|
|
271
290
|
state.lastAttemptAt = update.lastAttemptAt;
|
|
272
291
|
state.nextPollAt = now + pollIntervalMs;
|
|
292
|
+
state.consecutiveFailures = 0;
|
|
273
293
|
} catch (error) {
|
|
274
294
|
const retryAfterMs = quotaRetryAfterMs(error);
|
|
275
295
|
const lastError = providerQuotaErrorFromCollectorError(error, retryAfterMs);
|
|
276
|
-
const retryDelayMs =
|
|
296
|
+
const retryDelayMs = this.retryDelayMs(state, retryAfterMs);
|
|
277
297
|
state.lastAttemptAt = lastAttemptAt;
|
|
278
298
|
state.nextPollAt = now + retryDelayMs;
|
|
279
|
-
|
|
280
|
-
this.logFailure(candidate, error, retryAfterMs);
|
|
281
|
-
return;
|
|
282
|
-
}
|
|
299
|
+
state.consecutiveFailures = (state.consecutiveFailures ?? 0) + 1;
|
|
283
300
|
await this.relay.reportProviderQuota({
|
|
284
301
|
provider: candidate.provider,
|
|
285
302
|
accountKey: candidate.accountKey,
|
|
@@ -287,10 +304,37 @@ export class OrchestratorQuotaPoller {
|
|
|
287
304
|
lastError,
|
|
288
305
|
sourceAgentId: this.sourceAgentId(),
|
|
289
306
|
}).catch((publishError) => this.log(`quota status publish failed: ${errMessage(publishError)}`));
|
|
290
|
-
this.logFailure(candidate, error, retryAfterMs);
|
|
307
|
+
this.logFailure(candidate, error, retryAfterMs, retryDelayMs);
|
|
291
308
|
}
|
|
292
309
|
}
|
|
293
310
|
|
|
311
|
+
private async handleCandidateFailure(candidate: QuotaCandidate, error: unknown): Promise<void> {
|
|
312
|
+
const state = this.stateFor(candidate);
|
|
313
|
+
const now = this.now();
|
|
314
|
+
const retryAfterMs = quotaRetryAfterMs(error);
|
|
315
|
+
const retryDelayMs = this.retryDelayMs(state, retryAfterMs);
|
|
316
|
+
state.lastAttemptAt = now;
|
|
317
|
+
state.nextPollAt = now + retryDelayMs;
|
|
318
|
+
state.consecutiveFailures = (state.consecutiveFailures ?? 0) + 1;
|
|
319
|
+
await this.relay.reportProviderQuota({
|
|
320
|
+
provider: candidate.provider,
|
|
321
|
+
accountKey: candidate.accountKey,
|
|
322
|
+
lastAttemptAt: now,
|
|
323
|
+
lastError: providerQuotaErrorFromCollectorError(error, retryAfterMs),
|
|
324
|
+
sourceAgentId: this.sourceAgentId(),
|
|
325
|
+
}).catch((publishError) => this.log(`quota status publish failed: ${errMessage(publishError)}`));
|
|
326
|
+
this.logFailure(candidate, error, retryAfterMs, retryDelayMs);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
private retryDelayMs(state: QuotaPollState, retryAfterMs: number | undefined): number {
|
|
330
|
+
const baseDelayMs = retryAfterMs ?? QUOTA_FAST_RETRY_MS;
|
|
331
|
+
const multiplier = 2 ** Math.min(state.consecutiveFailures ?? 0, 10);
|
|
332
|
+
return Math.min(
|
|
333
|
+
QUOTA_RETRY_BACKOFF_MAX_MS,
|
|
334
|
+
Math.max(QUOTA_RETRY_BACKOFF_MIN_MS, Math.round(baseDelayMs * multiplier)),
|
|
335
|
+
);
|
|
336
|
+
}
|
|
337
|
+
|
|
294
338
|
private async ensureLease(candidate: QuotaCandidate, state: QuotaPollState, now: number): Promise<boolean> {
|
|
295
339
|
if (state.leaseToken && state.leaseExpiresAt && state.leaseExpiresAt - now > QUOTA_LEASE_RENEW_MS) return true;
|
|
296
340
|
const result = await this.relay.acquireProviderQuotaLease(this.config.id, {
|
|
@@ -353,13 +397,13 @@ export class OrchestratorQuotaPoller {
|
|
|
353
397
|
(this.options.log ?? ((line) => console.error(`[orchestrator] ${line}`)))(message);
|
|
354
398
|
}
|
|
355
399
|
|
|
356
|
-
private logFailure(candidate: QuotaCandidate, error: unknown, retryAfterMs: number | undefined): void {
|
|
400
|
+
private logFailure(candidate: QuotaCandidate, error: unknown, retryAfterMs: number | undefined, retryDelayMs: number): void {
|
|
357
401
|
const state = this.stateFor(candidate);
|
|
358
|
-
const key = retryAfterMs !== undefined ? `retry-after:${retryAfterMs}` : errMessage(error)
|
|
402
|
+
const key = retryAfterMs !== undefined ? `retry-after:${retryAfterMs}:delay:${retryDelayMs}` : `${errMessage(error)}:delay:${retryDelayMs}`;
|
|
359
403
|
const now = this.now();
|
|
360
404
|
if (state.lastLog?.key === key && now - state.lastLog.at < QUOTA_FAILURE_LOG_INTERVAL_MS) return;
|
|
361
405
|
state.lastLog = { key, at: now };
|
|
362
|
-
const suffix =
|
|
406
|
+
const suffix = `; retrying in ${Math.round(retryDelayMs / 1000)}s`;
|
|
363
407
|
this.log(`quota refresh failed for ${candidate.provider}/${candidate.accountKey}${suffix}: ${errMessage(error)}`);
|
|
364
408
|
}
|
|
365
409
|
|
|
@@ -22,7 +22,7 @@ function owningRepoRoot(worktreePath: string, fallback: string): string {
|
|
|
22
22
|
return existsSync(root) ? root : fallback;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export function cleanupWorkspace(workspace: { repoRoot?: string; worktreePath?: string; id?: string; branch?: string; deleteBranch?: boolean; workspacesRoot?: string }): { workspaceId?: string; removed: boolean; worktreePath?: string; branchDeleted?: boolean; containerRemoved?: boolean } {
|
|
25
|
+
export function cleanupWorkspace(workspace: { repoRoot?: string; worktreePath?: string; id?: string; branch?: string; baseRef?: string; baseSha?: string; deleteBranch?: boolean; workspacesRoot?: string }): { workspaceId?: string; removed: boolean; worktreePath?: string; branchDeleted?: boolean; branchPreservedReason?: string; containerRemoved?: boolean } {
|
|
26
26
|
if (!workspace.worktreePath) throw new Error("worktreePath required");
|
|
27
27
|
const path = resolve(workspace.worktreePath);
|
|
28
28
|
const recordedRepo = workspace.repoRoot ? resolve(workspace.repoRoot) : path;
|
|
@@ -38,14 +38,61 @@ export function cleanupWorkspace(workspace: { repoRoot?: string; worktreePath?:
|
|
|
38
38
|
throw new Error(result.stderr || `worktree ${path} still present after \`git worktree remove\` (repo ${repo})`);
|
|
39
39
|
}
|
|
40
40
|
// Once the worktree is gone the agent/... branch is litter — delete it so
|
|
41
|
-
// branches don't accumulate.
|
|
42
|
-
//
|
|
41
|
+
// branches don't accumulate. First prove it has no unlanded commits, because
|
|
42
|
+
// deleting the branch ref is what can make committed-but-unlanded work
|
|
43
|
+
// unreachable after a crashed worker (#614).
|
|
43
44
|
let branchDeleted = false;
|
|
45
|
+
let branchPreservedReason: string | undefined;
|
|
44
46
|
if (workspace.branch && workspace.deleteBranch !== false) {
|
|
45
|
-
|
|
47
|
+
const safety = branchSafeToDelete(repo, workspace.branch, workspace.baseRef, workspace.baseSha);
|
|
48
|
+
if (safety.safe) {
|
|
49
|
+
branchDeleted = git(["branch", "-D", workspace.branch], repo).ok;
|
|
50
|
+
if (!branchDeleted) branchPreservedReason = "branch delete failed";
|
|
51
|
+
} else {
|
|
52
|
+
branchPreservedReason = safety.reason;
|
|
53
|
+
}
|
|
46
54
|
}
|
|
47
55
|
const containerRemoved = workspace.workspacesRoot ? removeEmptyContainer(dirname(path), resolve(workspace.workspacesRoot)) : false;
|
|
48
|
-
return { workspaceId: workspace.id, removed: true, worktreePath: path, branchDeleted, containerRemoved };
|
|
56
|
+
return { workspaceId: workspace.id, removed: true, worktreePath: path, branchDeleted, ...(branchPreservedReason ? { branchPreservedReason } : {}), containerRemoved };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function branchSafeToDelete(repo: string, branch: string, baseRef?: string, baseSha?: string): { safe: boolean; reason?: string } {
|
|
60
|
+
const branchRef = resolveCommit(repo, branch) ? branch : resolveCommit(repo, `refs/heads/${branch}`) ? `refs/heads/${branch}` : undefined;
|
|
61
|
+
if (!branchRef) return { safe: true };
|
|
62
|
+
|
|
63
|
+
const base = resolveCleanupBase(repo, baseRef, baseSha);
|
|
64
|
+
if (!base) return { safe: false, reason: "base ref unavailable" };
|
|
65
|
+
|
|
66
|
+
const counts = git(["rev-list", "--left-right", "--count", `${base}...${branchRef}`], repo);
|
|
67
|
+
if (!counts.ok || !counts.stdout) return { safe: false, reason: counts.stderr || "ahead count unavailable" };
|
|
68
|
+
const ahead = Number(counts.stdout.split(/\s+/)[1]);
|
|
69
|
+
if (!Number.isFinite(ahead)) return { safe: false, reason: "ahead count unavailable" };
|
|
70
|
+
if (ahead === 0) return { safe: true };
|
|
71
|
+
|
|
72
|
+
const cherryBase = upstreamRef(repo, base) ?? base;
|
|
73
|
+
if (git(["diff", "--quiet", cherryBase, branchRef], repo).ok) return { safe: true };
|
|
74
|
+
const cherry = git(["cherry", cherryBase, branchRef], repo);
|
|
75
|
+
if (!cherry.ok) return { safe: false, reason: cherry.stderr || "unmerged commit check unavailable" };
|
|
76
|
+
const unmergedAhead = cherry.stdout ? cherry.stdout.split("\n").filter((line) => line.startsWith("+")).length : 0;
|
|
77
|
+
return unmergedAhead === 0
|
|
78
|
+
? { safe: true }
|
|
79
|
+
: { safe: false, reason: `${unmergedAhead} unlanded commit(s)` };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function resolveCleanupBase(repo: string, baseRef?: string, baseSha?: string): string | undefined {
|
|
83
|
+
for (const candidate of [baseRef, baseSha, "main", "master"]) {
|
|
84
|
+
if (candidate && resolveCommit(repo, candidate)) return candidate;
|
|
85
|
+
}
|
|
86
|
+
return undefined;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function resolveCommit(repo: string, ref: string): boolean {
|
|
90
|
+
return git(["rev-parse", "--verify", "--quiet", `${ref}^{commit}`], repo).ok;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function upstreamRef(repo: string, base: string): string | undefined {
|
|
94
|
+
const res = git(["rev-parse", "--abbrev-ref", `${base}@{upstream}`], repo);
|
|
95
|
+
return res.ok && res.stdout ? res.stdout : undefined;
|
|
49
96
|
}
|
|
50
97
|
|
|
51
98
|
export function sweepEmptyWorkspaceContainers(wsRoot: string): string[] {
|
|
@@ -138,7 +138,7 @@ export function reconcileWorkspace(workspace: { id?: string; repoRoot?: string;
|
|
|
138
138
|
// detection can only under-report, so this never deletes unmerged work.
|
|
139
139
|
const empty = gitState.error === undefined && gitState.dirtyCount === 0 && ((gitState.ahead ?? 0) === 0 || gitState.landed === true);
|
|
140
140
|
if (empty) {
|
|
141
|
-
cleanupWorkspace({ id: workspace.id, repoRoot: workspace.repoRoot, worktreePath: workspace.worktreePath, branch: workspace.branch });
|
|
141
|
+
cleanupWorkspace({ id: workspace.id, repoRoot: workspace.repoRoot, worktreePath: workspace.worktreePath, branch: workspace.branch, baseRef: workspace.baseRef, baseSha: workspace.baseSha });
|
|
142
142
|
return { workspaceId: workspace.id, removed: true, status: "cleaned", gitState };
|
|
143
143
|
}
|
|
144
144
|
return { workspaceId: workspace.id, removed: false, status: "review_requested", gitState };
|