agent-relay-orchestrator 0.78.5 → 0.78.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/quota-poller.ts +55 -11
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-orchestrator",
3
- "version": "0.78.5",
3
+ "version": "0.78.6",
4
4
  "description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
5
5
  "type": "module",
6
6
  "bin": {
@@ -25,6 +25,8 @@ import { codexCommandFromEnv, providerHomeRootFromEnv, type OrchestratorConfig }
25
25
 
26
26
  const QUOTA_LEASE_TTL_MS = 90_000;
27
27
  const QUOTA_LEASE_RENEW_MS = 30_000;
28
+ const QUOTA_RETRY_BACKOFF_MAX_MS = 15 * 60_000;
29
+ const QUOTA_RETRY_BACKOFF_MIN_MS = 1_000;
28
30
  const CODEX_APP_SERVER_CONNECT_ATTEMPTS = 40;
29
31
  const CODEX_APP_SERVER_CONNECT_RETRY_MS = 250;
30
32
 
@@ -62,6 +64,7 @@ type QuotaPollState = {
62
64
  leaseExpiresAt?: number;
63
65
  nextPollAt?: number;
64
66
  lastAttemptAt?: number;
67
+ consecutiveFailures?: number;
65
68
  lastLog?: { key: string; at: number };
66
69
  };
67
70
 
@@ -115,14 +118,18 @@ export class OrchestratorQuotaPoller {
115
118
  const { candidates, skips } = await this.discoverCandidates();
116
119
  await this.releaseRemovedCandidates(candidates);
117
120
  for (const candidate of candidates) {
118
- await this.processCandidate(candidate);
121
+ try {
122
+ await this.processCandidate(candidate);
123
+ } catch (error) {
124
+ await this.handleCandidateFailure(candidate, error);
125
+ }
119
126
  }
120
127
  for (const skip of skips) {
121
128
  await this.reportSkip(skip);
122
129
  }
123
130
  } finally {
124
131
  this.inFlight = false;
125
- this.schedule(this.options.intervalMs ?? QUOTA_LEASE_RENEW_MS);
132
+ this.schedule(this.nextScheduleDelay(this.options.intervalMs ?? QUOTA_LEASE_RENEW_MS));
126
133
  }
127
134
  }
128
135
 
@@ -136,6 +143,18 @@ export class OrchestratorQuotaPoller {
136
143
  }, Math.max(1_000, delayMs));
137
144
  }
138
145
 
146
+ private nextScheduleDelay(defaultDelayMs: number): number {
147
+ const now = this.now();
148
+ let delayMs = defaultDelayMs;
149
+ for (const state of this.states.values()) {
150
+ if (state.nextPollAt !== undefined) delayMs = Math.min(delayMs, state.nextPollAt - now);
151
+ if (state.leaseToken && state.leaseExpiresAt !== undefined) {
152
+ delayMs = Math.min(delayMs, state.leaseExpiresAt - now - QUOTA_LEASE_RENEW_MS);
153
+ }
154
+ }
155
+ return Math.max(QUOTA_RETRY_BACKOFF_MIN_MS, delayMs);
156
+ }
157
+
139
158
  // Refresh per-provider quota config (#605). Best-effort: on failure we keep the
140
159
  // last known config (defaults for any unset provider), so a transient relay blip
141
160
  // never silently stops collection.
@@ -270,16 +289,14 @@ export class OrchestratorQuotaPoller {
270
289
  await this.relay.reportProviderQuota(update);
271
290
  state.lastAttemptAt = update.lastAttemptAt;
272
291
  state.nextPollAt = now + pollIntervalMs;
292
+ state.consecutiveFailures = 0;
273
293
  } catch (error) {
274
294
  const retryAfterMs = quotaRetryAfterMs(error);
275
295
  const lastError = providerQuotaErrorFromCollectorError(error, retryAfterMs);
276
- const retryDelayMs = retryAfterMs ?? (state.lastAttemptAt ? pollIntervalMs : QUOTA_FAST_RETRY_MS);
296
+ const retryDelayMs = this.retryDelayMs(state, retryAfterMs);
277
297
  state.lastAttemptAt = lastAttemptAt;
278
298
  state.nextPollAt = now + retryDelayMs;
279
- if (candidate.provider === "claude" && retryAfterMs !== undefined) {
280
- this.logFailure(candidate, error, retryAfterMs);
281
- return;
282
- }
299
+ state.consecutiveFailures = (state.consecutiveFailures ?? 0) + 1;
283
300
  await this.relay.reportProviderQuota({
284
301
  provider: candidate.provider,
285
302
  accountKey: candidate.accountKey,
@@ -287,10 +304,37 @@ export class OrchestratorQuotaPoller {
287
304
  lastError,
288
305
  sourceAgentId: this.sourceAgentId(),
289
306
  }).catch((publishError) => this.log(`quota status publish failed: ${errMessage(publishError)}`));
290
- this.logFailure(candidate, error, retryAfterMs);
307
+ this.logFailure(candidate, error, retryAfterMs, retryDelayMs);
291
308
  }
292
309
  }
293
310
 
311
+ private async handleCandidateFailure(candidate: QuotaCandidate, error: unknown): Promise<void> {
312
+ const state = this.stateFor(candidate);
313
+ const now = this.now();
314
+ const retryAfterMs = quotaRetryAfterMs(error);
315
+ const retryDelayMs = this.retryDelayMs(state, retryAfterMs);
316
+ state.lastAttemptAt = now;
317
+ state.nextPollAt = now + retryDelayMs;
318
+ state.consecutiveFailures = (state.consecutiveFailures ?? 0) + 1;
319
+ await this.relay.reportProviderQuota({
320
+ provider: candidate.provider,
321
+ accountKey: candidate.accountKey,
322
+ lastAttemptAt: now,
323
+ lastError: providerQuotaErrorFromCollectorError(error, retryAfterMs),
324
+ sourceAgentId: this.sourceAgentId(),
325
+ }).catch((publishError) => this.log(`quota status publish failed: ${errMessage(publishError)}`));
326
+ this.logFailure(candidate, error, retryAfterMs, retryDelayMs);
327
+ }
328
+
329
+ private retryDelayMs(state: QuotaPollState, retryAfterMs: number | undefined): number {
330
+ const baseDelayMs = retryAfterMs ?? QUOTA_FAST_RETRY_MS;
331
+ const multiplier = 2 ** Math.min(state.consecutiveFailures ?? 0, 10);
332
+ return Math.min(
333
+ QUOTA_RETRY_BACKOFF_MAX_MS,
334
+ Math.max(QUOTA_RETRY_BACKOFF_MIN_MS, Math.round(baseDelayMs * multiplier)),
335
+ );
336
+ }
337
+
294
338
  private async ensureLease(candidate: QuotaCandidate, state: QuotaPollState, now: number): Promise<boolean> {
295
339
  if (state.leaseToken && state.leaseExpiresAt && state.leaseExpiresAt - now > QUOTA_LEASE_RENEW_MS) return true;
296
340
  const result = await this.relay.acquireProviderQuotaLease(this.config.id, {
@@ -353,13 +397,13 @@ export class OrchestratorQuotaPoller {
353
397
  (this.options.log ?? ((line) => console.error(`[orchestrator] ${line}`)))(message);
354
398
  }
355
399
 
356
- private logFailure(candidate: QuotaCandidate, error: unknown, retryAfterMs: number | undefined): void {
400
+ private logFailure(candidate: QuotaCandidate, error: unknown, retryAfterMs: number | undefined, retryDelayMs: number): void {
357
401
  const state = this.stateFor(candidate);
358
- const key = retryAfterMs !== undefined ? `retry-after:${retryAfterMs}` : errMessage(error);
402
+ const key = retryAfterMs !== undefined ? `retry-after:${retryAfterMs}:delay:${retryDelayMs}` : `${errMessage(error)}:delay:${retryDelayMs}`;
359
403
  const now = this.now();
360
404
  if (state.lastLog?.key === key && now - state.lastLog.at < QUOTA_FAILURE_LOG_INTERVAL_MS) return;
361
405
  state.lastLog = { key, at: now };
362
- const suffix = retryAfterMs !== undefined ? `; retrying in ${Math.round(retryAfterMs / 1000)}s` : "";
406
+ const suffix = `; retrying in ${Math.round(retryDelayMs / 1000)}s`;
363
407
  this.log(`quota refresh failed for ${candidate.provider}/${candidate.accountKey}${suffix}: ${errMessage(error)}`);
364
408
  }
365
409