pi-antigravity-rotator 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [1.3.9] - 2026-04-27
6
+
7
+ ### Fixed
8
+ - Fixed `ERR_MODULE_NOT_FOUND` crash on `pi-antigravity-rotator login` (and all other CLI commands) when installed globally via npm or Volta. The binary entry point was resolving `src/cli.ts` relative to `bin/` instead of the package root, causing Node to look for `bin/src/cli.ts` which does not exist in any install layout. Changed import path from `./src/cli.ts` to `../src/cli.ts`.
9
+
10
+ ## [1.3.8] - 2026-04-26
11
+
12
+ ### Fixed
13
+ - Persist per-model request-count rotation counters across restarts so configured request thresholds continue to work after service reloads.
14
+ - Keep serving from the current healthy account when request-count rotation reaches its threshold but no replacement account is available, avoiding unnecessary `503` responses while usable quota remains.
15
+
5
16
  ## [1.3.7] - 2026-04-25
6
17
 
7
18
  ### Fixed
package/README.md CHANGED
@@ -16,7 +16,7 @@ Multi-account rotation proxy for Google Antigravity. Distributes API usage acros
16
16
  - **Token auto-refresh** -- Tokens are refreshed automatically before expiry; no manual management
17
17
  - **Endpoint cascade** -- Tries daily, autopush, and prod API endpoints for resilience
18
18
  - **Web dashboard** -- Real-time view of model routing table, per-account quota bars with per-model timers, and flagged account alerts
19
- - **State persistence** -- Survives restarts; routing assignments, cooldowns, and flags are saved to disk
19
+ - **State persistence** -- Survives restarts; routing assignments, per-model request counters, cooldowns, and flags are saved to disk
20
20
 
21
21
  ## Quick Start
22
22
 
@@ -148,7 +148,7 @@ Three mechanisms trigger rotation, scoped to the specific model:
148
148
 
149
149
  1. **Quota-based** (primary) -- Polls the Google quota API every 5 minutes. When a model's remaining quota drops by `rotateOnQuotaDrop` percentage points (default: 20%), that model rotates to the next account. Other models stay on their current accounts.
150
150
 
151
- 2. **Request-count** (fallback) -- Before forwarding a request, the rotator checks how many requests the current account has already served for that specific model and rotates once it reaches `requestsPerRotation` (default: 5). By default this fallback is only used when quota data for that model is still unknown.
151
+ 2. **Request-count** (fallback) -- Before forwarding a request, the rotator checks how many requests the current account has already served for that specific model and rotates once it reaches `requestsPerRotation` (default: 5). Per-model counters are persisted so restarts do not reset the threshold. By default this fallback is only used when quota data for that model is still unknown; set `useRequestCountRotationWhenQuotaUnknownOnly` to `false` to keep request-count rotation active even when quota telemetry exists. If the threshold is reached but every replacement account is cooling down, flagged, disabled, busy, blocked by fresh-window policy, or out of quota for that model, the rotator stays on the current healthy account instead of returning `503`.
152
152
 
153
153
  3. **429 failover** (reactive) -- On rate limit, the account is marked exhausted with a parsed retry cooldown and the affected model immediately switches.
154
154
 
@@ -253,12 +253,12 @@ pi-antigravity-rotator start --config-dir /path/to/config
253
253
  | Field | Default | Description |
254
254
  |-------|---------|-------------|
255
255
  | `proxyPort` | `51200` | Port the proxy listens on |
256
- | `requestsPerRotation` | `5` | Max requests before rotating (fallback trigger) |
256
+ | `requestsPerRotation` | `5` | Max per-model requests before attempting request-count rotation |
257
257
  | `rotateOnQuotaDrop` | `20` | Rotate when a model's quota drops this many %. Set to `0` to disable |
258
258
  | `quotaPollIntervalMs` | `300000` | Quota poll interval in ms (5 minutes) |
259
259
  | `maxConcurrentRequestsPerAccount` | `1` | Max simultaneous requests allowed per account |
260
260
  | `protectivePauseMs` | `21600000` | Global routing pause after a serious provider enforcement signal |
261
- | `useRequestCountRotationWhenQuotaUnknownOnly` | `true` | Use request-count rotation only until quota telemetry exists for the request's model |
261
+ | `useRequestCountRotationWhenQuotaUnknownOnly` | `true` | Use request-count rotation only until quota telemetry exists for the request's model. Set to `false` to keep rotating by request count even with known quotas |
262
262
 
263
263
  ### Account Fields
264
264
 
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
2
  import("tsx/esm/api").then(({ register }) => {
3
3
  register();
4
- return import("./src/cli.ts");
4
+ return import("../src/cli.ts");
5
5
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-antigravity-rotator",
3
- "version": "1.3.7",
3
+ "version": "1.3.9",
4
4
  "description": "Multi-account rotation proxy for Google Antigravity with per-model routing, real-time quota tracking, and infringement detection",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -11,13 +11,13 @@
11
11
  "start": "tsx src/cli.ts start",
12
12
  "login": "tsx src/cli.ts login"
13
13
  },
14
- "files": [
15
- "bin/",
16
- "src/",
17
- "CHANGELOG.md",
18
- "README.md",
19
- "LICENSE"
20
- ],
14
+ "files": [
15
+ "bin/",
16
+ "src/",
17
+ "CHANGELOG.md",
18
+ "README.md",
19
+ "LICENSE"
20
+ ],
21
21
  "keywords": [
22
22
  "pi-package",
23
23
  "pi",
package/src/dashboard.ts CHANGED
@@ -39,6 +39,12 @@ export function serveAccountFreshWindowStartsApi(
39
39
  res.end(JSON.stringify({ ok, email, allowFreshWindowStartsOverride: enabled }));
40
40
  }
41
41
 
42
+ export function serveClearInFlightApi(res: ServerResponse, rotator: AccountRotator, email: string, modelKey?: string): void {
43
+ const ok = rotator.clearInFlightRequests(email, modelKey);
44
+ res.writeHead(ok ? 200 : 404, { "Content-Type": "application/json" });
45
+ res.end(JSON.stringify({ ok, email, modelKey }));
46
+ }
47
+
42
48
  const DASHBOARD_HTML = `<!DOCTYPE html>
43
49
  <html lang="en">
44
50
  <head>
@@ -412,6 +418,33 @@ const DASHBOARD_HTML = `<!DOCTYPE html>
412
418
  flex-shrink: 0;
413
419
  }
414
420
 
421
+ .quota-action {
422
+ width: 54px;
423
+ flex-shrink: 0;
424
+ }
425
+
426
+ .btn-clear-flight {
427
+ width: 54px;
428
+ border: 1px solid rgba(96, 165, 250, 0.28);
429
+ background: rgba(96, 165, 250, 0.08);
430
+ color: var(--blue);
431
+ border-radius: 4px;
432
+ font-size: 9px;
433
+ font-family: var(--font);
434
+ font-weight: 700;
435
+ padding: 2px 4px;
436
+ cursor: pointer;
437
+ }
438
+
439
+ .btn-clear-flight:hover { background: rgba(96, 165, 250, 0.16); }
440
+ .btn-clear-flight:disabled {
441
+ border-color: var(--border);
442
+ background: rgba(255,255,255,0.03);
443
+ color: var(--text-dim);
444
+ cursor: not-allowed;
445
+ opacity: 0.55;
446
+ }
447
+
415
448
  .pulse { animation: pulse 2s ease-in-out infinite; }
416
449
  @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.6; } }
417
450
 
@@ -851,9 +884,14 @@ function timerDisplayLabel(timerType) {
851
884
  return timerType === 'fresh' ? 'idle' : timerType;
852
885
  }
853
886
 
854
- function renderQuotaBars(quota) {
887
+ function renderQuotaBars(account) {
888
+ var quota = account.quota;
855
889
  if (!quota || quota.length === 0) return '';
856
890
  var rows = quota.map(function(q) {
891
+ var inFlightForModel = (account.inFlightByModel || {})[q.modelKey] || 0;
892
+ var clearButton = inFlightForModel > 0
893
+ ? '<button class="btn-clear-flight" title="Clear in-flight counter for ' + q.displayName + '" onclick="clearInFlight(\\'' + account.email + '\\', \\'' + q.modelKey + '\\')">Clear</button>'
894
+ : '<button class="btn-clear-flight" title="No in-flight requests for ' + q.displayName + '" disabled>Clear</button>';
857
895
  var color = quotaBarColor(q.percentRemaining);
858
896
  var timerClass = 'timer-' + q.timerType;
859
897
  var resetLabel = '';
@@ -867,6 +905,7 @@ function renderQuotaBars(quota) {
867
905
  '<div class="quota-bar-bg"><div class="quota-bar-fill" style="width:' + q.percentRemaining + '%;background:' + color + '"></div></div>' +
868
906
  '<span class="quota-pct" style="color:' + color + '">' + q.percentRemaining + '%</span>' +
869
907
  '<span class="quota-reset">' + (resetLabel || '--') + '</span>' +
908
+ '<span class="quota-action">' + clearButton + '</span>' +
870
909
  '</div>';
871
910
  }).join('');
872
911
  return '<div class="quota-section"><div class="quota-section-title">Quota (per model)</div>' + rows + '</div>';
@@ -967,7 +1006,7 @@ function renderAccounts(data) {
967
1006
  '</div>' +
968
1007
  '</div>' +
969
1008
  '<div class="card-email">' + maskEmail(a.email) + '</div>' +
970
- (a.quota && a.quota.length > 0 ? renderQuotaBars(a.quota) : '') +
1009
+ (a.quota && a.quota.length > 0 ? renderQuotaBars(a) : '') +
971
1010
  '<div class="card-stats">' +
972
1011
  '<div class="card-stat"><div class="stat-label">Requests</div><div class="stat-value">' +
973
1012
  a.requestsSinceRotation + ' / ' + a.totalRequests + ' total</div></div>' +
@@ -1163,6 +1202,12 @@ async function setAccountFreshWindowOverride(email, enabled) {
1163
1202
  refresh();
1164
1203
  }
1165
1204
 
1205
+ async function clearInFlight(email, modelKey) {
1206
+ if (!confirm('Clear in-flight counter for this account/model? Use only when you are sure the request is stuck.')) return;
1207
+ await fetch('/api/clear-inflight/' + encodeURIComponent(email) + '/' + encodeURIComponent(modelKey), { method: 'POST' });
1208
+ refresh();
1209
+ }
1210
+
1166
1211
  function renderProAdvisor(advisor) {
1167
1212
  var panel = document.getElementById('proAdvisor');
1168
1213
  var button = document.getElementById('advisorBtn');
package/src/proxy.ts CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
4
4
  import { Readable } from "node:stream";
5
- import { ANTIGRAVITY_ENDPOINTS } from "./types.js";
5
+ import { ANTIGRAVITY_ENDPOINTS, resolveQuotaModelKey } from "./types.js";
6
6
  import type { AccountRuntime } from "./types.js";
7
7
  import type { AccountRotator } from "./rotator.js";
8
8
  import {
@@ -11,6 +11,7 @@ import {
11
11
  serveEnableApi,
12
12
  serveFreshWindowStartsApi,
13
13
  serveAccountFreshWindowStartsApi,
14
+ serveClearInFlightApi,
14
15
  } from "./dashboard.js";
15
16
  import { handleHostedCallback, serveLoginLanding, startHostedLogin } from "./onboarding.js";
16
17
 
@@ -304,7 +305,7 @@ async function handleProxyRequest(
304
305
  const rotateAndRelease = async (): Promise<AccountRuntime | null> => {
305
306
  const nextAccount = await rotator.rotateToNext(body.model);
306
307
  if (nextAccount) {
307
- rotator.finishRequest(nextAccount);
308
+ rotator.finishRequest(nextAccount, resolveQuotaModelKey(body.model) ?? undefined);
308
309
  }
309
310
  return nextAccount;
310
311
  };
@@ -327,12 +328,17 @@ async function handleProxyRequest(
327
328
  const cooldownMs = capCooldown(extractRetryDelay(errorText, response.headers));
328
329
  proxyLog(`[${label}] 429 rate limited, cooldown ${Math.ceil(cooldownMs / 1000)}s`, "warn");
329
330
  rotator.markExhausted(account, cooldownMs);
330
- const nextAccount = await rotateAndRelease();
331
- if (!nextAccount) {
332
- sendNoAccountsAvailable(`all candidate accounts are cooling down after ${label} was rate limited`);
333
- return;
334
- }
335
- continue;
331
+ res.writeHead(503, {
332
+ "Content-Type": "application/json",
333
+ "Retry-After": String(Math.ceil(cooldownMs / 1000)),
334
+ });
335
+ res.end(JSON.stringify({
336
+ error: "Rate limited",
337
+ reason: `${label} was rate limited; not retrying another account for this request`,
338
+ model: body.model,
339
+ retryAfterMs: cooldownMs,
340
+ }));
341
+ return;
336
342
  }
337
343
 
338
344
  if (response.status === 401) {
@@ -427,7 +433,7 @@ async function handleProxyRequest(
427
433
  }
428
434
  continue;
429
435
  } finally {
430
- rotator.finishRequest(account);
436
+ rotator.finishRequest(account, resolveQuotaModelKey(body.model) ?? undefined);
431
437
  }
432
438
  }
433
439
 
@@ -490,6 +496,15 @@ export function startProxy(rotator: AccountRotator, port: number): void {
490
496
  return;
491
497
  }
492
498
 
499
+ if (method === "POST" && url.startsWith("/api/clear-inflight/")) {
500
+ const rest = url.slice("/api/clear-inflight/".length);
501
+ const firstSlash = rest.indexOf("/");
502
+ const email = decodeURIComponent(firstSlash >= 0 ? rest.slice(0, firstSlash) : rest);
503
+ const modelKey = firstSlash >= 0 ? decodeURIComponent(rest.slice(firstSlash + 1)) : undefined;
504
+ serveClearInFlightApi(res, rotator, email, modelKey);
505
+ return;
506
+ }
507
+
493
508
  if (method === "POST" && (url === "/api/settings/fresh-window-starts/on" || url === "/api/settings/fresh-window-starts/off")) {
494
509
  serveFreshWindowStartsApi(res, rotator, url.endsWith("/on"));
495
510
  return;
package/src/rotator.ts CHANGED
@@ -62,6 +62,7 @@ export class AccountRotator {
62
62
  disabled: false,
63
63
  flagged: false,
64
64
  inFlightRequests: 0,
65
+ inFlightByModel: {},
65
66
  allowFreshWindowStartsOverride: false,
66
67
  }));
67
68
  }
@@ -78,7 +79,7 @@ export class AccountRotator {
78
79
  this.modelState.set(model, {
79
80
  activeAccountIndex: Math.min(idx, this.accounts.length - 1),
80
81
  quotaAtRotationStart: -1,
81
- requestsOnActiveAccount: 0,
82
+ requestsOnActiveAccount: state.modelRequestCounts?.[model] ?? 0,
82
83
  });
83
84
  }
84
85
  }
@@ -117,12 +118,15 @@ export class AccountRotator {
117
118
 
118
119
  saveState(): void {
119
120
  const modelAccounts: Record<string, number> = {};
121
+ const modelRequestCounts: Record<string, number> = {};
120
122
  for (const [model, state] of this.modelState.entries()) {
121
123
  modelAccounts[model] = state.activeAccountIndex;
124
+ modelRequestCounts[model] = state.requestsOnActiveAccount;
122
125
  }
123
126
 
124
127
  const state: PersistedState = {
125
128
  modelAccounts,
129
+ modelRequestCounts,
126
130
  currentIndex: this.defaultIndex,
127
131
  protectivePauseUntil: this.protectivePauseUntil,
128
132
  protectivePauseReason: this.protectivePauseReason,
@@ -201,7 +205,7 @@ export class AccountRotator {
201
205
  if (drop >= this.config.rotateOnQuotaDrop) {
202
206
  // Only rotate if there's a healthy account to rotate to
203
207
  const hasHealthy = this.accounts.some(
204
- (a, idx) => idx !== mState.activeAccountIndex && this.isAvailable(a, Date.now()),
208
+ (a, idx) => idx !== mState.activeAccountIndex && this.isRoutableForModel(a, modelKey, Date.now()),
205
209
  );
206
210
  if (hasHealthy) {
207
211
  this.log(
@@ -331,7 +335,7 @@ export class AccountRotator {
331
335
  private hasTimedCandidate(modelKey: string, now: number, excludeIdx: number = -1): boolean {
332
336
  return this.accounts.some((account, idx) => {
333
337
  if (idx === excludeIdx) return false;
334
- if (!this.isAvailable(account, now)) return false;
338
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
335
339
  if (this.getModelQuota(account, modelKey) === 0) return false;
336
340
  return this.isTimedWindow(account, modelKey);
337
341
  });
@@ -346,7 +350,7 @@ export class AccountRotator {
346
350
  for (let i = 0; i < this.accounts.length; i++) {
347
351
  if (i === excludeIdx) continue;
348
352
  const account = this.accounts[i];
349
- if (!this.isAvailable(account, now)) continue;
353
+ if (!this.isAvailableForModel(account, modelKey, now)) continue;
350
354
 
351
355
  const quota = this.getModelQuota(account, modelKey);
352
356
  if (quota === 0) continue;
@@ -374,6 +378,7 @@ export class AccountRotator {
374
378
  const state = this.modelState.get(modelKey);
375
379
  if (state) {
376
380
  state.requestsOnActiveAccount++;
381
+ this.saveState();
377
382
  }
378
383
  }
379
384
 
@@ -409,7 +414,7 @@ export class AccountRotator {
409
414
  const idx = state?.activeAccountIndex ?? this.defaultIndex;
410
415
 
411
416
  const current = this.accounts[idx];
412
- if (current && this.isAvailable(current, now)) {
417
+ if (current && (!modelKey ? this.isAvailable(current, now) : this.isAvailableForModel(current, modelKey, now))) {
413
418
  // Check if this account has quota for the requested model
414
419
  if (modelKey) {
415
420
  if (this.shouldRotateBeforeRequest(current, modelKey, state ?? null)) {
@@ -422,7 +427,7 @@ export class AccountRotator {
422
427
  return rotated;
423
428
  }
424
429
  this.log(
425
- `${current.config.label || current.config.email} [${modelKey}]: threshold reached but no replacement is available, staying`,
430
+ `${current.config.label || current.config.email} [${modelKey}]: threshold reached but no replacement is available, staying on current account`,
426
431
  "warn",
427
432
  );
428
433
  }
@@ -444,13 +449,13 @@ export class AccountRotator {
444
449
  return this.rotateModelForRequest(modelKey);
445
450
  }
446
451
  }
447
- this.startRequest(current);
452
+ this.startRequest(current, modelKey ?? undefined);
448
453
  try {
449
454
  await this.ensureValidToken(current);
450
455
  if (modelKey) this.countModelAssignment(modelKey);
451
456
  return current;
452
457
  } catch (err) {
453
- this.finishRequest(current);
458
+ this.finishRequest(current, modelKey ?? undefined);
454
459
  throw err;
455
460
  }
456
461
  }
@@ -483,19 +488,19 @@ export class AccountRotator {
483
488
  `[${modelKey}] Rotated to ${best.config.label || best.config.email} [${timerType}] (quota: ${quota >= 0 ? quota + "%" : "unknown"})`,
484
489
  );
485
490
  this.saveState();
486
- this.startRequest(best);
491
+ this.startRequest(best, modelKey);
487
492
  try {
488
493
  await this.ensureValidToken(best);
489
494
  return best;
490
495
  } catch (err) {
491
- this.finishRequest(best);
496
+ this.finishRequest(best, modelKey);
492
497
  throw err;
493
498
  }
494
499
  }
495
500
 
496
- if (!this.allowFreshWindowStarts && this.accounts.some((account, idx) => {
497
- if (idx === excludeIdx) return false;
498
- if (!this.isAvailable(account, now)) return false;
501
+ if (!this.allowFreshWindowStarts && this.accounts.some((account, idx) => {
502
+ if (idx === excludeIdx) return false;
503
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
499
504
  if (this.getModelQuota(account, modelKey) === 0) return false;
500
505
  return this.getModelTimerType(account, modelKey) === "fresh";
501
506
  })) {
@@ -580,8 +585,22 @@ export class AccountRotator {
580
585
  account.consecutiveErrors = 0;
581
586
  account.lastError = null;
582
587
 
588
+ const modelKey = model ? resolveQuotaModelKey(model) : null;
589
+ const state = modelKey ? this.modelState.get(modelKey) : null;
590
+ const shouldRotate =
591
+ !!modelKey &&
592
+ !!state &&
593
+ this.accounts[state.activeAccountIndex] === account &&
594
+ this.shouldUseRequestCountRotation(account, modelKey) &&
595
+ state.requestsOnActiveAccount >= this.config.requestsPerRotation;
596
+
583
597
  this.saveState();
584
- return false;
598
+ if (shouldRotate) {
599
+ this.log(
600
+ `${account.config.label || account.config.email} [${modelKey}]: hit rotation threshold (${state.requestsOnActiveAccount}/${this.config.requestsPerRotation})`,
601
+ );
602
+ }
603
+ return shouldRotate;
585
604
  }
586
605
 
587
606
  // Mark an account as exhausted (429 or quota exceeded)
@@ -652,6 +671,23 @@ export class AccountRotator {
652
671
  return true;
653
672
  }
654
673
 
674
+ clearInFlightRequests(email: string, modelKey?: string): boolean {
675
+ const account = this.accounts.find((a) => a.config.email === email);
676
+ if (!account) return false;
677
+ if (modelKey) {
678
+ const previous = account.inFlightByModel[modelKey] ?? 0;
679
+ account.inFlightByModel[modelKey] = 0;
680
+ this.recalculateInFlightRequests(account);
681
+ this.log(`${email}: operator cleared ${previous} in-flight request(s) for ${modelKey}`, "warn");
682
+ return true;
683
+ }
684
+ const previous = account.inFlightRequests;
685
+ account.inFlightRequests = 0;
686
+ account.inFlightByModel = {};
687
+ this.log(`${email}: operator cleared ${previous} in-flight request(s)`, "warn");
688
+ return true;
689
+ }
690
+
655
691
  async ensureValidToken(account: AccountRuntime): Promise<void> {
656
692
  const now = Date.now();
657
693
  if (account.accessToken && account.tokenExpires > now) {
@@ -702,7 +738,12 @@ export class AccountRotator {
702
738
  if (account.disabled) return false;
703
739
  if (account.flagged) return false;
704
740
  if (account.cooldownUntil > now) return false;
705
- if (account.inFlightRequests >= (this.config.maxConcurrentRequestsPerAccount ?? 1)) return false;
741
+ return true;
742
+ }
743
+
744
+ private isAvailableForModel(account: AccountRuntime, modelKey: string, now: number): boolean {
745
+ if (!this.isAvailable(account, now)) return false;
746
+ if ((account.inFlightByModel[modelKey] ?? 0) >= (this.config.maxConcurrentRequestsPerAccount ?? 1)) return false;
706
747
  return true;
707
748
  }
708
749
 
@@ -711,6 +752,7 @@ export class AccountRotator {
711
752
  account.flagged = true;
712
753
  account.lastError = reason;
713
754
  account.inFlightRequests = 0;
755
+ account.inFlightByModel = {};
714
756
  this.log(`${account.config.email}: FLAGGED - ${reason}`, "error");
715
757
  if (this.shouldTriggerProtectivePause(reason)) {
716
758
  this.protectivePauseUntil = Date.now() + (this.config.protectivePauseMs ?? 6 * 60 * 60 * 1000);
@@ -723,22 +765,38 @@ export class AccountRotator {
723
765
  this.saveState();
724
766
  }
725
767
 
726
- startRequest(account: AccountRuntime): void {
727
- account.inFlightRequests++;
768
+ startRequest(account: AccountRuntime, modelKey?: string): void {
769
+ const key = modelKey ?? "__default__";
770
+ account.inFlightByModel[key] = (account.inFlightByModel[key] ?? 0) + 1;
771
+ this.recalculateInFlightRequests(account);
772
+ }
773
+
774
+ finishRequest(account: AccountRuntime, modelKey?: string): void {
775
+ const key = modelKey ?? "__default__";
776
+ account.inFlightByModel[key] = Math.max(0, (account.inFlightByModel[key] ?? 0) - 1);
777
+ if (account.inFlightByModel[key] === 0) delete account.inFlightByModel[key];
778
+ this.recalculateInFlightRequests(account);
728
779
  }
729
780
 
730
- finishRequest(account: AccountRuntime): void {
731
- account.inFlightRequests = Math.max(0, account.inFlightRequests - 1);
781
+ private recalculateInFlightRequests(account: AccountRuntime): void {
782
+ account.inFlightRequests = Object.values(account.inFlightByModel).reduce((sum, count) => sum + count, 0);
783
+ }
784
+
785
+ private isRoutableForModel(account: AccountRuntime, modelKey: string, now: number): boolean {
786
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
787
+ if (this.getModelQuota(account, modelKey) === 0) return false;
788
+ if (!this.isFreshWindowAllowed(account, modelKey)) return false;
789
+ return true;
732
790
  }
733
791
 
734
792
  getStatus(): StatusResponse {
735
793
  const now = Date.now();
736
794
 
737
- // Build per-model active account map
795
+ // Build per-model active account map from accounts that can actually serve now.
738
796
  const activeAccounts: Record<string, string> = {};
739
797
  for (const [model, mState] of this.modelState.entries()) {
740
798
  const account = this.accounts[mState.activeAccountIndex];
741
- if (account) {
799
+ if (account && this.isRoutableForModel(account, model, now)) {
742
800
  activeAccounts[model] = account.config.email;
743
801
  }
744
802
  }
@@ -747,7 +805,7 @@ export class AccountRotator {
747
805
  // Determine which models this account is active for
748
806
  const activeForModels: string[] = [];
749
807
  for (const [model, mState] of this.modelState.entries()) {
750
- if (this.accounts[mState.activeAccountIndex] === a) {
808
+ if (this.accounts[mState.activeAccountIndex] === a && this.isRoutableForModel(a, model, now)) {
751
809
  activeForModels.push(model);
752
810
  }
753
811
  }
@@ -778,11 +836,12 @@ export class AccountRotator {
778
836
  cooldownRemaining: Math.max(0, a.cooldownUntil - now),
779
837
  lastUsed: a.lastUsed,
780
838
  lastError: a.lastError,
781
- consecutiveErrors: a.consecutiveErrors,
782
- hasValidToken: !!(a.accessToken && a.tokenExpires > now),
783
- quota: a.quota,
784
- inFlightRequests: a.inFlightRequests,
785
- proDetected: this.isProAccount(a),
839
+ consecutiveErrors: a.consecutiveErrors,
840
+ hasValidToken: !!(a.accessToken && a.tokenExpires > now),
841
+ quota: a.quota,
842
+ inFlightRequests: a.inFlightRequests,
843
+ inFlightByModel: a.inFlightByModel,
844
+ proDetected: this.isProAccount(a),
786
845
  familyManager: !!a.config.familyManager,
787
846
  allowFreshWindowStartsOverride: a.allowFreshWindowStartsOverride,
788
847
  effectiveFreshWindowStartsAllowed: this.isEffectiveFreshWindowAllowed(a),
@@ -842,8 +901,9 @@ export class AccountRotator {
842
901
  lastError: null,
843
902
  consecutiveErrors: 0,
844
903
  disabled: false,
845
- flagged: false,
846
- inFlightRequests: 0,
904
+ flagged: false,
905
+ inFlightRequests: 0,
906
+ inFlightByModel: {},
847
907
  allowFreshWindowStartsOverride: false,
848
908
  };
849
909
  this.accounts.push(runtime);
package/src/types.ts CHANGED
@@ -110,6 +110,7 @@ export interface AccountRuntime {
110
110
  disabled: boolean; // permanently disabled (revoked token, etc.)
111
111
  flagged: boolean; // flagged for infringement/abuse by Google
112
112
  inFlightRequests: number;
113
+ inFlightByModel: Record<string, number>;
113
114
  allowFreshWindowStartsOverride: boolean;
114
115
  }
115
116
 
@@ -124,6 +125,8 @@ export interface ModelRotationState {
124
125
  export interface PersistedState {
125
126
  // Per-model active account index
126
127
  modelAccounts: Record<string, number>;
128
+ // Per-model request count on the active account
129
+ modelRequestCounts?: Record<string, number>;
127
130
  // Legacy fallback
128
131
  currentIndex?: number;
129
132
  protectivePauseUntil?: number;
@@ -195,6 +198,7 @@ export interface AccountStatus {
195
198
  hasValidToken: boolean;
196
199
  quota: ModelQuota[];
197
200
  inFlightRequests: number;
201
+ inFlightByModel: Record<string, number>;
198
202
  // Pro family sharing
199
203
  proDetected: boolean;
200
204
  familyManager: boolean;