pi-antigravity-rotator 1.3.6 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,127 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [1.3.8] - 2026-04-26
6
+
7
+ ### Fixed
8
+ - Persist per-model request-count rotation counters across restarts so configured request thresholds continue to work after service reloads.
9
+ - Keep serving from the current healthy account when request-count rotation reaches its threshold but no replacement account is available, avoiding unnecessary `503` responses while usable quota remains.
10
+
11
+ ## [1.3.7] - 2026-04-25
12
+
13
+ ### Fixed
14
+ - Release in-flight account reservations when a streaming response closes early, the client disconnects, or the upstream stream goes idle, preventing accounts from getting stuck as busy indefinitely.
15
+
16
+ ## [1.3.6] - 2026-04-24
17
+
18
+ ### Fixed
19
+ - Treat Node `fetch failed` transport errors as transient upstream/network failures instead of account health errors, avoiding false account disables during stalled requests.
20
+
21
+ ## [1.3.5] - 2026-04-24
22
+
23
+ ### Fixed
24
+ - Make request-count rotation deterministic by counting per-model account assignments before the next request is forwarded, instead of rotating only after a successful response completes.
25
+
26
+ ## [1.3.4] - 2026-04-24
27
+
28
+ ### Fixed
29
+ - Rotate fairly among accounts that tie on model timer priority and remaining quota instead of repeatedly selecting the first matching candidate.
30
+
31
+ ## [1.3.3] - 2026-04-23
32
+
33
+ ### Added
34
+ - Hosted Antigravity login flow so operators can complete Google account linking from a browser and feed the callback URL back into the rotator workflow.
35
+ - Global fresh-window operator control plus per-account override so dormant quota windows can be blocked pool-wide and selectively re-enabled account by account.
36
+ - Header modal launchers for Attention Needed and Pro Family Advisor to keep operator actions available without taking permanent dashboard space.
37
+
38
+ ### Changed
39
+ - Reworked the dashboard layout to prioritize the account grid above the fold: request totals moved into the header, bulky summary widgets were removed, and Recent Events now sits at the bottom.
40
+ - Simplified the header by moving the PII visibility toggle next to the title and removing the inline model-routing pills.
41
+ - Tightened the routing health strip with denser pills, single-line counters, and clearer spacing between major dashboard sections.
42
+
43
+ ## [1.3.2] - 2026-04-23
44
+
45
+ ### Added
46
+ - Routing health panel in the dashboard with current state, stop reason, retry window, and pool blocker counts.
47
+ - Attention Needed summary panel for flagged, cooling, disabled, and error accounts.
48
+ - Recent Events feed showing the latest rotator and proxy incidents that led to the current state.
49
+ - In-memory event buffer exposed through the status API for dashboard diagnostics.
50
+ - Conservative concurrency guardrail to cap each account to one in-flight request by default.
51
+ - Protective pause after serious provider ToS/abuse-style flags to stop the rest of the pool from being burned.
52
+
53
+ ### Changed
54
+ - Dashboard now focuses on operator visibility so the service can be monitored without relying on `journalctl`.
55
+ - Request-count rotation is now only used when quota data is still unknown, reducing unnecessary account churn.
56
+ - Flagged accounts remain quarantined until the provider explicitly restores access.
57
+
58
+ ### Fixed
59
+ - Fixed the exhausted fallback path so cooled-down accounts are no longer selected again when all accounts are exhausted.
60
+ - Fixed proxy retry behavior so it returns `503` immediately when no healthy replacement account exists instead of continuing to hammer the pool.
61
+ - Fixed quota polling so flagged accounts are no longer re-polled every cycle after a provider `403`.
62
+ - Fixed bursty same-account pressure by reserving accounts during selection and request handling.
63
+
64
+ ## [1.3.1] - 2026-04-22
65
+
66
+ ### Changed
67
+ - Prioritize Pro 5h accounts in rotation. Accounts with active 5h timers are now drained first to maximize the +40% recharge benefit when the timer expires. Previously they were saved for last, wasting quota.
68
+
69
+ ## [1.3.0] - 2026-04-22
70
+
71
+ ### Added
72
+ - Pro Family Sharing Advisor: dashboard panel suggests when to add/remove accounts from Pro family sharing.
73
+ - Pro/Free/Family Manager badges on account cards (auto-detected from 5h/7d timer type).
74
+ - `familyManager` config flag for the account that owns the family plan.
75
+ - `proSlots` config option for max simultaneous Pro accounts (default 6).
76
+ - Advisor prioritizes accounts by longest reset time when suggesting Pro upgrades.
77
+ - Only G3Pro and Claude quotas considered for remove-pro decisions (Flash ignored).
78
+
79
+
80
+ ## [1.2.0] - 2026-04-22
81
+
82
+ ### Added
83
+ - PII masking mode for dashboard (`?mask` URL param or toggle button). Masks emails and labels for screen recordings.
84
+ - Contextual help hints for flagged accounts (verification instructions, Google Account Recovery link).
85
+ - Model-aware quota rotation: accounts with 0% quota for the requested model are skipped instead of wasting requests.
86
+
87
+ ### Fixed
88
+ - Fixed `ReadableStream is locked` crash by using `Response.text()` and `Readable.fromWeb()` instead of raw ReadableStream API.
89
+ - Fixed `ERR_HTTP_HEADERS_SENT` crash when retrying after response headers were already sent.
90
+ - Fixed 403 fallthrough bug: non-flagging 403 responses consumed the body then fell through to streaming, causing locked stream errors.
91
+ - Accounts needing verification (`Verify your account`) are now flagged immediately instead of retried.
92
+ - Dashboard URL routing now handles query parameters correctly.
93
+
94
+ ## [1.1.0] - 2026-04-22
95
+
96
+ ### Changed
97
+ - Use prod endpoint only (`cloudcode-pa.googleapis.com`). Removed daily/autopush endpoints that caused multi-minute hangs.
98
+ - 503 errors (no capacity) are now returned directly to the agent for its own retry/backoff instead of burning through all accounts.
99
+ - Quota-based rotation only triggers if a healthy account is available. The proxy won't rotate away from a working account if there's no better alternative.
100
+ - Dashboard accounts are sorted by total quota (highest first), flagged/disabled last.
101
+ - Config files now default to `~/.pi-antigravity-rotator/` (overridable via `PI_ROTATOR_DIR` env or `--config-dir` flag).
102
+
103
+ ### Added
104
+ - `POST /api/reset-cooldowns` endpoint to clear all cooldowns at once.
105
+ - CLI entry point with `start`, `login`, and `status` commands.
106
+ - 30-minute max cooldown cap on all exhaustions (prevents multi-day cooldowns).
107
+ - Stale cooldowns from `state.json` are capped to 30 minutes on startup.
108
+ - Case-insensitive authorization header handling (fixes duplicate header bug with pi agent).
109
+ - MIT License.
110
+
111
+ ### Fixed
112
+ - Fixed duplicate `Authorization` header causing 401 on all accounts. Pi sends lowercase `authorization`; the proxy was keeping both the original and the new one.
113
+ - Fixed infinite retry loop when all accounts are exhausted or 503 (no capacity).
114
+ - Fixed quota rotation moving away from the only working account when no alternatives are available.
115
+
116
+ ## [1.0.0] - 2026-04-22
117
+
118
+ ### Added
119
+ - Initial release.
120
+ - Per-model routing (Gemini Pro, Flash, Claude).
121
+ - Quota-based rotation with configurable drop threshold.
122
+ - Request-count-based rotation (fallback).
123
+ - 429 failover with automatic cooldown.
124
+ - Account protection: quota API 403, API 401, API 403 keyword detection.
125
+ - Real-time dashboard with account cards, quota bars, and model routing table.
126
+ - OAuth login helper with automatic pi agent configuration.
127
+ - State persistence across restarts.
package/README.md CHANGED
@@ -16,7 +16,7 @@ Multi-account rotation proxy for Google Antigravity. Distributes API usage acros
16
16
  - **Token auto-refresh** -- Tokens are refreshed automatically before expiry; no manual management
17
17
  - **Endpoint cascade** -- Tries daily, autopush, and prod API endpoints for resilience
18
18
  - **Web dashboard** -- Real-time view of model routing table, per-account quota bars with per-model timers, and flagged account alerts
19
- - **State persistence** -- Survives restarts; routing assignments, cooldowns, and flags are saved to disk
19
+ - **State persistence** -- Survives restarts; routing assignments, per-model request counters, cooldowns, and flags are saved to disk
20
20
 
21
21
  ## Quick Start
22
22
 
@@ -148,7 +148,7 @@ Three mechanisms trigger rotation, scoped to the specific model:
148
148
 
149
149
  1. **Quota-based** (primary) -- Polls the Google quota API every 5 minutes. When a model's remaining quota drops by `rotateOnQuotaDrop` percentage points (default: 20%), that model rotates to the next account. Other models stay on their current accounts.
150
150
 
151
- 2. **Request-count** (fallback) -- Before forwarding a request, the rotator checks how many requests the current account has already served for that specific model and rotates once it reaches `requestsPerRotation` (default: 5). By default this fallback is only used when quota data for that model is still unknown.
151
+ 2. **Request-count** (fallback) -- Before forwarding a request, the rotator checks how many requests the current account has already served for that specific model and rotates once it reaches `requestsPerRotation` (default: 5). Per-model counters are persisted so restarts do not reset the threshold. By default this fallback is only used when quota data for that model is still unknown; set `useRequestCountRotationWhenQuotaUnknownOnly` to `false` to keep request-count rotation active even when quota telemetry exists. If the threshold is reached but every replacement account is cooling down, flagged, disabled, busy, blocked by fresh-window policy, or out of quota for that model, the rotator stays on the current healthy account instead of returning `503`.
152
152
 
153
153
  3. **429 failover** (reactive) -- On rate limit, the account is marked exhausted with a parsed retry cooldown and the affected model immediately switches.
154
154
 
@@ -253,12 +253,12 @@ pi-antigravity-rotator start --config-dir /path/to/config
253
253
  | Field | Default | Description |
254
254
  |-------|---------|-------------|
255
255
  | `proxyPort` | `51200` | Port the proxy listens on |
256
- | `requestsPerRotation` | `5` | Max requests before rotating (fallback trigger) |
256
+ | `requestsPerRotation` | `5` | Max per-model requests before attempting request-count rotation |
257
257
  | `rotateOnQuotaDrop` | `20` | Rotate when a model's quota drops this many %. Set to `0` to disable |
258
258
  | `quotaPollIntervalMs` | `300000` | Quota poll interval in ms (5 minutes) |
259
259
  | `maxConcurrentRequestsPerAccount` | `1` | Max simultaneous requests allowed per account |
260
260
  | `protectivePauseMs` | `21600000` | Global routing pause after a serious provider enforcement signal |
261
- | `useRequestCountRotationWhenQuotaUnknownOnly` | `true` | Use request-count rotation only until quota telemetry exists for the request's model |
261
+ | `useRequestCountRotationWhenQuotaUnknownOnly` | `true` | Use request-count rotation only until quota telemetry exists for the request's model. Set to `false` to keep rotating by request count even with known quotas |
262
262
 
263
263
  ### Account Fields
264
264
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-antigravity-rotator",
3
- "version": "1.3.6",
3
+ "version": "1.3.8",
4
4
  "description": "Multi-account rotation proxy for Google Antigravity with per-model routing, real-time quota tracking, and infringement detection",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -14,6 +14,7 @@
14
14
  "files": [
15
15
  "bin/",
16
16
  "src/",
17
+ "CHANGELOG.md",
17
18
  "README.md",
18
19
  "LICENSE"
19
20
  ],
package/src/dashboard.ts CHANGED
@@ -39,6 +39,12 @@ export function serveAccountFreshWindowStartsApi(
39
39
  res.end(JSON.stringify({ ok, email, allowFreshWindowStartsOverride: enabled }));
40
40
  }
41
41
 
42
+ export function serveClearInFlightApi(res: ServerResponse, rotator: AccountRotator, email: string, modelKey?: string): void {
43
+ const ok = rotator.clearInFlightRequests(email, modelKey);
44
+ res.writeHead(ok ? 200 : 404, { "Content-Type": "application/json" });
45
+ res.end(JSON.stringify({ ok, email, modelKey }));
46
+ }
47
+
42
48
  const DASHBOARD_HTML = `<!DOCTYPE html>
43
49
  <html lang="en">
44
50
  <head>
@@ -412,6 +418,33 @@ const DASHBOARD_HTML = `<!DOCTYPE html>
412
418
  flex-shrink: 0;
413
419
  }
414
420
 
421
+ .quota-action {
422
+ width: 54px;
423
+ flex-shrink: 0;
424
+ }
425
+
426
+ .btn-clear-flight {
427
+ width: 54px;
428
+ border: 1px solid rgba(96, 165, 250, 0.28);
429
+ background: rgba(96, 165, 250, 0.08);
430
+ color: var(--blue);
431
+ border-radius: 4px;
432
+ font-size: 9px;
433
+ font-family: var(--font);
434
+ font-weight: 700;
435
+ padding: 2px 4px;
436
+ cursor: pointer;
437
+ }
438
+
439
+ .btn-clear-flight:hover { background: rgba(96, 165, 250, 0.16); }
440
+ .btn-clear-flight:disabled {
441
+ border-color: var(--border);
442
+ background: rgba(255,255,255,0.03);
443
+ color: var(--text-dim);
444
+ cursor: not-allowed;
445
+ opacity: 0.55;
446
+ }
447
+
415
448
  .pulse { animation: pulse 2s ease-in-out infinite; }
416
449
  @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.6; } }
417
450
 
@@ -851,9 +884,14 @@ function timerDisplayLabel(timerType) {
851
884
  return timerType === 'fresh' ? 'idle' : timerType;
852
885
  }
853
886
 
854
- function renderQuotaBars(quota) {
887
+ function renderQuotaBars(account) {
888
+ var quota = account.quota;
855
889
  if (!quota || quota.length === 0) return '';
856
890
  var rows = quota.map(function(q) {
891
+ var inFlightForModel = (account.inFlightByModel || {})[q.modelKey] || 0;
892
+ var clearButton = inFlightForModel > 0
893
+ ? '<button class="btn-clear-flight" title="Clear in-flight counter for ' + q.displayName + '" onclick="clearInFlight(\\'' + account.email + '\\', \\'' + q.modelKey + '\\')">Clear</button>'
894
+ : '<button class="btn-clear-flight" title="No in-flight requests for ' + q.displayName + '" disabled>Clear</button>';
857
895
  var color = quotaBarColor(q.percentRemaining);
858
896
  var timerClass = 'timer-' + q.timerType;
859
897
  var resetLabel = '';
@@ -867,6 +905,7 @@ function renderQuotaBars(quota) {
867
905
  '<div class="quota-bar-bg"><div class="quota-bar-fill" style="width:' + q.percentRemaining + '%;background:' + color + '"></div></div>' +
868
906
  '<span class="quota-pct" style="color:' + color + '">' + q.percentRemaining + '%</span>' +
869
907
  '<span class="quota-reset">' + (resetLabel || '--') + '</span>' +
908
+ '<span class="quota-action">' + clearButton + '</span>' +
870
909
  '</div>';
871
910
  }).join('');
872
911
  return '<div class="quota-section"><div class="quota-section-title">Quota (per model)</div>' + rows + '</div>';
@@ -967,7 +1006,7 @@ function renderAccounts(data) {
967
1006
  '</div>' +
968
1007
  '</div>' +
969
1008
  '<div class="card-email">' + maskEmail(a.email) + '</div>' +
970
- (a.quota && a.quota.length > 0 ? renderQuotaBars(a.quota) : '') +
1009
+ (a.quota && a.quota.length > 0 ? renderQuotaBars(a) : '') +
971
1010
  '<div class="card-stats">' +
972
1011
  '<div class="card-stat"><div class="stat-label">Requests</div><div class="stat-value">' +
973
1012
  a.requestsSinceRotation + ' / ' + a.totalRequests + ' total</div></div>' +
@@ -1163,6 +1202,12 @@ async function setAccountFreshWindowOverride(email, enabled) {
1163
1202
  refresh();
1164
1203
  }
1165
1204
 
1205
+ async function clearInFlight(email, modelKey) {
1206
+ if (!confirm('Clear in-flight counter for this account/model? Use only when you are sure the request is stuck.')) return;
1207
+ await fetch('/api/clear-inflight/' + encodeURIComponent(email) + '/' + encodeURIComponent(modelKey), { method: 'POST' });
1208
+ refresh();
1209
+ }
1210
+
1166
1211
  function renderProAdvisor(advisor) {
1167
1212
  var panel = document.getElementById('proAdvisor');
1168
1213
  var button = document.getElementById('advisorBtn');
package/src/proxy.ts CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
4
4
  import { Readable } from "node:stream";
5
- import { ANTIGRAVITY_ENDPOINTS } from "./types.js";
5
+ import { ANTIGRAVITY_ENDPOINTS, resolveQuotaModelKey } from "./types.js";
6
6
  import type { AccountRuntime } from "./types.js";
7
7
  import type { AccountRotator } from "./rotator.js";
8
8
  import {
@@ -11,11 +11,13 @@ import {
11
11
  serveEnableApi,
12
12
  serveFreshWindowStartsApi,
13
13
  serveAccountFreshWindowStartsApi,
14
+ serveClearInFlightApi,
14
15
  } from "./dashboard.js";
15
16
  import { handleHostedCallback, serveLoginLanding, startHostedLogin } from "./onboarding.js";
16
17
 
17
18
  const MAX_ENDPOINT_RETRIES = 3;
18
19
  const MAX_COOLDOWN_MS = 30 * 60 * 1000; // 30 minutes max cooldown
20
+ const STREAM_IDLE_TIMEOUT_MS = 10 * 60 * 1000; // Release account if a stream goes silent.
19
21
 
20
22
  interface RequestBody {
21
23
  project: string;
@@ -103,6 +105,91 @@ function isFetchTransportError(err: unknown): boolean {
103
105
  return err instanceof TypeError && err.message === "fetch failed";
104
106
  }
105
107
 
108
+ async function streamResponseBody(
109
+ body: Response["body"],
110
+ req: IncomingMessage,
111
+ res: ServerResponse,
112
+ label: string,
113
+ proxyLog: (msg: string, level?: "info" | "warn" | "error") => void,
114
+ ): Promise<void> {
115
+ if (!body) return;
116
+
117
+ const nodeStream = Readable.fromWeb(body as import("node:stream/web").ReadableStream);
118
+
119
+ await new Promise<void>((resolve) => {
120
+ let settled = false;
121
+ let idleTimer: ReturnType<typeof setTimeout> | null = null;
122
+
123
+ const cleanup = (): void => {
124
+ if (idleTimer) clearTimeout(idleTimer);
125
+ nodeStream.off("data", onData);
126
+ nodeStream.off("end", onEnd);
127
+ nodeStream.off("error", onError);
128
+ nodeStream.off("close", onClose);
129
+ req.off("aborted", onClientAbort);
130
+ req.off("close", onClientClose);
131
+ res.off("close", onResponseClose);
132
+ res.off("error", onResponseError);
133
+ };
134
+
135
+ const finish = (reason?: string): void => {
136
+ if (settled) return;
137
+ settled = true;
138
+ if (reason) proxyLog(`[${label}] Stream closed: ${reason}`, "warn");
139
+ cleanup();
140
+ resolve();
141
+ };
142
+
143
+ const resetIdleTimer = (): void => {
144
+ if (idleTimer) clearTimeout(idleTimer);
145
+ idleTimer = setTimeout(() => {
146
+ nodeStream.destroy(new Error(`stream idle for ${Math.round(STREAM_IDLE_TIMEOUT_MS / 1000)}s`));
147
+ finish(`idle timeout after ${Math.round(STREAM_IDLE_TIMEOUT_MS / 1000)}s`);
148
+ }, STREAM_IDLE_TIMEOUT_MS);
149
+ };
150
+
151
+ const onData = (chunk: Buffer): void => {
152
+ resetIdleTimer();
153
+ if (!res.destroyed && !res.writableEnded) {
154
+ res.write(chunk);
155
+ }
156
+ };
157
+ const onEnd = (): void => finish();
158
+ const onError = (err: Error): void => finish(String(err));
159
+ const onClose = (): void => finish();
160
+ const onClientAbort = (): void => {
161
+ nodeStream.destroy();
162
+ finish("client aborted");
163
+ };
164
+ const onClientClose = (): void => {
165
+ if (!res.writableEnded) {
166
+ nodeStream.destroy();
167
+ finish("client closed connection");
168
+ }
169
+ };
170
+ const onResponseClose = (): void => {
171
+ if (!res.writableEnded) {
172
+ nodeStream.destroy();
173
+ finish("response closed before completion");
174
+ }
175
+ };
176
+ const onResponseError = (err: Error): void => {
177
+ nodeStream.destroy(err);
178
+ finish(String(err));
179
+ };
180
+
181
+ nodeStream.on("data", onData);
182
+ nodeStream.once("end", onEnd);
183
+ nodeStream.once("error", onError);
184
+ nodeStream.once("close", onClose);
185
+ req.once("aborted", onClientAbort);
186
+ req.once("close", onClientClose);
187
+ res.once("close", onResponseClose);
188
+ res.once("error", onResponseError);
189
+ resetIdleTimer();
190
+ });
191
+ }
192
+
106
193
  /**
107
194
  * Read the full request body from an IncomingMessage.
108
195
  */
@@ -218,7 +305,7 @@ async function handleProxyRequest(
218
305
  const rotateAndRelease = async (): Promise<AccountRuntime | null> => {
219
306
  const nextAccount = await rotator.rotateToNext(body.model);
220
307
  if (nextAccount) {
221
- rotator.finishRequest(nextAccount);
308
+ rotator.finishRequest(nextAccount, resolveQuotaModelKey(body.model) ?? undefined);
222
309
  }
223
310
  return nextAccount;
224
311
  };
@@ -241,12 +328,17 @@ async function handleProxyRequest(
241
328
  const cooldownMs = capCooldown(extractRetryDelay(errorText, response.headers));
242
329
  proxyLog(`[${label}] 429 rate limited, cooldown ${Math.ceil(cooldownMs / 1000)}s`, "warn");
243
330
  rotator.markExhausted(account, cooldownMs);
244
- const nextAccount = await rotateAndRelease();
245
- if (!nextAccount) {
246
- sendNoAccountsAvailable(`all candidate accounts are cooling down after ${label} was rate limited`);
247
- return;
248
- }
249
- continue;
331
+ res.writeHead(503, {
332
+ "Content-Type": "application/json",
333
+ "Retry-After": String(Math.ceil(cooldownMs / 1000)),
334
+ });
335
+ res.end(JSON.stringify({
336
+ error: "Rate limited",
337
+ reason: `${label} was rate limited; not retrying another account for this request`,
338
+ model: body.model,
339
+ retryAfterMs: cooldownMs,
340
+ }));
341
+ return;
250
342
  }
251
343
 
252
344
  if (response.status === 401) {
@@ -313,23 +405,12 @@ async function handleProxyRequest(
313
405
 
314
406
  res.writeHead(response.status, responseHeaders);
315
407
 
316
- // Stream body using Node.js Readable (avoids ReadableStream locking issues)
317
- if (response.body) {
318
408
  try {
319
- const nodeStream = Readable.fromWeb(response.body as import("node:stream/web").ReadableStream);
320
- await new Promise<void>((resolve) => {
321
- nodeStream.on("data", (chunk: Buffer) => res.write(chunk));
322
- nodeStream.on("end", resolve);
323
- nodeStream.on("error", (err) => {
324
- proxyLog(`[${label}] Stream error: ${err}`, "warn");
325
- resolve();
326
- });
327
- });
328
- } catch (err) {
329
- proxyLog(`[${label}] Stream setup error: ${err}`, "warn");
330
- }
409
+ await streamResponseBody(response.body, req, res, label, proxyLog);
410
+ } catch (err) {
411
+ proxyLog(`[${label}] Stream setup error: ${err}`, "warn");
331
412
  }
332
- res.end();
413
+ res.end();
333
414
 
334
415
  if (shouldRotate) {
335
416
  await rotateAndRelease();
@@ -352,7 +433,7 @@ async function handleProxyRequest(
352
433
  }
353
434
  continue;
354
435
  } finally {
355
- rotator.finishRequest(account);
436
+ rotator.finishRequest(account, resolveQuotaModelKey(body.model) ?? undefined);
356
437
  }
357
438
  }
358
439
 
@@ -415,6 +496,15 @@ export function startProxy(rotator: AccountRotator, port: number): void {
415
496
  return;
416
497
  }
417
498
 
499
+ if (method === "POST" && url.startsWith("/api/clear-inflight/")) {
500
+ const rest = url.slice("/api/clear-inflight/".length);
501
+ const firstSlash = rest.indexOf("/");
502
+ const email = decodeURIComponent(firstSlash >= 0 ? rest.slice(0, firstSlash) : rest);
503
+ const modelKey = firstSlash >= 0 ? decodeURIComponent(rest.slice(firstSlash + 1)) : undefined;
504
+ serveClearInFlightApi(res, rotator, email, modelKey);
505
+ return;
506
+ }
507
+
418
508
  if (method === "POST" && (url === "/api/settings/fresh-window-starts/on" || url === "/api/settings/fresh-window-starts/off")) {
419
509
  serveFreshWindowStartsApi(res, rotator, url.endsWith("/on"));
420
510
  return;
package/src/rotator.ts CHANGED
@@ -62,6 +62,7 @@ export class AccountRotator {
62
62
  disabled: false,
63
63
  flagged: false,
64
64
  inFlightRequests: 0,
65
+ inFlightByModel: {},
65
66
  allowFreshWindowStartsOverride: false,
66
67
  }));
67
68
  }
@@ -78,7 +79,7 @@ export class AccountRotator {
78
79
  this.modelState.set(model, {
79
80
  activeAccountIndex: Math.min(idx, this.accounts.length - 1),
80
81
  quotaAtRotationStart: -1,
81
- requestsOnActiveAccount: 0,
82
+ requestsOnActiveAccount: state.modelRequestCounts?.[model] ?? 0,
82
83
  });
83
84
  }
84
85
  }
@@ -117,12 +118,15 @@ export class AccountRotator {
117
118
 
118
119
  saveState(): void {
119
120
  const modelAccounts: Record<string, number> = {};
121
+ const modelRequestCounts: Record<string, number> = {};
120
122
  for (const [model, state] of this.modelState.entries()) {
121
123
  modelAccounts[model] = state.activeAccountIndex;
124
+ modelRequestCounts[model] = state.requestsOnActiveAccount;
122
125
  }
123
126
 
124
127
  const state: PersistedState = {
125
128
  modelAccounts,
129
+ modelRequestCounts,
126
130
  currentIndex: this.defaultIndex,
127
131
  protectivePauseUntil: this.protectivePauseUntil,
128
132
  protectivePauseReason: this.protectivePauseReason,
@@ -201,7 +205,7 @@ export class AccountRotator {
201
205
  if (drop >= this.config.rotateOnQuotaDrop) {
202
206
  // Only rotate if there's a healthy account to rotate to
203
207
  const hasHealthy = this.accounts.some(
204
- (a, idx) => idx !== mState.activeAccountIndex && this.isAvailable(a, Date.now()),
208
+ (a, idx) => idx !== mState.activeAccountIndex && this.isRoutableForModel(a, modelKey, Date.now()),
205
209
  );
206
210
  if (hasHealthy) {
207
211
  this.log(
@@ -331,7 +335,7 @@ export class AccountRotator {
331
335
  private hasTimedCandidate(modelKey: string, now: number, excludeIdx: number = -1): boolean {
332
336
  return this.accounts.some((account, idx) => {
333
337
  if (idx === excludeIdx) return false;
334
- if (!this.isAvailable(account, now)) return false;
338
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
335
339
  if (this.getModelQuota(account, modelKey) === 0) return false;
336
340
  return this.isTimedWindow(account, modelKey);
337
341
  });
@@ -346,7 +350,7 @@ export class AccountRotator {
346
350
  for (let i = 0; i < this.accounts.length; i++) {
347
351
  if (i === excludeIdx) continue;
348
352
  const account = this.accounts[i];
349
- if (!this.isAvailable(account, now)) continue;
353
+ if (!this.isAvailableForModel(account, modelKey, now)) continue;
350
354
 
351
355
  const quota = this.getModelQuota(account, modelKey);
352
356
  if (quota === 0) continue;
@@ -374,6 +378,7 @@ export class AccountRotator {
374
378
  const state = this.modelState.get(modelKey);
375
379
  if (state) {
376
380
  state.requestsOnActiveAccount++;
381
+ this.saveState();
377
382
  }
378
383
  }
379
384
 
@@ -409,7 +414,7 @@ export class AccountRotator {
409
414
  const idx = state?.activeAccountIndex ?? this.defaultIndex;
410
415
 
411
416
  const current = this.accounts[idx];
412
- if (current && this.isAvailable(current, now)) {
417
+ if (current && (!modelKey ? this.isAvailable(current, now) : this.isAvailableForModel(current, modelKey, now))) {
413
418
  // Check if this account has quota for the requested model
414
419
  if (modelKey) {
415
420
  if (this.shouldRotateBeforeRequest(current, modelKey, state ?? null)) {
@@ -422,7 +427,7 @@ export class AccountRotator {
422
427
  return rotated;
423
428
  }
424
429
  this.log(
425
- `${current.config.label || current.config.email} [${modelKey}]: threshold reached but no replacement is available, staying`,
430
+ `${current.config.label || current.config.email} [${modelKey}]: threshold reached but no replacement is available, staying on current account`,
426
431
  "warn",
427
432
  );
428
433
  }
@@ -444,13 +449,13 @@ export class AccountRotator {
444
449
  return this.rotateModelForRequest(modelKey);
445
450
  }
446
451
  }
447
- this.startRequest(current);
452
+ this.startRequest(current, modelKey ?? undefined);
448
453
  try {
449
454
  await this.ensureValidToken(current);
450
455
  if (modelKey) this.countModelAssignment(modelKey);
451
456
  return current;
452
457
  } catch (err) {
453
- this.finishRequest(current);
458
+ this.finishRequest(current, modelKey ?? undefined);
454
459
  throw err;
455
460
  }
456
461
  }
@@ -483,19 +488,19 @@ export class AccountRotator {
483
488
  `[${modelKey}] Rotated to ${best.config.label || best.config.email} [${timerType}] (quota: ${quota >= 0 ? quota + "%" : "unknown"})`,
484
489
  );
485
490
  this.saveState();
486
- this.startRequest(best);
491
+ this.startRequest(best, modelKey);
487
492
  try {
488
493
  await this.ensureValidToken(best);
489
494
  return best;
490
495
  } catch (err) {
491
- this.finishRequest(best);
496
+ this.finishRequest(best, modelKey);
492
497
  throw err;
493
498
  }
494
499
  }
495
500
 
496
- if (!this.allowFreshWindowStarts && this.accounts.some((account, idx) => {
497
- if (idx === excludeIdx) return false;
498
- if (!this.isAvailable(account, now)) return false;
501
+ if (!this.allowFreshWindowStarts && this.accounts.some((account, idx) => {
502
+ if (idx === excludeIdx) return false;
503
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
499
504
  if (this.getModelQuota(account, modelKey) === 0) return false;
500
505
  return this.getModelTimerType(account, modelKey) === "fresh";
501
506
  })) {
@@ -580,8 +585,22 @@ export class AccountRotator {
580
585
  account.consecutiveErrors = 0;
581
586
  account.lastError = null;
582
587
 
588
+ const modelKey = model ? resolveQuotaModelKey(model) : null;
589
+ const state = modelKey ? this.modelState.get(modelKey) : null;
590
+ const shouldRotate =
591
+ !!modelKey &&
592
+ !!state &&
593
+ this.accounts[state.activeAccountIndex] === account &&
594
+ this.shouldUseRequestCountRotation(account, modelKey) &&
595
+ state.requestsOnActiveAccount >= this.config.requestsPerRotation;
596
+
583
597
  this.saveState();
584
- return false;
598
+ if (shouldRotate) {
599
+ this.log(
600
+ `${account.config.label || account.config.email} [${modelKey}]: hit rotation threshold (${state.requestsOnActiveAccount}/${this.config.requestsPerRotation})`,
601
+ );
602
+ }
603
+ return shouldRotate;
585
604
  }
586
605
 
587
606
  // Mark an account as exhausted (429 or quota exceeded)
@@ -652,6 +671,23 @@ export class AccountRotator {
652
671
  return true;
653
672
  }
654
673
 
674
+ clearInFlightRequests(email: string, modelKey?: string): boolean {
675
+ const account = this.accounts.find((a) => a.config.email === email);
676
+ if (!account) return false;
677
+ if (modelKey) {
678
+ const previous = account.inFlightByModel[modelKey] ?? 0;
679
+ account.inFlightByModel[modelKey] = 0;
680
+ this.recalculateInFlightRequests(account);
681
+ this.log(`${email}: operator cleared ${previous} in-flight request(s) for ${modelKey}`, "warn");
682
+ return true;
683
+ }
684
+ const previous = account.inFlightRequests;
685
+ account.inFlightRequests = 0;
686
+ account.inFlightByModel = {};
687
+ this.log(`${email}: operator cleared ${previous} in-flight request(s)`, "warn");
688
+ return true;
689
+ }
690
+
655
691
  async ensureValidToken(account: AccountRuntime): Promise<void> {
656
692
  const now = Date.now();
657
693
  if (account.accessToken && account.tokenExpires > now) {
@@ -702,7 +738,12 @@ export class AccountRotator {
702
738
  if (account.disabled) return false;
703
739
  if (account.flagged) return false;
704
740
  if (account.cooldownUntil > now) return false;
705
- if (account.inFlightRequests >= (this.config.maxConcurrentRequestsPerAccount ?? 1)) return false;
741
+ return true;
742
+ }
743
+
744
+ private isAvailableForModel(account: AccountRuntime, modelKey: string, now: number): boolean {
745
+ if (!this.isAvailable(account, now)) return false;
746
+ if ((account.inFlightByModel[modelKey] ?? 0) >= (this.config.maxConcurrentRequestsPerAccount ?? 1)) return false;
706
747
  return true;
707
748
  }
708
749
 
@@ -711,6 +752,7 @@ export class AccountRotator {
711
752
  account.flagged = true;
712
753
  account.lastError = reason;
713
754
  account.inFlightRequests = 0;
755
+ account.inFlightByModel = {};
714
756
  this.log(`${account.config.email}: FLAGGED - ${reason}`, "error");
715
757
  if (this.shouldTriggerProtectivePause(reason)) {
716
758
  this.protectivePauseUntil = Date.now() + (this.config.protectivePauseMs ?? 6 * 60 * 60 * 1000);
@@ -723,22 +765,38 @@ export class AccountRotator {
723
765
  this.saveState();
724
766
  }
725
767
 
726
- startRequest(account: AccountRuntime): void {
727
- account.inFlightRequests++;
768
+ startRequest(account: AccountRuntime, modelKey?: string): void {
769
+ const key = modelKey ?? "__default__";
770
+ account.inFlightByModel[key] = (account.inFlightByModel[key] ?? 0) + 1;
771
+ this.recalculateInFlightRequests(account);
772
+ }
773
+
774
+ finishRequest(account: AccountRuntime, modelKey?: string): void {
775
+ const key = modelKey ?? "__default__";
776
+ account.inFlightByModel[key] = Math.max(0, (account.inFlightByModel[key] ?? 0) - 1);
777
+ if (account.inFlightByModel[key] === 0) delete account.inFlightByModel[key];
778
+ this.recalculateInFlightRequests(account);
728
779
  }
729
780
 
730
- finishRequest(account: AccountRuntime): void {
731
- account.inFlightRequests = Math.max(0, account.inFlightRequests - 1);
781
+ private recalculateInFlightRequests(account: AccountRuntime): void {
782
+ account.inFlightRequests = Object.values(account.inFlightByModel).reduce((sum, count) => sum + count, 0);
783
+ }
784
+
785
+ private isRoutableForModel(account: AccountRuntime, modelKey: string, now: number): boolean {
786
+ if (!this.isAvailableForModel(account, modelKey, now)) return false;
787
+ if (this.getModelQuota(account, modelKey) === 0) return false;
788
+ if (!this.isFreshWindowAllowed(account, modelKey)) return false;
789
+ return true;
732
790
  }
733
791
 
734
792
  getStatus(): StatusResponse {
735
793
  const now = Date.now();
736
794
 
737
- // Build per-model active account map
795
+ // Build per-model active account map from accounts that can actually serve now.
738
796
  const activeAccounts: Record<string, string> = {};
739
797
  for (const [model, mState] of this.modelState.entries()) {
740
798
  const account = this.accounts[mState.activeAccountIndex];
741
- if (account) {
799
+ if (account && this.isRoutableForModel(account, model, now)) {
742
800
  activeAccounts[model] = account.config.email;
743
801
  }
744
802
  }
@@ -747,7 +805,7 @@ export class AccountRotator {
747
805
  // Determine which models this account is active for
748
806
  const activeForModels: string[] = [];
749
807
  for (const [model, mState] of this.modelState.entries()) {
750
- if (this.accounts[mState.activeAccountIndex] === a) {
808
+ if (this.accounts[mState.activeAccountIndex] === a && this.isRoutableForModel(a, model, now)) {
751
809
  activeForModels.push(model);
752
810
  }
753
811
  }
@@ -778,11 +836,12 @@ export class AccountRotator {
778
836
  cooldownRemaining: Math.max(0, a.cooldownUntil - now),
779
837
  lastUsed: a.lastUsed,
780
838
  lastError: a.lastError,
781
- consecutiveErrors: a.consecutiveErrors,
782
- hasValidToken: !!(a.accessToken && a.tokenExpires > now),
783
- quota: a.quota,
784
- inFlightRequests: a.inFlightRequests,
785
- proDetected: this.isProAccount(a),
839
+ consecutiveErrors: a.consecutiveErrors,
840
+ hasValidToken: !!(a.accessToken && a.tokenExpires > now),
841
+ quota: a.quota,
842
+ inFlightRequests: a.inFlightRequests,
843
+ inFlightByModel: a.inFlightByModel,
844
+ proDetected: this.isProAccount(a),
786
845
  familyManager: !!a.config.familyManager,
787
846
  allowFreshWindowStartsOverride: a.allowFreshWindowStartsOverride,
788
847
  effectiveFreshWindowStartsAllowed: this.isEffectiveFreshWindowAllowed(a),
@@ -842,8 +901,9 @@ export class AccountRotator {
842
901
  lastError: null,
843
902
  consecutiveErrors: 0,
844
903
  disabled: false,
845
- flagged: false,
846
- inFlightRequests: 0,
904
+ flagged: false,
905
+ inFlightRequests: 0,
906
+ inFlightByModel: {},
847
907
  allowFreshWindowStartsOverride: false,
848
908
  };
849
909
  this.accounts.push(runtime);
package/src/types.ts CHANGED
@@ -110,6 +110,7 @@ export interface AccountRuntime {
110
110
  disabled: boolean; // permanently disabled (revoked token, etc.)
111
111
  flagged: boolean; // flagged for infringement/abuse by Google
112
112
  inFlightRequests: number;
113
+ inFlightByModel: Record<string, number>;
113
114
  allowFreshWindowStartsOverride: boolean;
114
115
  }
115
116
 
@@ -124,6 +125,8 @@ export interface ModelRotationState {
124
125
  export interface PersistedState {
125
126
  // Per-model active account index
126
127
  modelAccounts: Record<string, number>;
128
+ // Per-model request count on the active account
129
+ modelRequestCounts?: Record<string, number>;
127
130
  // Legacy fallback
128
131
  currentIndex?: number;
129
132
  protectivePauseUntil?: number;
@@ -195,6 +198,7 @@ export interface AccountStatus {
195
198
  hasValidToken: boolean;
196
199
  quota: ModelQuota[];
197
200
  inFlightRequests: number;
201
+ inFlightByModel: Record<string, number>;
198
202
  // Pro family sharing
199
203
  proDetected: boolean;
200
204
  familyManager: boolean;