antigravity-claude-proxy 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "antigravity-claude-proxy",
3
- "version": "2.0.8",
3
+ "version": "2.0.9",
4
4
  "description": "Proxy server to use Antigravity's Claude models with Claude Code CLI",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -14,7 +14,8 @@ import {
14
14
  resetAllRateLimits as resetLimits,
15
15
  markRateLimited as markLimited,
16
16
  markInvalid as markAccountInvalid,
17
- getMinWaitTimeMs as getMinWait
17
+ getMinWaitTimeMs as getMinWait,
18
+ getRateLimitInfo as getLimitInfo
18
19
  } from './rate-limits.js';
19
20
  import {
20
21
  getTokenForAccount as fetchToken,
@@ -214,6 +215,16 @@ export class AccountManager {
214
215
  return getMinWait(this.#accounts, modelId);
215
216
  }
216
217
 
218
+ /**
219
+ * Get rate limit info for a specific account and model
220
+ * @param {string} email - Email of the account
221
+ * @param {string} modelId - Model ID to check
222
+ * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
223
+ */
224
+ getRateLimitInfo(email, modelId) {
225
+ return getLimitInfo(this.#accounts, email, modelId);
226
+ }
227
+
217
228
  /**
218
229
  * Get OAuth token for an account
219
230
  * @param {Object} account - Account object with email and credentials
@@ -22,6 +22,7 @@ export function isAllRateLimited(accounts, modelId) {
22
22
 
23
23
  return accounts.every(acc => {
24
24
  if (acc.isInvalid) return true; // Invalid accounts count as unavailable
25
+ if (acc.enabled === false) return true; // Disabled accounts count as unavailable
25
26
  const modelLimits = acc.modelRateLimits || {};
26
27
  const limit = modelLimits[modelId];
27
28
  return limit && limit.isRateLimited && limit.resetTime > Date.now();
@@ -118,18 +119,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
118
119
  const account = accounts.find(a => a.email === email);
119
120
  if (!account) return false;
120
121
 
121
- // Use configured cooldown as the maximum wait time
122
- // If API returns a reset time, cap it at DEFAULT_COOLDOWN_MS
123
- // If API doesn't return a reset time, use DEFAULT_COOLDOWN_MS
124
- let cooldownMs;
125
- if (resetMs && resetMs > 0) {
126
- // API provided a reset time - cap it at configured maximum
127
- cooldownMs = Math.min(resetMs, DEFAULT_COOLDOWN_MS);
128
- } else {
129
- // No reset time from API - use configured default
130
- cooldownMs = DEFAULT_COOLDOWN_MS;
131
- }
132
- const resetTime = Date.now() + cooldownMs;
122
+ // Store the ACTUAL reset time from the API
123
+ // This is used to decide whether to wait (short) or switch accounts (long)
124
+ const actualResetMs = (resetMs && resetMs > 0) ? resetMs : DEFAULT_COOLDOWN_MS;
133
125
 
134
126
  if (!account.modelRateLimits) {
135
127
  account.modelRateLimits = {};
@@ -137,12 +129,20 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
137
129
 
138
130
  account.modelRateLimits[modelId] = {
139
131
  isRateLimited: true,
140
- resetTime: resetTime
132
+ resetTime: Date.now() + actualResetMs, // Actual reset time for decisions
133
+ actualResetMs: actualResetMs // Original duration from API
141
134
  };
142
135
 
143
- logger.warn(
144
- `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(cooldownMs)}`
145
- );
136
+ // Log appropriately based on duration
137
+ if (actualResetMs > DEFAULT_COOLDOWN_MS) {
138
+ logger.warn(
139
+ `[AccountManager] Quota exhausted: ${email} (model: ${modelId}). Resets in ${formatDuration(actualResetMs)}`
140
+ );
141
+ } else {
142
+ logger.warn(
143
+ `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(actualResetMs)}`
144
+ );
145
+ }
146
146
 
147
147
  return true;
148
148
  }
@@ -209,3 +209,29 @@ export function getMinWaitTimeMs(accounts, modelId) {
209
209
 
210
210
  return minWait === Infinity ? DEFAULT_COOLDOWN_MS : minWait;
211
211
  }
212
+
213
+ /**
214
+ * Get the rate limit info for a specific account and model
215
+ * Returns the actual reset time from API, not capped
216
+ *
217
+ * @param {Array} accounts - Array of account objects
218
+ * @param {string} email - Email of the account
219
+ * @param {string} modelId - Model ID to check
220
+ * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
221
+ */
222
+ export function getRateLimitInfo(accounts, email, modelId) {
223
+ const account = accounts.find(a => a.email === email);
224
+ if (!account || !account.modelRateLimits || !account.modelRateLimits[modelId]) {
225
+ return { isRateLimited: false, actualResetMs: null, waitMs: 0 };
226
+ }
227
+
228
+ const limit = account.modelRateLimits[modelId];
229
+ const now = Date.now();
230
+ const waitMs = limit.resetTime ? Math.max(0, limit.resetTime - now) : 0;
231
+
232
+ return {
233
+ isRateLimited: limit.isRateLimited && waitMs > 0,
234
+ actualResetMs: limit.actualResetMs || null,
235
+ waitMs
236
+ };
237
+ }
@@ -9,6 +9,7 @@ import {
9
9
  ANTIGRAVITY_ENDPOINT_FALLBACKS,
10
10
  MAX_RETRIES,
11
11
  MAX_WAIT_BEFORE_ERROR_MS,
12
+ DEFAULT_COOLDOWN_MS,
12
13
  isThinkingModel
13
14
  } from '../constants.js';
14
15
  import { convertGoogleToAnthropic } from '../format/index.js';
@@ -39,67 +40,56 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
39
40
 
40
41
  // Retry loop with account failover
41
42
  // Ensure we try at least as many times as there are accounts to cycle through everyone
42
- // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
43
43
  const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
44
44
 
45
45
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
46
- // Use sticky account selection for cache continuity
47
- const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model);
48
- let account = stickyAccount;
49
-
50
- // Handle waiting for sticky account
51
- if (!account && waitMs > 0) {
52
- logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
53
- await sleep(waitMs);
54
- accountManager.clearExpiredLimits();
55
- account = accountManager.getCurrentStickyAccount(model);
56
- }
46
+ // Clear any expired rate limits before picking
47
+ accountManager.clearExpiredLimits();
57
48
 
58
- // Handle all accounts rate-limited
59
- if (!account) {
49
+ // Get available accounts for this model
50
+ const availableAccounts = accountManager.getAvailableAccounts(model);
51
+
52
+ // If no accounts available, check if we should wait or throw error
53
+ if (availableAccounts.length === 0) {
60
54
  if (accountManager.isAllRateLimited(model)) {
61
- const allWaitMs = accountManager.getMinWaitTimeMs(model);
62
- const resetTime = new Date(Date.now() + allWaitMs).toISOString();
55
+ const minWaitMs = accountManager.getMinWaitTimeMs(model);
56
+ const resetTime = new Date(Date.now() + minWaitMs).toISOString();
63
57
 
64
58
  // If wait time is too long (> 2 minutes), throw error immediately
65
- if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
59
+ if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
66
60
  throw new Error(
67
- `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
61
+ `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
68
62
  );
69
63
  }
70
64
 
71
- // Wait for reset (applies to both single and multi-account modes)
65
+ // Wait for shortest reset time
72
66
  const accountCount = accountManager.getAccountCount();
73
- logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
74
- await sleep(allWaitMs);
75
-
76
- // Add small buffer after waiting to ensure rate limits have truly expired
77
- await sleep(500);
67
+ logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
68
+ await sleep(minWaitMs + 500); // Add 500ms buffer
78
69
  accountManager.clearExpiredLimits();
79
- account = accountManager.pickNext(model);
80
-
81
- // If still no account after waiting, try optimistic reset
82
- // This handles cases where the API rate limit is transient
83
- if (!account) {
84
- logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
85
- accountManager.resetAllRateLimits();
86
- account = accountManager.pickNext(model);
87
- }
70
+ continue; // Retry the loop
88
71
  }
89
72
 
90
- if (!account) {
91
- // Check if fallback is enabled and available
92
- if (fallbackEnabled) {
93
- const fallbackModel = getFallbackModel(model);
94
- if (fallbackModel) {
95
- logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
96
- // Retry with fallback model
97
- const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
98
- return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
99
- }
73
+ // Check if fallback is enabled and available
74
+ if (fallbackEnabled) {
75
+ const fallbackModel = getFallbackModel(model);
76
+ if (fallbackModel) {
77
+ logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
78
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
79
+ return await sendMessage(fallbackRequest, accountManager, false);
100
80
  }
101
- throw new Error('No accounts available');
102
81
  }
82
+ throw new Error('No accounts available');
83
+ }
84
+
85
+ // Pick sticky account (prefers current for cache continuity)
86
+ let account = accountManager.getCurrentStickyAccount(model);
87
+ if (!account) {
88
+ account = accountManager.pickNext(model);
89
+ }
90
+
91
+ if (!account) {
92
+ continue; // Shouldn't happen, but safety check
103
93
  }
104
94
 
105
95
  try {
@@ -112,6 +102,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
112
102
 
113
103
  // Try each endpoint
114
104
  let lastError = null;
105
+ let retriedOnce = false; // Track if we've already retried for short rate limit
106
+
115
107
  for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
116
108
  try {
117
109
  const url = isThinking
@@ -137,14 +129,51 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
137
129
  }
138
130
 
139
131
  if (response.status === 429) {
140
- // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
141
- logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
142
132
  const resetMs = parseResetTime(response, errorText);
143
- // Keep minimum reset time across all 429 responses
144
- if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
145
- lastError = { is429: true, response, errorText, resetMs };
133
+
134
+ // Decision: wait and retry OR switch account
135
+ if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
136
+ // Long-term quota exhaustion (> 10s) - switch to next account
137
+ logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
138
+ accountManager.markRateLimited(account.email, resetMs, model);
139
+ throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
140
+ } else {
141
+ // Short-term rate limit (<= 10s) - wait and retry once
142
+ const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
143
+
144
+ if (!retriedOnce) {
145
+ retriedOnce = true;
146
+ logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
147
+ await sleep(waitMs);
148
+ // Retry same endpoint
149
+ const retryResponse = await fetch(url, {
150
+ method: 'POST',
151
+ headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
152
+ body: JSON.stringify(payload)
153
+ });
154
+
155
+ if (retryResponse.ok) {
156
+ // Process retry response
157
+ if (isThinking) {
158
+ return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model);
159
+ }
160
+ const data = await retryResponse.json();
161
+ logger.debug('[CloudCode] Response received after retry');
162
+ return convertGoogleToAnthropic(data, anthropicRequest.model);
163
+ }
164
+
165
+ // Retry also failed - parse new reset time
166
+ const retryErrorText = await retryResponse.text();
167
+ const retryResetMs = parseResetTime(retryResponse, retryErrorText);
168
+ logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
169
+ accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
170
+ throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
171
+ } else {
172
+ // Already retried once, mark and switch
173
+ accountManager.markRateLimited(account.email, waitMs, model);
174
+ throw new Error(`RATE_LIMITED: ${errorText}`);
175
+ }
146
176
  }
147
- continue;
148
177
  }
149
178
 
150
179
  if (response.status >= 400) {
@@ -179,7 +208,6 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
179
208
 
180
209
  // If all endpoints failed for this account
181
210
  if (lastError) {
182
- // If all endpoints returned 429, mark account as rate-limited
183
211
  if (lastError.is429) {
184
212
  logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
185
213
  accountManager.markRateLimited(account.email, lastError.resetMs, model);
@@ -199,18 +227,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
199
227
  logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
200
228
  continue;
201
229
  }
202
- // Non-rate-limit error: throw immediately
203
- // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
230
+ // Handle 5xx errors
204
231
  if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
205
232
  logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
206
- accountManager.pickNext(model); // Force advance to next account
233
+ accountManager.pickNext(model);
207
234
  continue;
208
235
  }
209
236
 
210
237
  if (isNetworkError(error)) {
211
238
  logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
212
- await sleep(1000); // Brief pause before retry
213
- accountManager.pickNext(model); // Advance to next account
239
+ await sleep(1000);
240
+ accountManager.pickNext(model);
214
241
  continue;
215
242
  }
216
243
 
@@ -224,7 +251,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
224
251
  if (fallbackModel) {
225
252
  logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
226
253
  const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
227
- return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
254
+ return await sendMessage(fallbackRequest, accountManager, false);
228
255
  }
229
256
  }
230
257
 
@@ -57,22 +57,26 @@ export async function listModels(token) {
57
57
  * Returns model quotas including remaining fraction and reset time
58
58
  *
59
59
  * @param {string} token - OAuth access token
60
+ * @param {string} [projectId] - Optional project ID for accurate quota info
60
61
  * @returns {Promise<Object>} Raw response from fetchAvailableModels API
61
62
  */
62
- export async function fetchAvailableModels(token) {
63
+ export async function fetchAvailableModels(token, projectId = null) {
63
64
  const headers = {
64
65
  'Authorization': `Bearer ${token}`,
65
66
  'Content-Type': 'application/json',
66
67
  ...ANTIGRAVITY_HEADERS
67
68
  };
68
69
 
70
+ // Include project ID in body for accurate quota info (per Quotio implementation)
71
+ const body = projectId ? { project: projectId } : {};
72
+
69
73
  for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
70
74
  try {
71
75
  const url = `${endpoint}/v1internal:fetchAvailableModels`;
72
76
  const response = await fetch(url, {
73
77
  method: 'POST',
74
78
  headers,
75
- body: JSON.stringify({})
79
+ body: JSON.stringify(body)
76
80
  });
77
81
 
78
82
  if (!response.ok) {
@@ -95,10 +99,11 @@ export async function fetchAvailableModels(token) {
95
99
  * Extracts quota info (remaining fraction and reset time) for each model
96
100
  *
97
101
  * @param {string} token - OAuth access token
102
+ * @param {string} [projectId] - Optional project ID for accurate quota info
98
103
  * @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
99
104
  */
100
- export async function getModelQuotas(token) {
101
- const data = await fetchAvailableModels(token);
105
+ export async function getModelQuotas(token, projectId = null) {
106
+ const data = await fetchAvailableModels(token, projectId);
102
107
  if (!data || !data.models) return {};
103
108
 
104
109
  const quotas = {};
@@ -108,7 +113,8 @@ export async function getModelQuotas(token) {
108
113
 
109
114
  if (modelData.quotaInfo) {
110
115
  quotas[modelId] = {
111
- remainingFraction: modelData.quotaInfo.remainingFraction ?? null,
116
+ // When remainingFraction is missing but resetTime is present, quota is exhausted (0%)
117
+ remainingFraction: modelData.quotaInfo.remainingFraction ?? (modelData.quotaInfo.resetTime ? 0 : null),
112
118
  resetTime: modelData.quotaInfo.resetTime ?? null
113
119
  };
114
120
  }
@@ -78,7 +78,7 @@ export function parseResetTime(responseOrError, errorText = '') {
78
78
 
79
79
  // Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s")
80
80
  // This is Google's preferred format for rate limit reset delay
81
- const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\\d+(?:\\.\\d+)?)(ms|s)/i);
81
+ const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\d+(?:\.\d+)?)(ms|s)/i);
82
82
  if (quotaDelayMatch) {
83
83
  const value = parseFloat(quotaDelayMatch[1]);
84
84
  const unit = quotaDelayMatch[2].toLowerCase();
@@ -103,7 +103,7 @@ export function parseResetTime(responseOrError, errorText = '') {
103
103
  // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
104
104
  // Added stricter regex to avoid partial matches
105
105
  if (!resetMs) {
106
- const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\\d\\.]+)(?:s\b|s")/i);
106
+ const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d.]+)(?:s\b|s")/i);
107
107
  if (secMatch) {
108
108
  resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
109
109
  logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
@@ -9,7 +9,8 @@ import {
9
9
  ANTIGRAVITY_ENDPOINT_FALLBACKS,
10
10
  MAX_RETRIES,
11
11
  MAX_EMPTY_RESPONSE_RETRIES,
12
- MAX_WAIT_BEFORE_ERROR_MS
12
+ MAX_WAIT_BEFORE_ERROR_MS,
13
+ DEFAULT_COOLDOWN_MS
13
14
  } from '../constants.js';
14
15
  import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
15
16
  import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
@@ -38,68 +39,57 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
38
39
 
39
40
  // Retry loop with account failover
40
41
  // Ensure we try at least as many times as there are accounts to cycle through everyone
41
- // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
42
42
  const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
43
43
 
44
44
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
45
- // Use sticky account selection for cache continuity
46
- const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model);
47
- let account = stickyAccount;
48
-
49
- // Handle waiting for sticky account
50
- if (!account && waitMs > 0) {
51
- logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
52
- await sleep(waitMs);
53
- accountManager.clearExpiredLimits();
54
- account = accountManager.getCurrentStickyAccount(model);
55
- }
45
+ // Clear any expired rate limits before picking
46
+ accountManager.clearExpiredLimits();
56
47
 
57
- // Handle all accounts rate-limited
58
- if (!account) {
48
+ // Get available accounts for this model
49
+ const availableAccounts = accountManager.getAvailableAccounts(model);
50
+
51
+ // If no accounts available, check if we should wait or throw error
52
+ if (availableAccounts.length === 0) {
59
53
  if (accountManager.isAllRateLimited(model)) {
60
- const allWaitMs = accountManager.getMinWaitTimeMs(model);
61
- const resetTime = new Date(Date.now() + allWaitMs).toISOString();
54
+ const minWaitMs = accountManager.getMinWaitTimeMs(model);
55
+ const resetTime = new Date(Date.now() + minWaitMs).toISOString();
62
56
 
63
57
  // If wait time is too long (> 2 minutes), throw error immediately
64
- if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
58
+ if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
65
59
  throw new Error(
66
- `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
60
+ `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
67
61
  );
68
62
  }
69
63
 
70
- // Wait for reset (applies to both single and multi-account modes)
64
+ // Wait for shortest reset time
71
65
  const accountCount = accountManager.getAccountCount();
72
- logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
73
- await sleep(allWaitMs);
74
-
75
- // Add small buffer after waiting to ensure rate limits have truly expired
76
- await sleep(500);
66
+ logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
67
+ await sleep(minWaitMs + 500); // Add 500ms buffer
77
68
  accountManager.clearExpiredLimits();
78
- account = accountManager.pickNext(model);
79
-
80
- // If still no account after waiting, try optimistic reset
81
- // This handles cases where the API rate limit is transient
82
- if (!account) {
83
- logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
84
- accountManager.resetAllRateLimits();
85
- account = accountManager.pickNext(model);
86
- }
69
+ continue; // Retry the loop
87
70
  }
88
71
 
89
- if (!account) {
90
- // Check if fallback is enabled and available
91
- if (fallbackEnabled) {
92
- const fallbackModel = getFallbackModel(model);
93
- if (fallbackModel) {
94
- logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
95
- // Retry with fallback model
96
- const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
97
- yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
98
- return;
99
- }
72
+ // Check if fallback is enabled and available
73
+ if (fallbackEnabled) {
74
+ const fallbackModel = getFallbackModel(model);
75
+ if (fallbackModel) {
76
+ logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
77
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
78
+ yield* sendMessageStream(fallbackRequest, accountManager, false);
79
+ return;
100
80
  }
101
- throw new Error('No accounts available');
102
81
  }
82
+ throw new Error('No accounts available');
83
+ }
84
+
85
+ // Pick sticky account (prefers current for cache continuity)
86
+ let account = accountManager.getCurrentStickyAccount(model);
87
+ if (!account) {
88
+ account = accountManager.pickNext(model);
89
+ }
90
+
91
+ if (!account) {
92
+ continue; // Shouldn't happen, but safety check
103
93
  }
104
94
 
105
95
  try {
@@ -112,6 +102,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
112
102
 
113
103
  // Try each endpoint for streaming
114
104
  let lastError = null;
105
+ let retriedOnce = false; // Track if we've already retried for short rate limit
106
+
115
107
  for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
116
108
  try {
117
109
  const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
@@ -134,14 +126,48 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
134
126
  }
135
127
 
136
128
  if (response.status === 429) {
137
- // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
138
- logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
139
129
  const resetMs = parseResetTime(response, errorText);
140
- // Keep minimum reset time across all 429 responses
141
- if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
142
- lastError = { is429: true, response, errorText, resetMs };
130
+
131
+ // Decision: wait and retry OR switch account
132
+ if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
133
+ // Long-term quota exhaustion (> 10s) - switch to next account
134
+ logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
135
+ accountManager.markRateLimited(account.email, resetMs, model);
136
+ throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
137
+ } else {
138
+ // Short-term rate limit (<= 10s) - wait and retry once
139
+ const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
140
+
141
+ if (!retriedOnce) {
142
+ retriedOnce = true;
143
+ logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
144
+ await sleep(waitMs);
145
+ // Retry same endpoint
146
+ const retryResponse = await fetch(url, {
147
+ method: 'POST',
148
+ headers: buildHeaders(token, model, 'text/event-stream'),
149
+ body: JSON.stringify(payload)
150
+ });
151
+
152
+ if (retryResponse.ok) {
153
+ // Stream the retry response
154
+ yield* streamSSEResponse(retryResponse, anthropicRequest.model);
155
+ logger.debug('[CloudCode] Stream completed after retry');
156
+ return;
157
+ }
158
+
159
+ // Retry also failed - parse new reset time
160
+ const retryErrorText = await retryResponse.text();
161
+ const retryResetMs = parseResetTime(retryResponse, retryErrorText);
162
+ logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
163
+ accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
164
+ throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
165
+ } else {
166
+ // Already retried once, mark and switch
167
+ accountManager.markRateLimited(account.email, waitMs, model);
168
+ throw new Error(`RATE_LIMITED: ${errorText}`);
169
+ }
143
170
  }
144
- continue;
145
171
  }
146
172
 
147
173
  lastError = new Error(`API error ${response.status}: ${errorText}`);
@@ -156,7 +182,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
156
182
  }
157
183
 
158
184
  // Stream the response with retry logic for empty responses
159
- // Uses a for-loop for clearer retry semantics
160
185
  let currentResponse = response;
161
186
 
162
187
  for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
@@ -207,28 +232,22 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
207
232
  throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
208
233
  }
209
234
 
210
- // For 5xx errors, don't pass to streamer - just continue to next retry
235
+ // For 5xx errors, continue retrying
211
236
  if (currentResponse.status >= 500) {
212
237
  logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
213
- // Don't continue here - let the loop increment and refetch
214
- // Set currentResponse to null to force refetch at loop start
215
- emptyRetries--; // Compensate for loop increment since we didn't actually try
216
238
  await sleep(1000);
217
- // Refetch immediately for 5xx
218
239
  currentResponse = await fetch(url, {
219
240
  method: 'POST',
220
241
  headers: buildHeaders(token, model, 'text/event-stream'),
221
242
  body: JSON.stringify(payload)
222
243
  });
223
244
  if (currentResponse.ok) {
224
- continue; // Try streaming with new response
245
+ continue;
225
246
  }
226
- // If still failing, let it fall through to throw
227
247
  }
228
248
 
229
249
  throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
230
250
  }
231
- // Response is OK, loop will continue to try streamSSEResponse
232
251
  }
233
252
  }
234
253
 
@@ -237,7 +256,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
237
256
  throw endpointError; // Re-throw to trigger account switch
238
257
  }
239
258
  if (isEmptyResponseError(endpointError)) {
240
- throw endpointError; // Re-throw empty response errors to outer handler
259
+ throw endpointError;
241
260
  }
242
261
  logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
243
262
  lastError = endpointError;
@@ -246,7 +265,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
246
265
 
247
266
  // If all endpoints failed for this account
248
267
  if (lastError) {
249
- // If all endpoints returned 429, mark account as rate-limited
250
268
  if (lastError.is429) {
251
269
  logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
252
270
  accountManager.markRateLimited(account.email, lastError.resetMs, model);
@@ -266,18 +284,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
266
284
  logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
267
285
  continue;
268
286
  }
269
- // Non-rate-limit error: throw immediately
270
- // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
287
+ // Handle 5xx errors
271
288
  if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
272
289
  logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
273
- accountManager.pickNext(model); // Force advance to next account
290
+ accountManager.pickNext(model);
274
291
  continue;
275
292
  }
276
293
 
277
294
  if (isNetworkError(error)) {
278
295
  logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
279
- await sleep(1000); // Brief pause before retry
280
- accountManager.pickNext(model); // Advance to next account
296
+ await sleep(1000);
297
+ accountManager.pickNext(model);
281
298
  continue;
282
299
  }
283
300
 
@@ -291,7 +308,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
291
308
  if (fallbackModel) {
292
309
  logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
293
310
  const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
294
- yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
311
+ yield* sendMessageStream(fallbackRequest, accountManager, false);
295
312
  return;
296
313
  }
297
314
  }
package/src/constants.js CHANGED
@@ -69,15 +69,16 @@ export const ONBOARD_USER_ENDPOINTS = ANTIGRAVITY_ENDPOINT_FALLBACKS;
69
69
 
70
70
  // Hybrid headers specifically for loadCodeAssist
71
71
  // Uses google-api-nodejs-client User-Agent (required for project discovery on some accounts)
72
- export const LOAD_CODE_ASSIST_HEADERS = {
73
- 'User-Agent': 'google-api-nodejs-client/9.15.1',
74
- 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
75
- 'Client-Metadata': JSON.stringify({
76
- ideType: 'IDE_UNSPECIFIED',
77
- platform: 'PLATFORM_UNSPECIFIED',
78
- pluginType: 'GEMINI'
79
- })
80
- };
72
+ // export const LOAD_CODE_ASSIST_HEADERS = {
73
+ // 'User-Agent': 'google-api-nodejs-client/9.15.1',
74
+ // 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
75
+ // 'Client-Metadata': JSON.stringify({
76
+ // ideType: 'IDE_UNSPECIFIED',
77
+ // platform: 'PLATFORM_UNSPECIFIED',
78
+ // pluginType: 'GEMINI'
79
+ // })
80
+ // };
81
+ export const LOAD_CODE_ASSIST_HEADERS = ANTIGRAVITY_HEADERS;
81
82
 
82
83
  // Default project ID if none can be discovered
83
84
  export const DEFAULT_PROJECT_ID = 'rising-fact-p41fc';
package/src/index.js CHANGED
@@ -71,7 +71,7 @@ app.listen(PORT, () => {
71
71
  ║ Antigravity Claude Proxy Server ║
72
72
  ╠══════════════════════════════════════════════════════════════╣
73
73
  ║ ║
74
- ${border} ${align(`Server running at: http://localhost:${PORT}`)}${border}
74
+ ${border} ${align(`Server and WebUI running at: http://localhost:${PORT}`)}${border}
75
75
  ${statusSection}║ ║
76
76
  ${controlSection}
77
77
  ║ ║
package/src/server.js CHANGED
@@ -214,7 +214,8 @@ app.get('/health', async (req, res) => {
214
214
 
215
215
  try {
216
216
  const token = await accountManager.getTokenForAccount(account);
217
- const quotas = await getModelQuotas(token);
217
+ const projectId = account.subscription?.projectId || null;
218
+ const quotas = await getModelQuotas(token, projectId);
218
219
 
219
220
  // Format quotas for readability
220
221
  const formattedQuotas = {};
@@ -309,11 +310,11 @@ app.get('/account-limits', async (req, res) => {
309
310
  try {
310
311
  const token = await accountManager.getTokenForAccount(account);
311
312
 
312
- // Fetch both quotas and subscription tier in parallel
313
- const [quotas, subscription] = await Promise.all([
314
- getModelQuotas(token),
315
- getSubscriptionTier(token)
316
- ]);
313
+ // Fetch subscription tier first to get project ID
314
+ const subscription = await getSubscriptionTier(token);
315
+
316
+ // Then fetch quotas with project ID for accurate quota info
317
+ const quotas = await getModelQuotas(token, subscription.projectId);
317
318
 
318
319
  // Update account object with fresh data
319
320
  account.subscription = {