npm - antigravity-claude-proxy - Versions diffs - 2.0.8 → 2.0.9 - Mend

antigravity-claude-proxy 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/account-manager/index.js +12 -1
package/src/account-manager/rate-limits.js +42 -16
package/src/cloudcode/message-handler.js +84 -57
package/src/cloudcode/model-api.js +11 -5
package/src/cloudcode/rate-limit-parser.js +2 -2
package/src/cloudcode/streaming-handler.js +86 -69
package/src/constants.js +10 -9
package/src/index.js +1 -1
package/src/server.js +7 -6

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "antigravity-claude-proxy",
-  "version": "2.0.8",
+  "version": "2.0.9",
   "description": "Proxy server to use Antigravity's Claude models with Claude Code CLI",
   "main": "src/index.js",
   "type": "module",

package/src/account-manager/index.js CHANGED Viewed

@@ -14,7 +14,8 @@ import {
     resetAllRateLimits as resetLimits,
     markRateLimited as markLimited,
     markInvalid as markAccountInvalid,
-    getMinWaitTimeMs as getMinWait
+    getMinWaitTimeMs as getMinWait,
+    getRateLimitInfo as getLimitInfo
 } from './rate-limits.js';
 import {
     getTokenForAccount as fetchToken,
@@ -214,6 +215,16 @@ export class AccountManager {
         return getMinWait(this.#accounts, modelId);
     }
+    /**
+     * Get rate limit info for a specific account and model
+     * @param {string} email - Email of the account
+     * @param {string} modelId - Model ID to check
+     * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
+     */
+    getRateLimitInfo(email, modelId) {
+        return getLimitInfo(this.#accounts, email, modelId);
+    }
     /**
      * Get OAuth token for an account
      * @param {Object} account - Account object with email and credentials

package/src/account-manager/rate-limits.js CHANGED Viewed

@@ -22,6 +22,7 @@ export function isAllRateLimited(accounts, modelId) {
     return accounts.every(acc => {
         if (acc.isInvalid) return true; // Invalid accounts count as unavailable
+        if (acc.enabled === false) return true; // Disabled accounts count as unavailable
         const modelLimits = acc.modelRateLimits || {};
         const limit = modelLimits[modelId];
         return limit && limit.isRateLimited && limit.resetTime > Date.now();
@@ -118,18 +119,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
     const account = accounts.find(a => a.email === email);
     if (!account) return false;
-    // Use configured cooldown as the maximum wait time
-    // If API returns a reset time, cap it at DEFAULT_COOLDOWN_MS
-    // If API doesn't return a reset time, use DEFAULT_COOLDOWN_MS
-    let cooldownMs;
-    if (resetMs && resetMs > 0) {
-        // API provided a reset time - cap it at configured maximum
-        cooldownMs = Math.min(resetMs, DEFAULT_COOLDOWN_MS);
-    } else {
-        // No reset time from API - use configured default
-        cooldownMs = DEFAULT_COOLDOWN_MS;
-    }
-    const resetTime = Date.now() + cooldownMs;
+    // Store the ACTUAL reset time from the API
+    // This is used to decide whether to wait (short) or switch accounts (long)
+    const actualResetMs = (resetMs && resetMs > 0) ? resetMs : DEFAULT_COOLDOWN_MS;
     if (!account.modelRateLimits) {
         account.modelRateLimits = {};
@@ -137,12 +129,20 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
     account.modelRateLimits[modelId] = {
         isRateLimited: true,
-        resetTime: resetTime
+        resetTime: Date.now() + actualResetMs,  // Actual reset time for decisions
+        actualResetMs: actualResetMs             // Original duration from API
     };
-    logger.warn(
-        `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(cooldownMs)}`
-    );
+    // Log appropriately based on duration
+    if (actualResetMs > DEFAULT_COOLDOWN_MS) {
+        logger.warn(
+            `[AccountManager] Quota exhausted: ${email} (model: ${modelId}). Resets in ${formatDuration(actualResetMs)}`
+        );
+    } else {
+        logger.warn(
+            `[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(actualResetMs)}`
+        );
+    }
     return true;
 }
@@ -209,3 +209,29 @@ export function getMinWaitTimeMs(accounts, modelId) {
     return minWait === Infinity ? DEFAULT_COOLDOWN_MS : minWait;
 }
+/**
+ * Get the rate limit info for a specific account and model
+ * Returns the actual reset time from API, not capped
+ *
+ * @param {Array} accounts - Array of account objects
+ * @param {string} email - Email of the account
+ * @param {string} modelId - Model ID to check
+ * @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
+ */
+export function getRateLimitInfo(accounts, email, modelId) {
+    const account = accounts.find(a => a.email === email);
+    if (!account || !account.modelRateLimits || !account.modelRateLimits[modelId]) {
+        return { isRateLimited: false, actualResetMs: null, waitMs: 0 };
+    }
+    const limit = account.modelRateLimits[modelId];
+    const now = Date.now();
+    const waitMs = limit.resetTime ? Math.max(0, limit.resetTime - now) : 0;
+    return {
+        isRateLimited: limit.isRateLimited && waitMs > 0,
+        actualResetMs: limit.actualResetMs || null,
+        waitMs
+    };
+}

package/src/cloudcode/message-handler.js CHANGED Viewed

@@ -9,6 +9,7 @@ import {
     ANTIGRAVITY_ENDPOINT_FALLBACKS,
     MAX_RETRIES,
     MAX_WAIT_BEFORE_ERROR_MS,
+    DEFAULT_COOLDOWN_MS,
     isThinkingModel
 } from '../constants.js';
 import { convertGoogleToAnthropic } from '../format/index.js';
@@ -39,67 +40,56 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
     // Retry loop with account failover
     // Ensure we try at least as many times as there are accounts to cycle through everyone
-    // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
     const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
     for (let attempt = 0; attempt < maxAttempts; attempt++) {
-        // Use sticky account selection for cache continuity
-        const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model);
-        let account = stickyAccount;
-        // Handle waiting for sticky account
-        if (!account && waitMs > 0) {
-            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
-            await sleep(waitMs);
-            accountManager.clearExpiredLimits();
-            account = accountManager.getCurrentStickyAccount(model);
-        }
+        // Clear any expired rate limits before picking
+        accountManager.clearExpiredLimits();
-        // Handle all accounts rate-limited
-        if (!account) {
+        // Get available accounts for this model
+        const availableAccounts = accountManager.getAvailableAccounts(model);
+        // If no accounts available, check if we should wait or throw error
+        if (availableAccounts.length === 0) {
             if (accountManager.isAllRateLimited(model)) {
-                const allWaitMs = accountManager.getMinWaitTimeMs(model);
-                const resetTime = new Date(Date.now() + allWaitMs).toISOString();
+                const minWaitMs = accountManager.getMinWaitTimeMs(model);
+                const resetTime = new Date(Date.now() + minWaitMs).toISOString();
                 // If wait time is too long (> 2 minutes), throw error immediately
-                if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
                     throw new Error(
-                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
+                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
                     );
                 }
-                // Wait for reset (applies to both single and multi-account modes)
+                // Wait for shortest reset time
                 const accountCount = accountManager.getAccountCount();
-                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
-                await sleep(allWaitMs);
-                // Add small buffer after waiting to ensure rate limits have truly expired
-                await sleep(500);
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
+                await sleep(minWaitMs + 500); // Add 500ms buffer
                 accountManager.clearExpiredLimits();
-                account = accountManager.pickNext(model);
-                // If still no account after waiting, try optimistic reset
-                // This handles cases where the API rate limit is transient
-                if (!account) {
-                    logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
-                    accountManager.resetAllRateLimits();
-                    account = accountManager.pickNext(model);
-                }
+                continue; // Retry the loop
             }
-            if (!account) {
-                // Check if fallback is enabled and available
-                if (fallbackEnabled) {
-                    const fallbackModel = getFallbackModel(model);
-                    if (fallbackModel) {
-                        logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
-                        // Retry with fallback model
-                        const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
-                        return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
-                    }
+            // Check if fallback is enabled and available
+            if (fallbackEnabled) {
+                const fallbackModel = getFallbackModel(model);
+                if (fallbackModel) {
+                    logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
+                    const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                    return await sendMessage(fallbackRequest, accountManager, false);
                 }
-                throw new Error('No accounts available');
             }
+            throw new Error('No accounts available');
+        }
+        // Pick sticky account (prefers current for cache continuity)
+        let account = accountManager.getCurrentStickyAccount(model);
+        if (!account) {
+            account = accountManager.pickNext(model);
+        }
+        if (!account) {
+            continue; // Shouldn't happen, but safety check
         }
         try {
@@ -112,6 +102,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
             // Try each endpoint
             let lastError = null;
+            let retriedOnce = false; // Track if we've already retried for short rate limit
             for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
                 try {
                     const url = isThinking
@@ -137,14 +129,51 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                         }
                         if (response.status === 429) {
-                            // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
-                            logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
                             const resetMs = parseResetTime(response, errorText);
-                            // Keep minimum reset time across all 429 responses
-                            if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
-                                lastError = { is429: true, response, errorText, resetMs };
+                            // Decision: wait and retry OR switch account
+                            if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
+                                // Long-term quota exhaustion (> 10s) - switch to next account
+                                logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
+                                accountManager.markRateLimited(account.email, resetMs, model);
+                                throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
+                            } else {
+                                // Short-term rate limit (<= 10s) - wait and retry once
+                                const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
+                                if (!retriedOnce) {
+                                    retriedOnce = true;
+                                    logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
+                                    await sleep(waitMs);
+                                    // Retry same endpoint
+                                    const retryResponse = await fetch(url, {
+                                        method: 'POST',
+                                        headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
+                                        body: JSON.stringify(payload)
+                                    });
+                                    if (retryResponse.ok) {
+                                        // Process retry response
+                                        if (isThinking) {
+                                            return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model);
+                                        }
+                                        const data = await retryResponse.json();
+                                        logger.debug('[CloudCode] Response received after retry');
+                                        return convertGoogleToAnthropic(data, anthropicRequest.model);
+                                    }
+                                    // Retry also failed - parse new reset time
+                                    const retryErrorText = await retryResponse.text();
+                                    const retryResetMs = parseResetTime(retryResponse, retryErrorText);
+                                    logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
+                                    accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
+                                    throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
+                                } else {
+                                    // Already retried once, mark and switch
+                                    accountManager.markRateLimited(account.email, waitMs, model);
+                                    throw new Error(`RATE_LIMITED: ${errorText}`);
+                                }
                             }
-                            continue;
                         }
                         if (response.status >= 400) {
@@ -179,7 +208,6 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
             // If all endpoints failed for this account
             if (lastError) {
-                // If all endpoints returned 429, mark account as rate-limited
                 if (lastError.is429) {
                     logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
                     accountManager.markRateLimited(account.email, lastError.resetMs, model);
@@ -199,18 +227,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
                 logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
                 continue;
             }
-            // Non-rate-limit error: throw immediately
-            // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
+            // Handle 5xx errors
             if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
                 logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
-                accountManager.pickNext(model); // Force advance to next account
+                accountManager.pickNext(model);
                 continue;
             }
             if (isNetworkError(error)) {
                 logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
-                await sleep(1000); // Brief pause before retry
-                accountManager.pickNext(model); // Advance to next account
+                await sleep(1000);
+                accountManager.pickNext(model);
                 continue;
             }
@@ -224,7 +251,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
         if (fallbackModel) {
             logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
             const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
-            return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
+            return await sendMessage(fallbackRequest, accountManager, false);
         }
     }

package/src/cloudcode/model-api.js CHANGED Viewed

@@ -57,22 +57,26 @@ export async function listModels(token) {
  * Returns model quotas including remaining fraction and reset time
  *
  * @param {string} token - OAuth access token
+ * @param {string} [projectId] - Optional project ID for accurate quota info
  * @returns {Promise<Object>} Raw response from fetchAvailableModels API
  */
-export async function fetchAvailableModels(token) {
+export async function fetchAvailableModels(token, projectId = null) {
     const headers = {
         'Authorization': `Bearer ${token}`,
         'Content-Type': 'application/json',
         ...ANTIGRAVITY_HEADERS
     };
+    // Include project ID in body for accurate quota info (per Quotio implementation)
+    const body = projectId ? { project: projectId } : {};
     for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
         try {
             const url = `${endpoint}/v1internal:fetchAvailableModels`;
             const response = await fetch(url, {
                 method: 'POST',
                 headers,
-                body: JSON.stringify({})
+                body: JSON.stringify(body)
             });
             if (!response.ok) {
@@ -95,10 +99,11 @@ export async function fetchAvailableModels(token) {
  * Extracts quota info (remaining fraction and reset time) for each model
  *
  * @param {string} token - OAuth access token
+ * @param {string} [projectId] - Optional project ID for accurate quota info
  * @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
  */
-export async function getModelQuotas(token) {
-    const data = await fetchAvailableModels(token);
+export async function getModelQuotas(token, projectId = null) {
+    const data = await fetchAvailableModels(token, projectId);
     if (!data || !data.models) return {};
     const quotas = {};
@@ -108,7 +113,8 @@ export async function getModelQuotas(token) {
         if (modelData.quotaInfo) {
             quotas[modelId] = {
-                remainingFraction: modelData.quotaInfo.remainingFraction ?? null,
+                // When remainingFraction is missing but resetTime is present, quota is exhausted (0%)
+                remainingFraction: modelData.quotaInfo.remainingFraction ?? (modelData.quotaInfo.resetTime ? 0 : null),
                 resetTime: modelData.quotaInfo.resetTime ?? null
             };
         }

package/src/cloudcode/rate-limit-parser.js CHANGED Viewed

@@ -78,7 +78,7 @@ export function parseResetTime(responseOrError, errorText = '') {
         // Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s")
         // This is Google's preferred format for rate limit reset delay
-        const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\\d+(?:\\.\\d+)?)(ms|s)/i);
+        const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\d+(?:\.\d+)?)(ms|s)/i);
         if (quotaDelayMatch) {
             const value = parseFloat(quotaDelayMatch[1]);
             const unit = quotaDelayMatch[2].toLowerCase();
@@ -103,7 +103,7 @@ export function parseResetTime(responseOrError, errorText = '') {
         // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
         // Added stricter regex to avoid partial matches
         if (!resetMs) {
-             const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\\d\\.]+)(?:s\b|s")/i);
+             const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d.]+)(?:s\b|s")/i);
              if (secMatch) {
                  resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
                  logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);

package/src/cloudcode/streaming-handler.js CHANGED Viewed

@@ -9,7 +9,8 @@ import {
     ANTIGRAVITY_ENDPOINT_FALLBACKS,
     MAX_RETRIES,
     MAX_EMPTY_RESPONSE_RETRIES,
-    MAX_WAIT_BEFORE_ERROR_MS
+    MAX_WAIT_BEFORE_ERROR_MS,
+    DEFAULT_COOLDOWN_MS
 } from '../constants.js';
 import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
 import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
@@ -38,68 +39,57 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
     // Retry loop with account failover
     // Ensure we try at least as many times as there are accounts to cycle through everyone
-    // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
     const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
     for (let attempt = 0; attempt < maxAttempts; attempt++) {
-        // Use sticky account selection for cache continuity
-        const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount(model);
-        let account = stickyAccount;
-        // Handle waiting for sticky account
-        if (!account && waitMs > 0) {
-            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
-            await sleep(waitMs);
-            accountManager.clearExpiredLimits();
-            account = accountManager.getCurrentStickyAccount(model);
-        }
+        // Clear any expired rate limits before picking
+        accountManager.clearExpiredLimits();
-        // Handle all accounts rate-limited
-        if (!account) {
+        // Get available accounts for this model
+        const availableAccounts = accountManager.getAvailableAccounts(model);
+        // If no accounts available, check if we should wait or throw error
+        if (availableAccounts.length === 0) {
             if (accountManager.isAllRateLimited(model)) {
-                const allWaitMs = accountManager.getMinWaitTimeMs(model);
-                const resetTime = new Date(Date.now() + allWaitMs).toISOString();
+                const minWaitMs = accountManager.getMinWaitTimeMs(model);
+                const resetTime = new Date(Date.now() + minWaitMs).toISOString();
                 // If wait time is too long (> 2 minutes), throw error immediately
-                if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
                     throw new Error(
-                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
+                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
                     );
                 }
-                // Wait for reset (applies to both single and multi-account modes)
+                // Wait for shortest reset time
                 const accountCount = accountManager.getAccountCount();
-                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
-                await sleep(allWaitMs);
-                // Add small buffer after waiting to ensure rate limits have truly expired
-                await sleep(500);
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
+                await sleep(minWaitMs + 500); // Add 500ms buffer
                 accountManager.clearExpiredLimits();
-                account = accountManager.pickNext(model);
-                // If still no account after waiting, try optimistic reset
-                // This handles cases where the API rate limit is transient
-                if (!account) {
-                    logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
-                    accountManager.resetAllRateLimits();
-                    account = accountManager.pickNext(model);
-                }
+                continue; // Retry the loop
             }
-            if (!account) {
-                // Check if fallback is enabled and available
-                if (fallbackEnabled) {
-                    const fallbackModel = getFallbackModel(model);
-                    if (fallbackModel) {
-                        logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
-                        // Retry with fallback model
-                        const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
-                        yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
-                        return;
-                    }
+            // Check if fallback is enabled and available
+            if (fallbackEnabled) {
+                const fallbackModel = getFallbackModel(model);
+                if (fallbackModel) {
+                    logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
+                    const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                    yield* sendMessageStream(fallbackRequest, accountManager, false);
+                    return;
                 }
-                throw new Error('No accounts available');
             }
+            throw new Error('No accounts available');
+        }
+        // Pick sticky account (prefers current for cache continuity)
+        let account = accountManager.getCurrentStickyAccount(model);
+        if (!account) {
+            account = accountManager.pickNext(model);
+        }
+        if (!account) {
+            continue; // Shouldn't happen, but safety check
         }
         try {
@@ -112,6 +102,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
             // Try each endpoint for streaming
             let lastError = null;
+            let retriedOnce = false; // Track if we've already retried for short rate limit
             for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
                 try {
                     const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
@@ -134,14 +126,48 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                         }
                         if (response.status === 429) {
-                            // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
-                            logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
                             const resetMs = parseResetTime(response, errorText);
-                            // Keep minimum reset time across all 429 responses
-                            if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
-                                lastError = { is429: true, response, errorText, resetMs };
+                            // Decision: wait and retry OR switch account
+                            if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
+                                // Long-term quota exhaustion (> 10s) - switch to next account
+                                logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
+                                accountManager.markRateLimited(account.email, resetMs, model);
+                                throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
+                            } else {
+                                // Short-term rate limit (<= 10s) - wait and retry once
+                                const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
+                                if (!retriedOnce) {
+                                    retriedOnce = true;
+                                    logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
+                                    await sleep(waitMs);
+                                    // Retry same endpoint
+                                    const retryResponse = await fetch(url, {
+                                        method: 'POST',
+                                        headers: buildHeaders(token, model, 'text/event-stream'),
+                                        body: JSON.stringify(payload)
+                                    });
+                                    if (retryResponse.ok) {
+                                        // Stream the retry response
+                                        yield* streamSSEResponse(retryResponse, anthropicRequest.model);
+                                        logger.debug('[CloudCode] Stream completed after retry');
+                                        return;
+                                    }
+                                    // Retry also failed - parse new reset time
+                                    const retryErrorText = await retryResponse.text();
+                                    const retryResetMs = parseResetTime(retryResponse, retryErrorText);
+                                    logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
+                                    accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
+                                    throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
+                                } else {
+                                    // Already retried once, mark and switch
+                                    accountManager.markRateLimited(account.email, waitMs, model);
+                                    throw new Error(`RATE_LIMITED: ${errorText}`);
+                                }
                             }
-                            continue;
                         }
                         lastError = new Error(`API error ${response.status}: ${errorText}`);
@@ -156,7 +182,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                     }
                     // Stream the response with retry logic for empty responses
-                    // Uses a for-loop for clearer retry semantics
                     let currentResponse = response;
                     for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
@@ -207,28 +232,22 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                                     throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
                                 }
-                                // For 5xx errors, don't pass to streamer - just continue to next retry
+                                // For 5xx errors, continue retrying
                                 if (currentResponse.status >= 500) {
                                     logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
-                                    // Don't continue here - let the loop increment and refetch
-                                    // Set currentResponse to null to force refetch at loop start
-                                    emptyRetries--; // Compensate for loop increment since we didn't actually try
                                     await sleep(1000);
-                                    // Refetch immediately for 5xx
                                     currentResponse = await fetch(url, {
                                         method: 'POST',
                                         headers: buildHeaders(token, model, 'text/event-stream'),
                                         body: JSON.stringify(payload)
                                     });
                                     if (currentResponse.ok) {
-                                        continue; // Try streaming with new response
+                                        continue;
                                     }
-                                    // If still failing, let it fall through to throw
                                 }
                                 throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
                             }
-                            // Response is OK, loop will continue to try streamSSEResponse
                         }
                     }
@@ -237,7 +256,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                         throw endpointError; // Re-throw to trigger account switch
                     }
                     if (isEmptyResponseError(endpointError)) {
-                        throw endpointError; // Re-throw empty response errors to outer handler
+                        throw endpointError;
                     }
                     logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
                     lastError = endpointError;
@@ -246,7 +265,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
             // If all endpoints failed for this account
             if (lastError) {
-                // If all endpoints returned 429, mark account as rate-limited
                 if (lastError.is429) {
                     logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
                     accountManager.markRateLimited(account.email, lastError.resetMs, model);
@@ -266,18 +284,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
                 logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
                 continue;
             }
-            // Non-rate-limit error: throw immediately
-            // UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
+            // Handle 5xx errors
             if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
                 logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
-                accountManager.pickNext(model); // Force advance to next account
+                accountManager.pickNext(model);
                 continue;
             }
             if (isNetworkError(error)) {
                 logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
-                await sleep(1000); // Brief pause before retry
-                accountManager.pickNext(model); // Advance to next account
+                await sleep(1000);
+                accountManager.pickNext(model);
                 continue;
             }
@@ -291,7 +308,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
         if (fallbackModel) {
             logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
             const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
-            yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
+            yield* sendMessageStream(fallbackRequest, accountManager, false);
             return;
         }
     }

package/src/constants.js CHANGED Viewed

@@ -69,15 +69,16 @@ export const ONBOARD_USER_ENDPOINTS = ANTIGRAVITY_ENDPOINT_FALLBACKS;
 // Hybrid headers specifically for loadCodeAssist
 // Uses google-api-nodejs-client User-Agent (required for project discovery on some accounts)
-export const LOAD_CODE_ASSIST_HEADERS = {
-    'User-Agent': 'google-api-nodejs-client/9.15.1',
-    'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
-    'Client-Metadata': JSON.stringify({
-        ideType: 'IDE_UNSPECIFIED',
-        platform: 'PLATFORM_UNSPECIFIED',
-        pluginType: 'GEMINI'
-    })
-};
+// export const LOAD_CODE_ASSIST_HEADERS = {
+//     'User-Agent': 'google-api-nodejs-client/9.15.1',
+//     'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
+//     'Client-Metadata': JSON.stringify({
+//         ideType: 'IDE_UNSPECIFIED',
+//         platform: 'PLATFORM_UNSPECIFIED',
+//         pluginType: 'GEMINI'
+//     })
+// };
+export const LOAD_CODE_ASSIST_HEADERS = ANTIGRAVITY_HEADERS;
 // Default project ID if none can be discovered
 export const DEFAULT_PROJECT_ID = 'rising-fact-p41fc';

package/src/index.js CHANGED Viewed

@@ -71,7 +71,7 @@ app.listen(PORT, () => {
 ║           Antigravity Claude Proxy Server                    ║
 ╠══════════════════════════════════════════════════════════════╣
 ║                                                              ║
-${border}  ${align(`Server running at: http://localhost:${PORT}`)}${border}
+${border}  ${align(`Server and WebUI running at: http://localhost:${PORT}`)}${border}
 ${statusSection}║                                                              ║
 ${controlSection}
 ║                                                              ║

package/src/server.js CHANGED Viewed

@@ -214,7 +214,8 @@ app.get('/health', async (req, res) => {
                 try {
                     const token = await accountManager.getTokenForAccount(account);
-                    const quotas = await getModelQuotas(token);
+                    const projectId = account.subscription?.projectId || null;
+                    const quotas = await getModelQuotas(token, projectId);
                     // Format quotas for readability
                     const formattedQuotas = {};
@@ -309,11 +310,11 @@ app.get('/account-limits', async (req, res) => {
                 try {
                     const token = await accountManager.getTokenForAccount(account);
-                    // Fetch both quotas and subscription tier in parallel
-                    const [quotas, subscription] = await Promise.all([
-                        getModelQuotas(token),
-                        getSubscriptionTier(token)
-                    ]);
+                    // Fetch subscription tier first to get project ID
+                    const subscription = await getSubscriptionTier(token);
+                    // Then fetch quotas with project ID for accurate quota info
+                    const quotas = await getModelQuotas(token, subscription.projectId);
                     // Update account object with fresh data
                     account.subscription = {