npm - commons-proxy - Versions diffs - 2.0.0 - Mend

commons-proxy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/LICENSE +21 -0
package/README.md +757 -0
package/bin/cli.js +146 -0
package/package.json +97 -0
package/public/Complaint Details.pdf +0 -0
package/public/Cyber Crime Portal.pdf +0 -0
package/public/app.js +229 -0
package/public/css/src/input.css +523 -0
package/public/css/style.css +1 -0
package/public/favicon.png +0 -0
package/public/index.html +549 -0
package/public/js/components/account-manager.js +356 -0
package/public/js/components/add-account-modal.js +414 -0
package/public/js/components/claude-config.js +420 -0
package/public/js/components/dashboard/charts.js +605 -0
package/public/js/components/dashboard/filters.js +362 -0
package/public/js/components/dashboard/stats.js +110 -0
package/public/js/components/dashboard.js +236 -0
package/public/js/components/logs-viewer.js +100 -0
package/public/js/components/models.js +36 -0
package/public/js/components/server-config.js +349 -0
package/public/js/config/constants.js +102 -0
package/public/js/data-store.js +375 -0
package/public/js/settings-store.js +58 -0
package/public/js/store.js +99 -0
package/public/js/translations/en.js +367 -0
package/public/js/translations/id.js +412 -0
package/public/js/translations/pt.js +308 -0
package/public/js/translations/tr.js +358 -0
package/public/js/translations/zh.js +373 -0
package/public/js/utils/account-actions.js +189 -0
package/public/js/utils/error-handler.js +96 -0
package/public/js/utils/model-config.js +42 -0
package/public/js/utils/ui-logger.js +143 -0
package/public/js/utils/validators.js +77 -0
package/public/js/utils.js +69 -0
package/public/proxy-server-64.png +0 -0
package/public/views/accounts.html +361 -0
package/public/views/dashboard.html +484 -0
package/public/views/logs.html +97 -0
package/public/views/models.html +331 -0
package/public/views/settings.html +1327 -0
package/src/account-manager/credentials.js +378 -0
package/src/account-manager/index.js +462 -0
package/src/account-manager/onboarding.js +112 -0
package/src/account-manager/rate-limits.js +369 -0
package/src/account-manager/storage.js +160 -0
package/src/account-manager/strategies/base-strategy.js +109 -0
package/src/account-manager/strategies/hybrid-strategy.js +339 -0
package/src/account-manager/strategies/index.js +79 -0
package/src/account-manager/strategies/round-robin-strategy.js +76 -0
package/src/account-manager/strategies/sticky-strategy.js +138 -0
package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
package/src/account-manager/strategies/trackers/index.js +9 -0
package/src/account-manager/strategies/trackers/quota-tracker.js +120 -0
package/src/account-manager/strategies/trackers/token-bucket-tracker.js +155 -0
package/src/auth/database.js +169 -0
package/src/auth/oauth.js +548 -0
package/src/auth/token-extractor.js +117 -0
package/src/cli/accounts.js +648 -0
package/src/cloudcode/index.js +29 -0
package/src/cloudcode/message-handler.js +510 -0
package/src/cloudcode/model-api.js +248 -0
package/src/cloudcode/rate-limit-parser.js +235 -0
package/src/cloudcode/request-builder.js +93 -0
package/src/cloudcode/session-manager.js +47 -0
package/src/cloudcode/sse-parser.js +121 -0
package/src/cloudcode/sse-streamer.js +293 -0
package/src/cloudcode/streaming-handler.js +615 -0
package/src/config.js +125 -0
package/src/constants.js +407 -0
package/src/errors.js +242 -0
package/src/fallback-config.js +29 -0
package/src/format/content-converter.js +193 -0
package/src/format/index.js +20 -0
package/src/format/request-converter.js +255 -0
package/src/format/response-converter.js +120 -0
package/src/format/schema-sanitizer.js +673 -0
package/src/format/signature-cache.js +88 -0
package/src/format/thinking-utils.js +648 -0
package/src/index.js +148 -0
package/src/modules/usage-stats.js +205 -0
package/src/providers/anthropic-provider.js +258 -0
package/src/providers/base-provider.js +157 -0
package/src/providers/cloudcode.js +94 -0
package/src/providers/copilot.js +399 -0
package/src/providers/github-provider.js +287 -0
package/src/providers/google-provider.js +192 -0
package/src/providers/index.js +211 -0
package/src/providers/openai-compatible.js +265 -0
package/src/providers/openai-provider.js +271 -0
package/src/providers/openrouter-provider.js +325 -0
package/src/providers/setup.js +83 -0
package/src/server.js +870 -0
package/src/utils/claude-config.js +245 -0
package/src/utils/helpers.js +51 -0
package/src/utils/logger.js +142 -0
package/src/utils/native-module-helper.js +162 -0
package/src/webui/index.js +1134 -0

package/src/cloudcode/message-handler.js ADDED Viewed

@@ -0,0 +1,510 @@
+/**
+ * Message Handler for Cloud Code
+ *
+ * Handles non-streaming message requests with multi-account support,
+ * retry logic, and endpoint failover.
+ */
+import {
+    CLOUDCODE_ENDPOINT_FALLBACKS,
+    MAX_RETRIES,
+    MAX_WAIT_BEFORE_ERROR_MS,
+    DEFAULT_COOLDOWN_MS,
+    RATE_LIMIT_DEDUP_WINDOW_MS,
+    RATE_LIMIT_STATE_RESET_MS,
+    FIRST_RETRY_DELAY_MS,
+    SWITCH_ACCOUNT_DELAY_MS,
+    MAX_CONSECUTIVE_FAILURES,
+    EXTENDED_COOLDOWN_MS,
+    CAPACITY_BACKOFF_TIERS_MS,
+    MAX_CAPACITY_RETRIES,
+    BACKOFF_BY_ERROR_TYPE,
+    QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
+    MIN_BACKOFF_MS,
+    isThinkingModel
+} from '../constants.js';
+import { convertGoogleToAnthropic } from '../format/index.js';
+import { isRateLimitError, isAuthError } from '../errors.js';
+import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
+import { logger } from '../utils/logger.js';
+import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
+import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
+import { parseThinkingSSEResponse } from './sse-parser.js';
+import { getFallbackModel } from '../fallback-config.js';
+/**
+ * Rate limit deduplication - prevents thundering herd on concurrent rate limits.
+ * Tracks rate limit state per account+model including consecutive429 count and timestamps.
+ */
+const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
+/**
+ * Get deduplication key for rate limit tracking
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ * @returns {string} Dedup key
+ */
+function getDedupKey(email, model) {
+    return `${email}:${model}`;
+}
+/**
+ * Get rate limit backoff with deduplication and exponential backoff (matches opencode-cloudcode-auth)
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ * @param {number|null} serverRetryAfterMs - Server-provided retry time
+ * @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
+ */
+function getRateLimitBackoff(email, model, serverRetryAfterMs) {
+    const now = Date.now();
+    const stateKey = getDedupKey(email, model);
+    const previous = rateLimitStateByAccountModel.get(stateKey);
+    // Check if within dedup window - return duplicate status
+    if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
+        const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
+        const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
+        logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
+        return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
+    }
+    // Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
+    const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
+        ? previous.consecutive429 + 1
+        : 1;
+    // Update state
+    rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
+    // Calculate exponential backoff
+    const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
+    const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
+    logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
+    return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
+}
+/**
+ * Clear rate limit state after successful request
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ */
+function clearRateLimitState(email, model) {
+    const key = getDedupKey(email, model);
+    rateLimitStateByAccountModel.delete(key);
+}
+/**
+ * Detect permanent authentication failures that require re-authentication.
+ * These should mark the account as invalid rather than just clearing cache.
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if permanent auth failure
+ */
+function isPermanentAuthFailure(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('invalid_grant') ||
+        lower.includes('token revoked') ||
+        lower.includes('token has been expired or revoked') ||
+        lower.includes('token_revoked') ||
+        lower.includes('invalid_client') ||
+        lower.includes('credentials are invalid');
+}
+/**
+ * Detect if 429 error is due to model capacity (not user quota).
+ * Capacity issues should retry on same account with shorter delay.
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if capacity exhausted (not quota)
+ */
+function isModelCapacityExhausted(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('model_capacity_exhausted') ||
+        lower.includes('capacity_exhausted') ||
+        lower.includes('model is currently overloaded') ||
+        lower.includes('service temporarily unavailable');
+}
+// Periodically clean up stale rate limit state (every 60 seconds)
+setInterval(() => {
+    const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
+    for (const [key, state] of rateLimitStateByAccountModel.entries()) {
+        if (state.lastAt < cutoff) {
+            rateLimitStateByAccountModel.delete(key);
+        }
+    }
+}, 60000);
+/**
+ * Calculate smart backoff based on error type (matches opencode-cloudcode-auth)
+ * @param {string} errorText - Error message
+ * @param {number|null} serverResetMs - Reset time from server
+ * @param {number} consecutiveFailures - Number of consecutive failures
+ * @returns {number} Backoff time in milliseconds
+ */
+function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
+    // If server provides a reset time, use it (with minimum floor to prevent loops)
+    if (serverResetMs && serverResetMs > 0) {
+        return Math.max(serverResetMs, MIN_BACKOFF_MS);
+    }
+    const reason = parseRateLimitReason(errorText);
+    switch (reason) {
+        case 'QUOTA_EXHAUSTED':
+            // Progressive backoff: [60s, 5m, 30m, 2h]
+            const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
+            return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
+        case 'RATE_LIMIT_EXCEEDED':
+            return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
+        case 'MODEL_CAPACITY_EXHAUSTED':
+            return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
+        case 'SERVER_ERROR':
+            return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
+        default:
+            return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
+    }
+}
+/**
+ * Send a non-streaming request to Cloud Code with multi-account support
+ * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @param {Object} anthropicRequest.model - Model name to use
+ * @param {Array} anthropicRequest.messages - Array of message objects
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
+ * @returns {Promise<Object>} Anthropic-format response object
+ * @throws {Error} If max retries exceeded or no accounts available
+ */
+export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) {
+    const model = anthropicRequest.model;
+    const isThinking = isThinkingModel(model);
+    // Retry loop with account failover
+    // Ensure we try at least as many times as there are accounts to cycle through everyone
+    const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+        // Clear any expired rate limits before picking
+        accountManager.clearExpiredLimits();
+        // Get available accounts for this model
+        const availableAccounts = accountManager.getAvailableAccounts(model);
+        // If no accounts available, check if we should wait or throw error
+        if (availableAccounts.length === 0) {
+            if (accountManager.isAllRateLimited(model)) {
+                const minWaitMs = accountManager.getMinWaitTimeMs(model);
+                const resetTime = new Date(Date.now() + minWaitMs).toISOString();
+                // If wait time is too long (> 2 minutes), try fallback first, then throw error
+                if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                    // Check if fallback is enabled and available
+                    if (fallbackEnabled) {
+                        const fallbackModel = getFallbackModel(model);
+                        if (fallbackModel) {
+                            logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel}`);
+                            const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                            return await sendMessage(fallbackRequest, accountManager, false);
+                        }
+                    }
+                    throw new Error(
+                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
+                    );
+                }
+                // Wait for shortest reset time
+                const accountCount = accountManager.getAccountCount();
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
+                await sleep(minWaitMs + 500); // Add 500ms buffer
+                accountManager.clearExpiredLimits();
+                // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
+                // This prevents "Max retries exceeded" when we are just patiently waiting
+                attempt--;
+                continue; // Retry the loop
+            }
+            // No accounts available and not rate-limited (shouldn't happen normally)
+            throw new Error('No accounts available');
+        }
+        // Select account using configured strategy
+        const { account, waitMs } = accountManager.selectAccount(model);
+        // If strategy returns a wait time without an account, sleep and retry
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
+            await sleep(waitMs + 500);
+            attempt--; // CRITICAL FIX: Don't count strategy wait as failure
+            continue;
+        }
+        // If strategy returns an account with throttle wait (fallback mode), apply delay
+        // This prevents overwhelming the API when using emergency/lastResort fallbacks
+        if (account && waitMs > 0) {
+            logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
+            await sleep(waitMs);
+        }
+        if (!account) {
+            logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
+            continue;
+        }
+        try {
+            // Get token and project for this account
+            const token = await accountManager.getTokenForAccount(account);
+            const project = await accountManager.getProjectForAccount(account, token);
+            const payload = buildCloudCodeRequest(anthropicRequest, project);
+            logger.debug(`[CloudCode] Sending request for model: ${model}`);
+            // Try each endpoint with index-based loop for capacity retry support
+            let lastError = null;
+            let capacityRetryCount = 0;
+            let endpointIndex = 0;
+            while (endpointIndex < CLOUDCODE_ENDPOINT_FALLBACKS.length) {
+                const endpoint = CLOUDCODE_ENDPOINT_FALLBACKS[endpointIndex];
+                try {
+                    const url = isThinking
+                        ? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
+                        : `${endpoint}/v1internal:generateContent`;
+                    const response = await fetch(url, {
+                        method: 'POST',
+                        headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
+                        body: JSON.stringify(payload)
+                    });
+                    if (!response.ok) {
+                        const errorText = await response.text();
+                        logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
+                        if (response.status === 401) {
+                            // Check for permanent auth failures
+                            if (isPermanentAuthFailure(errorText)) {
+                                logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
+                                accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
+                            }
+                            // Transient auth error - clear caches and retry with fresh token
+                            logger.warn('[CloudCode] Transient auth error, refreshing token...');
+                            accountManager.clearTokenCache(account.email);
+                            accountManager.clearProjectCache(account.email);
+                            endpointIndex++;
+                            continue;
+                        }
+                        if (response.status === 429) {
+                            const resetMs = parseResetTime(response, errorText);
+                            const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
+                            // Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
+                            if (isModelCapacityExhausted(errorText)) {
+                                if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                    // Progressive capacity backoff tiers
+                                    const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
+                                    const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
+                                    capacityRetryCount++;
+                                    // Track failures for progressive backoff escalation (matches opencode-cloudcode-auth)
+                                    accountManager.incrementConsecutiveFailures(account.email);
+                                    logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                    await sleep(waitMs);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
+                                }
+                                // Max capacity retries exceeded - treat as quota exhaustion
+                                logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
+                            }
+                            // Get rate limit backoff with exponential backoff and state reset
+                            const backoff = getRateLimitBackoff(account.email, model, resetMs);
+                            // For very short rate limits (< 1 second), always wait and retry
+                            // Switching accounts won't help when all accounts have per-second rate limits
+                            if (resetMs !== null && resetMs < 1000) {
+                                const waitMs = resetMs;
+                                logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            }
+                            // If within dedup window AND reset time is >= 1s, switch account
+                            if (backoff.isDuplicate) {
+                                const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
+                                logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
+                                accountManager.markRateLimited(account.email, smartBackoffMs, model);
+                                throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
+                            }
+                            // Calculate smart backoff based on error type
+                            const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
+                            // Decision: wait and retry OR switch account
+                            // First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
+                            if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
+                                // Quick 1s retry on first 429 (matches opencode-cloudcode-auth)
+                                const waitMs = backoff.delayMs;
+                                // markRateLimited already increments consecutiveFailures internally
+                                // This prevents concurrent retry storms and ensures progressive backoff escalation
+                                accountManager.markRateLimited(account.email, waitMs, model);
+                                logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            } else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
+                                // Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
+                                logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
+                                await sleep(SWITCH_ACCOUNT_DELAY_MS);
+                                accountManager.markRateLimited(account.email, smartBackoffMs, model);
+                                throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
+                            } else {
+                                // Short-term rate limit but not first attempt - use exponential backoff delay
+                                const waitMs = backoff.delayMs;
+                                // markRateLimited already increments consecutiveFailures internally
+                                accountManager.markRateLimited(account.email, waitMs, model);
+                                logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            }
+                        }
+                        if (response.status >= 400) {
+                            // Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
+                            if (response.status === 503 && isModelCapacityExhausted(errorText)) {
+                                if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                    // Progressive capacity backoff tiers (same as 429 capacity handling)
+                                    const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
+                                    const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
+                                    capacityRetryCount++;
+                                    accountManager.incrementConsecutiveFailures(account.email);
+                                    logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                    await sleep(waitMs);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
+                                }
+                                // Max capacity retries exceeded - switch account
+                                logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
+                                accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
+                                throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
+                            }
+                            lastError = new Error(`API error ${response.status}: ${errorText}`);
+                            // Try next endpoint for 403/404/5xx errors (matches opencode-cloudcode-auth behavior)
+                            if (response.status === 403 || response.status === 404) {
+                                logger.warn(`[CloudCode] ${response.status} at ${endpoint}...`);
+                            } else if (response.status >= 500) {
+                                logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
+                                await sleep(1000);
+                            }
+                            endpointIndex++;
+                            continue;
+                        }
+                    }
+                    // For thinking models, parse SSE and accumulate all parts
+                    if (isThinking) {
+                        const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
+                        // Clear rate limit state on success
+                        clearRateLimitState(account.email, model);
+                        accountManager.notifySuccess(account, model);
+                        return result;
+                    }
+                    // Non-thinking models use regular JSON
+                    const data = await response.json();
+                    logger.debug('[CloudCode] Response received');
+                    // Clear rate limit state on success
+                    clearRateLimitState(account.email, model);
+                    accountManager.notifySuccess(account, model);
+                    return convertGoogleToAnthropic(data, anthropicRequest.model);
+                } catch (endpointError) {
+                    if (isRateLimitError(endpointError)) {
+                        throw endpointError; // Re-throw to trigger account switch
+                    }
+                    logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
+                    lastError = endpointError;
+                    endpointIndex++;
+                }
+            }
+            // If all endpoints failed for this account
+            if (lastError) {
+                if (lastError.is429) {
+                    logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
+                    accountManager.markRateLimited(account.email, lastError.resetMs, model);
+                    throw new Error(`Rate limited: ${lastError.errorText}`);
+                }
+                throw lastError;
+            }
+        } catch (error) {
+            if (isRateLimitError(error)) {
+                // Rate limited - already marked, notify strategy and continue to next account
+                accountManager.notifyRateLimit(account, model);
+                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
+                continue;
+            }
+            if (isAuthError(error)) {
+                // Auth invalid - already marked, continue to next account
+                logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
+                continue;
+            }
+            // Handle 5xx errors
+            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
+                accountManager.notifyFailure(account, model);
+                // Track 5xx errors for extended cooldown
+                // Note: markRateLimited already increments consecutiveFailures internally
+                const currentFailures = accountManager.getConsecutiveFailures(account.email);
+                if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    accountManager.incrementConsecutiveFailures(account.email);
+                    logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
+                }
+                continue;
+            }
+            if (isNetworkError(error)) {
+                accountManager.notifyFailure(account, model);
+                // Track network errors for extended cooldown
+                // Note: markRateLimited already increments consecutiveFailures internally
+                const currentFailures = accountManager.getConsecutiveFailures(account.email);
+                if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    accountManager.incrementConsecutiveFailures(account.email);
+                    logger.warn(`[CloudCode] Network error for ${account.email} (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
+                }
+                await sleep(1000);
+                continue;
+            }
+            throw error;
+        }
+    }
+    // All retries exhausted - try fallback model if enabled
+    if (fallbackEnabled) {
+        const fallbackModel = getFallbackModel(model);
+        if (fallbackModel) {
+            logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
+            const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+            return await sendMessage(fallbackRequest, accountManager, false);
+        }
+    }
+    throw new Error('Max retries exceeded');
+}