npm - commons-proxy - Versions diffs - 2.0.0 - Mend

commons-proxy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/LICENSE +21 -0
package/README.md +757 -0
package/bin/cli.js +146 -0
package/package.json +97 -0
package/public/Complaint Details.pdf +0 -0
package/public/Cyber Crime Portal.pdf +0 -0
package/public/app.js +229 -0
package/public/css/src/input.css +523 -0
package/public/css/style.css +1 -0
package/public/favicon.png +0 -0
package/public/index.html +549 -0
package/public/js/components/account-manager.js +356 -0
package/public/js/components/add-account-modal.js +414 -0
package/public/js/components/claude-config.js +420 -0
package/public/js/components/dashboard/charts.js +605 -0
package/public/js/components/dashboard/filters.js +362 -0
package/public/js/components/dashboard/stats.js +110 -0
package/public/js/components/dashboard.js +236 -0
package/public/js/components/logs-viewer.js +100 -0
package/public/js/components/models.js +36 -0
package/public/js/components/server-config.js +349 -0
package/public/js/config/constants.js +102 -0
package/public/js/data-store.js +375 -0
package/public/js/settings-store.js +58 -0
package/public/js/store.js +99 -0
package/public/js/translations/en.js +367 -0
package/public/js/translations/id.js +412 -0
package/public/js/translations/pt.js +308 -0
package/public/js/translations/tr.js +358 -0
package/public/js/translations/zh.js +373 -0
package/public/js/utils/account-actions.js +189 -0
package/public/js/utils/error-handler.js +96 -0
package/public/js/utils/model-config.js +42 -0
package/public/js/utils/ui-logger.js +143 -0
package/public/js/utils/validators.js +77 -0
package/public/js/utils.js +69 -0
package/public/proxy-server-64.png +0 -0
package/public/views/accounts.html +361 -0
package/public/views/dashboard.html +484 -0
package/public/views/logs.html +97 -0
package/public/views/models.html +331 -0
package/public/views/settings.html +1327 -0
package/src/account-manager/credentials.js +378 -0
package/src/account-manager/index.js +462 -0
package/src/account-manager/onboarding.js +112 -0
package/src/account-manager/rate-limits.js +369 -0
package/src/account-manager/storage.js +160 -0
package/src/account-manager/strategies/base-strategy.js +109 -0
package/src/account-manager/strategies/hybrid-strategy.js +339 -0
package/src/account-manager/strategies/index.js +79 -0
package/src/account-manager/strategies/round-robin-strategy.js +76 -0
package/src/account-manager/strategies/sticky-strategy.js +138 -0
package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
package/src/account-manager/strategies/trackers/index.js +9 -0
package/src/account-manager/strategies/trackers/quota-tracker.js +120 -0
package/src/account-manager/strategies/trackers/token-bucket-tracker.js +155 -0
package/src/auth/database.js +169 -0
package/src/auth/oauth.js +548 -0
package/src/auth/token-extractor.js +117 -0
package/src/cli/accounts.js +648 -0
package/src/cloudcode/index.js +29 -0
package/src/cloudcode/message-handler.js +510 -0
package/src/cloudcode/model-api.js +248 -0
package/src/cloudcode/rate-limit-parser.js +235 -0
package/src/cloudcode/request-builder.js +93 -0
package/src/cloudcode/session-manager.js +47 -0
package/src/cloudcode/sse-parser.js +121 -0
package/src/cloudcode/sse-streamer.js +293 -0
package/src/cloudcode/streaming-handler.js +615 -0
package/src/config.js +125 -0
package/src/constants.js +407 -0
package/src/errors.js +242 -0
package/src/fallback-config.js +29 -0
package/src/format/content-converter.js +193 -0
package/src/format/index.js +20 -0
package/src/format/request-converter.js +255 -0
package/src/format/response-converter.js +120 -0
package/src/format/schema-sanitizer.js +673 -0
package/src/format/signature-cache.js +88 -0
package/src/format/thinking-utils.js +648 -0
package/src/index.js +148 -0
package/src/modules/usage-stats.js +205 -0
package/src/providers/anthropic-provider.js +258 -0
package/src/providers/base-provider.js +157 -0
package/src/providers/cloudcode.js +94 -0
package/src/providers/copilot.js +399 -0
package/src/providers/github-provider.js +287 -0
package/src/providers/google-provider.js +192 -0
package/src/providers/index.js +211 -0
package/src/providers/openai-compatible.js +265 -0
package/src/providers/openai-provider.js +271 -0
package/src/providers/openrouter-provider.js +325 -0
package/src/providers/setup.js +83 -0
package/src/server.js +870 -0
package/src/utils/claude-config.js +245 -0
package/src/utils/helpers.js +51 -0
package/src/utils/logger.js +142 -0
package/src/utils/native-module-helper.js +162 -0
package/src/webui/index.js +1134 -0

package/src/cloudcode/streaming-handler.js ADDED Viewed

@@ -0,0 +1,615 @@
+/**
+ * Streaming Handler for Cloud Code
+ *
+ * Handles streaming message requests with multi-account support,
+ * retry logic, and endpoint failover.
+ */
+import {
+    CLOUDCODE_ENDPOINT_FALLBACKS,
+    MAX_RETRIES,
+    MAX_EMPTY_RESPONSE_RETRIES,
+    MAX_WAIT_BEFORE_ERROR_MS,
+    DEFAULT_COOLDOWN_MS,
+    RATE_LIMIT_DEDUP_WINDOW_MS,
+    RATE_LIMIT_STATE_RESET_MS,
+    FIRST_RETRY_DELAY_MS,
+    SWITCH_ACCOUNT_DELAY_MS,
+    MAX_CONSECUTIVE_FAILURES,
+    EXTENDED_COOLDOWN_MS,
+    CAPACITY_BACKOFF_TIERS_MS,
+    MAX_CAPACITY_RETRIES,
+    BACKOFF_BY_ERROR_TYPE,
+    QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
+    MIN_BACKOFF_MS
+} from '../constants.js';
+import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
+import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
+import { logger } from '../utils/logger.js';
+import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
+import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
+import { streamSSEResponse } from './sse-streamer.js';
+import { getFallbackModel } from '../fallback-config.js';
+import crypto from 'crypto';
+/**
+ * Rate limit deduplication - prevents thundering herd on concurrent rate limits.
+ * Tracks rate limit state per account+model including consecutive429 count and timestamps.
+ */
+const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
+/**
+ * Get deduplication key for rate limit tracking
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ * @returns {string} Dedup key
+ */
+function getDedupKey(email, model) {
+    return `${email}:${model}`;
+}
+/**
+ * Get rate limit backoff with deduplication and exponential backoff (matches opencode-cloudcode-auth)
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ * @param {number|null} serverRetryAfterMs - Server-provided retry time
+ * @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
+ */
+function getRateLimitBackoff(email, model, serverRetryAfterMs) {
+    const now = Date.now();
+    const stateKey = getDedupKey(email, model);
+    const previous = rateLimitStateByAccountModel.get(stateKey);
+    // Check if within dedup window - return duplicate status
+    if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
+        const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
+        const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
+        logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
+        return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
+    }
+    // Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
+    const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
+        ? previous.consecutive429 + 1
+        : 1;
+    // Update state
+    rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
+    // Calculate exponential backoff
+    const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
+    const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
+    logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
+    return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
+}
+/**
+ * Clear rate limit state after successful request
+ * @param {string} email - Account email
+ * @param {string} model - Model ID
+ */
+function clearRateLimitState(email, model) {
+    const key = getDedupKey(email, model);
+    rateLimitStateByAccountModel.delete(key);
+}
+/**
+ * Detect permanent authentication failures that require re-authentication.
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if permanent auth failure
+ */
+function isPermanentAuthFailure(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('invalid_grant') ||
+        lower.includes('token revoked') ||
+        lower.includes('token has been expired or revoked') ||
+        lower.includes('token_revoked') ||
+        lower.includes('invalid_client') ||
+        lower.includes('credentials are invalid');
+}
+/**
+ * Detect if 429 error is due to model capacity (not user quota).
+ * @param {string} errorText - Error message from API
+ * @returns {boolean} True if capacity exhausted (not quota)
+ */
+function isModelCapacityExhausted(errorText) {
+    const lower = (errorText || '').toLowerCase();
+    return lower.includes('model_capacity_exhausted') ||
+        lower.includes('capacity_exhausted') ||
+        lower.includes('model is currently overloaded') ||
+        lower.includes('service temporarily unavailable');
+}
+// Periodically clean up stale rate limit state (every 60 seconds)
+setInterval(() => {
+    const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
+    for (const [key, state] of rateLimitStateByAccountModel.entries()) {
+        if (state.lastAt < cutoff) {
+            rateLimitStateByAccountModel.delete(key);
+        }
+    }
+}, 60000);
+/**
+ * Calculate smart backoff based on error type (matches opencode-cloudcode-auth)
+ * @param {string} errorText - Error message
+ * @param {number|null} serverResetMs - Reset time from server
+ * @param {number} consecutiveFailures - Number of consecutive failures
+ * @returns {number} Backoff time in milliseconds
+ */
+function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
+    // If server provides a reset time, use it (with minimum floor to prevent loops)
+    if (serverResetMs && serverResetMs > 0) {
+        return Math.max(serverResetMs, MIN_BACKOFF_MS);
+    }
+    const reason = parseRateLimitReason(errorText);
+    switch (reason) {
+        case 'QUOTA_EXHAUSTED':
+            // Progressive backoff: [60s, 5m, 30m, 2h]
+            const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
+            return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
+        case 'RATE_LIMIT_EXCEEDED':
+            return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
+        case 'MODEL_CAPACITY_EXHAUSTED':
+            return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
+        case 'SERVER_ERROR':
+            return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
+        default:
+            return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
+    }
+}
+/**
+ * Send a streaming request to Cloud Code with multi-account support
+ * Streams events in real-time as they arrive from the server
+ *
+ * @param {Object} anthropicRequest - The Anthropic-format request
+ * @param {string} anthropicRequest.model - Model name to use
+ * @param {Array} anthropicRequest.messages - Array of message objects
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
+ * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
+ * @throws {Error} If max retries exceeded or no accounts available
+ */
+export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) {
+    const model = anthropicRequest.model;
+    // Retry loop with account failover
+    // Ensure we try at least as many times as there are accounts to cycle through everyone
+    const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+        // Clear any expired rate limits before picking
+        accountManager.clearExpiredLimits();
+        // Get available accounts for this model
+        const availableAccounts = accountManager.getAvailableAccounts(model);
+        // If no accounts available, check if we should wait or throw error
+        if (availableAccounts.length === 0) {
+            if (accountManager.isAllRateLimited(model)) {
+                const minWaitMs = accountManager.getMinWaitTimeMs(model);
+                const resetTime = new Date(Date.now() + minWaitMs).toISOString();
+                // If wait time is too long (> 2 minutes), try fallback first, then throw error
+                if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
+                    // Check if fallback is enabled and available
+                    if (fallbackEnabled) {
+                        const fallbackModel = getFallbackModel(model);
+                        if (fallbackModel) {
+                            logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel} (streaming)`);
+                            const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+                            yield* sendMessageStream(fallbackRequest, accountManager, false);
+                            return;
+                        }
+                    }
+                    throw new Error(
+                        `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
+                    );
+                }
+                // Wait for shortest reset time
+                const accountCount = accountManager.getAccountCount();
+                logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
+                await sleep(minWaitMs + 500); // Add 500ms buffer
+                accountManager.clearExpiredLimits();
+                // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
+                // This prevents "Max retries exceeded" when we are just patiently waiting
+                attempt--;
+                continue; // Retry the loop
+            }
+            // No accounts available and not rate-limited (shouldn't happen normally)
+            throw new Error('No accounts available');
+        }
+        // Select account using configured strategy
+        const { account, waitMs } = accountManager.selectAccount(model);
+        // If strategy returns a wait time without an account, sleep and retry
+        if (!account && waitMs > 0) {
+            logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
+            await sleep(waitMs + 500);
+            attempt--; // CRITICAL FIX: Don't count strategy wait as failure
+            continue;
+        }
+        // If strategy returns an account with throttle wait (fallback mode), apply delay
+        // This prevents overwhelming the API when using emergency/lastResort fallbacks
+        if (account && waitMs > 0) {
+            logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
+            await sleep(waitMs);
+        }
+        if (!account) {
+            logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
+            continue;
+        }
+        try {
+            // Get token and project for this account
+            const token = await accountManager.getTokenForAccount(account);
+            const project = await accountManager.getProjectForAccount(account, token);
+            const payload = buildCloudCodeRequest(anthropicRequest, project);
+            logger.debug(`[CloudCode] Starting stream for model: ${model}`);
+            // Try each endpoint with index-based loop for capacity retry support
+            let lastError = null;
+            let capacityRetryCount = 0;
+            let endpointIndex = 0;
+            while (endpointIndex < CLOUDCODE_ENDPOINT_FALLBACKS.length) {
+                const endpoint = CLOUDCODE_ENDPOINT_FALLBACKS[endpointIndex];
+                try {
+                    const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
+                    const response = await fetch(url, {
+                        method: 'POST',
+                        headers: buildHeaders(token, model, 'text/event-stream'),
+                        body: JSON.stringify(payload)
+                    });
+                    if (!response.ok) {
+                        const errorText = await response.text();
+                        logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
+                        if (response.status === 401) {
+                            // Check for permanent auth failures
+                            if (isPermanentAuthFailure(errorText)) {
+                                logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
+                                accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
+                            }
+                            // Transient auth error - clear caches and retry
+                            accountManager.clearTokenCache(account.email);
+                            accountManager.clearProjectCache(account.email);
+                            endpointIndex++;
+                            continue;
+                        }
+                        if (response.status === 429) {
+                            const resetMs = parseResetTime(response, errorText);
+                            const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
+                            // Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
+                            if (isModelCapacityExhausted(errorText)) {
+                                if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                    // Progressive capacity backoff tiers
+                                    const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
+                                    const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
+                                    capacityRetryCount++;
+                                    // Track failures for progressive backoff escalation (matches opencode-cloudcode-auth)
+                                    accountManager.incrementConsecutiveFailures(account.email);
+                                    logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                    await sleep(waitMs);
+                                    // Don't increment endpointIndex - retry same endpoint
+                                    continue;
+                                }
+                                // Max capacity retries exceeded - treat as quota exhaustion
+                                logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
+                            }
+                            // Get rate limit backoff with exponential backoff and state reset
+                            const backoff = getRateLimitBackoff(account.email, model, resetMs);
+                            // For very short rate limits (< 1 second), always wait and retry
+                            // Switching accounts won't help when all accounts have per-second rate limits
+                            if (resetMs !== null && resetMs < 1000) {
+                                const waitMs = resetMs;
+                                logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            }
+                            // If within dedup window AND reset time is >= 1s, switch account
+                            if (backoff.isDuplicate) {
+                                const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
+                                logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
+                                accountManager.markRateLimited(account.email, smartBackoffMs, model);
+                                throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
+                            }
+                            // Calculate smart backoff based on error type
+                            const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
+                            // Decision: wait and retry OR switch account
+                            // First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
+                            if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
+                                // Quick 1s retry on first 429 (matches opencode-cloudcode-auth)
+                                const waitMs = backoff.delayMs;
+                                // markRateLimited already increments consecutiveFailures internally
+                                accountManager.markRateLimited(account.email, waitMs, model);
+                                logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            } else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
+                                // Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
+                                logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
+                                await sleep(SWITCH_ACCOUNT_DELAY_MS);
+                                accountManager.markRateLimited(account.email, smartBackoffMs, model);
+                                throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
+                            } else {
+                                // Short-term rate limit but not first attempt - use exponential backoff delay
+                                const waitMs = backoff.delayMs;
+                                // markRateLimited already increments consecutiveFailures internally
+                                accountManager.markRateLimited(account.email, waitMs, model);
+                                logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            }
+                        }
+                        // Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
+                        if (response.status === 503 && isModelCapacityExhausted(errorText)) {
+                            if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
+                                // Progressive capacity backoff tiers (same as 429 capacity handling)
+                                const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
+                                const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
+                                capacityRetryCount++;
+                                accountManager.incrementConsecutiveFailures(account.email);
+                                logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
+                                await sleep(waitMs);
+                                // Don't increment endpointIndex - retry same endpoint
+                                continue;
+                            }
+                            // Max capacity retries exceeded - switch account
+                            logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
+                            accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
+                            throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
+                        }
+                        lastError = new Error(`API error ${response.status}: ${errorText}`);
+                        // Try next endpoint for 403/404/5xx errors (matches opencode-cloudcode-auth behavior)
+                        if (response.status === 403 || response.status === 404) {
+                            logger.warn(`[CloudCode] ${response.status} at ${endpoint}..`);
+                        } else if (response.status >= 500) {
+                            logger.warn(`[CloudCode] ${response.status} stream error, waiting 1s before retry...`);
+                            await sleep(1000);
+                        }
+                        endpointIndex++;
+                        continue;
+                    }
+                    // Stream the response with retry logic for empty responses
+                    let currentResponse = response;
+                    for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
+                        try {
+                            yield* streamSSEResponse(currentResponse, anthropicRequest.model);
+                            logger.debug('[CloudCode] Stream completed');
+                            // Clear rate limit state on success
+                            clearRateLimitState(account.email, model);
+                            accountManager.notifySuccess(account, model);
+                            return;
+                        } catch (streamError) {
+                            // Only retry on EmptyResponseError
+                            if (!isEmptyResponseError(streamError)) {
+                                throw streamError;
+                            }
+                            // Check if we have retries left
+                            if (emptyRetries >= MAX_EMPTY_RESPONSE_RETRIES) {
+                                logger.error(`[CloudCode] Empty response after ${MAX_EMPTY_RESPONSE_RETRIES} retries`);
+                                yield* emitEmptyResponseFallback(anthropicRequest.model);
+                                return;
+                            }
+                            // Exponential backoff: 500ms, 1000ms, 2000ms
+                            const backoffMs = 500 * Math.pow(2, emptyRetries);
+                            logger.warn(`[CloudCode] Empty response, retry ${emptyRetries + 1}/${MAX_EMPTY_RESPONSE_RETRIES} after ${backoffMs}ms...`);
+                            await sleep(backoffMs);
+                            // Refetch the response
+                            currentResponse = await fetch(url, {
+                                method: 'POST',
+                                headers: buildHeaders(token, model, 'text/event-stream'),
+                                body: JSON.stringify(payload)
+                            });
+                            // Handle specific error codes on retry
+                            if (!currentResponse.ok) {
+                                const retryErrorText = await currentResponse.text();
+                                // Rate limit error - mark account and throw to trigger account switch
+                                if (currentResponse.status === 429) {
+                                    const resetMs = parseResetTime(currentResponse, retryErrorText);
+                                    accountManager.markRateLimited(account.email, resetMs, model);
+                                    throw new Error(`429 RESOURCE_EXHAUSTED during retry: ${retryErrorText}`);
+                                }
+                                // Auth error - check for permanent failure
+                                if (currentResponse.status === 401) {
+                                    if (isPermanentAuthFailure(retryErrorText)) {
+                                        logger.error(`[CloudCode] Permanent auth failure during retry for ${account.email}`);
+                                        accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
+                                        throw new Error(`AUTH_INVALID_PERMANENT: ${retryErrorText}`);
+                                    }
+                                    accountManager.clearTokenCache(account.email);
+                                    accountManager.clearProjectCache(account.email);
+                                    throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
+                                }
+                                // For 5xx errors, continue retrying
+                                if (currentResponse.status >= 500) {
+                                    logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
+                                    await sleep(1000);
+                                    currentResponse = await fetch(url, {
+                                        method: 'POST',
+                                        headers: buildHeaders(token, model, 'text/event-stream'),
+                                        body: JSON.stringify(payload)
+                                    });
+                                    if (currentResponse.ok) {
+                                        continue;
+                                    }
+                                }
+                                throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
+                            }
+                        }
+                    }
+                } catch (endpointError) {
+                    if (isRateLimitError(endpointError)) {
+                        throw endpointError; // Re-throw to trigger account switch
+                    }
+                    if (isEmptyResponseError(endpointError)) {
+                        throw endpointError;
+                    }
+                    logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
+                    lastError = endpointError;
+                    endpointIndex++;
+                }
+            }
+            // If all endpoints failed for this account
+            if (lastError) {
+                if (lastError.is429) {
+                    logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
+                    accountManager.markRateLimited(account.email, lastError.resetMs, model);
+                    throw new Error(`Rate limited: ${lastError.errorText}`);
+                }
+                throw lastError;
+            }
+        } catch (error) {
+            if (isRateLimitError(error)) {
+                // Rate limited - already marked, notify strategy and continue to next account
+                accountManager.notifyRateLimit(account, model);
+                logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
+                continue;
+            }
+            if (isAuthError(error)) {
+                // Auth invalid - already marked, continue to next account
+                logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
+                continue;
+            }
+            // Handle 5xx errors
+            if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
+                accountManager.notifyFailure(account, model);
+                // Track 5xx errors for extended cooldown
+                // Note: markRateLimited already increments consecutiveFailures internally
+                const currentFailures = accountManager.getConsecutiveFailures(account.email);
+                if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    accountManager.incrementConsecutiveFailures(account.email);
+                    logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
+                }
+                continue;
+            }
+            if (isNetworkError(error)) {
+                accountManager.notifyFailure(account, model);
+                // Track network errors for extended cooldown
+                // Note: markRateLimited already increments consecutiveFailures internally
+                const currentFailures = accountManager.getConsecutiveFailures(account.email);
+                if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
+                    logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
+                    accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
+                } else {
+                    accountManager.incrementConsecutiveFailures(account.email);
+                    logger.warn(`[CloudCode] Network error for ${account.email} (stream) (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
+                }
+                await sleep(1000);
+                continue;
+            }
+            throw error;
+        }
+    }
+    // All retries exhausted - try fallback model if enabled
+    if (fallbackEnabled) {
+        const fallbackModel = getFallbackModel(model);
+        if (fallbackModel) {
+            logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
+            const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
+            yield* sendMessageStream(fallbackRequest, accountManager, false);
+            return;
+        }
+    }
+    throw new Error('Max retries exceeded');
+}
+/**
+ * Emit a fallback message when all retry attempts fail with empty response
+ * @param {string} model - The model name
+ * @yields {Object} Anthropic-format SSE events for empty response fallback
+ */
+function* emitEmptyResponseFallback(model) {
+    // Use proper message ID format consistent with Anthropic API
+    const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
+    yield {
+        type: 'message_start',
+        message: {
+            id: messageId,
+            type: 'message',
+            role: 'assistant',
+            content: [],
+            model: model,
+            stop_reason: null,
+            stop_sequence: null,
+            usage: { input_tokens: 0, output_tokens: 0 }
+        }
+    };
+    yield {
+        type: 'content_block_start',
+        index: 0,
+        content_block: { type: 'text', text: '' }
+    };
+    yield {
+        type: 'content_block_delta',
+        index: 0,
+        delta: { type: 'text_delta', text: '[No response after retries - please try again]' }
+    };
+    yield { type: 'content_block_stop', index: 0 };
+    yield {
+        type: 'message_delta',
+        delta: { stop_reason: 'end_turn', stop_sequence: null },
+        usage: { output_tokens: 0 }
+    };
+    yield { type: 'message_stop' };
+}