commons-proxy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +757 -0
  3. package/bin/cli.js +146 -0
  4. package/package.json +97 -0
  5. package/public/Complaint Details.pdf +0 -0
  6. package/public/Cyber Crime Portal.pdf +0 -0
  7. package/public/app.js +229 -0
  8. package/public/css/src/input.css +523 -0
  9. package/public/css/style.css +1 -0
  10. package/public/favicon.png +0 -0
  11. package/public/index.html +549 -0
  12. package/public/js/components/account-manager.js +356 -0
  13. package/public/js/components/add-account-modal.js +414 -0
  14. package/public/js/components/claude-config.js +420 -0
  15. package/public/js/components/dashboard/charts.js +605 -0
  16. package/public/js/components/dashboard/filters.js +362 -0
  17. package/public/js/components/dashboard/stats.js +110 -0
  18. package/public/js/components/dashboard.js +236 -0
  19. package/public/js/components/logs-viewer.js +100 -0
  20. package/public/js/components/models.js +36 -0
  21. package/public/js/components/server-config.js +349 -0
  22. package/public/js/config/constants.js +102 -0
  23. package/public/js/data-store.js +375 -0
  24. package/public/js/settings-store.js +58 -0
  25. package/public/js/store.js +99 -0
  26. package/public/js/translations/en.js +367 -0
  27. package/public/js/translations/id.js +412 -0
  28. package/public/js/translations/pt.js +308 -0
  29. package/public/js/translations/tr.js +358 -0
  30. package/public/js/translations/zh.js +373 -0
  31. package/public/js/utils/account-actions.js +189 -0
  32. package/public/js/utils/error-handler.js +96 -0
  33. package/public/js/utils/model-config.js +42 -0
  34. package/public/js/utils/ui-logger.js +143 -0
  35. package/public/js/utils/validators.js +77 -0
  36. package/public/js/utils.js +69 -0
  37. package/public/proxy-server-64.png +0 -0
  38. package/public/views/accounts.html +361 -0
  39. package/public/views/dashboard.html +484 -0
  40. package/public/views/logs.html +97 -0
  41. package/public/views/models.html +331 -0
  42. package/public/views/settings.html +1327 -0
  43. package/src/account-manager/credentials.js +378 -0
  44. package/src/account-manager/index.js +462 -0
  45. package/src/account-manager/onboarding.js +112 -0
  46. package/src/account-manager/rate-limits.js +369 -0
  47. package/src/account-manager/storage.js +160 -0
  48. package/src/account-manager/strategies/base-strategy.js +109 -0
  49. package/src/account-manager/strategies/hybrid-strategy.js +339 -0
  50. package/src/account-manager/strategies/index.js +79 -0
  51. package/src/account-manager/strategies/round-robin-strategy.js +76 -0
  52. package/src/account-manager/strategies/sticky-strategy.js +138 -0
  53. package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
  54. package/src/account-manager/strategies/trackers/index.js +9 -0
  55. package/src/account-manager/strategies/trackers/quota-tracker.js +120 -0
  56. package/src/account-manager/strategies/trackers/token-bucket-tracker.js +155 -0
  57. package/src/auth/database.js +169 -0
  58. package/src/auth/oauth.js +548 -0
  59. package/src/auth/token-extractor.js +117 -0
  60. package/src/cli/accounts.js +648 -0
  61. package/src/cloudcode/index.js +29 -0
  62. package/src/cloudcode/message-handler.js +510 -0
  63. package/src/cloudcode/model-api.js +248 -0
  64. package/src/cloudcode/rate-limit-parser.js +235 -0
  65. package/src/cloudcode/request-builder.js +93 -0
  66. package/src/cloudcode/session-manager.js +47 -0
  67. package/src/cloudcode/sse-parser.js +121 -0
  68. package/src/cloudcode/sse-streamer.js +293 -0
  69. package/src/cloudcode/streaming-handler.js +615 -0
  70. package/src/config.js +125 -0
  71. package/src/constants.js +407 -0
  72. package/src/errors.js +242 -0
  73. package/src/fallback-config.js +29 -0
  74. package/src/format/content-converter.js +193 -0
  75. package/src/format/index.js +20 -0
  76. package/src/format/request-converter.js +255 -0
  77. package/src/format/response-converter.js +120 -0
  78. package/src/format/schema-sanitizer.js +673 -0
  79. package/src/format/signature-cache.js +88 -0
  80. package/src/format/thinking-utils.js +648 -0
  81. package/src/index.js +148 -0
  82. package/src/modules/usage-stats.js +205 -0
  83. package/src/providers/anthropic-provider.js +258 -0
  84. package/src/providers/base-provider.js +157 -0
  85. package/src/providers/cloudcode.js +94 -0
  86. package/src/providers/copilot.js +399 -0
  87. package/src/providers/github-provider.js +287 -0
  88. package/src/providers/google-provider.js +192 -0
  89. package/src/providers/index.js +211 -0
  90. package/src/providers/openai-compatible.js +265 -0
  91. package/src/providers/openai-provider.js +271 -0
  92. package/src/providers/openrouter-provider.js +325 -0
  93. package/src/providers/setup.js +83 -0
  94. package/src/server.js +870 -0
  95. package/src/utils/claude-config.js +245 -0
  96. package/src/utils/helpers.js +51 -0
  97. package/src/utils/logger.js +142 -0
  98. package/src/utils/native-module-helper.js +162 -0
  99. package/src/webui/index.js +1134 -0
@@ -0,0 +1,510 @@
1
+ /**
2
+ * Message Handler for Cloud Code
3
+ *
4
+ * Handles non-streaming message requests with multi-account support,
5
+ * retry logic, and endpoint failover.
6
+ */
7
+
8
+ import {
9
+ CLOUDCODE_ENDPOINT_FALLBACKS,
10
+ MAX_RETRIES,
11
+ MAX_WAIT_BEFORE_ERROR_MS,
12
+ DEFAULT_COOLDOWN_MS,
13
+ RATE_LIMIT_DEDUP_WINDOW_MS,
14
+ RATE_LIMIT_STATE_RESET_MS,
15
+ FIRST_RETRY_DELAY_MS,
16
+ SWITCH_ACCOUNT_DELAY_MS,
17
+ MAX_CONSECUTIVE_FAILURES,
18
+ EXTENDED_COOLDOWN_MS,
19
+ CAPACITY_BACKOFF_TIERS_MS,
20
+ MAX_CAPACITY_RETRIES,
21
+ BACKOFF_BY_ERROR_TYPE,
22
+ QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
23
+ MIN_BACKOFF_MS,
24
+ isThinkingModel
25
+ } from '../constants.js';
26
+ import { convertGoogleToAnthropic } from '../format/index.js';
27
+ import { isRateLimitError, isAuthError } from '../errors.js';
28
+ import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
29
+ import { logger } from '../utils/logger.js';
30
+ import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
31
+ import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
32
+ import { parseThinkingSSEResponse } from './sse-parser.js';
33
+ import { getFallbackModel } from '../fallback-config.js';
34
+
35
+ /**
36
+ * Rate limit deduplication - prevents thundering herd on concurrent rate limits.
37
+ * Tracks rate limit state per account+model including consecutive429 count and timestamps.
38
+ */
39
+ const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
40
+
41
+ /**
42
+ * Get deduplication key for rate limit tracking
43
+ * @param {string} email - Account email
44
+ * @param {string} model - Model ID
45
+ * @returns {string} Dedup key
46
+ */
47
+ function getDedupKey(email, model) {
48
+ return `${email}:${model}`;
49
+ }
50
+
51
+ /**
52
+ * Get rate limit backoff with deduplication and exponential backoff (matches opencode-cloudcode-auth)
53
+ * @param {string} email - Account email
54
+ * @param {string} model - Model ID
55
+ * @param {number|null} serverRetryAfterMs - Server-provided retry time
56
+ * @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
57
+ */
58
+ function getRateLimitBackoff(email, model, serverRetryAfterMs) {
59
+ const now = Date.now();
60
+ const stateKey = getDedupKey(email, model);
61
+ const previous = rateLimitStateByAccountModel.get(stateKey);
62
+
63
+ // Check if within dedup window - return duplicate status
64
+ if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
65
+ const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
66
+ const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
67
+ logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
68
+ return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
69
+ }
70
+
71
+ // Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
72
+ const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
73
+ ? previous.consecutive429 + 1
74
+ : 1;
75
+
76
+ // Update state
77
+ rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
78
+
79
+ // Calculate exponential backoff
80
+ const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
81
+ const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
82
+
83
+ logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
84
+ return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
85
+ }
86
+
87
+ /**
88
+ * Clear rate limit state after successful request
89
+ * @param {string} email - Account email
90
+ * @param {string} model - Model ID
91
+ */
92
+ function clearRateLimitState(email, model) {
93
+ const key = getDedupKey(email, model);
94
+ rateLimitStateByAccountModel.delete(key);
95
+ }
96
+
97
+ /**
98
+ * Detect permanent authentication failures that require re-authentication.
99
+ * These should mark the account as invalid rather than just clearing cache.
100
+ * @param {string} errorText - Error message from API
101
+ * @returns {boolean} True if permanent auth failure
102
+ */
103
+ function isPermanentAuthFailure(errorText) {
104
+ const lower = (errorText || '').toLowerCase();
105
+ return lower.includes('invalid_grant') ||
106
+ lower.includes('token revoked') ||
107
+ lower.includes('token has been expired or revoked') ||
108
+ lower.includes('token_revoked') ||
109
+ lower.includes('invalid_client') ||
110
+ lower.includes('credentials are invalid');
111
+ }
112
+
113
+ /**
114
+ * Detect if 429 error is due to model capacity (not user quota).
115
+ * Capacity issues should retry on same account with shorter delay.
116
+ * @param {string} errorText - Error message from API
117
+ * @returns {boolean} True if capacity exhausted (not quota)
118
+ */
119
+ function isModelCapacityExhausted(errorText) {
120
+ const lower = (errorText || '').toLowerCase();
121
+ return lower.includes('model_capacity_exhausted') ||
122
+ lower.includes('capacity_exhausted') ||
123
+ lower.includes('model is currently overloaded') ||
124
+ lower.includes('service temporarily unavailable');
125
+ }
126
+
127
+ // Periodically clean up stale rate limit state (every 60 seconds)
128
+ setInterval(() => {
129
+ const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
130
+ for (const [key, state] of rateLimitStateByAccountModel.entries()) {
131
+ if (state.lastAt < cutoff) {
132
+ rateLimitStateByAccountModel.delete(key);
133
+ }
134
+ }
135
+ }, 60000);
136
+
137
+ /**
138
+ * Calculate smart backoff based on error type (matches opencode-cloudcode-auth)
139
+ * @param {string} errorText - Error message
140
+ * @param {number|null} serverResetMs - Reset time from server
141
+ * @param {number} consecutiveFailures - Number of consecutive failures
142
+ * @returns {number} Backoff time in milliseconds
143
+ */
144
+ function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
145
+ // If server provides a reset time, use it (with minimum floor to prevent loops)
146
+ if (serverResetMs && serverResetMs > 0) {
147
+ return Math.max(serverResetMs, MIN_BACKOFF_MS);
148
+ }
149
+
150
+ const reason = parseRateLimitReason(errorText);
151
+
152
+ switch (reason) {
153
+ case 'QUOTA_EXHAUSTED':
154
+ // Progressive backoff: [60s, 5m, 30m, 2h]
155
+ const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
156
+ return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
157
+ case 'RATE_LIMIT_EXCEEDED':
158
+ return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
159
+ case 'MODEL_CAPACITY_EXHAUSTED':
160
+ return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
161
+ case 'SERVER_ERROR':
162
+ return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
163
+ default:
164
+ return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Send a non-streaming request to Cloud Code with multi-account support
170
+ * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
171
+ *
172
+ * @param {Object} anthropicRequest - The Anthropic-format request
173
+ * @param {Object} anthropicRequest.model - Model name to use
174
+ * @param {Array} anthropicRequest.messages - Array of message objects
175
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
176
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
177
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
178
+ * @returns {Promise<Object>} Anthropic-format response object
179
+ * @throws {Error} If max retries exceeded or no accounts available
180
+ */
181
+ export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) {
182
+ const model = anthropicRequest.model;
183
+ const isThinking = isThinkingModel(model);
184
+
185
+ // Retry loop with account failover
186
+ // Ensure we try at least as many times as there are accounts to cycle through everyone
187
+ const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
188
+
189
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
190
+ // Clear any expired rate limits before picking
191
+ accountManager.clearExpiredLimits();
192
+
193
+ // Get available accounts for this model
194
+ const availableAccounts = accountManager.getAvailableAccounts(model);
195
+
196
+ // If no accounts available, check if we should wait or throw error
197
+ if (availableAccounts.length === 0) {
198
+ if (accountManager.isAllRateLimited(model)) {
199
+ const minWaitMs = accountManager.getMinWaitTimeMs(model);
200
+ const resetTime = new Date(Date.now() + minWaitMs).toISOString();
201
+
202
+ // If wait time is too long (> 2 minutes), try fallback first, then throw error
203
+ if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
204
+ // Check if fallback is enabled and available
205
+ if (fallbackEnabled) {
206
+ const fallbackModel = getFallbackModel(model);
207
+ if (fallbackModel) {
208
+ logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel}`);
209
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
210
+ return await sendMessage(fallbackRequest, accountManager, false);
211
+ }
212
+ }
213
+ throw new Error(
214
+ `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
215
+ );
216
+ }
217
+
218
+ // Wait for shortest reset time
219
+ const accountCount = accountManager.getAccountCount();
220
+ logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
221
+ await sleep(minWaitMs + 500); // Add 500ms buffer
222
+ accountManager.clearExpiredLimits();
223
+
224
+ // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
225
+ // This prevents "Max retries exceeded" when we are just patiently waiting
226
+ attempt--;
227
+ continue; // Retry the loop
228
+ }
229
+
230
+ // No accounts available and not rate-limited (shouldn't happen normally)
231
+ throw new Error('No accounts available');
232
+ }
233
+
234
+ // Select account using configured strategy
235
+ const { account, waitMs } = accountManager.selectAccount(model);
236
+
237
+ // If strategy returns a wait time without an account, sleep and retry
238
+ if (!account && waitMs > 0) {
239
+ logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
240
+ await sleep(waitMs + 500);
241
+ attempt--; // CRITICAL FIX: Don't count strategy wait as failure
242
+ continue;
243
+ }
244
+
245
+ // If strategy returns an account with throttle wait (fallback mode), apply delay
246
+ // This prevents overwhelming the API when using emergency/lastResort fallbacks
247
+ if (account && waitMs > 0) {
248
+ logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
249
+ await sleep(waitMs);
250
+ }
251
+
252
+ if (!account) {
253
+ logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
254
+ continue;
255
+ }
256
+
257
+ try {
258
+ // Get token and project for this account
259
+ const token = await accountManager.getTokenForAccount(account);
260
+ const project = await accountManager.getProjectForAccount(account, token);
261
+ const payload = buildCloudCodeRequest(anthropicRequest, project);
262
+
263
+ logger.debug(`[CloudCode] Sending request for model: ${model}`);
264
+
265
+ // Try each endpoint with index-based loop for capacity retry support
266
+ let lastError = null;
267
+ let capacityRetryCount = 0;
268
+ let endpointIndex = 0;
269
+
270
+ while (endpointIndex < CLOUDCODE_ENDPOINT_FALLBACKS.length) {
271
+ const endpoint = CLOUDCODE_ENDPOINT_FALLBACKS[endpointIndex];
272
+ try {
273
+ const url = isThinking
274
+ ? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
275
+ : `${endpoint}/v1internal:generateContent`;
276
+
277
+ const response = await fetch(url, {
278
+ method: 'POST',
279
+ headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
280
+ body: JSON.stringify(payload)
281
+ });
282
+
283
+ if (!response.ok) {
284
+ const errorText = await response.text();
285
+ logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
286
+
287
+ if (response.status === 401) {
288
+ // Check for permanent auth failures
289
+ if (isPermanentAuthFailure(errorText)) {
290
+ logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
291
+ accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
292
+ throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
293
+ }
294
+
295
+ // Transient auth error - clear caches and retry with fresh token
296
+ logger.warn('[CloudCode] Transient auth error, refreshing token...');
297
+ accountManager.clearTokenCache(account.email);
298
+ accountManager.clearProjectCache(account.email);
299
+ endpointIndex++;
300
+ continue;
301
+ }
302
+
303
+ if (response.status === 429) {
304
+ const resetMs = parseResetTime(response, errorText);
305
+ const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
306
+
307
+ // Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
308
+ if (isModelCapacityExhausted(errorText)) {
309
+ if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
310
+ // Progressive capacity backoff tiers
311
+ const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
312
+ const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
313
+ capacityRetryCount++;
314
+ // Track failures for progressive backoff escalation (matches opencode-cloudcode-auth)
315
+ accountManager.incrementConsecutiveFailures(account.email);
316
+ logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
317
+ await sleep(waitMs);
318
+ // Don't increment endpointIndex - retry same endpoint
319
+ continue;
320
+ }
321
+ // Max capacity retries exceeded - treat as quota exhaustion
322
+ logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
323
+ }
324
+
325
+ // Get rate limit backoff with exponential backoff and state reset
326
+ const backoff = getRateLimitBackoff(account.email, model, resetMs);
327
+
328
+ // For very short rate limits (< 1 second), always wait and retry
329
+ // Switching accounts won't help when all accounts have per-second rate limits
330
+ if (resetMs !== null && resetMs < 1000) {
331
+ const waitMs = resetMs;
332
+ logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
333
+ await sleep(waitMs);
334
+ // Don't increment endpointIndex - retry same endpoint
335
+ continue;
336
+ }
337
+
338
+ // If within dedup window AND reset time is >= 1s, switch account
339
+ if (backoff.isDuplicate) {
340
+ const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
341
+ logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
342
+ accountManager.markRateLimited(account.email, smartBackoffMs, model);
343
+ throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
344
+ }
345
+
346
+ // Calculate smart backoff based on error type
347
+ const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
348
+
349
+ // Decision: wait and retry OR switch account
350
+ // First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
351
+ if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
352
+ // Quick 1s retry on first 429 (matches opencode-cloudcode-auth)
353
+ const waitMs = backoff.delayMs;
354
+ // markRateLimited already increments consecutiveFailures internally
355
+ // This prevents concurrent retry storms and ensures progressive backoff escalation
356
+ accountManager.markRateLimited(account.email, waitMs, model);
357
+ logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
358
+ await sleep(waitMs);
359
+ // Don't increment endpointIndex - retry same endpoint
360
+ continue;
361
+ } else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
362
+ // Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
363
+ logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
364
+ await sleep(SWITCH_ACCOUNT_DELAY_MS);
365
+ accountManager.markRateLimited(account.email, smartBackoffMs, model);
366
+ throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
367
+ } else {
368
+ // Short-term rate limit but not first attempt - use exponential backoff delay
369
+ const waitMs = backoff.delayMs;
370
+ // markRateLimited already increments consecutiveFailures internally
371
+ accountManager.markRateLimited(account.email, waitMs, model);
372
+ logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
373
+ await sleep(waitMs);
374
+ // Don't increment endpointIndex - retry same endpoint
375
+ continue;
376
+ }
377
+ }
378
+
379
+ if (response.status >= 400) {
380
+ // Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
381
+ if (response.status === 503 && isModelCapacityExhausted(errorText)) {
382
+ if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
383
+ // Progressive capacity backoff tiers (same as 429 capacity handling)
384
+ const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
385
+ const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
386
+ capacityRetryCount++;
387
+ accountManager.incrementConsecutiveFailures(account.email);
388
+ logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
389
+ await sleep(waitMs);
390
+ // Don't increment endpointIndex - retry same endpoint
391
+ continue;
392
+ }
393
+ // Max capacity retries exceeded - switch account
394
+ logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
395
+ accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
396
+ throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
397
+ }
398
+
399
+ lastError = new Error(`API error ${response.status}: ${errorText}`);
400
+ // Try next endpoint for 403/404/5xx errors (matches opencode-cloudcode-auth behavior)
401
+ if (response.status === 403 || response.status === 404) {
402
+ logger.warn(`[CloudCode] ${response.status} at ${endpoint}...`);
403
+ } else if (response.status >= 500) {
404
+ logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
405
+ await sleep(1000);
406
+ }
407
+ endpointIndex++;
408
+ continue;
409
+ }
410
+ }
411
+
412
+ // For thinking models, parse SSE and accumulate all parts
413
+ if (isThinking) {
414
+ const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
415
+ // Clear rate limit state on success
416
+ clearRateLimitState(account.email, model);
417
+ accountManager.notifySuccess(account, model);
418
+ return result;
419
+ }
420
+
421
+ // Non-thinking models use regular JSON
422
+ const data = await response.json();
423
+ logger.debug('[CloudCode] Response received');
424
+ // Clear rate limit state on success
425
+ clearRateLimitState(account.email, model);
426
+ accountManager.notifySuccess(account, model);
427
+ return convertGoogleToAnthropic(data, anthropicRequest.model);
428
+
429
+ } catch (endpointError) {
430
+ if (isRateLimitError(endpointError)) {
431
+ throw endpointError; // Re-throw to trigger account switch
432
+ }
433
+ logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
434
+ lastError = endpointError;
435
+ endpointIndex++;
436
+ }
437
+ }
438
+
439
+ // If all endpoints failed for this account
440
+ if (lastError) {
441
+ if (lastError.is429) {
442
+ logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
443
+ accountManager.markRateLimited(account.email, lastError.resetMs, model);
444
+ throw new Error(`Rate limited: ${lastError.errorText}`);
445
+ }
446
+ throw lastError;
447
+ }
448
+
449
+ } catch (error) {
450
+ if (isRateLimitError(error)) {
451
+ // Rate limited - already marked, notify strategy and continue to next account
452
+ accountManager.notifyRateLimit(account, model);
453
+ logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
454
+ continue;
455
+ }
456
+ if (isAuthError(error)) {
457
+ // Auth invalid - already marked, continue to next account
458
+ logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
459
+ continue;
460
+ }
461
+ // Handle 5xx errors
462
+ if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
463
+ accountManager.notifyFailure(account, model);
464
+
465
+ // Track 5xx errors for extended cooldown
466
+ // Note: markRateLimited already increments consecutiveFailures internally
467
+ const currentFailures = accountManager.getConsecutiveFailures(account.email);
468
+ if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
469
+ logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
470
+ accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
471
+ } else {
472
+ accountManager.incrementConsecutiveFailures(account.email);
473
+ logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
474
+ }
475
+ continue;
476
+ }
477
+
478
+ if (isNetworkError(error)) {
479
+ accountManager.notifyFailure(account, model);
480
+
481
+ // Track network errors for extended cooldown
482
+ // Note: markRateLimited already increments consecutiveFailures internally
483
+ const currentFailures = accountManager.getConsecutiveFailures(account.email);
484
+ if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
485
+ logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
486
+ accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
487
+ } else {
488
+ accountManager.incrementConsecutiveFailures(account.email);
489
+ logger.warn(`[CloudCode] Network error for ${account.email} (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
490
+ }
491
+ await sleep(1000);
492
+ continue;
493
+ }
494
+
495
+ throw error;
496
+ }
497
+ }
498
+
499
+ // All retries exhausted - try fallback model if enabled
500
+ if (fallbackEnabled) {
501
+ const fallbackModel = getFallbackModel(model);
502
+ if (fallbackModel) {
503
+ logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
504
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
505
+ return await sendMessage(fallbackRequest, accountManager, false);
506
+ }
507
+ }
508
+
509
+ throw new Error('Max retries exceeded');
510
+ }