commons-proxy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +757 -0
  3. package/bin/cli.js +146 -0
  4. package/package.json +97 -0
  5. package/public/Complaint Details.pdf +0 -0
  6. package/public/Cyber Crime Portal.pdf +0 -0
  7. package/public/app.js +229 -0
  8. package/public/css/src/input.css +523 -0
  9. package/public/css/style.css +1 -0
  10. package/public/favicon.png +0 -0
  11. package/public/index.html +549 -0
  12. package/public/js/components/account-manager.js +356 -0
  13. package/public/js/components/add-account-modal.js +414 -0
  14. package/public/js/components/claude-config.js +420 -0
  15. package/public/js/components/dashboard/charts.js +605 -0
  16. package/public/js/components/dashboard/filters.js +362 -0
  17. package/public/js/components/dashboard/stats.js +110 -0
  18. package/public/js/components/dashboard.js +236 -0
  19. package/public/js/components/logs-viewer.js +100 -0
  20. package/public/js/components/models.js +36 -0
  21. package/public/js/components/server-config.js +349 -0
  22. package/public/js/config/constants.js +102 -0
  23. package/public/js/data-store.js +375 -0
  24. package/public/js/settings-store.js +58 -0
  25. package/public/js/store.js +99 -0
  26. package/public/js/translations/en.js +367 -0
  27. package/public/js/translations/id.js +412 -0
  28. package/public/js/translations/pt.js +308 -0
  29. package/public/js/translations/tr.js +358 -0
  30. package/public/js/translations/zh.js +373 -0
  31. package/public/js/utils/account-actions.js +189 -0
  32. package/public/js/utils/error-handler.js +96 -0
  33. package/public/js/utils/model-config.js +42 -0
  34. package/public/js/utils/ui-logger.js +143 -0
  35. package/public/js/utils/validators.js +77 -0
  36. package/public/js/utils.js +69 -0
  37. package/public/proxy-server-64.png +0 -0
  38. package/public/views/accounts.html +361 -0
  39. package/public/views/dashboard.html +484 -0
  40. package/public/views/logs.html +97 -0
  41. package/public/views/models.html +331 -0
  42. package/public/views/settings.html +1327 -0
  43. package/src/account-manager/credentials.js +378 -0
  44. package/src/account-manager/index.js +462 -0
  45. package/src/account-manager/onboarding.js +112 -0
  46. package/src/account-manager/rate-limits.js +369 -0
  47. package/src/account-manager/storage.js +160 -0
  48. package/src/account-manager/strategies/base-strategy.js +109 -0
  49. package/src/account-manager/strategies/hybrid-strategy.js +339 -0
  50. package/src/account-manager/strategies/index.js +79 -0
  51. package/src/account-manager/strategies/round-robin-strategy.js +76 -0
  52. package/src/account-manager/strategies/sticky-strategy.js +138 -0
  53. package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
  54. package/src/account-manager/strategies/trackers/index.js +9 -0
  55. package/src/account-manager/strategies/trackers/quota-tracker.js +120 -0
  56. package/src/account-manager/strategies/trackers/token-bucket-tracker.js +155 -0
  57. package/src/auth/database.js +169 -0
  58. package/src/auth/oauth.js +548 -0
  59. package/src/auth/token-extractor.js +117 -0
  60. package/src/cli/accounts.js +648 -0
  61. package/src/cloudcode/index.js +29 -0
  62. package/src/cloudcode/message-handler.js +510 -0
  63. package/src/cloudcode/model-api.js +248 -0
  64. package/src/cloudcode/rate-limit-parser.js +235 -0
  65. package/src/cloudcode/request-builder.js +93 -0
  66. package/src/cloudcode/session-manager.js +47 -0
  67. package/src/cloudcode/sse-parser.js +121 -0
  68. package/src/cloudcode/sse-streamer.js +293 -0
  69. package/src/cloudcode/streaming-handler.js +615 -0
  70. package/src/config.js +125 -0
  71. package/src/constants.js +407 -0
  72. package/src/errors.js +242 -0
  73. package/src/fallback-config.js +29 -0
  74. package/src/format/content-converter.js +193 -0
  75. package/src/format/index.js +20 -0
  76. package/src/format/request-converter.js +255 -0
  77. package/src/format/response-converter.js +120 -0
  78. package/src/format/schema-sanitizer.js +673 -0
  79. package/src/format/signature-cache.js +88 -0
  80. package/src/format/thinking-utils.js +648 -0
  81. package/src/index.js +148 -0
  82. package/src/modules/usage-stats.js +205 -0
  83. package/src/providers/anthropic-provider.js +258 -0
  84. package/src/providers/base-provider.js +157 -0
  85. package/src/providers/cloudcode.js +94 -0
  86. package/src/providers/copilot.js +399 -0
  87. package/src/providers/github-provider.js +287 -0
  88. package/src/providers/google-provider.js +192 -0
  89. package/src/providers/index.js +211 -0
  90. package/src/providers/openai-compatible.js +265 -0
  91. package/src/providers/openai-provider.js +271 -0
  92. package/src/providers/openrouter-provider.js +325 -0
  93. package/src/providers/setup.js +83 -0
  94. package/src/server.js +870 -0
  95. package/src/utils/claude-config.js +245 -0
  96. package/src/utils/helpers.js +51 -0
  97. package/src/utils/logger.js +142 -0
  98. package/src/utils/native-module-helper.js +162 -0
  99. package/src/webui/index.js +1134 -0
@@ -0,0 +1,615 @@
1
+ /**
2
+ * Streaming Handler for Cloud Code
3
+ *
4
+ * Handles streaming message requests with multi-account support,
5
+ * retry logic, and endpoint failover.
6
+ */
7
+
8
+ import {
9
+ CLOUDCODE_ENDPOINT_FALLBACKS,
10
+ MAX_RETRIES,
11
+ MAX_EMPTY_RESPONSE_RETRIES,
12
+ MAX_WAIT_BEFORE_ERROR_MS,
13
+ DEFAULT_COOLDOWN_MS,
14
+ RATE_LIMIT_DEDUP_WINDOW_MS,
15
+ RATE_LIMIT_STATE_RESET_MS,
16
+ FIRST_RETRY_DELAY_MS,
17
+ SWITCH_ACCOUNT_DELAY_MS,
18
+ MAX_CONSECUTIVE_FAILURES,
19
+ EXTENDED_COOLDOWN_MS,
20
+ CAPACITY_BACKOFF_TIERS_MS,
21
+ MAX_CAPACITY_RETRIES,
22
+ BACKOFF_BY_ERROR_TYPE,
23
+ QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
24
+ MIN_BACKOFF_MS
25
+ } from '../constants.js';
26
+ import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
27
+ import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
28
+ import { logger } from '../utils/logger.js';
29
+ import { parseResetTime, parseRateLimitReason } from './rate-limit-parser.js';
30
+ import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
31
+ import { streamSSEResponse } from './sse-streamer.js';
32
+ import { getFallbackModel } from '../fallback-config.js';
33
+ import crypto from 'crypto';
34
+
35
+ /**
36
+ * Rate limit deduplication - prevents thundering herd on concurrent rate limits.
37
+ * Tracks rate limit state per account+model including consecutive429 count and timestamps.
38
+ */
39
+ const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }
40
+
41
+ /**
42
+ * Get deduplication key for rate limit tracking
43
+ * @param {string} email - Account email
44
+ * @param {string} model - Model ID
45
+ * @returns {string} Dedup key
46
+ */
47
+ function getDedupKey(email, model) {
48
+ return `${email}:${model}`;
49
+ }
50
+
51
+ /**
52
+ * Get rate limit backoff with deduplication and exponential backoff (matches opencode-cloudcode-auth)
53
+ * @param {string} email - Account email
54
+ * @param {string} model - Model ID
55
+ * @param {number|null} serverRetryAfterMs - Server-provided retry time
56
+ * @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
57
+ */
58
+ function getRateLimitBackoff(email, model, serverRetryAfterMs) {
59
+ const now = Date.now();
60
+ const stateKey = getDedupKey(email, model);
61
+ const previous = rateLimitStateByAccountModel.get(stateKey);
62
+
63
+ // Check if within dedup window - return duplicate status
64
+ if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
65
+ const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
66
+ const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
67
+ logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
68
+ return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
69
+ }
70
+
71
+ // Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
72
+ const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
73
+ ? previous.consecutive429 + 1
74
+ : 1;
75
+
76
+ // Update state
77
+ rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });
78
+
79
+ // Calculate exponential backoff
80
+ const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
81
+ const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);
82
+
83
+ logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
84
+ return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
85
+ }
86
+
87
+ /**
88
+ * Clear rate limit state after successful request
89
+ * @param {string} email - Account email
90
+ * @param {string} model - Model ID
91
+ */
92
+ function clearRateLimitState(email, model) {
93
+ const key = getDedupKey(email, model);
94
+ rateLimitStateByAccountModel.delete(key);
95
+ }
96
+
97
+ /**
98
+ * Detect permanent authentication failures that require re-authentication.
99
+ * @param {string} errorText - Error message from API
100
+ * @returns {boolean} True if permanent auth failure
101
+ */
102
+ function isPermanentAuthFailure(errorText) {
103
+ const lower = (errorText || '').toLowerCase();
104
+ return lower.includes('invalid_grant') ||
105
+ lower.includes('token revoked') ||
106
+ lower.includes('token has been expired or revoked') ||
107
+ lower.includes('token_revoked') ||
108
+ lower.includes('invalid_client') ||
109
+ lower.includes('credentials are invalid');
110
+ }
111
+
112
+ /**
113
+ * Detect if 429 error is due to model capacity (not user quota).
114
+ * @param {string} errorText - Error message from API
115
+ * @returns {boolean} True if capacity exhausted (not quota)
116
+ */
117
+ function isModelCapacityExhausted(errorText) {
118
+ const lower = (errorText || '').toLowerCase();
119
+ return lower.includes('model_capacity_exhausted') ||
120
+ lower.includes('capacity_exhausted') ||
121
+ lower.includes('model is currently overloaded') ||
122
+ lower.includes('service temporarily unavailable');
123
+ }
124
+
125
+ // Periodically clean up stale rate limit state (every 60 seconds)
126
+ setInterval(() => {
127
+ const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
128
+ for (const [key, state] of rateLimitStateByAccountModel.entries()) {
129
+ if (state.lastAt < cutoff) {
130
+ rateLimitStateByAccountModel.delete(key);
131
+ }
132
+ }
133
+ }, 60000);
134
+
135
+ /**
136
+ * Calculate smart backoff based on error type (matches opencode-cloudcode-auth)
137
+ * @param {string} errorText - Error message
138
+ * @param {number|null} serverResetMs - Reset time from server
139
+ * @param {number} consecutiveFailures - Number of consecutive failures
140
+ * @returns {number} Backoff time in milliseconds
141
+ */
142
+ function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
143
+ // If server provides a reset time, use it (with minimum floor to prevent loops)
144
+ if (serverResetMs && serverResetMs > 0) {
145
+ return Math.max(serverResetMs, MIN_BACKOFF_MS);
146
+ }
147
+
148
+ const reason = parseRateLimitReason(errorText);
149
+
150
+ switch (reason) {
151
+ case 'QUOTA_EXHAUSTED':
152
+ // Progressive backoff: [60s, 5m, 30m, 2h]
153
+ const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
154
+ return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
155
+ case 'RATE_LIMIT_EXCEEDED':
156
+ return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
157
+ case 'MODEL_CAPACITY_EXHAUSTED':
158
+ return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED;
159
+ case 'SERVER_ERROR':
160
+ return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
161
+ default:
162
+ return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
163
+ }
164
+ }
165
+
166
+ /**
167
+ * Send a streaming request to Cloud Code with multi-account support
168
+ * Streams events in real-time as they arrive from the server
169
+ *
170
+ * @param {Object} anthropicRequest - The Anthropic-format request
171
+ * @param {string} anthropicRequest.model - Model name to use
172
+ * @param {Array} anthropicRequest.messages - Array of message objects
173
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
174
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
175
+ * @param {import('../account-manager/index.js').default} accountManager - The account manager instance
176
+ * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
177
+ * @throws {Error} If max retries exceeded or no accounts available
178
+ */
179
+ export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) {
180
+ const model = anthropicRequest.model;
181
+
182
+ // Retry loop with account failover
183
+ // Ensure we try at least as many times as there are accounts to cycle through everyone
184
+ const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
185
+
186
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
187
+ // Clear any expired rate limits before picking
188
+ accountManager.clearExpiredLimits();
189
+
190
+ // Get available accounts for this model
191
+ const availableAccounts = accountManager.getAvailableAccounts(model);
192
+
193
+ // If no accounts available, check if we should wait or throw error
194
+ if (availableAccounts.length === 0) {
195
+ if (accountManager.isAllRateLimited(model)) {
196
+ const minWaitMs = accountManager.getMinWaitTimeMs(model);
197
+ const resetTime = new Date(Date.now() + minWaitMs).toISOString();
198
+
199
+ // If wait time is too long (> 2 minutes), try fallback first, then throw error
200
+ if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
201
+ // Check if fallback is enabled and available
202
+ if (fallbackEnabled) {
203
+ const fallbackModel = getFallbackModel(model);
204
+ if (fallbackModel) {
205
+ logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel} (streaming)`);
206
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
207
+ yield* sendMessageStream(fallbackRequest, accountManager, false);
208
+ return;
209
+ }
210
+ }
211
+ throw new Error(
212
+ `RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
213
+ );
214
+ }
215
+
216
+ // Wait for shortest reset time
217
+ const accountCount = accountManager.getAccountCount();
218
+ logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
219
+ await sleep(minWaitMs + 500); // Add 500ms buffer
220
+ accountManager.clearExpiredLimits();
221
+
222
+ // CRITICAL FIX: Don't count waiting for rate limits as a failed attempt
223
+ // This prevents "Max retries exceeded" when we are just patiently waiting
224
+ attempt--;
225
+ continue; // Retry the loop
226
+ }
227
+
228
+ // No accounts available and not rate-limited (shouldn't happen normally)
229
+ throw new Error('No accounts available');
230
+ }
231
+
232
+ // Select account using configured strategy
233
+ const { account, waitMs } = accountManager.selectAccount(model);
234
+
235
+ // If strategy returns a wait time without an account, sleep and retry
236
+ if (!account && waitMs > 0) {
237
+ logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
238
+ await sleep(waitMs + 500);
239
+ attempt--; // CRITICAL FIX: Don't count strategy wait as failure
240
+ continue;
241
+ }
242
+
243
+ // If strategy returns an account with throttle wait (fallback mode), apply delay
244
+ // This prevents overwhelming the API when using emergency/lastResort fallbacks
245
+ if (account && waitMs > 0) {
246
+ logger.debug(`[CloudCode] Throttling request (${waitMs}ms) - fallback mode active`);
247
+ await sleep(waitMs);
248
+ }
249
+
250
+ if (!account) {
251
+ logger.warn(`[CloudCode] Strategy returned no account for ${model} (attempt ${attempt + 1}/${maxAttempts})`);
252
+ continue;
253
+ }
254
+
255
+ try {
256
+ // Get token and project for this account
257
+ const token = await accountManager.getTokenForAccount(account);
258
+ const project = await accountManager.getProjectForAccount(account, token);
259
+ const payload = buildCloudCodeRequest(anthropicRequest, project);
260
+
261
+ logger.debug(`[CloudCode] Starting stream for model: ${model}`);
262
+
263
+ // Try each endpoint with index-based loop for capacity retry support
264
+ let lastError = null;
265
+ let capacityRetryCount = 0;
266
+ let endpointIndex = 0;
267
+
268
+ while (endpointIndex < CLOUDCODE_ENDPOINT_FALLBACKS.length) {
269
+ const endpoint = CLOUDCODE_ENDPOINT_FALLBACKS[endpointIndex];
270
+ try {
271
+ const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
272
+
273
+ const response = await fetch(url, {
274
+ method: 'POST',
275
+ headers: buildHeaders(token, model, 'text/event-stream'),
276
+ body: JSON.stringify(payload)
277
+ });
278
+
279
+ if (!response.ok) {
280
+ const errorText = await response.text();
281
+ logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
282
+
283
+ if (response.status === 401) {
284
+ // Check for permanent auth failures
285
+ if (isPermanentAuthFailure(errorText)) {
286
+ logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
287
+ accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
288
+ throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
289
+ }
290
+
291
+ // Transient auth error - clear caches and retry
292
+ accountManager.clearTokenCache(account.email);
293
+ accountManager.clearProjectCache(account.email);
294
+ endpointIndex++;
295
+ continue;
296
+ }
297
+
298
+ if (response.status === 429) {
299
+ const resetMs = parseResetTime(response, errorText);
300
+ const consecutiveFailures = accountManager.getConsecutiveFailures?.(account.email) || 0;
301
+
302
+ // Check if capacity issue (NOT quota) - retry same endpoint with progressive backoff
303
+ if (isModelCapacityExhausted(errorText)) {
304
+ if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
305
+ // Progressive capacity backoff tiers
306
+ const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
307
+ const waitMs = resetMs || CAPACITY_BACKOFF_TIERS_MS[tierIndex];
308
+ capacityRetryCount++;
309
+ // Track failures for progressive backoff escalation (matches opencode-cloudcode-auth)
310
+ accountManager.incrementConsecutiveFailures(account.email);
311
+ logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
312
+ await sleep(waitMs);
313
+ // Don't increment endpointIndex - retry same endpoint
314
+ continue;
315
+ }
316
+ // Max capacity retries exceeded - treat as quota exhaustion
317
+ logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
318
+ }
319
+
320
+ // Get rate limit backoff with exponential backoff and state reset
321
+ const backoff = getRateLimitBackoff(account.email, model, resetMs);
322
+
323
+ // For very short rate limits (< 1 second), always wait and retry
324
+ // Switching accounts won't help when all accounts have per-second rate limits
325
+ if (resetMs !== null && resetMs < 1000) {
326
+ const waitMs = resetMs;
327
+ logger.info(`[CloudCode] Short rate limit on ${account.email} (${resetMs}ms), waiting and retrying...`);
328
+ await sleep(waitMs);
329
+ // Don't increment endpointIndex - retry same endpoint
330
+ continue;
331
+ }
332
+
333
+ // If within dedup window AND reset time is >= 1s, switch account
334
+ if (backoff.isDuplicate) {
335
+ const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
336
+ logger.info(`[CloudCode] Skipping retry due to recent rate limit on ${account.email} (attempt ${backoff.attempt}), switching account...`);
337
+ accountManager.markRateLimited(account.email, smartBackoffMs, model);
338
+ throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
339
+ }
340
+
341
+ // Calculate smart backoff based on error type
342
+ const smartBackoffMs = calculateSmartBackoff(errorText, resetMs, consecutiveFailures);
343
+
344
+ // Decision: wait and retry OR switch account
345
+ // First 429 gets a quick 1s retry (FIRST_RETRY_DELAY_MS)
346
+ if (backoff.attempt === 1 && smartBackoffMs <= DEFAULT_COOLDOWN_MS) {
347
+ // Quick 1s retry on first 429 (matches opencode-cloudcode-auth)
348
+ const waitMs = backoff.delayMs;
349
+ // markRateLimited already increments consecutiveFailures internally
350
+ accountManager.markRateLimited(account.email, waitMs, model);
351
+ logger.info(`[CloudCode] First rate limit on ${account.email}, quick retry after ${formatDuration(waitMs)}...`);
352
+ await sleep(waitMs);
353
+ // Don't increment endpointIndex - retry same endpoint
354
+ continue;
355
+ } else if (smartBackoffMs > DEFAULT_COOLDOWN_MS) {
356
+ // Long-term quota exhaustion (> 10s) - wait SWITCH_ACCOUNT_DELAY_MS then switch
357
+ logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(smartBackoffMs)}), switching account after ${formatDuration(SWITCH_ACCOUNT_DELAY_MS)} delay...`);
358
+ await sleep(SWITCH_ACCOUNT_DELAY_MS);
359
+ accountManager.markRateLimited(account.email, smartBackoffMs, model);
360
+ throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
361
+ } else {
362
+ // Short-term rate limit but not first attempt - use exponential backoff delay
363
+ const waitMs = backoff.delayMs;
364
+ // markRateLimited already increments consecutiveFailures internally
365
+ accountManager.markRateLimited(account.email, waitMs, model);
366
+ logger.info(`[CloudCode] Rate limit on ${account.email} (attempt ${backoff.attempt}), waiting ${formatDuration(waitMs)}...`);
367
+ await sleep(waitMs);
368
+ // Don't increment endpointIndex - retry same endpoint
369
+ continue;
370
+ }
371
+ }
372
+
373
+ // Check for 503 MODEL_CAPACITY_EXHAUSTED - use progressive backoff like 429 capacity
374
+ if (response.status === 503 && isModelCapacityExhausted(errorText)) {
375
+ if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
376
+ // Progressive capacity backoff tiers (same as 429 capacity handling)
377
+ const tierIndex = Math.min(capacityRetryCount, CAPACITY_BACKOFF_TIERS_MS.length - 1);
378
+ const waitMs = CAPACITY_BACKOFF_TIERS_MS[tierIndex];
379
+ capacityRetryCount++;
380
+ accountManager.incrementConsecutiveFailures(account.email);
381
+ logger.info(`[CloudCode] 503 Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
382
+ await sleep(waitMs);
383
+ // Don't increment endpointIndex - retry same endpoint
384
+ continue;
385
+ }
386
+ // Max capacity retries exceeded - switch account
387
+ logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded on 503, switching account`);
388
+ accountManager.markRateLimited(account.email, BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED, model);
389
+ throw new Error(`CAPACITY_EXHAUSTED: ${errorText}`);
390
+ }
391
+
392
+ lastError = new Error(`API error ${response.status}: ${errorText}`);
393
+
394
+ // Try next endpoint for 403/404/5xx errors (matches opencode-cloudcode-auth behavior)
395
+ if (response.status === 403 || response.status === 404) {
396
+ logger.warn(`[CloudCode] ${response.status} at ${endpoint}..`);
397
+ } else if (response.status >= 500) {
398
+ logger.warn(`[CloudCode] ${response.status} stream error, waiting 1s before retry...`);
399
+ await sleep(1000);
400
+ }
401
+
402
+ endpointIndex++;
403
+ continue;
404
+ }
405
+
406
+ // Stream the response with retry logic for empty responses
407
+ let currentResponse = response;
408
+
409
+ for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
410
+ try {
411
+ yield* streamSSEResponse(currentResponse, anthropicRequest.model);
412
+ logger.debug('[CloudCode] Stream completed');
413
+ // Clear rate limit state on success
414
+ clearRateLimitState(account.email, model);
415
+ accountManager.notifySuccess(account, model);
416
+ return;
417
+ } catch (streamError) {
418
+ // Only retry on EmptyResponseError
419
+ if (!isEmptyResponseError(streamError)) {
420
+ throw streamError;
421
+ }
422
+
423
+ // Check if we have retries left
424
+ if (emptyRetries >= MAX_EMPTY_RESPONSE_RETRIES) {
425
+ logger.error(`[CloudCode] Empty response after ${MAX_EMPTY_RESPONSE_RETRIES} retries`);
426
+ yield* emitEmptyResponseFallback(anthropicRequest.model);
427
+ return;
428
+ }
429
+
430
+ // Exponential backoff: 500ms, 1000ms, 2000ms
431
+ const backoffMs = 500 * Math.pow(2, emptyRetries);
432
+ logger.warn(`[CloudCode] Empty response, retry ${emptyRetries + 1}/${MAX_EMPTY_RESPONSE_RETRIES} after ${backoffMs}ms...`);
433
+ await sleep(backoffMs);
434
+
435
+ // Refetch the response
436
+ currentResponse = await fetch(url, {
437
+ method: 'POST',
438
+ headers: buildHeaders(token, model, 'text/event-stream'),
439
+ body: JSON.stringify(payload)
440
+ });
441
+
442
+ // Handle specific error codes on retry
443
+ if (!currentResponse.ok) {
444
+ const retryErrorText = await currentResponse.text();
445
+
446
+ // Rate limit error - mark account and throw to trigger account switch
447
+ if (currentResponse.status === 429) {
448
+ const resetMs = parseResetTime(currentResponse, retryErrorText);
449
+ accountManager.markRateLimited(account.email, resetMs, model);
450
+ throw new Error(`429 RESOURCE_EXHAUSTED during retry: ${retryErrorText}`);
451
+ }
452
+
453
+ // Auth error - check for permanent failure
454
+ if (currentResponse.status === 401) {
455
+ if (isPermanentAuthFailure(retryErrorText)) {
456
+ logger.error(`[CloudCode] Permanent auth failure during retry for ${account.email}`);
457
+ accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
458
+ throw new Error(`AUTH_INVALID_PERMANENT: ${retryErrorText}`);
459
+ }
460
+ accountManager.clearTokenCache(account.email);
461
+ accountManager.clearProjectCache(account.email);
462
+ throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
463
+ }
464
+
465
+ // For 5xx errors, continue retrying
466
+ if (currentResponse.status >= 500) {
467
+ logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
468
+ await sleep(1000);
469
+ currentResponse = await fetch(url, {
470
+ method: 'POST',
471
+ headers: buildHeaders(token, model, 'text/event-stream'),
472
+ body: JSON.stringify(payload)
473
+ });
474
+ if (currentResponse.ok) {
475
+ continue;
476
+ }
477
+ }
478
+
479
+ throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
480
+ }
481
+ }
482
+ }
483
+
484
+ } catch (endpointError) {
485
+ if (isRateLimitError(endpointError)) {
486
+ throw endpointError; // Re-throw to trigger account switch
487
+ }
488
+ if (isEmptyResponseError(endpointError)) {
489
+ throw endpointError;
490
+ }
491
+ logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
492
+ lastError = endpointError;
493
+ endpointIndex++;
494
+ }
495
+ }
496
+
497
+ // If all endpoints failed for this account
498
+ if (lastError) {
499
+ if (lastError.is429) {
500
+ logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
501
+ accountManager.markRateLimited(account.email, lastError.resetMs, model);
502
+ throw new Error(`Rate limited: ${lastError.errorText}`);
503
+ }
504
+ throw lastError;
505
+ }
506
+
507
+ } catch (error) {
508
+ if (isRateLimitError(error)) {
509
+ // Rate limited - already marked, notify strategy and continue to next account
510
+ accountManager.notifyRateLimit(account, model);
511
+ logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
512
+ continue;
513
+ }
514
+ if (isAuthError(error)) {
515
+ // Auth invalid - already marked, continue to next account
516
+ logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
517
+ continue;
518
+ }
519
+ // Handle 5xx errors
520
+ if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
521
+ accountManager.notifyFailure(account, model);
522
+
523
+ // Track 5xx errors for extended cooldown
524
+ // Note: markRateLimited already increments consecutiveFailures internally
525
+ const currentFailures = accountManager.getConsecutiveFailures(account.email);
526
+ if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
527
+ logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
528
+ accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
529
+ } else {
530
+ accountManager.incrementConsecutiveFailures(account.email);
531
+ logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next...`);
532
+ }
533
+ continue;
534
+ }
535
+
536
+ if (isNetworkError(error)) {
537
+ accountManager.notifyFailure(account, model);
538
+
539
+ // Track network errors for extended cooldown
540
+ // Note: markRateLimited already increments consecutiveFailures internally
541
+ const currentFailures = accountManager.getConsecutiveFailures(account.email);
542
+ if (currentFailures + 1 >= MAX_CONSECUTIVE_FAILURES) {
543
+ logger.warn(`[CloudCode] Account ${account.email} has ${currentFailures + 1} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
544
+ accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
545
+ } else {
546
+ accountManager.incrementConsecutiveFailures(account.email);
547
+ logger.warn(`[CloudCode] Network error for ${account.email} (stream) (${currentFailures + 1}/${MAX_CONSECUTIVE_FAILURES}), trying next account... (${error.message})`);
548
+ }
549
+ await sleep(1000);
550
+ continue;
551
+ }
552
+
553
+ throw error;
554
+ }
555
+ }
556
+
557
+ // All retries exhausted - try fallback model if enabled
558
+ if (fallbackEnabled) {
559
+ const fallbackModel = getFallbackModel(model);
560
+ if (fallbackModel) {
561
+ logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
562
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
563
+ yield* sendMessageStream(fallbackRequest, accountManager, false);
564
+ return;
565
+ }
566
+ }
567
+
568
+ throw new Error('Max retries exceeded');
569
+ }
570
+
571
+ /**
572
+ * Emit a fallback message when all retry attempts fail with empty response
573
+ * @param {string} model - The model name
574
+ * @yields {Object} Anthropic-format SSE events for empty response fallback
575
+ */
576
+ function* emitEmptyResponseFallback(model) {
577
+ // Use proper message ID format consistent with Anthropic API
578
+ const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
579
+
580
+ yield {
581
+ type: 'message_start',
582
+ message: {
583
+ id: messageId,
584
+ type: 'message',
585
+ role: 'assistant',
586
+ content: [],
587
+ model: model,
588
+ stop_reason: null,
589
+ stop_sequence: null,
590
+ usage: { input_tokens: 0, output_tokens: 0 }
591
+ }
592
+ };
593
+
594
+ yield {
595
+ type: 'content_block_start',
596
+ index: 0,
597
+ content_block: { type: 'text', text: '' }
598
+ };
599
+
600
+ yield {
601
+ type: 'content_block_delta',
602
+ index: 0,
603
+ delta: { type: 'text_delta', text: '[No response after retries - please try again]' }
604
+ };
605
+
606
+ yield { type: 'content_block_stop', index: 0 };
607
+
608
+ yield {
609
+ type: 'message_delta',
610
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
611
+ usage: { output_tokens: 0 }
612
+ };
613
+
614
+ yield { type: 'message_stop' };
615
+ }