@kamel-ahmed/proxy-claude 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +622 -0
- package/bin/cli.js +124 -0
- package/package.json +80 -0
- package/public/app.js +228 -0
- package/public/css/src/input.css +523 -0
- package/public/css/style.css +1 -0
- package/public/favicon.svg +10 -0
- package/public/index.html +381 -0
- package/public/js/components/account-manager.js +245 -0
- package/public/js/components/claude-config.js +420 -0
- package/public/js/components/dashboard/charts.js +589 -0
- package/public/js/components/dashboard/filters.js +362 -0
- package/public/js/components/dashboard/stats.js +110 -0
- package/public/js/components/dashboard.js +236 -0
- package/public/js/components/logs-viewer.js +100 -0
- package/public/js/components/models.js +36 -0
- package/public/js/components/server-config.js +349 -0
- package/public/js/config/constants.js +102 -0
- package/public/js/data-store.js +386 -0
- package/public/js/settings-store.js +58 -0
- package/public/js/store.js +78 -0
- package/public/js/translations/en.js +351 -0
- package/public/js/translations/id.js +396 -0
- package/public/js/translations/pt.js +287 -0
- package/public/js/translations/tr.js +342 -0
- package/public/js/translations/zh.js +357 -0
- package/public/js/utils/account-actions.js +189 -0
- package/public/js/utils/error-handler.js +96 -0
- package/public/js/utils/model-config.js +42 -0
- package/public/js/utils/validators.js +77 -0
- package/public/js/utils.js +69 -0
- package/public/views/accounts.html +329 -0
- package/public/views/dashboard.html +484 -0
- package/public/views/logs.html +97 -0
- package/public/views/models.html +331 -0
- package/public/views/settings.html +1329 -0
- package/src/account-manager/credentials.js +243 -0
- package/src/account-manager/index.js +380 -0
- package/src/account-manager/onboarding.js +117 -0
- package/src/account-manager/rate-limits.js +237 -0
- package/src/account-manager/storage.js +136 -0
- package/src/account-manager/strategies/base-strategy.js +104 -0
- package/src/account-manager/strategies/hybrid-strategy.js +195 -0
- package/src/account-manager/strategies/index.js +79 -0
- package/src/account-manager/strategies/round-robin-strategy.js +76 -0
- package/src/account-manager/strategies/sticky-strategy.js +138 -0
- package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
- package/src/account-manager/strategies/trackers/index.js +8 -0
- package/src/account-manager/strategies/trackers/token-bucket-tracker.js +121 -0
- package/src/auth/database.js +169 -0
- package/src/auth/oauth.js +419 -0
- package/src/auth/token-extractor.js +117 -0
- package/src/cli/accounts.js +512 -0
- package/src/cli/refresh.js +201 -0
- package/src/cli/setup.js +338 -0
- package/src/cloudcode/index.js +29 -0
- package/src/cloudcode/message-handler.js +386 -0
- package/src/cloudcode/model-api.js +248 -0
- package/src/cloudcode/rate-limit-parser.js +181 -0
- package/src/cloudcode/request-builder.js +93 -0
- package/src/cloudcode/session-manager.js +47 -0
- package/src/cloudcode/sse-parser.js +121 -0
- package/src/cloudcode/sse-streamer.js +293 -0
- package/src/cloudcode/streaming-handler.js +492 -0
- package/src/config.js +107 -0
- package/src/constants.js +278 -0
- package/src/errors.js +238 -0
- package/src/fallback-config.js +29 -0
- package/src/format/content-converter.js +193 -0
- package/src/format/index.js +20 -0
- package/src/format/request-converter.js +248 -0
- package/src/format/response-converter.js +120 -0
- package/src/format/schema-sanitizer.js +673 -0
- package/src/format/signature-cache.js +88 -0
- package/src/format/thinking-utils.js +558 -0
- package/src/index.js +146 -0
- package/src/modules/usage-stats.js +205 -0
- package/src/server.js +861 -0
- package/src/utils/claude-config.js +245 -0
- package/src/utils/helpers.js +51 -0
- package/src/utils/logger.js +142 -0
- package/src/utils/native-module-helper.js +162 -0
- package/src/webui/index.js +707 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message Handler for Cloud Code
|
|
3
|
+
*
|
|
4
|
+
* Handles non-streaming message requests with multi-account support,
|
|
5
|
+
* retry logic, and endpoint failover.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
|
10
|
+
MAX_RETRIES,
|
|
11
|
+
MAX_WAIT_BEFORE_ERROR_MS,
|
|
12
|
+
DEFAULT_COOLDOWN_MS,
|
|
13
|
+
RATE_LIMIT_DEDUP_WINDOW_MS,
|
|
14
|
+
MAX_CONSECUTIVE_FAILURES,
|
|
15
|
+
EXTENDED_COOLDOWN_MS,
|
|
16
|
+
CAPACITY_RETRY_DELAY_MS,
|
|
17
|
+
MAX_CAPACITY_RETRIES,
|
|
18
|
+
isThinkingModel
|
|
19
|
+
} from '../constants.js';
|
|
20
|
+
import { convertGoogleToAnthropic } from '../format/index.js';
|
|
21
|
+
import { isRateLimitError, isAuthError } from '../errors.js';
|
|
22
|
+
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
|
23
|
+
import { logger } from '../utils/logger.js';
|
|
24
|
+
import { parseResetTime } from './rate-limit-parser.js';
|
|
25
|
+
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
|
26
|
+
import { parseThinkingSSEResponse } from './sse-parser.js';
|
|
27
|
+
import { getFallbackModel } from '../fallback-config.js';
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
|
31
|
+
* Tracks last rate limit timestamp per model to skip duplicate retries
|
|
32
|
+
*/
|
|
33
|
+
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Check if we should skip retry due to recent rate limit on this model
|
|
37
|
+
* @param {string} model - Model ID
|
|
38
|
+
* @returns {boolean} True if retry should be skipped (within dedup window)
|
|
39
|
+
*/
|
|
40
|
+
function shouldSkipRetryDueToDedup(model) {
|
|
41
|
+
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
|
42
|
+
if (!lastTimestamp) return false;
|
|
43
|
+
|
|
44
|
+
const elapsed = Date.now() - lastTimestamp;
|
|
45
|
+
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
|
46
|
+
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Record rate limit timestamp for deduplication
|
|
54
|
+
* @param {string} model - Model ID
|
|
55
|
+
*/
|
|
56
|
+
function recordRateLimitTimestamp(model) {
|
|
57
|
+
lastRateLimitTimestamps.set(model, Date.now());
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Clear rate limit timestamp after successful retry
|
|
62
|
+
* @param {string} model - Model ID
|
|
63
|
+
*/
|
|
64
|
+
function clearRateLimitTimestamp(model) {
|
|
65
|
+
lastRateLimitTimestamps.delete(model);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Gap 3: Detect permanent authentication failures that require re-authentication
|
|
70
|
+
* These should mark the account as invalid rather than just clearing cache
|
|
71
|
+
* @param {string} errorText - Error message from API
|
|
72
|
+
* @returns {boolean} True if permanent auth failure
|
|
73
|
+
*/
|
|
74
|
+
function isPermanentAuthFailure(errorText) {
|
|
75
|
+
const lower = (errorText || '').toLowerCase();
|
|
76
|
+
return lower.includes('invalid_grant') ||
|
|
77
|
+
lower.includes('token revoked') ||
|
|
78
|
+
lower.includes('token has been expired or revoked') ||
|
|
79
|
+
lower.includes('token_revoked') ||
|
|
80
|
+
lower.includes('invalid_client') ||
|
|
81
|
+
lower.includes('credentials are invalid');
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
|
86
|
+
* Capacity issues should retry on same account with shorter delay
|
|
87
|
+
* @param {string} errorText - Error message from API
|
|
88
|
+
* @returns {boolean} True if capacity exhausted (not quota)
|
|
89
|
+
*/
|
|
90
|
+
function isModelCapacityExhausted(errorText) {
|
|
91
|
+
const lower = (errorText || '').toLowerCase();
|
|
92
|
+
return lower.includes('model_capacity_exhausted') ||
|
|
93
|
+
lower.includes('capacity_exhausted') ||
|
|
94
|
+
lower.includes('model is currently overloaded') ||
|
|
95
|
+
lower.includes('service temporarily unavailable');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
|
99
|
+
setInterval(() => {
|
|
100
|
+
const cutoff = Date.now() - 60000; // 1 minute
|
|
101
|
+
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
|
102
|
+
if (timestamp < cutoff) {
|
|
103
|
+
lastRateLimitTimestamps.delete(model);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}, 60000);
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Send a non-streaming request to Cloud Code with multi-account support
|
|
110
|
+
* Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
|
|
111
|
+
*
|
|
112
|
+
* @param {Object} anthropicRequest - The Anthropic-format request
|
|
113
|
+
* @param {Object} anthropicRequest.model - Model name to use
|
|
114
|
+
* @param {Array} anthropicRequest.messages - Array of message objects
|
|
115
|
+
* @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
|
|
116
|
+
* @param {Object} [anthropicRequest.thinking] - Thinking configuration
|
|
117
|
+
* @param {import('../account-manager/index.js').default} accountManager - The account manager instance
|
|
118
|
+
* @returns {Promise<Object>} Anthropic-format response object
|
|
119
|
+
* @throws {Error} If max retries exceeded or no accounts available
|
|
120
|
+
*/
|
|
121
|
+
export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) {
|
|
122
|
+
const model = anthropicRequest.model;
|
|
123
|
+
const isThinking = isThinkingModel(model);
|
|
124
|
+
|
|
125
|
+
// Retry loop with account failover
|
|
126
|
+
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
|
127
|
+
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
|
128
|
+
|
|
129
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
130
|
+
// Clear any expired rate limits before picking
|
|
131
|
+
accountManager.clearExpiredLimits();
|
|
132
|
+
|
|
133
|
+
// Get available accounts for this model
|
|
134
|
+
const availableAccounts = accountManager.getAvailableAccounts(model);
|
|
135
|
+
|
|
136
|
+
// If no accounts available, check if we should wait or throw error
|
|
137
|
+
if (availableAccounts.length === 0) {
|
|
138
|
+
if (accountManager.isAllRateLimited(model)) {
|
|
139
|
+
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
|
140
|
+
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
|
141
|
+
|
|
142
|
+
// If wait time is too long (> 2 minutes), try fallback first, then throw error
|
|
143
|
+
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
|
144
|
+
// Check if fallback is enabled and available
|
|
145
|
+
if (fallbackEnabled) {
|
|
146
|
+
const fallbackModel = getFallbackModel(model);
|
|
147
|
+
if (fallbackModel) {
|
|
148
|
+
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel}`);
|
|
149
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
150
|
+
return await sendMessage(fallbackRequest, accountManager, false);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
throw new Error(
|
|
154
|
+
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Wait for shortest reset time
|
|
159
|
+
const accountCount = accountManager.getAccountCount();
|
|
160
|
+
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
|
|
161
|
+
await sleep(minWaitMs + 500); // Add 500ms buffer
|
|
162
|
+
accountManager.clearExpiredLimits();
|
|
163
|
+
continue; // Retry the loop
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// No accounts available and not rate-limited (shouldn't happen normally)
|
|
167
|
+
throw new Error('No accounts available');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Select account using configured strategy
|
|
171
|
+
const { account, waitMs } = accountManager.selectAccount(model);
|
|
172
|
+
|
|
173
|
+
// If strategy returns a wait time, sleep and retry
|
|
174
|
+
if (!account && waitMs > 0) {
|
|
175
|
+
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
|
176
|
+
await sleep(waitMs + 500);
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!account) {
|
|
181
|
+
continue; // Shouldn't happen, but safety check
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
// Get token and project for this account
|
|
186
|
+
const token = await accountManager.getTokenForAccount(account);
|
|
187
|
+
const project = await accountManager.getProjectForAccount(account, token);
|
|
188
|
+
const payload = buildCloudCodeRequest(anthropicRequest, project);
|
|
189
|
+
|
|
190
|
+
logger.debug(`[CloudCode] Sending request for model: ${model}`);
|
|
191
|
+
|
|
192
|
+
// Try each endpoint with index-based loop for capacity retry support
|
|
193
|
+
let lastError = null;
|
|
194
|
+
let retriedOnce = false; // Track if we've already retried for short rate limit
|
|
195
|
+
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
|
196
|
+
let endpointIndex = 0;
|
|
197
|
+
|
|
198
|
+
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
|
199
|
+
const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
|
|
200
|
+
try {
|
|
201
|
+
const url = isThinking
|
|
202
|
+
? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
|
|
203
|
+
: `${endpoint}/v1internal:generateContent`;
|
|
204
|
+
|
|
205
|
+
const response = await fetch(url, {
|
|
206
|
+
method: 'POST',
|
|
207
|
+
headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
|
|
208
|
+
body: JSON.stringify(payload)
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
if (!response.ok) {
|
|
212
|
+
const errorText = await response.text();
|
|
213
|
+
logger.warn(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
|
|
214
|
+
|
|
215
|
+
if (response.status === 401) {
|
|
216
|
+
// Gap 3: Check for permanent auth failures
|
|
217
|
+
if (isPermanentAuthFailure(errorText)) {
|
|
218
|
+
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
|
219
|
+
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
|
220
|
+
throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Transient auth error - clear caches and retry with fresh token
|
|
224
|
+
logger.warn('[CloudCode] Transient auth error, refreshing token...');
|
|
225
|
+
accountManager.clearTokenCache(account.email);
|
|
226
|
+
accountManager.clearProjectCache(account.email);
|
|
227
|
+
endpointIndex++;
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (response.status === 429) {
|
|
232
|
+
const resetMs = parseResetTime(response, errorText);
|
|
233
|
+
|
|
234
|
+
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
|
235
|
+
if (isModelCapacityExhausted(errorText)) {
|
|
236
|
+
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
|
237
|
+
capacityRetryCount++;
|
|
238
|
+
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
|
239
|
+
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
|
240
|
+
await sleep(waitMs);
|
|
241
|
+
// Don't increment endpointIndex - retry same endpoint
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
// Max capacity retries exceeded - treat as quota exhaustion
|
|
245
|
+
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Gap 1: Check deduplication window to prevent thundering herd
|
|
249
|
+
if (shouldSkipRetryDueToDedup(model)) {
|
|
250
|
+
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
|
251
|
+
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
|
252
|
+
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Decision: wait and retry OR switch account
|
|
256
|
+
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
|
257
|
+
// Long-term quota exhaustion (> 10s) - switch to next account
|
|
258
|
+
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
|
259
|
+
accountManager.markRateLimited(account.email, resetMs, model);
|
|
260
|
+
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
261
|
+
} else {
|
|
262
|
+
// Short-term rate limit (<= 10s) - wait and retry once
|
|
263
|
+
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
264
|
+
|
|
265
|
+
if (!retriedOnce) {
|
|
266
|
+
retriedOnce = true;
|
|
267
|
+
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
|
268
|
+
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
269
|
+
await sleep(waitMs);
|
|
270
|
+
// Don't increment endpointIndex - retry same endpoint
|
|
271
|
+
continue;
|
|
272
|
+
} else {
|
|
273
|
+
// Already retried once, mark and switch
|
|
274
|
+
accountManager.markRateLimited(account.email, waitMs, model);
|
|
275
|
+
throw new Error(`RATE_LIMITED: ${errorText}`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (response.status >= 400) {
|
|
281
|
+
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
|
282
|
+
// If it's a 5xx error, wait a bit before trying the next endpoint
|
|
283
|
+
if (response.status >= 500) {
|
|
284
|
+
logger.warn(`[CloudCode] ${response.status} error, waiting 1s before retry...`);
|
|
285
|
+
await sleep(1000);
|
|
286
|
+
}
|
|
287
|
+
endpointIndex++;
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// For thinking models, parse SSE and accumulate all parts
|
|
293
|
+
if (isThinking) {
|
|
294
|
+
const result = await parseThinkingSSEResponse(response, anthropicRequest.model);
|
|
295
|
+
// Gap 1: Clear timestamp on success
|
|
296
|
+
clearRateLimitTimestamp(model);
|
|
297
|
+
accountManager.notifySuccess(account, model);
|
|
298
|
+
return result;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Non-thinking models use regular JSON
|
|
302
|
+
const data = await response.json();
|
|
303
|
+
logger.debug('[CloudCode] Response received');
|
|
304
|
+
// Gap 1: Clear timestamp on success
|
|
305
|
+
clearRateLimitTimestamp(model);
|
|
306
|
+
accountManager.notifySuccess(account, model);
|
|
307
|
+
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
|
308
|
+
|
|
309
|
+
} catch (endpointError) {
|
|
310
|
+
if (isRateLimitError(endpointError)) {
|
|
311
|
+
throw endpointError; // Re-throw to trigger account switch
|
|
312
|
+
}
|
|
313
|
+
logger.warn(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
|
|
314
|
+
lastError = endpointError;
|
|
315
|
+
endpointIndex++;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// If all endpoints failed for this account
|
|
320
|
+
if (lastError) {
|
|
321
|
+
if (lastError.is429) {
|
|
322
|
+
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
|
323
|
+
accountManager.markRateLimited(account.email, lastError.resetMs, model);
|
|
324
|
+
throw new Error(`Rate limited: ${lastError.errorText}`);
|
|
325
|
+
}
|
|
326
|
+
throw lastError;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
} catch (error) {
|
|
330
|
+
if (isRateLimitError(error)) {
|
|
331
|
+
// Rate limited - already marked, notify strategy and continue to next account
|
|
332
|
+
accountManager.notifyRateLimit(account, model);
|
|
333
|
+
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
if (isAuthError(error)) {
|
|
337
|
+
// Auth invalid - already marked, continue to next account
|
|
338
|
+
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
// Handle 5xx errors
|
|
342
|
+
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
|
343
|
+
accountManager.notifyFailure(account, model);
|
|
344
|
+
|
|
345
|
+
// Gap 2: Check consecutive failures for extended cooldown
|
|
346
|
+
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
|
347
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
348
|
+
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
|
349
|
+
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
|
350
|
+
} else {
|
|
351
|
+
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
|
352
|
+
}
|
|
353
|
+
continue;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (isNetworkError(error)) {
|
|
357
|
+
accountManager.notifyFailure(account, model);
|
|
358
|
+
|
|
359
|
+
// Gap 2: Check consecutive failures for extended cooldown
|
|
360
|
+
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
|
361
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
362
|
+
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
|
363
|
+
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
|
364
|
+
} else {
|
|
365
|
+
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
|
|
366
|
+
}
|
|
367
|
+
await sleep(1000);
|
|
368
|
+
continue;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
throw error;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// All retries exhausted - try fallback model if enabled
|
|
376
|
+
if (fallbackEnabled) {
|
|
377
|
+
const fallbackModel = getFallbackModel(model);
|
|
378
|
+
if (fallbackModel) {
|
|
379
|
+
logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
|
|
380
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
381
|
+
return await sendMessage(fallbackRequest, accountManager, false);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
throw new Error('Max retries exceeded');
|
|
386
|
+
}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model API for Cloud Code
|
|
3
|
+
*
|
|
4
|
+
* Handles model listing and quota retrieval from the Cloud Code API.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
|
9
|
+
ANTIGRAVITY_HEADERS,
|
|
10
|
+
LOAD_CODE_ASSIST_ENDPOINTS,
|
|
11
|
+
LOAD_CODE_ASSIST_HEADERS,
|
|
12
|
+
getModelFamily
|
|
13
|
+
} from '../constants.js';
|
|
14
|
+
import { logger } from '../utils/logger.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Check if a model is supported (Claude or Gemini)
|
|
18
|
+
* @param {string} modelId - Model ID to check
|
|
19
|
+
* @returns {boolean} True if model is supported
|
|
20
|
+
*/
|
|
21
|
+
function isSupportedModel(modelId) {
|
|
22
|
+
const family = getModelFamily(modelId);
|
|
23
|
+
return family === 'claude' || family === 'gemini';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* List available models in Anthropic API format
|
|
28
|
+
* Fetches models dynamically from the Cloud Code API
|
|
29
|
+
*
|
|
30
|
+
* @param {string} token - OAuth access token
|
|
31
|
+
* @returns {Promise<{object: string, data: Array<{id: string, object: string, created: number, owned_by: string, description: string}>}>} List of available models
|
|
32
|
+
*/
|
|
33
|
+
export async function listModels(token) {
|
|
34
|
+
const data = await fetchAvailableModels(token);
|
|
35
|
+
if (!data || !data.models) {
|
|
36
|
+
return { object: 'list', data: [] };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const modelList = Object.entries(data.models)
|
|
40
|
+
.filter(([modelId]) => isSupportedModel(modelId))
|
|
41
|
+
.map(([modelId, modelData]) => ({
|
|
42
|
+
id: modelId,
|
|
43
|
+
object: 'model',
|
|
44
|
+
created: Math.floor(Date.now() / 1000),
|
|
45
|
+
owned_by: 'anthropic',
|
|
46
|
+
description: modelData.displayName || modelId
|
|
47
|
+
}));
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
object: 'list',
|
|
51
|
+
data: modelList
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Fetch available models with quota info from Cloud Code API
|
|
57
|
+
* Returns model quotas including remaining fraction and reset time
|
|
58
|
+
*
|
|
59
|
+
* @param {string} token - OAuth access token
|
|
60
|
+
* @param {string} [projectId] - Optional project ID for accurate quota info
|
|
61
|
+
* @returns {Promise<Object>} Raw response from fetchAvailableModels API
|
|
62
|
+
*/
|
|
63
|
+
export async function fetchAvailableModels(token, projectId = null) {
|
|
64
|
+
const headers = {
|
|
65
|
+
'Authorization': `Bearer ${token}`,
|
|
66
|
+
'Content-Type': 'application/json',
|
|
67
|
+
...ANTIGRAVITY_HEADERS
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Include project ID in body for accurate quota info (per Quotio implementation)
|
|
71
|
+
const body = projectId ? { project: projectId } : {};
|
|
72
|
+
|
|
73
|
+
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
|
74
|
+
try {
|
|
75
|
+
const url = `${endpoint}/v1internal:fetchAvailableModels`;
|
|
76
|
+
const response = await fetch(url, {
|
|
77
|
+
method: 'POST',
|
|
78
|
+
headers,
|
|
79
|
+
body: JSON.stringify(body)
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
if (!response.ok) {
|
|
83
|
+
const errorText = await response.text();
|
|
84
|
+
logger.warn(`[CloudCode] fetchAvailableModels error at ${endpoint}: ${response.status}`);
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return await response.json();
|
|
89
|
+
} catch (error) {
|
|
90
|
+
logger.warn(`[CloudCode] fetchAvailableModels failed at ${endpoint}:`, error.message);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
throw new Error('Failed to fetch available models from all endpoints');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Get model quotas for an account
|
|
99
|
+
* Extracts quota info (remaining fraction and reset time) for each model
|
|
100
|
+
*
|
|
101
|
+
* @param {string} token - OAuth access token
|
|
102
|
+
* @param {string} [projectId] - Optional project ID for accurate quota info
|
|
103
|
+
* @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
|
|
104
|
+
*/
|
|
105
|
+
export async function getModelQuotas(token, projectId = null) {
|
|
106
|
+
const data = await fetchAvailableModels(token, projectId);
|
|
107
|
+
if (!data || !data.models) return {};
|
|
108
|
+
|
|
109
|
+
const quotas = {};
|
|
110
|
+
for (const [modelId, modelData] of Object.entries(data.models)) {
|
|
111
|
+
// Only include Claude and Gemini models
|
|
112
|
+
if (!isSupportedModel(modelId)) continue;
|
|
113
|
+
|
|
114
|
+
if (modelData.quotaInfo) {
|
|
115
|
+
quotas[modelId] = {
|
|
116
|
+
// When remainingFraction is missing but resetTime is present, quota is exhausted (0%)
|
|
117
|
+
remainingFraction: modelData.quotaInfo.remainingFraction ?? (modelData.quotaInfo.resetTime ? 0 : null),
|
|
118
|
+
resetTime: modelData.quotaInfo.resetTime ?? null
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return quotas;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Parse tier ID string to determine subscription level
|
|
128
|
+
* @param {string} tierId - The tier ID from the API
|
|
129
|
+
* @returns {'free' | 'pro' | 'ultra' | 'unknown'} The subscription tier
|
|
130
|
+
*/
|
|
131
|
+
function parseTierId(tierId) {
|
|
132
|
+
if (!tierId) return 'unknown';
|
|
133
|
+
const lower = tierId.toLowerCase();
|
|
134
|
+
|
|
135
|
+
if (lower.includes('ultra')) {
|
|
136
|
+
return 'ultra';
|
|
137
|
+
}
|
|
138
|
+
if (lower === 'standard-tier') {
|
|
139
|
+
// standard-tier = "Gemini Code Assist" (paid, project-based)
|
|
140
|
+
return 'pro';
|
|
141
|
+
}
|
|
142
|
+
if (lower.includes('pro') || lower.includes('premium')) {
|
|
143
|
+
return 'pro';
|
|
144
|
+
}
|
|
145
|
+
if (lower === 'free-tier' || lower.includes('free')) {
|
|
146
|
+
return 'free';
|
|
147
|
+
}
|
|
148
|
+
return 'unknown';
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Get subscription tier for an account
|
|
153
|
+
* Calls loadCodeAssist API to discover project ID and subscription tier
|
|
154
|
+
*
|
|
155
|
+
* @param {string} token - OAuth access token
|
|
156
|
+
* @returns {Promise<{tier: string, projectId: string|null}>} Subscription tier (free/pro/ultra) and project ID
|
|
157
|
+
*/
|
|
158
|
+
export async function getSubscriptionTier(token) {
|
|
159
|
+
const headers = {
|
|
160
|
+
'Authorization': `Bearer ${token}`,
|
|
161
|
+
'Content-Type': 'application/json',
|
|
162
|
+
...LOAD_CODE_ASSIST_HEADERS
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
for (const endpoint of LOAD_CODE_ASSIST_ENDPOINTS) {
|
|
166
|
+
try {
|
|
167
|
+
const url = `${endpoint}/v1internal:loadCodeAssist`;
|
|
168
|
+
const response = await fetch(url, {
|
|
169
|
+
method: 'POST',
|
|
170
|
+
headers,
|
|
171
|
+
body: JSON.stringify({
|
|
172
|
+
metadata: {
|
|
173
|
+
ideType: 'IDE_UNSPECIFIED',
|
|
174
|
+
platform: 'PLATFORM_UNSPECIFIED',
|
|
175
|
+
pluginType: 'GEMINI',
|
|
176
|
+
duetProject: 'rising-fact-p41fc'
|
|
177
|
+
}
|
|
178
|
+
})
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
if (!response.ok) {
|
|
182
|
+
logger.warn(`[CloudCode] loadCodeAssist error at ${endpoint}: ${response.status}`);
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const data = await response.json();
|
|
187
|
+
|
|
188
|
+
// Debug: Log all tier-related fields from the response
|
|
189
|
+
logger.debug(`[CloudCode] loadCodeAssist tier data: paidTier=${JSON.stringify(data.paidTier)}, currentTier=${JSON.stringify(data.currentTier)}, allowedTiers=${JSON.stringify(data.allowedTiers?.map(t => ({ id: t?.id, isDefault: t?.isDefault })))}`);
|
|
190
|
+
|
|
191
|
+
// Extract project ID
|
|
192
|
+
let projectId = null;
|
|
193
|
+
if (typeof data.cloudaicompanionProject === 'string') {
|
|
194
|
+
projectId = data.cloudaicompanionProject;
|
|
195
|
+
} else if (data.cloudaicompanionProject?.id) {
|
|
196
|
+
projectId = data.cloudaicompanionProject.id;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Extract subscription tier
|
|
200
|
+
// Priority: paidTier > currentTier > allowedTiers
|
|
201
|
+
// - paidTier.id: "g1-pro-tier", "g1-ultra-tier" (Google One subscription)
|
|
202
|
+
// - currentTier.id: "standard-tier" (pro), "free-tier" (free)
|
|
203
|
+
// - allowedTiers: fallback when currentTier is missing
|
|
204
|
+
// Note: paidTier is sometimes missing from the response even for Pro accounts
|
|
205
|
+
let tier = 'unknown';
|
|
206
|
+
let tierId = null;
|
|
207
|
+
let tierSource = null;
|
|
208
|
+
|
|
209
|
+
// 1. Check paidTier first (Google One AI subscription - most reliable)
|
|
210
|
+
if (data.paidTier?.id) {
|
|
211
|
+
tierId = data.paidTier.id;
|
|
212
|
+
tier = parseTierId(tierId);
|
|
213
|
+
tierSource = 'paidTier';
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// 2. Fall back to currentTier if paidTier didn't give us a tier
|
|
217
|
+
if (tier === 'unknown' && data.currentTier?.id) {
|
|
218
|
+
tierId = data.currentTier.id;
|
|
219
|
+
tier = parseTierId(tierId);
|
|
220
|
+
tierSource = 'currentTier';
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// 3. Fall back to allowedTiers (find the default or first non-free tier)
|
|
224
|
+
if (tier === 'unknown' && Array.isArray(data.allowedTiers) && data.allowedTiers.length > 0) {
|
|
225
|
+
// First look for the default tier
|
|
226
|
+
let defaultTier = data.allowedTiers.find(t => t?.isDefault);
|
|
227
|
+
if (!defaultTier) {
|
|
228
|
+
defaultTier = data.allowedTiers[0];
|
|
229
|
+
}
|
|
230
|
+
if (defaultTier?.id) {
|
|
231
|
+
tierId = defaultTier.id;
|
|
232
|
+
tier = parseTierId(tierId);
|
|
233
|
+
tierSource = 'allowedTiers';
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
logger.debug(`[CloudCode] Subscription detected: ${tier} (tierId: ${tierId}, source: ${tierSource}), Project: ${projectId}`);
|
|
238
|
+
|
|
239
|
+
return { tier, projectId };
|
|
240
|
+
} catch (error) {
|
|
241
|
+
logger.warn(`[CloudCode] loadCodeAssist failed at ${endpoint}:`, error.message);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Fallback: return default values if all endpoints fail
|
|
246
|
+
logger.warn('[CloudCode] Failed to detect subscription tier from all endpoints. Defaulting to free.');
|
|
247
|
+
return { tier: 'free', projectId: null };
|
|
248
|
+
}
|