antigravity-claude-proxy 2.0.8 → 2.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/account-manager/index.js +12 -1
- package/src/account-manager/rate-limits.js +42 -16
- package/src/cloudcode/message-handler.js +84 -57
- package/src/cloudcode/model-api.js +11 -5
- package/src/cloudcode/rate-limit-parser.js +2 -2
- package/src/cloudcode/streaming-handler.js +86 -69
- package/src/constants.js +10 -9
- package/src/index.js +1 -1
- package/src/server.js +7 -6
package/package.json
CHANGED
|
@@ -14,7 +14,8 @@ import {
|
|
|
14
14
|
resetAllRateLimits as resetLimits,
|
|
15
15
|
markRateLimited as markLimited,
|
|
16
16
|
markInvalid as markAccountInvalid,
|
|
17
|
-
getMinWaitTimeMs as getMinWait
|
|
17
|
+
getMinWaitTimeMs as getMinWait,
|
|
18
|
+
getRateLimitInfo as getLimitInfo
|
|
18
19
|
} from './rate-limits.js';
|
|
19
20
|
import {
|
|
20
21
|
getTokenForAccount as fetchToken,
|
|
@@ -214,6 +215,16 @@ export class AccountManager {
|
|
|
214
215
|
return getMinWait(this.#accounts, modelId);
|
|
215
216
|
}
|
|
216
217
|
|
|
218
|
+
/**
|
|
219
|
+
* Get rate limit info for a specific account and model
|
|
220
|
+
* @param {string} email - Email of the account
|
|
221
|
+
* @param {string} modelId - Model ID to check
|
|
222
|
+
* @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
|
|
223
|
+
*/
|
|
224
|
+
getRateLimitInfo(email, modelId) {
|
|
225
|
+
return getLimitInfo(this.#accounts, email, modelId);
|
|
226
|
+
}
|
|
227
|
+
|
|
217
228
|
/**
|
|
218
229
|
* Get OAuth token for an account
|
|
219
230
|
* @param {Object} account - Account object with email and credentials
|
|
@@ -22,6 +22,7 @@ export function isAllRateLimited(accounts, modelId) {
|
|
|
22
22
|
|
|
23
23
|
return accounts.every(acc => {
|
|
24
24
|
if (acc.isInvalid) return true; // Invalid accounts count as unavailable
|
|
25
|
+
if (acc.enabled === false) return true; // Disabled accounts count as unavailable
|
|
25
26
|
const modelLimits = acc.modelRateLimits || {};
|
|
26
27
|
const limit = modelLimits[modelId];
|
|
27
28
|
return limit && limit.isRateLimited && limit.resetTime > Date.now();
|
|
@@ -118,18 +119,9 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
|
|
|
118
119
|
const account = accounts.find(a => a.email === email);
|
|
119
120
|
if (!account) return false;
|
|
120
121
|
|
|
121
|
-
//
|
|
122
|
-
//
|
|
123
|
-
|
|
124
|
-
let cooldownMs;
|
|
125
|
-
if (resetMs && resetMs > 0) {
|
|
126
|
-
// API provided a reset time - cap it at configured maximum
|
|
127
|
-
cooldownMs = Math.min(resetMs, DEFAULT_COOLDOWN_MS);
|
|
128
|
-
} else {
|
|
129
|
-
// No reset time from API - use configured default
|
|
130
|
-
cooldownMs = DEFAULT_COOLDOWN_MS;
|
|
131
|
-
}
|
|
132
|
-
const resetTime = Date.now() + cooldownMs;
|
|
122
|
+
// Store the ACTUAL reset time from the API
|
|
123
|
+
// This is used to decide whether to wait (short) or switch accounts (long)
|
|
124
|
+
const actualResetMs = (resetMs && resetMs > 0) ? resetMs : DEFAULT_COOLDOWN_MS;
|
|
133
125
|
|
|
134
126
|
if (!account.modelRateLimits) {
|
|
135
127
|
account.modelRateLimits = {};
|
|
@@ -137,12 +129,20 @@ export function markRateLimited(accounts, email, resetMs = null, modelId) {
|
|
|
137
129
|
|
|
138
130
|
account.modelRateLimits[modelId] = {
|
|
139
131
|
isRateLimited: true,
|
|
140
|
-
resetTime:
|
|
132
|
+
resetTime: Date.now() + actualResetMs, // Actual reset time for decisions
|
|
133
|
+
actualResetMs: actualResetMs // Original duration from API
|
|
141
134
|
};
|
|
142
135
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
136
|
+
// Log appropriately based on duration
|
|
137
|
+
if (actualResetMs > DEFAULT_COOLDOWN_MS) {
|
|
138
|
+
logger.warn(
|
|
139
|
+
`[AccountManager] Quota exhausted: ${email} (model: ${modelId}). Resets in ${formatDuration(actualResetMs)}`
|
|
140
|
+
);
|
|
141
|
+
} else {
|
|
142
|
+
logger.warn(
|
|
143
|
+
`[AccountManager] Rate limited: ${email} (model: ${modelId}). Available in ${formatDuration(actualResetMs)}`
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
146
|
|
|
147
147
|
return true;
|
|
148
148
|
}
|
|
@@ -209,3 +209,29 @@ export function getMinWaitTimeMs(accounts, modelId) {
|
|
|
209
209
|
|
|
210
210
|
return minWait === Infinity ? DEFAULT_COOLDOWN_MS : minWait;
|
|
211
211
|
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Get the rate limit info for a specific account and model
|
|
215
|
+
* Returns the actual reset time from API, not capped
|
|
216
|
+
*
|
|
217
|
+
* @param {Array} accounts - Array of account objects
|
|
218
|
+
* @param {string} email - Email of the account
|
|
219
|
+
* @param {string} modelId - Model ID to check
|
|
220
|
+
* @returns {{isRateLimited: boolean, actualResetMs: number|null, waitMs: number}} Rate limit info
|
|
221
|
+
*/
|
|
222
|
+
export function getRateLimitInfo(accounts, email, modelId) {
|
|
223
|
+
const account = accounts.find(a => a.email === email);
|
|
224
|
+
if (!account || !account.modelRateLimits || !account.modelRateLimits[modelId]) {
|
|
225
|
+
return { isRateLimited: false, actualResetMs: null, waitMs: 0 };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const limit = account.modelRateLimits[modelId];
|
|
229
|
+
const now = Date.now();
|
|
230
|
+
const waitMs = limit.resetTime ? Math.max(0, limit.resetTime - now) : 0;
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
isRateLimited: limit.isRateLimited && waitMs > 0,
|
|
234
|
+
actualResetMs: limit.actualResetMs || null,
|
|
235
|
+
waitMs
|
|
236
|
+
};
|
|
237
|
+
}
|
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
|
10
10
|
MAX_RETRIES,
|
|
11
11
|
MAX_WAIT_BEFORE_ERROR_MS,
|
|
12
|
+
DEFAULT_COOLDOWN_MS,
|
|
12
13
|
isThinkingModel
|
|
13
14
|
} from '../constants.js';
|
|
14
15
|
import { convertGoogleToAnthropic } from '../format/index.js';
|
|
@@ -39,67 +40,56 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
39
40
|
|
|
40
41
|
// Retry loop with account failover
|
|
41
42
|
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
|
42
|
-
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
|
|
43
43
|
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
|
44
44
|
|
|
45
45
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
46
|
-
//
|
|
47
|
-
|
|
48
|
-
let account = stickyAccount;
|
|
49
|
-
|
|
50
|
-
// Handle waiting for sticky account
|
|
51
|
-
if (!account && waitMs > 0) {
|
|
52
|
-
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
|
|
53
|
-
await sleep(waitMs);
|
|
54
|
-
accountManager.clearExpiredLimits();
|
|
55
|
-
account = accountManager.getCurrentStickyAccount(model);
|
|
56
|
-
}
|
|
46
|
+
// Clear any expired rate limits before picking
|
|
47
|
+
accountManager.clearExpiredLimits();
|
|
57
48
|
|
|
58
|
-
//
|
|
59
|
-
|
|
49
|
+
// Get available accounts for this model
|
|
50
|
+
const availableAccounts = accountManager.getAvailableAccounts(model);
|
|
51
|
+
|
|
52
|
+
// If no accounts available, check if we should wait or throw error
|
|
53
|
+
if (availableAccounts.length === 0) {
|
|
60
54
|
if (accountManager.isAllRateLimited(model)) {
|
|
61
|
-
const
|
|
62
|
-
const resetTime = new Date(Date.now() +
|
|
55
|
+
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
|
56
|
+
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
|
63
57
|
|
|
64
58
|
// If wait time is too long (> 2 minutes), throw error immediately
|
|
65
|
-
if (
|
|
59
|
+
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
|
66
60
|
throw new Error(
|
|
67
|
-
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(
|
|
61
|
+
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
|
68
62
|
);
|
|
69
63
|
}
|
|
70
64
|
|
|
71
|
-
// Wait for reset
|
|
65
|
+
// Wait for shortest reset time
|
|
72
66
|
const accountCount = accountManager.getAccountCount();
|
|
73
|
-
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(
|
|
74
|
-
await sleep(
|
|
75
|
-
|
|
76
|
-
// Add small buffer after waiting to ensure rate limits have truly expired
|
|
77
|
-
await sleep(500);
|
|
67
|
+
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
|
|
68
|
+
await sleep(minWaitMs + 500); // Add 500ms buffer
|
|
78
69
|
accountManager.clearExpiredLimits();
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// If still no account after waiting, try optimistic reset
|
|
82
|
-
// This handles cases where the API rate limit is transient
|
|
83
|
-
if (!account) {
|
|
84
|
-
logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
|
|
85
|
-
accountManager.resetAllRateLimits();
|
|
86
|
-
account = accountManager.pickNext(model);
|
|
87
|
-
}
|
|
70
|
+
continue; // Retry the loop
|
|
88
71
|
}
|
|
89
72
|
|
|
90
|
-
if
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
98
|
-
return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
|
|
99
|
-
}
|
|
73
|
+
// Check if fallback is enabled and available
|
|
74
|
+
if (fallbackEnabled) {
|
|
75
|
+
const fallbackModel = getFallbackModel(model);
|
|
76
|
+
if (fallbackModel) {
|
|
77
|
+
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
|
|
78
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
79
|
+
return await sendMessage(fallbackRequest, accountManager, false);
|
|
100
80
|
}
|
|
101
|
-
throw new Error('No accounts available');
|
|
102
81
|
}
|
|
82
|
+
throw new Error('No accounts available');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Pick sticky account (prefers current for cache continuity)
|
|
86
|
+
let account = accountManager.getCurrentStickyAccount(model);
|
|
87
|
+
if (!account) {
|
|
88
|
+
account = accountManager.pickNext(model);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (!account) {
|
|
92
|
+
continue; // Shouldn't happen, but safety check
|
|
103
93
|
}
|
|
104
94
|
|
|
105
95
|
try {
|
|
@@ -112,6 +102,8 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
112
102
|
|
|
113
103
|
// Try each endpoint
|
|
114
104
|
let lastError = null;
|
|
105
|
+
let retriedOnce = false; // Track if we've already retried for short rate limit
|
|
106
|
+
|
|
115
107
|
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
|
116
108
|
try {
|
|
117
109
|
const url = isThinking
|
|
@@ -137,14 +129,51 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
137
129
|
}
|
|
138
130
|
|
|
139
131
|
if (response.status === 429) {
|
|
140
|
-
// Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
|
|
141
|
-
logger.debug(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
|
|
142
132
|
const resetMs = parseResetTime(response, errorText);
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
133
|
+
|
|
134
|
+
// Decision: wait and retry OR switch account
|
|
135
|
+
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
|
136
|
+
// Long-term quota exhaustion (> 10s) - switch to next account
|
|
137
|
+
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
|
138
|
+
accountManager.markRateLimited(account.email, resetMs, model);
|
|
139
|
+
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
140
|
+
} else {
|
|
141
|
+
// Short-term rate limit (<= 10s) - wait and retry once
|
|
142
|
+
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
143
|
+
|
|
144
|
+
if (!retriedOnce) {
|
|
145
|
+
retriedOnce = true;
|
|
146
|
+
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
147
|
+
await sleep(waitMs);
|
|
148
|
+
// Retry same endpoint
|
|
149
|
+
const retryResponse = await fetch(url, {
|
|
150
|
+
method: 'POST',
|
|
151
|
+
headers: buildHeaders(token, model, isThinking ? 'text/event-stream' : 'application/json'),
|
|
152
|
+
body: JSON.stringify(payload)
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
if (retryResponse.ok) {
|
|
156
|
+
// Process retry response
|
|
157
|
+
if (isThinking) {
|
|
158
|
+
return await parseThinkingSSEResponse(retryResponse, anthropicRequest.model);
|
|
159
|
+
}
|
|
160
|
+
const data = await retryResponse.json();
|
|
161
|
+
logger.debug('[CloudCode] Response received after retry');
|
|
162
|
+
return convertGoogleToAnthropic(data, anthropicRequest.model);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Retry also failed - parse new reset time
|
|
166
|
+
const retryErrorText = await retryResponse.text();
|
|
167
|
+
const retryResetMs = parseResetTime(retryResponse, retryErrorText);
|
|
168
|
+
logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
|
|
169
|
+
accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
|
|
170
|
+
throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
|
|
171
|
+
} else {
|
|
172
|
+
// Already retried once, mark and switch
|
|
173
|
+
accountManager.markRateLimited(account.email, waitMs, model);
|
|
174
|
+
throw new Error(`RATE_LIMITED: ${errorText}`);
|
|
175
|
+
}
|
|
146
176
|
}
|
|
147
|
-
continue;
|
|
148
177
|
}
|
|
149
178
|
|
|
150
179
|
if (response.status >= 400) {
|
|
@@ -179,7 +208,6 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
179
208
|
|
|
180
209
|
// If all endpoints failed for this account
|
|
181
210
|
if (lastError) {
|
|
182
|
-
// If all endpoints returned 429, mark account as rate-limited
|
|
183
211
|
if (lastError.is429) {
|
|
184
212
|
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
|
185
213
|
accountManager.markRateLimited(account.email, lastError.resetMs, model);
|
|
@@ -199,18 +227,17 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
199
227
|
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
|
200
228
|
continue;
|
|
201
229
|
}
|
|
202
|
-
//
|
|
203
|
-
// UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
|
|
230
|
+
// Handle 5xx errors
|
|
204
231
|
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
|
205
232
|
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx error, trying next...`);
|
|
206
|
-
accountManager.pickNext(model);
|
|
233
|
+
accountManager.pickNext(model);
|
|
207
234
|
continue;
|
|
208
235
|
}
|
|
209
236
|
|
|
210
237
|
if (isNetworkError(error)) {
|
|
211
238
|
logger.warn(`[CloudCode] Network error for ${account.email}, trying next account... (${error.message})`);
|
|
212
|
-
await sleep(1000);
|
|
213
|
-
accountManager.pickNext(model);
|
|
239
|
+
await sleep(1000);
|
|
240
|
+
accountManager.pickNext(model);
|
|
214
241
|
continue;
|
|
215
242
|
}
|
|
216
243
|
|
|
@@ -224,7 +251,7 @@ export async function sendMessage(anthropicRequest, accountManager, fallbackEnab
|
|
|
224
251
|
if (fallbackModel) {
|
|
225
252
|
logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
|
|
226
253
|
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
227
|
-
return await sendMessage(fallbackRequest, accountManager, false);
|
|
254
|
+
return await sendMessage(fallbackRequest, accountManager, false);
|
|
228
255
|
}
|
|
229
256
|
}
|
|
230
257
|
|
|
@@ -57,22 +57,26 @@ export async function listModels(token) {
|
|
|
57
57
|
* Returns model quotas including remaining fraction and reset time
|
|
58
58
|
*
|
|
59
59
|
* @param {string} token - OAuth access token
|
|
60
|
+
* @param {string} [projectId] - Optional project ID for accurate quota info
|
|
60
61
|
* @returns {Promise<Object>} Raw response from fetchAvailableModels API
|
|
61
62
|
*/
|
|
62
|
-
export async function fetchAvailableModels(token) {
|
|
63
|
+
export async function fetchAvailableModels(token, projectId = null) {
|
|
63
64
|
const headers = {
|
|
64
65
|
'Authorization': `Bearer ${token}`,
|
|
65
66
|
'Content-Type': 'application/json',
|
|
66
67
|
...ANTIGRAVITY_HEADERS
|
|
67
68
|
};
|
|
68
69
|
|
|
70
|
+
// Include project ID in body for accurate quota info (per Quotio implementation)
|
|
71
|
+
const body = projectId ? { project: projectId } : {};
|
|
72
|
+
|
|
69
73
|
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
|
70
74
|
try {
|
|
71
75
|
const url = `${endpoint}/v1internal:fetchAvailableModels`;
|
|
72
76
|
const response = await fetch(url, {
|
|
73
77
|
method: 'POST',
|
|
74
78
|
headers,
|
|
75
|
-
body: JSON.stringify(
|
|
79
|
+
body: JSON.stringify(body)
|
|
76
80
|
});
|
|
77
81
|
|
|
78
82
|
if (!response.ok) {
|
|
@@ -95,10 +99,11 @@ export async function fetchAvailableModels(token) {
|
|
|
95
99
|
* Extracts quota info (remaining fraction and reset time) for each model
|
|
96
100
|
*
|
|
97
101
|
* @param {string} token - OAuth access token
|
|
102
|
+
* @param {string} [projectId] - Optional project ID for accurate quota info
|
|
98
103
|
* @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
|
|
99
104
|
*/
|
|
100
|
-
export async function getModelQuotas(token) {
|
|
101
|
-
const data = await fetchAvailableModels(token);
|
|
105
|
+
export async function getModelQuotas(token, projectId = null) {
|
|
106
|
+
const data = await fetchAvailableModels(token, projectId);
|
|
102
107
|
if (!data || !data.models) return {};
|
|
103
108
|
|
|
104
109
|
const quotas = {};
|
|
@@ -108,7 +113,8 @@ export async function getModelQuotas(token) {
|
|
|
108
113
|
|
|
109
114
|
if (modelData.quotaInfo) {
|
|
110
115
|
quotas[modelId] = {
|
|
111
|
-
|
|
116
|
+
// When remainingFraction is missing but resetTime is present, quota is exhausted (0%)
|
|
117
|
+
remainingFraction: modelData.quotaInfo.remainingFraction ?? (modelData.quotaInfo.resetTime ? 0 : null),
|
|
112
118
|
resetTime: modelData.quotaInfo.resetTime ?? null
|
|
113
119
|
};
|
|
114
120
|
}
|
|
@@ -78,7 +78,7 @@ export function parseResetTime(responseOrError, errorText = '') {
|
|
|
78
78
|
|
|
79
79
|
// Try to extract "quotaResetDelay" first (e.g. "754.431528ms" or "1.5s")
|
|
80
80
|
// This is Google's preferred format for rate limit reset delay
|
|
81
|
-
const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(
|
|
81
|
+
const quotaDelayMatch = msg.match(/quotaResetDelay[:\s"]+(\d+(?:\.\d+)?)(ms|s)/i);
|
|
82
82
|
if (quotaDelayMatch) {
|
|
83
83
|
const value = parseFloat(quotaDelayMatch[1]);
|
|
84
84
|
const unit = quotaDelayMatch[2].toLowerCase();
|
|
@@ -103,7 +103,7 @@ export function parseResetTime(responseOrError, errorText = '') {
|
|
|
103
103
|
// Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
|
|
104
104
|
// Added stricter regex to avoid partial matches
|
|
105
105
|
if (!resetMs) {
|
|
106
|
-
const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([
|
|
106
|
+
const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d.]+)(?:s\b|s")/i);
|
|
107
107
|
if (secMatch) {
|
|
108
108
|
resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
|
|
109
109
|
logger.debug(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
|
|
@@ -9,7 +9,8 @@ import {
|
|
|
9
9
|
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
|
10
10
|
MAX_RETRIES,
|
|
11
11
|
MAX_EMPTY_RESPONSE_RETRIES,
|
|
12
|
-
MAX_WAIT_BEFORE_ERROR_MS
|
|
12
|
+
MAX_WAIT_BEFORE_ERROR_MS,
|
|
13
|
+
DEFAULT_COOLDOWN_MS
|
|
13
14
|
} from '../constants.js';
|
|
14
15
|
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
|
|
15
16
|
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
|
@@ -38,68 +39,57 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
38
39
|
|
|
39
40
|
// Retry loop with account failover
|
|
40
41
|
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
|
41
|
-
// +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
|
|
42
42
|
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
|
43
43
|
|
|
44
44
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
let account = stickyAccount;
|
|
48
|
-
|
|
49
|
-
// Handle waiting for sticky account
|
|
50
|
-
if (!account && waitMs > 0) {
|
|
51
|
-
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
|
|
52
|
-
await sleep(waitMs);
|
|
53
|
-
accountManager.clearExpiredLimits();
|
|
54
|
-
account = accountManager.getCurrentStickyAccount(model);
|
|
55
|
-
}
|
|
45
|
+
// Clear any expired rate limits before picking
|
|
46
|
+
accountManager.clearExpiredLimits();
|
|
56
47
|
|
|
57
|
-
//
|
|
58
|
-
|
|
48
|
+
// Get available accounts for this model
|
|
49
|
+
const availableAccounts = accountManager.getAvailableAccounts(model);
|
|
50
|
+
|
|
51
|
+
// If no accounts available, check if we should wait or throw error
|
|
52
|
+
if (availableAccounts.length === 0) {
|
|
59
53
|
if (accountManager.isAllRateLimited(model)) {
|
|
60
|
-
const
|
|
61
|
-
const resetTime = new Date(Date.now() +
|
|
54
|
+
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
|
55
|
+
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
|
62
56
|
|
|
63
57
|
// If wait time is too long (> 2 minutes), throw error immediately
|
|
64
|
-
if (
|
|
58
|
+
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
|
65
59
|
throw new Error(
|
|
66
|
-
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(
|
|
60
|
+
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
|
67
61
|
);
|
|
68
62
|
}
|
|
69
63
|
|
|
70
|
-
// Wait for reset
|
|
64
|
+
// Wait for shortest reset time
|
|
71
65
|
const accountCount = accountManager.getAccountCount();
|
|
72
|
-
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(
|
|
73
|
-
await sleep(
|
|
74
|
-
|
|
75
|
-
// Add small buffer after waiting to ensure rate limits have truly expired
|
|
76
|
-
await sleep(500);
|
|
66
|
+
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
|
|
67
|
+
await sleep(minWaitMs + 500); // Add 500ms buffer
|
|
77
68
|
accountManager.clearExpiredLimits();
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
// If still no account after waiting, try optimistic reset
|
|
81
|
-
// This handles cases where the API rate limit is transient
|
|
82
|
-
if (!account) {
|
|
83
|
-
logger.warn('[CloudCode] No account available after wait, attempting optimistic reset...');
|
|
84
|
-
accountManager.resetAllRateLimits();
|
|
85
|
-
account = accountManager.pickNext(model);
|
|
86
|
-
}
|
|
69
|
+
continue; // Retry the loop
|
|
87
70
|
}
|
|
88
71
|
|
|
89
|
-
if
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
|
|
98
|
-
return;
|
|
99
|
-
}
|
|
72
|
+
// Check if fallback is enabled and available
|
|
73
|
+
if (fallbackEnabled) {
|
|
74
|
+
const fallbackModel = getFallbackModel(model);
|
|
75
|
+
if (fallbackModel) {
|
|
76
|
+
logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
|
|
77
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
78
|
+
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
79
|
+
return;
|
|
100
80
|
}
|
|
101
|
-
throw new Error('No accounts available');
|
|
102
81
|
}
|
|
82
|
+
throw new Error('No accounts available');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Pick sticky account (prefers current for cache continuity)
|
|
86
|
+
let account = accountManager.getCurrentStickyAccount(model);
|
|
87
|
+
if (!account) {
|
|
88
|
+
account = accountManager.pickNext(model);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (!account) {
|
|
92
|
+
continue; // Shouldn't happen, but safety check
|
|
103
93
|
}
|
|
104
94
|
|
|
105
95
|
try {
|
|
@@ -112,6 +102,8 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
112
102
|
|
|
113
103
|
// Try each endpoint for streaming
|
|
114
104
|
let lastError = null;
|
|
105
|
+
let retriedOnce = false; // Track if we've already retried for short rate limit
|
|
106
|
+
|
|
115
107
|
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
|
116
108
|
try {
|
|
117
109
|
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
|
@@ -134,14 +126,48 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
134
126
|
}
|
|
135
127
|
|
|
136
128
|
if (response.status === 429) {
|
|
137
|
-
// Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
|
|
138
|
-
logger.debug(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
|
|
139
129
|
const resetMs = parseResetTime(response, errorText);
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
130
|
+
|
|
131
|
+
// Decision: wait and retry OR switch account
|
|
132
|
+
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
|
133
|
+
// Long-term quota exhaustion (> 10s) - switch to next account
|
|
134
|
+
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
|
135
|
+
accountManager.markRateLimited(account.email, resetMs, model);
|
|
136
|
+
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
137
|
+
} else {
|
|
138
|
+
// Short-term rate limit (<= 10s) - wait and retry once
|
|
139
|
+
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
140
|
+
|
|
141
|
+
if (!retriedOnce) {
|
|
142
|
+
retriedOnce = true;
|
|
143
|
+
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
144
|
+
await sleep(waitMs);
|
|
145
|
+
// Retry same endpoint
|
|
146
|
+
const retryResponse = await fetch(url, {
|
|
147
|
+
method: 'POST',
|
|
148
|
+
headers: buildHeaders(token, model, 'text/event-stream'),
|
|
149
|
+
body: JSON.stringify(payload)
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
if (retryResponse.ok) {
|
|
153
|
+
// Stream the retry response
|
|
154
|
+
yield* streamSSEResponse(retryResponse, anthropicRequest.model);
|
|
155
|
+
logger.debug('[CloudCode] Stream completed after retry');
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Retry also failed - parse new reset time
|
|
160
|
+
const retryErrorText = await retryResponse.text();
|
|
161
|
+
const retryResetMs = parseResetTime(retryResponse, retryErrorText);
|
|
162
|
+
logger.warn(`[CloudCode] Retry also failed, marking and switching...`);
|
|
163
|
+
accountManager.markRateLimited(account.email, retryResetMs || waitMs, model);
|
|
164
|
+
throw new Error(`RATE_LIMITED_AFTER_RETRY: ${retryErrorText}`);
|
|
165
|
+
} else {
|
|
166
|
+
// Already retried once, mark and switch
|
|
167
|
+
accountManager.markRateLimited(account.email, waitMs, model);
|
|
168
|
+
throw new Error(`RATE_LIMITED: ${errorText}`);
|
|
169
|
+
}
|
|
143
170
|
}
|
|
144
|
-
continue;
|
|
145
171
|
}
|
|
146
172
|
|
|
147
173
|
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
|
@@ -156,7 +182,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
156
182
|
}
|
|
157
183
|
|
|
158
184
|
// Stream the response with retry logic for empty responses
|
|
159
|
-
// Uses a for-loop for clearer retry semantics
|
|
160
185
|
let currentResponse = response;
|
|
161
186
|
|
|
162
187
|
for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
|
|
@@ -207,28 +232,22 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
207
232
|
throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
|
|
208
233
|
}
|
|
209
234
|
|
|
210
|
-
// For 5xx errors,
|
|
235
|
+
// For 5xx errors, continue retrying
|
|
211
236
|
if (currentResponse.status >= 500) {
|
|
212
237
|
logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
|
|
213
|
-
// Don't continue here - let the loop increment and refetch
|
|
214
|
-
// Set currentResponse to null to force refetch at loop start
|
|
215
|
-
emptyRetries--; // Compensate for loop increment since we didn't actually try
|
|
216
238
|
await sleep(1000);
|
|
217
|
-
// Refetch immediately for 5xx
|
|
218
239
|
currentResponse = await fetch(url, {
|
|
219
240
|
method: 'POST',
|
|
220
241
|
headers: buildHeaders(token, model, 'text/event-stream'),
|
|
221
242
|
body: JSON.stringify(payload)
|
|
222
243
|
});
|
|
223
244
|
if (currentResponse.ok) {
|
|
224
|
-
continue;
|
|
245
|
+
continue;
|
|
225
246
|
}
|
|
226
|
-
// If still failing, let it fall through to throw
|
|
227
247
|
}
|
|
228
248
|
|
|
229
249
|
throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
|
|
230
250
|
}
|
|
231
|
-
// Response is OK, loop will continue to try streamSSEResponse
|
|
232
251
|
}
|
|
233
252
|
}
|
|
234
253
|
|
|
@@ -237,7 +256,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
237
256
|
throw endpointError; // Re-throw to trigger account switch
|
|
238
257
|
}
|
|
239
258
|
if (isEmptyResponseError(endpointError)) {
|
|
240
|
-
throw endpointError;
|
|
259
|
+
throw endpointError;
|
|
241
260
|
}
|
|
242
261
|
logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
|
|
243
262
|
lastError = endpointError;
|
|
@@ -246,7 +265,6 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
246
265
|
|
|
247
266
|
// If all endpoints failed for this account
|
|
248
267
|
if (lastError) {
|
|
249
|
-
// If all endpoints returned 429, mark account as rate-limited
|
|
250
268
|
if (lastError.is429) {
|
|
251
269
|
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
|
252
270
|
accountManager.markRateLimited(account.email, lastError.resetMs, model);
|
|
@@ -266,18 +284,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
266
284
|
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
|
267
285
|
continue;
|
|
268
286
|
}
|
|
269
|
-
//
|
|
270
|
-
// UNLESS it's a 500 error, then we treat it as a "soft" failure for this account and try the next one
|
|
287
|
+
// Handle 5xx errors
|
|
271
288
|
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
|
272
289
|
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
|
273
|
-
accountManager.pickNext(model);
|
|
290
|
+
accountManager.pickNext(model);
|
|
274
291
|
continue;
|
|
275
292
|
}
|
|
276
293
|
|
|
277
294
|
if (isNetworkError(error)) {
|
|
278
295
|
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
|
|
279
|
-
await sleep(1000);
|
|
280
|
-
accountManager.pickNext(model);
|
|
296
|
+
await sleep(1000);
|
|
297
|
+
accountManager.pickNext(model);
|
|
281
298
|
continue;
|
|
282
299
|
}
|
|
283
300
|
|
|
@@ -291,7 +308,7 @@ export async function* sendMessageStream(anthropicRequest, accountManager, fallb
|
|
|
291
308
|
if (fallbackModel) {
|
|
292
309
|
logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
|
|
293
310
|
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
294
|
-
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
311
|
+
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
295
312
|
return;
|
|
296
313
|
}
|
|
297
314
|
}
|
package/src/constants.js
CHANGED
|
@@ -69,15 +69,16 @@ export const ONBOARD_USER_ENDPOINTS = ANTIGRAVITY_ENDPOINT_FALLBACKS;
|
|
|
69
69
|
|
|
70
70
|
// Hybrid headers specifically for loadCodeAssist
|
|
71
71
|
// Uses google-api-nodejs-client User-Agent (required for project discovery on some accounts)
|
|
72
|
-
export const LOAD_CODE_ASSIST_HEADERS = {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
};
|
|
72
|
+
// export const LOAD_CODE_ASSIST_HEADERS = {
|
|
73
|
+
// 'User-Agent': 'google-api-nodejs-client/9.15.1',
|
|
74
|
+
// 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
|
|
75
|
+
// 'Client-Metadata': JSON.stringify({
|
|
76
|
+
// ideType: 'IDE_UNSPECIFIED',
|
|
77
|
+
// platform: 'PLATFORM_UNSPECIFIED',
|
|
78
|
+
// pluginType: 'GEMINI'
|
|
79
|
+
// })
|
|
80
|
+
// };
|
|
81
|
+
export const LOAD_CODE_ASSIST_HEADERS = ANTIGRAVITY_HEADERS;
|
|
81
82
|
|
|
82
83
|
// Default project ID if none can be discovered
|
|
83
84
|
export const DEFAULT_PROJECT_ID = 'rising-fact-p41fc';
|
package/src/index.js
CHANGED
|
@@ -71,7 +71,7 @@ app.listen(PORT, () => {
|
|
|
71
71
|
║ Antigravity Claude Proxy Server ║
|
|
72
72
|
╠══════════════════════════════════════════════════════════════╣
|
|
73
73
|
║ ║
|
|
74
|
-
${border} ${align(`Server running at: http://localhost:${PORT}`)}${border}
|
|
74
|
+
${border} ${align(`Server and WebUI running at: http://localhost:${PORT}`)}${border}
|
|
75
75
|
${statusSection}║ ║
|
|
76
76
|
${controlSection}
|
|
77
77
|
║ ║
|
package/src/server.js
CHANGED
|
@@ -214,7 +214,8 @@ app.get('/health', async (req, res) => {
|
|
|
214
214
|
|
|
215
215
|
try {
|
|
216
216
|
const token = await accountManager.getTokenForAccount(account);
|
|
217
|
-
const
|
|
217
|
+
const projectId = account.subscription?.projectId || null;
|
|
218
|
+
const quotas = await getModelQuotas(token, projectId);
|
|
218
219
|
|
|
219
220
|
// Format quotas for readability
|
|
220
221
|
const formattedQuotas = {};
|
|
@@ -309,11 +310,11 @@ app.get('/account-limits', async (req, res) => {
|
|
|
309
310
|
try {
|
|
310
311
|
const token = await accountManager.getTokenForAccount(account);
|
|
311
312
|
|
|
312
|
-
// Fetch
|
|
313
|
-
const
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
313
|
+
// Fetch subscription tier first to get project ID
|
|
314
|
+
const subscription = await getSubscriptionTier(token);
|
|
315
|
+
|
|
316
|
+
// Then fetch quotas with project ID for accurate quota info
|
|
317
|
+
const quotas = await getModelQuotas(token, subscription.projectId);
|
|
317
318
|
|
|
318
319
|
// Update account object with fresh data
|
|
319
320
|
account.subscription = {
|