@kamel-ahmed/proxy-claude 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +622 -0
- package/bin/cli.js +124 -0
- package/package.json +80 -0
- package/public/app.js +228 -0
- package/public/css/src/input.css +523 -0
- package/public/css/style.css +1 -0
- package/public/favicon.svg +10 -0
- package/public/index.html +381 -0
- package/public/js/components/account-manager.js +245 -0
- package/public/js/components/claude-config.js +420 -0
- package/public/js/components/dashboard/charts.js +589 -0
- package/public/js/components/dashboard/filters.js +362 -0
- package/public/js/components/dashboard/stats.js +110 -0
- package/public/js/components/dashboard.js +236 -0
- package/public/js/components/logs-viewer.js +100 -0
- package/public/js/components/models.js +36 -0
- package/public/js/components/server-config.js +349 -0
- package/public/js/config/constants.js +102 -0
- package/public/js/data-store.js +386 -0
- package/public/js/settings-store.js +58 -0
- package/public/js/store.js +78 -0
- package/public/js/translations/en.js +351 -0
- package/public/js/translations/id.js +396 -0
- package/public/js/translations/pt.js +287 -0
- package/public/js/translations/tr.js +342 -0
- package/public/js/translations/zh.js +357 -0
- package/public/js/utils/account-actions.js +189 -0
- package/public/js/utils/error-handler.js +96 -0
- package/public/js/utils/model-config.js +42 -0
- package/public/js/utils/validators.js +77 -0
- package/public/js/utils.js +69 -0
- package/public/views/accounts.html +329 -0
- package/public/views/dashboard.html +484 -0
- package/public/views/logs.html +97 -0
- package/public/views/models.html +331 -0
- package/public/views/settings.html +1329 -0
- package/src/account-manager/credentials.js +243 -0
- package/src/account-manager/index.js +380 -0
- package/src/account-manager/onboarding.js +117 -0
- package/src/account-manager/rate-limits.js +237 -0
- package/src/account-manager/storage.js +136 -0
- package/src/account-manager/strategies/base-strategy.js +104 -0
- package/src/account-manager/strategies/hybrid-strategy.js +195 -0
- package/src/account-manager/strategies/index.js +79 -0
- package/src/account-manager/strategies/round-robin-strategy.js +76 -0
- package/src/account-manager/strategies/sticky-strategy.js +138 -0
- package/src/account-manager/strategies/trackers/health-tracker.js +162 -0
- package/src/account-manager/strategies/trackers/index.js +8 -0
- package/src/account-manager/strategies/trackers/token-bucket-tracker.js +121 -0
- package/src/auth/database.js +169 -0
- package/src/auth/oauth.js +419 -0
- package/src/auth/token-extractor.js +117 -0
- package/src/cli/accounts.js +512 -0
- package/src/cli/refresh.js +201 -0
- package/src/cli/setup.js +338 -0
- package/src/cloudcode/index.js +29 -0
- package/src/cloudcode/message-handler.js +386 -0
- package/src/cloudcode/model-api.js +248 -0
- package/src/cloudcode/rate-limit-parser.js +181 -0
- package/src/cloudcode/request-builder.js +93 -0
- package/src/cloudcode/session-manager.js +47 -0
- package/src/cloudcode/sse-parser.js +121 -0
- package/src/cloudcode/sse-streamer.js +293 -0
- package/src/cloudcode/streaming-handler.js +492 -0
- package/src/config.js +107 -0
- package/src/constants.js +278 -0
- package/src/errors.js +238 -0
- package/src/fallback-config.js +29 -0
- package/src/format/content-converter.js +193 -0
- package/src/format/index.js +20 -0
- package/src/format/request-converter.js +248 -0
- package/src/format/response-converter.js +120 -0
- package/src/format/schema-sanitizer.js +673 -0
- package/src/format/signature-cache.js +88 -0
- package/src/format/thinking-utils.js +558 -0
- package/src/index.js +146 -0
- package/src/modules/usage-stats.js +205 -0
- package/src/server.js +861 -0
- package/src/utils/claude-config.js +245 -0
- package/src/utils/helpers.js +51 -0
- package/src/utils/logger.js +142 -0
- package/src/utils/native-module-helper.js +162 -0
- package/src/webui/index.js +707 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming Handler for Cloud Code
|
|
3
|
+
*
|
|
4
|
+
* Handles streaming message requests with multi-account support,
|
|
5
|
+
* retry logic, and endpoint failover.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
ANTIGRAVITY_ENDPOINT_FALLBACKS,
|
|
10
|
+
MAX_RETRIES,
|
|
11
|
+
MAX_EMPTY_RESPONSE_RETRIES,
|
|
12
|
+
MAX_WAIT_BEFORE_ERROR_MS,
|
|
13
|
+
DEFAULT_COOLDOWN_MS,
|
|
14
|
+
RATE_LIMIT_DEDUP_WINDOW_MS,
|
|
15
|
+
MAX_CONSECUTIVE_FAILURES,
|
|
16
|
+
EXTENDED_COOLDOWN_MS,
|
|
17
|
+
CAPACITY_RETRY_DELAY_MS,
|
|
18
|
+
MAX_CAPACITY_RETRIES
|
|
19
|
+
} from '../constants.js';
|
|
20
|
+
import { isRateLimitError, isAuthError, isEmptyResponseError } from '../errors.js';
|
|
21
|
+
import { formatDuration, sleep, isNetworkError } from '../utils/helpers.js';
|
|
22
|
+
import { logger } from '../utils/logger.js';
|
|
23
|
+
import { parseResetTime } from './rate-limit-parser.js';
|
|
24
|
+
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
|
25
|
+
import { streamSSEResponse } from './sse-streamer.js';
|
|
26
|
+
import { getFallbackModel } from '../fallback-config.js';
|
|
27
|
+
import crypto from 'crypto';
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Gap 1: Rate limit deduplication - prevents thundering herd on concurrent rate limits
|
|
31
|
+
* Tracks last rate limit timestamp per model to skip duplicate retries
|
|
32
|
+
*/
|
|
33
|
+
const lastRateLimitTimestamps = new Map(); // modelId -> timestamp
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Check if we should skip retry due to recent rate limit on this model
|
|
37
|
+
* @param {string} model - Model ID
|
|
38
|
+
* @returns {boolean} True if retry should be skipped (within dedup window)
|
|
39
|
+
*/
|
|
40
|
+
function shouldSkipRetryDueToDedup(model) {
|
|
41
|
+
const lastTimestamp = lastRateLimitTimestamps.get(model);
|
|
42
|
+
if (!lastTimestamp) return false;
|
|
43
|
+
|
|
44
|
+
const elapsed = Date.now() - lastTimestamp;
|
|
45
|
+
if (elapsed < RATE_LIMIT_DEDUP_WINDOW_MS) {
|
|
46
|
+
logger.debug(`[CloudCode] Rate limit on ${model} within dedup window (${elapsed}ms ago), skipping retry`);
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Record rate limit timestamp for deduplication
|
|
54
|
+
* @param {string} model - Model ID
|
|
55
|
+
*/
|
|
56
|
+
function recordRateLimitTimestamp(model) {
|
|
57
|
+
lastRateLimitTimestamps.set(model, Date.now());
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Clear rate limit timestamp after successful retry
|
|
62
|
+
* @param {string} model - Model ID
|
|
63
|
+
*/
|
|
64
|
+
function clearRateLimitTimestamp(model) {
|
|
65
|
+
lastRateLimitTimestamps.delete(model);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Gap 3: Detect permanent authentication failures that require re-authentication
|
|
70
|
+
* @param {string} errorText - Error message from API
|
|
71
|
+
* @returns {boolean} True if permanent auth failure
|
|
72
|
+
*/
|
|
73
|
+
function isPermanentAuthFailure(errorText) {
|
|
74
|
+
const lower = (errorText || '').toLowerCase();
|
|
75
|
+
return lower.includes('invalid_grant') ||
|
|
76
|
+
lower.includes('token revoked') ||
|
|
77
|
+
lower.includes('token has been expired or revoked') ||
|
|
78
|
+
lower.includes('token_revoked') ||
|
|
79
|
+
lower.includes('invalid_client') ||
|
|
80
|
+
lower.includes('credentials are invalid');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Gap 4: Detect if 429 error is due to model capacity (not user quota)
|
|
85
|
+
* @param {string} errorText - Error message from API
|
|
86
|
+
* @returns {boolean} True if capacity exhausted (not quota)
|
|
87
|
+
*/
|
|
88
|
+
function isModelCapacityExhausted(errorText) {
|
|
89
|
+
const lower = (errorText || '').toLowerCase();
|
|
90
|
+
return lower.includes('model_capacity_exhausted') ||
|
|
91
|
+
lower.includes('capacity_exhausted') ||
|
|
92
|
+
lower.includes('model is currently overloaded') ||
|
|
93
|
+
lower.includes('service temporarily unavailable');
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Periodically clean up stale dedup timestamps (every 60 seconds)
|
|
97
|
+
setInterval(() => {
|
|
98
|
+
const cutoff = Date.now() - 60000; // 1 minute
|
|
99
|
+
for (const [model, timestamp] of lastRateLimitTimestamps.entries()) {
|
|
100
|
+
if (timestamp < cutoff) {
|
|
101
|
+
lastRateLimitTimestamps.delete(model);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}, 60000);
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Send a streaming request to Cloud Code with multi-account support
|
|
108
|
+
* Streams events in real-time as they arrive from the server
|
|
109
|
+
*
|
|
110
|
+
* @param {Object} anthropicRequest - The Anthropic-format request
|
|
111
|
+
* @param {string} anthropicRequest.model - Model name to use
|
|
112
|
+
* @param {Array} anthropicRequest.messages - Array of message objects
|
|
113
|
+
* @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
|
|
114
|
+
* @param {Object} [anthropicRequest.thinking] - Thinking configuration
|
|
115
|
+
* @param {import('../account-manager/index.js').default} accountManager - The account manager instance
|
|
116
|
+
* @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
|
|
117
|
+
* @throws {Error} If max retries exceeded or no accounts available
|
|
118
|
+
*/
|
|
119
|
+
export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) {
|
|
120
|
+
const model = anthropicRequest.model;
|
|
121
|
+
|
|
122
|
+
// Retry loop with account failover
|
|
123
|
+
// Ensure we try at least as many times as there are accounts to cycle through everyone
|
|
124
|
+
const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
|
|
125
|
+
|
|
126
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
127
|
+
// Clear any expired rate limits before picking
|
|
128
|
+
accountManager.clearExpiredLimits();
|
|
129
|
+
|
|
130
|
+
// Get available accounts for this model
|
|
131
|
+
const availableAccounts = accountManager.getAvailableAccounts(model);
|
|
132
|
+
|
|
133
|
+
// If no accounts available, check if we should wait or throw error
|
|
134
|
+
if (availableAccounts.length === 0) {
|
|
135
|
+
if (accountManager.isAllRateLimited(model)) {
|
|
136
|
+
const minWaitMs = accountManager.getMinWaitTimeMs(model);
|
|
137
|
+
const resetTime = new Date(Date.now() + minWaitMs).toISOString();
|
|
138
|
+
|
|
139
|
+
// If wait time is too long (> 2 minutes), try fallback first, then throw error
|
|
140
|
+
if (minWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
|
|
141
|
+
// Check if fallback is enabled and available
|
|
142
|
+
if (fallbackEnabled) {
|
|
143
|
+
const fallbackModel = getFallbackModel(model);
|
|
144
|
+
if (fallbackModel) {
|
|
145
|
+
logger.warn(`[CloudCode] All accounts exhausted for ${model} (${formatDuration(minWaitMs)} wait). Attempting fallback to ${fallbackModel} (streaming)`);
|
|
146
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
147
|
+
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
throw new Error(
|
|
152
|
+
`RESOURCE_EXHAUSTED: Rate limited on ${model}. Quota will reset after ${formatDuration(minWaitMs)}. Next available: ${resetTime}`
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Wait for shortest reset time
|
|
157
|
+
const accountCount = accountManager.getAccountCount();
|
|
158
|
+
logger.warn(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(minWaitMs)}...`);
|
|
159
|
+
await sleep(minWaitMs + 500); // Add 500ms buffer
|
|
160
|
+
accountManager.clearExpiredLimits();
|
|
161
|
+
continue; // Retry the loop
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// No accounts available and not rate-limited (shouldn't happen normally)
|
|
165
|
+
throw new Error('No accounts available');
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Select account using configured strategy
|
|
169
|
+
const { account, waitMs } = accountManager.selectAccount(model);
|
|
170
|
+
|
|
171
|
+
// If strategy returns a wait time, sleep and retry
|
|
172
|
+
if (!account && waitMs > 0) {
|
|
173
|
+
logger.info(`[CloudCode] Waiting ${formatDuration(waitMs)} for account...`);
|
|
174
|
+
await sleep(waitMs + 500);
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (!account) {
|
|
179
|
+
continue; // Shouldn't happen, but safety check
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
// Get token and project for this account
|
|
184
|
+
const token = await accountManager.getTokenForAccount(account);
|
|
185
|
+
const project = await accountManager.getProjectForAccount(account, token);
|
|
186
|
+
const payload = buildCloudCodeRequest(anthropicRequest, project);
|
|
187
|
+
|
|
188
|
+
logger.debug(`[CloudCode] Starting stream for model: ${model}`);
|
|
189
|
+
|
|
190
|
+
// Try each endpoint with index-based loop for capacity retry support
|
|
191
|
+
let lastError = null;
|
|
192
|
+
let retriedOnce = false; // Track if we've already retried for short rate limit
|
|
193
|
+
let capacityRetryCount = 0; // Gap 4: Track capacity exhaustion retries
|
|
194
|
+
let endpointIndex = 0;
|
|
195
|
+
|
|
196
|
+
while (endpointIndex < ANTIGRAVITY_ENDPOINT_FALLBACKS.length) {
|
|
197
|
+
const endpoint = ANTIGRAVITY_ENDPOINT_FALLBACKS[endpointIndex];
|
|
198
|
+
try {
|
|
199
|
+
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
|
200
|
+
|
|
201
|
+
const response = await fetch(url, {
|
|
202
|
+
method: 'POST',
|
|
203
|
+
headers: buildHeaders(token, model, 'text/event-stream'),
|
|
204
|
+
body: JSON.stringify(payload)
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
if (!response.ok) {
|
|
208
|
+
const errorText = await response.text();
|
|
209
|
+
logger.warn(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
|
|
210
|
+
|
|
211
|
+
if (response.status === 401) {
|
|
212
|
+
// Gap 3: Check for permanent auth failures
|
|
213
|
+
if (isPermanentAuthFailure(errorText)) {
|
|
214
|
+
logger.error(`[CloudCode] Permanent auth failure for ${account.email}: ${errorText.substring(0, 100)}`);
|
|
215
|
+
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
|
216
|
+
throw new Error(`AUTH_INVALID_PERMANENT: ${errorText}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Transient auth error - clear caches and retry
|
|
220
|
+
accountManager.clearTokenCache(account.email);
|
|
221
|
+
accountManager.clearProjectCache(account.email);
|
|
222
|
+
endpointIndex++;
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if (response.status === 429) {
|
|
227
|
+
const resetMs = parseResetTime(response, errorText);
|
|
228
|
+
|
|
229
|
+
// Gap 4: Check if capacity issue (NOT quota) - retry SAME endpoint
|
|
230
|
+
if (isModelCapacityExhausted(errorText)) {
|
|
231
|
+
if (capacityRetryCount < MAX_CAPACITY_RETRIES) {
|
|
232
|
+
capacityRetryCount++;
|
|
233
|
+
const waitMs = resetMs || CAPACITY_RETRY_DELAY_MS;
|
|
234
|
+
logger.info(`[CloudCode] Model capacity exhausted, retry ${capacityRetryCount}/${MAX_CAPACITY_RETRIES} after ${formatDuration(waitMs)}...`);
|
|
235
|
+
await sleep(waitMs);
|
|
236
|
+
// Don't increment endpointIndex - retry same endpoint
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
// Max capacity retries exceeded - treat as quota exhaustion
|
|
240
|
+
logger.warn(`[CloudCode] Max capacity retries (${MAX_CAPACITY_RETRIES}) exceeded, switching account`);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Gap 1: Check deduplication window to prevent thundering herd
|
|
244
|
+
if (shouldSkipRetryDueToDedup(model)) {
|
|
245
|
+
logger.info(`[CloudCode] Skipping retry due to recent rate limit, switching account...`);
|
|
246
|
+
accountManager.markRateLimited(account.email, resetMs || DEFAULT_COOLDOWN_MS, model);
|
|
247
|
+
throw new Error(`RATE_LIMITED_DEDUP: ${errorText}`);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Decision: wait and retry OR switch account
|
|
251
|
+
if (resetMs && resetMs > DEFAULT_COOLDOWN_MS) {
|
|
252
|
+
// Long-term quota exhaustion (> 10s) - switch to next account
|
|
253
|
+
logger.info(`[CloudCode] Quota exhausted for ${account.email} (${formatDuration(resetMs)}), switching account...`);
|
|
254
|
+
accountManager.markRateLimited(account.email, resetMs, model);
|
|
255
|
+
throw new Error(`QUOTA_EXHAUSTED: ${errorText}`);
|
|
256
|
+
} else {
|
|
257
|
+
// Short-term rate limit (<= 10s) - wait and retry once
|
|
258
|
+
const waitMs = resetMs || DEFAULT_COOLDOWN_MS;
|
|
259
|
+
|
|
260
|
+
if (!retriedOnce) {
|
|
261
|
+
retriedOnce = true;
|
|
262
|
+
recordRateLimitTimestamp(model); // Gap 1: Record before retry
|
|
263
|
+
logger.info(`[CloudCode] Short rate limit (${formatDuration(waitMs)}), waiting and retrying...`);
|
|
264
|
+
await sleep(waitMs);
|
|
265
|
+
// Don't increment endpointIndex - retry same endpoint
|
|
266
|
+
continue;
|
|
267
|
+
} else {
|
|
268
|
+
// Already retried once, mark and switch
|
|
269
|
+
accountManager.markRateLimited(account.email, waitMs, model);
|
|
270
|
+
throw new Error(`RATE_LIMITED: ${errorText}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
lastError = new Error(`API error ${response.status}: ${errorText}`);
|
|
276
|
+
|
|
277
|
+
// If it's a 5xx error, wait a bit before trying the next endpoint
|
|
278
|
+
if (response.status >= 500) {
|
|
279
|
+
logger.warn(`[CloudCode] ${response.status} stream error, waiting 1s before retry...`);
|
|
280
|
+
await sleep(1000);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
endpointIndex++;
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Stream the response with retry logic for empty responses
|
|
288
|
+
let currentResponse = response;
|
|
289
|
+
|
|
290
|
+
for (let emptyRetries = 0; emptyRetries <= MAX_EMPTY_RESPONSE_RETRIES; emptyRetries++) {
|
|
291
|
+
try {
|
|
292
|
+
yield* streamSSEResponse(currentResponse, anthropicRequest.model);
|
|
293
|
+
logger.debug('[CloudCode] Stream completed');
|
|
294
|
+
// Gap 1: Clear timestamp on success
|
|
295
|
+
clearRateLimitTimestamp(model);
|
|
296
|
+
accountManager.notifySuccess(account, model);
|
|
297
|
+
return;
|
|
298
|
+
} catch (streamError) {
|
|
299
|
+
// Only retry on EmptyResponseError
|
|
300
|
+
if (!isEmptyResponseError(streamError)) {
|
|
301
|
+
throw streamError;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Check if we have retries left
|
|
305
|
+
if (emptyRetries >= MAX_EMPTY_RESPONSE_RETRIES) {
|
|
306
|
+
logger.error(`[CloudCode] Empty response after ${MAX_EMPTY_RESPONSE_RETRIES} retries`);
|
|
307
|
+
yield* emitEmptyResponseFallback(anthropicRequest.model);
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Exponential backoff: 500ms, 1000ms, 2000ms
|
|
312
|
+
const backoffMs = 500 * Math.pow(2, emptyRetries);
|
|
313
|
+
logger.warn(`[CloudCode] Empty response, retry ${emptyRetries + 1}/${MAX_EMPTY_RESPONSE_RETRIES} after ${backoffMs}ms...`);
|
|
314
|
+
await sleep(backoffMs);
|
|
315
|
+
|
|
316
|
+
// Refetch the response
|
|
317
|
+
currentResponse = await fetch(url, {
|
|
318
|
+
method: 'POST',
|
|
319
|
+
headers: buildHeaders(token, model, 'text/event-stream'),
|
|
320
|
+
body: JSON.stringify(payload)
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// Handle specific error codes on retry
|
|
324
|
+
if (!currentResponse.ok) {
|
|
325
|
+
const retryErrorText = await currentResponse.text();
|
|
326
|
+
|
|
327
|
+
// Rate limit error - mark account and throw to trigger account switch
|
|
328
|
+
if (currentResponse.status === 429) {
|
|
329
|
+
const resetMs = parseResetTime(currentResponse, retryErrorText);
|
|
330
|
+
accountManager.markRateLimited(account.email, resetMs, model);
|
|
331
|
+
throw new Error(`429 RESOURCE_EXHAUSTED during retry: ${retryErrorText}`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Auth error - check for permanent failure
|
|
335
|
+
if (currentResponse.status === 401) {
|
|
336
|
+
if (isPermanentAuthFailure(retryErrorText)) {
|
|
337
|
+
logger.error(`[CloudCode] Permanent auth failure during retry for ${account.email}`);
|
|
338
|
+
accountManager.markInvalid(account.email, 'Token revoked - re-authentication required');
|
|
339
|
+
throw new Error(`AUTH_INVALID_PERMANENT: ${retryErrorText}`);
|
|
340
|
+
}
|
|
341
|
+
accountManager.clearTokenCache(account.email);
|
|
342
|
+
accountManager.clearProjectCache(account.email);
|
|
343
|
+
throw new Error(`401 AUTH_INVALID during retry: ${retryErrorText}`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// For 5xx errors, continue retrying
|
|
347
|
+
if (currentResponse.status >= 500) {
|
|
348
|
+
logger.warn(`[CloudCode] Retry got ${currentResponse.status}, will retry...`);
|
|
349
|
+
await sleep(1000);
|
|
350
|
+
currentResponse = await fetch(url, {
|
|
351
|
+
method: 'POST',
|
|
352
|
+
headers: buildHeaders(token, model, 'text/event-stream'),
|
|
353
|
+
body: JSON.stringify(payload)
|
|
354
|
+
});
|
|
355
|
+
if (currentResponse.ok) {
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
throw new Error(`Empty response retry failed: ${currentResponse.status} - ${retryErrorText}`);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
} catch (endpointError) {
|
|
366
|
+
if (isRateLimitError(endpointError)) {
|
|
367
|
+
throw endpointError; // Re-throw to trigger account switch
|
|
368
|
+
}
|
|
369
|
+
if (isEmptyResponseError(endpointError)) {
|
|
370
|
+
throw endpointError;
|
|
371
|
+
}
|
|
372
|
+
logger.warn(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
|
|
373
|
+
lastError = endpointError;
|
|
374
|
+
endpointIndex++;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// If all endpoints failed for this account
|
|
379
|
+
if (lastError) {
|
|
380
|
+
if (lastError.is429) {
|
|
381
|
+
logger.warn(`[CloudCode] All endpoints rate-limited for ${account.email}`);
|
|
382
|
+
accountManager.markRateLimited(account.email, lastError.resetMs, model);
|
|
383
|
+
throw new Error(`Rate limited: ${lastError.errorText}`);
|
|
384
|
+
}
|
|
385
|
+
throw lastError;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
} catch (error) {
|
|
389
|
+
if (isRateLimitError(error)) {
|
|
390
|
+
// Rate limited - already marked, notify strategy and continue to next account
|
|
391
|
+
accountManager.notifyRateLimit(account, model);
|
|
392
|
+
logger.info(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
395
|
+
if (isAuthError(error)) {
|
|
396
|
+
// Auth invalid - already marked, continue to next account
|
|
397
|
+
logger.warn(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
// Handle 5xx errors
|
|
401
|
+
if (error.message.includes('API error 5') || error.message.includes('500') || error.message.includes('503')) {
|
|
402
|
+
accountManager.notifyFailure(account, model);
|
|
403
|
+
|
|
404
|
+
// Gap 2: Check consecutive failures for extended cooldown
|
|
405
|
+
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
|
406
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
407
|
+
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
|
408
|
+
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
|
409
|
+
} else {
|
|
410
|
+
logger.warn(`[CloudCode] Account ${account.email} failed with 5xx stream error, trying next...`);
|
|
411
|
+
}
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (isNetworkError(error)) {
|
|
416
|
+
accountManager.notifyFailure(account, model);
|
|
417
|
+
|
|
418
|
+
// Gap 2: Check consecutive failures for extended cooldown
|
|
419
|
+
const consecutiveFailures = accountManager.getHealthTracker()?.getConsecutiveFailures(account.email) || 0;
|
|
420
|
+
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
421
|
+
logger.warn(`[CloudCode] Account ${account.email} has ${consecutiveFailures} consecutive network failures, applying extended cooldown (${formatDuration(EXTENDED_COOLDOWN_MS)})`);
|
|
422
|
+
accountManager.markRateLimited(account.email, EXTENDED_COOLDOWN_MS, model);
|
|
423
|
+
} else {
|
|
424
|
+
logger.warn(`[CloudCode] Network error for ${account.email} (stream), trying next account... (${error.message})`);
|
|
425
|
+
}
|
|
426
|
+
await sleep(1000);
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
throw error;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// All retries exhausted - try fallback model if enabled
|
|
435
|
+
if (fallbackEnabled) {
|
|
436
|
+
const fallbackModel = getFallbackModel(model);
|
|
437
|
+
if (fallbackModel) {
|
|
438
|
+
logger.warn(`[CloudCode] All retries exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
|
|
439
|
+
const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
|
|
440
|
+
yield* sendMessageStream(fallbackRequest, accountManager, false);
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
throw new Error('Max retries exceeded');
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Emit a fallback message when all retry attempts fail with empty response
|
|
450
|
+
* @param {string} model - The model name
|
|
451
|
+
* @yields {Object} Anthropic-format SSE events for empty response fallback
|
|
452
|
+
*/
|
|
453
|
+
function* emitEmptyResponseFallback(model) {
|
|
454
|
+
// Use proper message ID format consistent with Anthropic API
|
|
455
|
+
const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
|
|
456
|
+
|
|
457
|
+
yield {
|
|
458
|
+
type: 'message_start',
|
|
459
|
+
message: {
|
|
460
|
+
id: messageId,
|
|
461
|
+
type: 'message',
|
|
462
|
+
role: 'assistant',
|
|
463
|
+
content: [],
|
|
464
|
+
model: model,
|
|
465
|
+
stop_reason: null,
|
|
466
|
+
stop_sequence: null,
|
|
467
|
+
usage: { input_tokens: 0, output_tokens: 0 }
|
|
468
|
+
}
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
yield {
|
|
472
|
+
type: 'content_block_start',
|
|
473
|
+
index: 0,
|
|
474
|
+
content_block: { type: 'text', text: '' }
|
|
475
|
+
};
|
|
476
|
+
|
|
477
|
+
yield {
|
|
478
|
+
type: 'content_block_delta',
|
|
479
|
+
index: 0,
|
|
480
|
+
delta: { type: 'text_delta', text: '[No response after retries - please try again]' }
|
|
481
|
+
};
|
|
482
|
+
|
|
483
|
+
yield { type: 'content_block_stop', index: 0 };
|
|
484
|
+
|
|
485
|
+
yield {
|
|
486
|
+
type: 'message_delta',
|
|
487
|
+
delta: { stop_reason: 'end_turn', stop_sequence: null },
|
|
488
|
+
usage: { output_tokens: 0 }
|
|
489
|
+
};
|
|
490
|
+
|
|
491
|
+
yield { type: 'message_stop' };
|
|
492
|
+
}
|
package/src/config.js
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import { logger } from './utils/logger.js';
|
|
5
|
+
|
|
6
|
+
// Default config
|
|
7
|
+
const DEFAULT_CONFIG = {
|
|
8
|
+
apiKey: '',
|
|
9
|
+
webuiPassword: '',
|
|
10
|
+
debug: false,
|
|
11
|
+
logLevel: 'info',
|
|
12
|
+
maxRetries: 5,
|
|
13
|
+
retryBaseMs: 1000,
|
|
14
|
+
retryMaxMs: 30000,
|
|
15
|
+
persistTokenCache: false,
|
|
16
|
+
defaultCooldownMs: 10000, // 10 seconds
|
|
17
|
+
maxWaitBeforeErrorMs: 120000, // 2 minutes
|
|
18
|
+
modelMapping: {},
|
|
19
|
+
// Account selection strategy configuration
|
|
20
|
+
accountSelection: {
|
|
21
|
+
strategy: 'hybrid', // 'sticky' | 'round-robin' | 'hybrid'
|
|
22
|
+
// Hybrid strategy tuning (optional - sensible defaults)
|
|
23
|
+
healthScore: {
|
|
24
|
+
initial: 70, // Starting score for new accounts
|
|
25
|
+
successReward: 1, // Points on successful request
|
|
26
|
+
rateLimitPenalty: -10, // Points on rate limit
|
|
27
|
+
failurePenalty: -20, // Points on other failures
|
|
28
|
+
recoveryPerHour: 2, // Passive recovery rate
|
|
29
|
+
minUsable: 50, // Minimum score to be selected
|
|
30
|
+
maxScore: 100 // Maximum score cap
|
|
31
|
+
},
|
|
32
|
+
tokenBucket: {
|
|
33
|
+
maxTokens: 50, // Maximum token capacity
|
|
34
|
+
tokensPerMinute: 6, // Regeneration rate
|
|
35
|
+
initialTokens: 50 // Starting tokens
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// Config locations
|
|
41
|
+
const HOME_DIR = os.homedir();
|
|
42
|
+
const CONFIG_DIR = path.join(HOME_DIR, '.config', 'antigravity-proxy');
|
|
43
|
+
const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
|
|
44
|
+
|
|
45
|
+
// Ensure config dir exists
|
|
46
|
+
if (!fs.existsSync(CONFIG_DIR)) {
|
|
47
|
+
try {
|
|
48
|
+
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
|
49
|
+
} catch (err) {
|
|
50
|
+
// Ignore
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Load config
|
|
55
|
+
let config = { ...DEFAULT_CONFIG };
|
|
56
|
+
|
|
57
|
+
function loadConfig() {
|
|
58
|
+
try {
|
|
59
|
+
// Env vars take precedence for initial defaults, but file overrides them if present?
|
|
60
|
+
// Usually Env > File > Default.
|
|
61
|
+
|
|
62
|
+
if (fs.existsSync(CONFIG_FILE)) {
|
|
63
|
+
const fileContent = fs.readFileSync(CONFIG_FILE, 'utf8');
|
|
64
|
+
const userConfig = JSON.parse(fileContent);
|
|
65
|
+
config = { ...DEFAULT_CONFIG, ...userConfig };
|
|
66
|
+
} else {
|
|
67
|
+
// Try looking in current dir for config.json as fallback
|
|
68
|
+
const localConfigPath = path.resolve('config.json');
|
|
69
|
+
if (fs.existsSync(localConfigPath)) {
|
|
70
|
+
const fileContent = fs.readFileSync(localConfigPath, 'utf8');
|
|
71
|
+
const userConfig = JSON.parse(fileContent);
|
|
72
|
+
config = { ...DEFAULT_CONFIG, ...userConfig };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Environment overrides
|
|
77
|
+
if (process.env.API_KEY) config.apiKey = process.env.API_KEY;
|
|
78
|
+
if (process.env.WEBUI_PASSWORD) config.webuiPassword = process.env.WEBUI_PASSWORD;
|
|
79
|
+
if (process.env.DEBUG === 'true') config.debug = true;
|
|
80
|
+
|
|
81
|
+
} catch (error) {
|
|
82
|
+
logger.error('[Config] Error loading config:', error);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Initial load
|
|
87
|
+
loadConfig();
|
|
88
|
+
|
|
89
|
+
export function getPublicConfig() {
|
|
90
|
+
return { ...config };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function saveConfig(updates) {
|
|
94
|
+
try {
|
|
95
|
+
// Apply updates
|
|
96
|
+
config = { ...config, ...updates };
|
|
97
|
+
|
|
98
|
+
// Save to disk
|
|
99
|
+
fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2), 'utf8');
|
|
100
|
+
return true;
|
|
101
|
+
} catch (error) {
|
|
102
|
+
logger.error('[Config] Failed to save config:', error);
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export { config };
|