antigravity-claude-proxy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1018 @@
1
+ /**
2
+ * Cloud Code Client for Antigravity
3
+ *
4
+ * Communicates with Google's Cloud Code internal API using the
5
+ * v1internal:streamGenerateContent endpoint with proper request wrapping.
6
+ *
7
+ * Supports multi-account load balancing with automatic failover.
8
+ *
9
+ * Based on: https://github.com/NoeFabris/opencode-antigravity-auth
10
+ */
11
+
12
+ import crypto from 'crypto';
13
+ import {
14
+ ANTIGRAVITY_ENDPOINT_FALLBACKS,
15
+ ANTIGRAVITY_HEADERS,
16
+ AVAILABLE_MODELS,
17
+ MAX_RETRIES,
18
+ MAX_WAIT_BEFORE_ERROR_MS,
19
+ MIN_SIGNATURE_LENGTH
20
+ } from './constants.js';
21
+ import {
22
+ mapModelName,
23
+ convertAnthropicToGoogle,
24
+ convertGoogleToAnthropic
25
+ } from './format-converter.js';
26
+ import { formatDuration, sleep } from './utils/helpers.js';
27
+ import { isRateLimitError, isAuthError } from './errors.js';
28
+
29
+ /**
30
+ * Check if an error is a rate limit error (429 or RESOURCE_EXHAUSTED)
31
+ * @deprecated Use isRateLimitError from errors.js instead
32
+ */
33
+ function is429Error(error) {
34
+ return isRateLimitError(error);
35
+ }
36
+
37
+ /**
38
+ * Check if an error is an auth-invalid error (credentials need re-authentication)
39
+ * @deprecated Use isAuthError from errors.js instead
40
+ */
41
+ function isAuthInvalidError(error) {
42
+ return isAuthError(error);
43
+ }
44
+
45
+ /**
46
+ * Derive a stable session ID from the first user message in the conversation.
47
+ * This ensures the same conversation uses the same session ID across turns,
48
+ * enabling prompt caching (cache is scoped to session + organization).
49
+ *
50
+ * @param {Object} anthropicRequest - The Anthropic-format request
51
+ * @returns {string} A stable session ID (32 hex characters) or random UUID if no user message
52
+ */
53
+ function deriveSessionId(anthropicRequest) {
54
+ const messages = anthropicRequest.messages || [];
55
+
56
+ // Find the first user message
57
+ for (const msg of messages) {
58
+ if (msg.role === 'user') {
59
+ let content = '';
60
+
61
+ if (typeof msg.content === 'string') {
62
+ content = msg.content;
63
+ } else if (Array.isArray(msg.content)) {
64
+ // Extract text from content blocks
65
+ content = msg.content
66
+ .filter(block => block.type === 'text' && block.text)
67
+ .map(block => block.text)
68
+ .join('\n');
69
+ }
70
+
71
+ if (content) {
72
+ // Hash the content with SHA256, return first 32 hex chars
73
+ const hash = crypto.createHash('sha256').update(content).digest('hex');
74
+ return hash.substring(0, 32);
75
+ }
76
+ }
77
+ }
78
+
79
+ // Fallback to random UUID if no user message found
80
+ return crypto.randomUUID();
81
+ }
82
+
83
+ /**
84
+ * Parse reset time from HTTP response or error
85
+ * Checks headers first, then error message body
86
+ * Returns milliseconds or null if not found
87
+ *
88
+ * @param {Response|Error} responseOrError - HTTP Response object or Error
89
+ * @param {string} errorText - Optional error body text
90
+ */
91
+ function parseResetTime(responseOrError, errorText = '') {
92
+ let resetMs = null;
93
+
94
+ // If it's a Response object, check headers first
95
+ if (responseOrError && typeof responseOrError.headers?.get === 'function') {
96
+ const headers = responseOrError.headers;
97
+
98
+ // Standard Retry-After header (seconds or HTTP date)
99
+ const retryAfter = headers.get('retry-after');
100
+ if (retryAfter) {
101
+ const seconds = parseInt(retryAfter, 10);
102
+ if (!isNaN(seconds)) {
103
+ resetMs = seconds * 1000;
104
+ console.log(`[CloudCode] Retry-After header: ${seconds}s`);
105
+ } else {
106
+ // Try parsing as HTTP date
107
+ const date = new Date(retryAfter);
108
+ if (!isNaN(date.getTime())) {
109
+ resetMs = date.getTime() - Date.now();
110
+ if (resetMs > 0) {
111
+ console.log(`[CloudCode] Retry-After date: ${retryAfter}`);
112
+ } else {
113
+ resetMs = null;
114
+ }
115
+ }
116
+ }
117
+ }
118
+
119
+ // x-ratelimit-reset (Unix timestamp in seconds)
120
+ if (!resetMs) {
121
+ const ratelimitReset = headers.get('x-ratelimit-reset');
122
+ if (ratelimitReset) {
123
+ const resetTimestamp = parseInt(ratelimitReset, 10) * 1000;
124
+ resetMs = resetTimestamp - Date.now();
125
+ if (resetMs > 0) {
126
+ console.log(`[CloudCode] x-ratelimit-reset: ${new Date(resetTimestamp).toISOString()}`);
127
+ } else {
128
+ resetMs = null;
129
+ }
130
+ }
131
+ }
132
+
133
+ // x-ratelimit-reset-after (seconds)
134
+ if (!resetMs) {
135
+ const resetAfter = headers.get('x-ratelimit-reset-after');
136
+ if (resetAfter) {
137
+ const seconds = parseInt(resetAfter, 10);
138
+ if (!isNaN(seconds) && seconds > 0) {
139
+ resetMs = seconds * 1000;
140
+ console.log(`[CloudCode] x-ratelimit-reset-after: ${seconds}s`);
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ // If no header found, try parsing from error message/body
147
+ if (!resetMs) {
148
+ const msg = (responseOrError instanceof Error ? responseOrError.message : errorText) || '';
149
+
150
+ // Try to extract "retry-after-ms" or "retryDelay" - check seconds format first (e.g. "7739.23s")
151
+ const secMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+([\d\.]+)(?:s\b|s")/i);
152
+ if (secMatch) {
153
+ resetMs = Math.ceil(parseFloat(secMatch[1]) * 1000);
154
+ console.log(`[CloudCode] Parsed retry seconds from body (precise): ${resetMs}ms`);
155
+ }
156
+
157
+ if (!resetMs) {
158
+ // Check for ms (explicit "ms" suffix or implicit if no suffix)
159
+ // Rejects "s" suffix or floats (handled above)
160
+ const msMatch = msg.match(/(?:retry[-_]?after[-_]?ms|retryDelay)[:\s"]+(\d+)(?:\s*ms)?(?![\w.])/i);
161
+ if (msMatch) {
162
+ resetMs = parseInt(msMatch[1], 10);
163
+ console.log(`[CloudCode] Parsed retry-after-ms from body: ${resetMs}ms`);
164
+ }
165
+ }
166
+
167
+ // Try to extract seconds value like "retry after 60 seconds"
168
+ if (!resetMs) {
169
+ const secMatch = msg.match(/retry\s+(?:after\s+)?(\d+)\s*(?:sec|s\b)/i);
170
+ if (secMatch) {
171
+ resetMs = parseInt(secMatch[1], 10) * 1000;
172
+ console.log(`[CloudCode] Parsed retry seconds from body: ${secMatch[1]}s`);
173
+ }
174
+ }
175
+
176
+ // Try to extract duration like "1h23m45s" or "23m45s" or "45s"
177
+ if (!resetMs) {
178
+ const durationMatch = msg.match(/(\d+)h(\d+)m(\d+)s|(\d+)m(\d+)s|(\d+)s/i);
179
+ if (durationMatch) {
180
+ if (durationMatch[1]) {
181
+ const hours = parseInt(durationMatch[1], 10);
182
+ const minutes = parseInt(durationMatch[2], 10);
183
+ const seconds = parseInt(durationMatch[3], 10);
184
+ resetMs = (hours * 3600 + minutes * 60 + seconds) * 1000;
185
+ } else if (durationMatch[4]) {
186
+ const minutes = parseInt(durationMatch[4], 10);
187
+ const seconds = parseInt(durationMatch[5], 10);
188
+ resetMs = (minutes * 60 + seconds) * 1000;
189
+ } else if (durationMatch[6]) {
190
+ resetMs = parseInt(durationMatch[6], 10) * 1000;
191
+ }
192
+ if (resetMs) {
193
+ console.log(`[CloudCode] Parsed duration from body: ${formatDuration(resetMs)}`);
194
+ }
195
+ }
196
+ }
197
+
198
+ // Try to extract ISO timestamp or Unix timestamp
199
+ if (!resetMs) {
200
+ const isoMatch = msg.match(/reset[:\s"]+(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)/i);
201
+ if (isoMatch) {
202
+ const resetTime = new Date(isoMatch[1]).getTime();
203
+ if (!isNaN(resetTime)) {
204
+ resetMs = resetTime - Date.now();
205
+ if (resetMs > 0) {
206
+ console.log(`[CloudCode] Parsed ISO reset time: ${isoMatch[1]}`);
207
+ } else {
208
+ resetMs = null;
209
+ }
210
+ }
211
+ }
212
+ }
213
+ }
214
+
215
+ return resetMs;
216
+ }
217
+
218
+ /**
219
+ * Build the wrapped request body for Cloud Code API
220
+ */
221
+ function buildCloudCodeRequest(anthropicRequest, projectId) {
222
+ const model = mapModelName(anthropicRequest.model);
223
+ const googleRequest = convertAnthropicToGoogle(anthropicRequest);
224
+
225
+ // Use stable session ID derived from first user message for cache continuity
226
+ googleRequest.sessionId = deriveSessionId(anthropicRequest);
227
+
228
+ const payload = {
229
+ project: projectId,
230
+ model: model,
231
+ request: googleRequest,
232
+ userAgent: 'antigravity',
233
+ requestId: 'agent-' + crypto.randomUUID()
234
+ };
235
+
236
+ return payload;
237
+ }
238
+
239
+ /**
240
+ * Build headers for Cloud Code API requests
241
+ */
242
+ function buildHeaders(token, model, accept = 'application/json') {
243
+ const headers = {
244
+ 'Authorization': `Bearer ${token}`,
245
+ 'Content-Type': 'application/json',
246
+ ...ANTIGRAVITY_HEADERS
247
+ };
248
+
249
+ // Add interleaved thinking header for Claude thinking models
250
+ const isThinkingModel = model.toLowerCase().includes('thinking');
251
+ if (isThinkingModel) {
252
+ headers['anthropic-beta'] = 'interleaved-thinking-2025-05-14';
253
+ }
254
+
255
+ if (accept !== 'application/json') {
256
+ headers['Accept'] = accept;
257
+ }
258
+
259
+ return headers;
260
+ }
261
+
262
+ /**
263
+ * Send a non-streaming request to Cloud Code with multi-account support
264
+ * Uses SSE endpoint for thinking models (non-streaming doesn't return thinking blocks)
265
+ *
266
+ * @param {Object} anthropicRequest - The Anthropic-format request
267
+ * @param {Object} anthropicRequest.model - Model name to use
268
+ * @param {Array} anthropicRequest.messages - Array of message objects
269
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
270
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
271
+ * @param {import('./account-manager.js').default} accountManager - The account manager instance
272
+ * @returns {Promise<Object>} Anthropic-format response object
273
+ * @throws {Error} If max retries exceeded or no accounts available
274
+ */
275
+ export async function sendMessage(anthropicRequest, accountManager) {
276
+ const model = mapModelName(anthropicRequest.model);
277
+ const isThinkingModel = model.toLowerCase().includes('thinking');
278
+
279
+ // Retry loop with account failover
280
+ // Ensure we try at least as many times as there are accounts to cycle through everyone
281
+ // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
282
+ const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
283
+
284
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
285
+ // Use sticky account selection for cache continuity
286
+ const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
287
+ let account = stickyAccount;
288
+
289
+ // Handle waiting for sticky account
290
+ if (!account && waitMs > 0) {
291
+ console.log(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
292
+ await sleep(waitMs);
293
+ accountManager.clearExpiredLimits();
294
+ account = accountManager.getCurrentStickyAccount();
295
+ }
296
+
297
+ // Handle all accounts rate-limited
298
+ if (!account) {
299
+ if (accountManager.isAllRateLimited()) {
300
+ const allWaitMs = accountManager.getMinWaitTimeMs();
301
+ const resetTime = new Date(Date.now() + allWaitMs).toISOString();
302
+
303
+ // If wait time is too long (> 2 minutes), throw error immediately
304
+ if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
305
+ throw new Error(
306
+ `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
307
+ );
308
+ }
309
+
310
+ // Wait for reset (applies to both single and multi-account modes)
311
+ const accountCount = accountManager.getAccountCount();
312
+ console.log(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
313
+ await sleep(allWaitMs);
314
+ accountManager.clearExpiredLimits();
315
+ account = accountManager.pickNext();
316
+ }
317
+
318
+ if (!account) {
319
+ throw new Error('No accounts available');
320
+ }
321
+ }
322
+
323
+ try {
324
+ // Get token and project for this account
325
+ const token = await accountManager.getTokenForAccount(account);
326
+ const project = await accountManager.getProjectForAccount(account, token);
327
+ const payload = buildCloudCodeRequest(anthropicRequest, project);
328
+
329
+ console.log(`[CloudCode] Sending request for model: ${model}`);
330
+
331
+ // Try each endpoint
332
+ let lastError = null;
333
+ for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
334
+ try {
335
+ const url = isThinkingModel
336
+ ? `${endpoint}/v1internal:streamGenerateContent?alt=sse`
337
+ : `${endpoint}/v1internal:generateContent`;
338
+
339
+ const response = await fetch(url, {
340
+ method: 'POST',
341
+ headers: buildHeaders(token, model, isThinkingModel ? 'text/event-stream' : 'application/json'),
342
+ body: JSON.stringify(payload)
343
+ });
344
+
345
+ if (!response.ok) {
346
+ const errorText = await response.text();
347
+ console.log(`[CloudCode] Error at ${endpoint}: ${response.status} - ${errorText}`);
348
+
349
+ if (response.status === 401) {
350
+ // Auth error - clear caches and retry with fresh token
351
+ console.log('[CloudCode] Auth error, refreshing token...');
352
+ accountManager.clearTokenCache(account.email);
353
+ accountManager.clearProjectCache(account.email);
354
+ continue;
355
+ }
356
+
357
+ if (response.status === 429) {
358
+ // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
359
+ console.log(`[CloudCode] Rate limited at ${endpoint}, trying next endpoint...`);
360
+ const resetMs = parseResetTime(response, errorText);
361
+ // Keep minimum reset time across all 429 responses
362
+ if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
363
+ lastError = { is429: true, response, errorText, resetMs };
364
+ }
365
+ continue;
366
+ }
367
+
368
+ if (response.status >= 400) {
369
+ lastError = new Error(`API error ${response.status}: ${errorText}`);
370
+ continue;
371
+ }
372
+ }
373
+
374
+ // For thinking models, parse SSE and accumulate all parts
375
+ if (isThinkingModel) {
376
+ return await parseThinkingSSEResponse(response, anthropicRequest.model);
377
+ }
378
+
379
+ // Non-thinking models use regular JSON
380
+ const data = await response.json();
381
+ console.log('[CloudCode] Response received');
382
+ return convertGoogleToAnthropic(data, anthropicRequest.model);
383
+
384
+ } catch (endpointError) {
385
+ if (is429Error(endpointError)) {
386
+ throw endpointError; // Re-throw to trigger account switch
387
+ }
388
+ console.log(`[CloudCode] Error at ${endpoint}:`, endpointError.message);
389
+ lastError = endpointError;
390
+ }
391
+ }
392
+
393
+ // If all endpoints failed for this account
394
+ if (lastError) {
395
+ // If all endpoints returned 429, mark account as rate-limited
396
+ if (lastError.is429) {
397
+ console.log(`[CloudCode] All endpoints rate-limited for ${account.email}`);
398
+ accountManager.markRateLimited(account.email, lastError.resetMs);
399
+ throw new Error(`Rate limited: ${lastError.errorText}`);
400
+ }
401
+ throw lastError;
402
+ }
403
+
404
+ } catch (error) {
405
+ if (is429Error(error)) {
406
+ // Rate limited - already marked, continue to next account
407
+ console.log(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
408
+ continue;
409
+ }
410
+ if (isAuthInvalidError(error)) {
411
+ // Auth invalid - already marked, continue to next account
412
+ console.log(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
413
+ continue;
414
+ }
415
+ // Non-rate-limit error: throw immediately
416
+ throw error;
417
+ }
418
+ }
419
+
420
+ throw new Error('Max retries exceeded');
421
+ }
422
+
423
+ /**
424
+ * Parse SSE response for thinking models and accumulate all parts
425
+ */
426
+ async function parseThinkingSSEResponse(response, originalModel) {
427
+ let accumulatedThinkingText = '';
428
+ let accumulatedThinkingSignature = '';
429
+ let accumulatedText = '';
430
+ const finalParts = [];
431
+ let usageMetadata = {};
432
+ let finishReason = 'STOP';
433
+
434
+ const flushThinking = () => {
435
+ if (accumulatedThinkingText) {
436
+ finalParts.push({
437
+ thought: true,
438
+ text: accumulatedThinkingText,
439
+ thoughtSignature: accumulatedThinkingSignature
440
+ });
441
+ accumulatedThinkingText = '';
442
+ accumulatedThinkingSignature = '';
443
+ }
444
+ };
445
+
446
+ const flushText = () => {
447
+ if (accumulatedText) {
448
+ finalParts.push({ text: accumulatedText });
449
+ accumulatedText = '';
450
+ }
451
+ };
452
+
453
+ const reader = response.body.getReader();
454
+ const decoder = new TextDecoder();
455
+ let buffer = '';
456
+
457
+ while (true) {
458
+ const { done, value } = await reader.read();
459
+ if (done) break;
460
+
461
+ buffer += decoder.decode(value, { stream: true });
462
+ const lines = buffer.split('\n');
463
+ buffer = lines.pop() || '';
464
+
465
+ for (const line of lines) {
466
+ if (!line.startsWith('data:')) continue;
467
+ const jsonText = line.slice(5).trim();
468
+ if (!jsonText) continue;
469
+
470
+ try {
471
+ const data = JSON.parse(jsonText);
472
+ const innerResponse = data.response || data;
473
+
474
+ if (innerResponse.usageMetadata) {
475
+ usageMetadata = innerResponse.usageMetadata;
476
+ }
477
+
478
+ const candidates = innerResponse.candidates || [];
479
+ const firstCandidate = candidates[0] || {};
480
+ if (firstCandidate.finishReason) {
481
+ finishReason = firstCandidate.finishReason;
482
+ }
483
+
484
+ const parts = firstCandidate.content?.parts || [];
485
+ for (const part of parts) {
486
+ if (part.thought === true) {
487
+ flushText();
488
+ accumulatedThinkingText += (part.text || '');
489
+ if (part.thoughtSignature) {
490
+ accumulatedThinkingSignature = part.thoughtSignature;
491
+ }
492
+ } else if (part.functionCall) {
493
+ flushThinking();
494
+ flushText();
495
+ finalParts.push(part);
496
+ } else if (part.text !== undefined) {
497
+ if (!part.text) continue;
498
+ flushThinking();
499
+ accumulatedText += part.text;
500
+ }
501
+ }
502
+ } catch (e) {
503
+ console.log('[CloudCode] SSE parse warning:', e.message, 'Raw:', jsonText.slice(0, 100));
504
+ }
505
+ }
506
+ }
507
+
508
+ flushThinking();
509
+ flushText();
510
+
511
+ const accumulatedResponse = {
512
+ candidates: [{ content: { parts: finalParts }, finishReason }],
513
+ usageMetadata
514
+ };
515
+
516
+ const partTypes = finalParts.map(p => p.thought ? 'thought' : (p.functionCall ? 'functionCall' : 'text'));
517
+ console.log('[CloudCode] Response received (SSE), part types:', partTypes);
518
+ if (finalParts.some(p => p.thought)) {
519
+ const thinkingPart = finalParts.find(p => p.thought);
520
+ console.log('[CloudCode] Thinking signature length:', thinkingPart?.thoughtSignature?.length || 0);
521
+ }
522
+
523
+ return convertGoogleToAnthropic(accumulatedResponse, originalModel);
524
+ }
525
+
526
+ /**
527
+ * Send a streaming request to Cloud Code with multi-account support
528
+ * Streams events in real-time as they arrive from the server
529
+ *
530
+ * @param {Object} anthropicRequest - The Anthropic-format request
531
+ * @param {string} anthropicRequest.model - Model name to use
532
+ * @param {Array} anthropicRequest.messages - Array of message objects
533
+ * @param {number} [anthropicRequest.max_tokens] - Maximum tokens to generate
534
+ * @param {Object} [anthropicRequest.thinking] - Thinking configuration
535
+ * @param {import('./account-manager.js').default} accountManager - The account manager instance
536
+ * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
537
+ * @throws {Error} If max retries exceeded or no accounts available
538
+ */
539
+ export async function* sendMessageStream(anthropicRequest, accountManager) {
540
+ const model = mapModelName(anthropicRequest.model);
541
+
542
+ // Retry loop with account failover
543
+ // Ensure we try at least as many times as there are accounts to cycle through everyone
544
+ // +1 to ensure we hit the "all accounts rate-limited" check at the start of the next loop
545
+ const maxAttempts = Math.max(MAX_RETRIES, accountManager.getAccountCount() + 1);
546
+
547
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
548
+ // Use sticky account selection for cache continuity
549
+ const { account: stickyAccount, waitMs } = accountManager.pickStickyAccount();
550
+ let account = stickyAccount;
551
+
552
+ // Handle waiting for sticky account
553
+ if (!account && waitMs > 0) {
554
+ console.log(`[CloudCode] Waiting ${formatDuration(waitMs)} for sticky account...`);
555
+ await sleep(waitMs);
556
+ accountManager.clearExpiredLimits();
557
+ account = accountManager.getCurrentStickyAccount();
558
+ }
559
+
560
+ // Handle all accounts rate-limited
561
+ if (!account) {
562
+ if (accountManager.isAllRateLimited()) {
563
+ const allWaitMs = accountManager.getMinWaitTimeMs();
564
+ const resetTime = new Date(Date.now() + allWaitMs).toISOString();
565
+
566
+ // If wait time is too long (> 2 minutes), throw error immediately
567
+ if (allWaitMs > MAX_WAIT_BEFORE_ERROR_MS) {
568
+ throw new Error(
569
+ `RESOURCE_EXHAUSTED: Rate limited. Quota will reset after ${formatDuration(allWaitMs)}. Next available: ${resetTime}`
570
+ );
571
+ }
572
+
573
+ // Wait for reset (applies to both single and multi-account modes)
574
+ const accountCount = accountManager.getAccountCount();
575
+ console.log(`[CloudCode] All ${accountCount} account(s) rate-limited. Waiting ${formatDuration(allWaitMs)}...`);
576
+ await sleep(allWaitMs);
577
+ accountManager.clearExpiredLimits();
578
+ account = accountManager.pickNext();
579
+ }
580
+
581
+ if (!account) {
582
+ throw new Error('No accounts available');
583
+ }
584
+ }
585
+
586
+ try {
587
+ // Get token and project for this account
588
+ const token = await accountManager.getTokenForAccount(account);
589
+ const project = await accountManager.getProjectForAccount(account, token);
590
+ const payload = buildCloudCodeRequest(anthropicRequest, project);
591
+
592
+ console.log(`[CloudCode] Starting stream for model: ${model}`);
593
+
594
+ // Try each endpoint for streaming
595
+ let lastError = null;
596
+ for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
597
+ try {
598
+ const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
599
+
600
+ const response = await fetch(url, {
601
+ method: 'POST',
602
+ headers: buildHeaders(token, model, 'text/event-stream'),
603
+ body: JSON.stringify(payload)
604
+ });
605
+
606
+ if (!response.ok) {
607
+ const errorText = await response.text();
608
+ console.log(`[CloudCode] Stream error at ${endpoint}: ${response.status} - ${errorText}`);
609
+
610
+ if (response.status === 401) {
611
+ // Auth error - clear caches and retry
612
+ accountManager.clearTokenCache(account.email);
613
+ accountManager.clearProjectCache(account.email);
614
+ continue;
615
+ }
616
+
617
+ if (response.status === 429) {
618
+ // Rate limited on this endpoint - try next endpoint first (DAILY → PROD)
619
+ console.log(`[CloudCode] Stream rate limited at ${endpoint}, trying next endpoint...`);
620
+ const resetMs = parseResetTime(response, errorText);
621
+ // Keep minimum reset time across all 429 responses
622
+ if (!lastError?.is429 || (resetMs && (!lastError.resetMs || resetMs < lastError.resetMs))) {
623
+ lastError = { is429: true, response, errorText, resetMs };
624
+ }
625
+ continue;
626
+ }
627
+
628
+ lastError = new Error(`API error ${response.status}: ${errorText}`);
629
+ continue;
630
+ }
631
+
632
+ // Stream the response - yield events as they arrive
633
+ yield* streamSSEResponse(response, anthropicRequest.model);
634
+
635
+ console.log('[CloudCode] Stream completed');
636
+ return;
637
+
638
+ } catch (endpointError) {
639
+ if (is429Error(endpointError)) {
640
+ throw endpointError; // Re-throw to trigger account switch
641
+ }
642
+ console.log(`[CloudCode] Stream error at ${endpoint}:`, endpointError.message);
643
+ lastError = endpointError;
644
+ }
645
+ }
646
+
647
+ // If all endpoints failed for this account
648
+ if (lastError) {
649
+ // If all endpoints returned 429, mark account as rate-limited
650
+ if (lastError.is429) {
651
+ console.log(`[CloudCode] All endpoints rate-limited for ${account.email}`);
652
+ accountManager.markRateLimited(account.email, lastError.resetMs);
653
+ throw new Error(`Rate limited: ${lastError.errorText}`);
654
+ }
655
+ throw lastError;
656
+ }
657
+
658
+ } catch (error) {
659
+ if (is429Error(error)) {
660
+ // Rate limited - already marked, continue to next account
661
+ console.log(`[CloudCode] Account ${account.email} rate-limited, trying next...`);
662
+ continue;
663
+ }
664
+ if (isAuthInvalidError(error)) {
665
+ // Auth invalid - already marked, continue to next account
666
+ console.log(`[CloudCode] Account ${account.email} has invalid credentials, trying next...`);
667
+ continue;
668
+ }
669
+ // Non-rate-limit error: throw immediately
670
+ throw error;
671
+ }
672
+ }
673
+
674
+ throw new Error('Max retries exceeded');
675
+ }
676
+
677
+ /**
678
+ * Stream SSE response and yield Anthropic-format events
679
+ */
680
+ async function* streamSSEResponse(response, originalModel) {
681
+ const messageId = `msg_${crypto.randomBytes(16).toString('hex')}`;
682
+ let hasEmittedStart = false;
683
+ let blockIndex = 0;
684
+ let currentBlockType = null;
685
+ let currentThinkingSignature = '';
686
+ let inputTokens = 0;
687
+ let outputTokens = 0;
688
+ let cacheReadTokens = 0;
689
+ let stopReason = 'end_turn';
690
+
691
+ const reader = response.body.getReader();
692
+ const decoder = new TextDecoder();
693
+ let buffer = '';
694
+
695
+ while (true) {
696
+ const { done, value } = await reader.read();
697
+ if (done) break;
698
+
699
+ buffer += decoder.decode(value, { stream: true });
700
+ const lines = buffer.split('\n');
701
+ buffer = lines.pop() || '';
702
+
703
+ for (const line of lines) {
704
+ if (!line.startsWith('data:')) continue;
705
+
706
+ const jsonText = line.slice(5).trim();
707
+ if (!jsonText) continue;
708
+
709
+ try {
710
+ const data = JSON.parse(jsonText);
711
+ const innerResponse = data.response || data;
712
+
713
+ // Extract usage metadata (including cache tokens)
714
+ const usage = innerResponse.usageMetadata;
715
+ if (usage) {
716
+ inputTokens = usage.promptTokenCount || inputTokens;
717
+ outputTokens = usage.candidatesTokenCount || outputTokens;
718
+ cacheReadTokens = usage.cachedContentTokenCount || cacheReadTokens;
719
+ }
720
+
721
+ const candidates = innerResponse.candidates || [];
722
+ const firstCandidate = candidates[0] || {};
723
+ const content = firstCandidate.content || {};
724
+ const parts = content.parts || [];
725
+
726
+ // Emit message_start on first data
727
+ // Note: input_tokens = promptTokenCount - cachedContentTokenCount (Antigravity includes cached in total)
728
+ if (!hasEmittedStart && parts.length > 0) {
729
+ hasEmittedStart = true;
730
+ yield {
731
+ type: 'message_start',
732
+ message: {
733
+ id: messageId,
734
+ type: 'message',
735
+ role: 'assistant',
736
+ content: [],
737
+ model: originalModel,
738
+ stop_reason: null,
739
+ stop_sequence: null,
740
+ usage: {
741
+ input_tokens: inputTokens - cacheReadTokens,
742
+ output_tokens: 0,
743
+ cache_read_input_tokens: cacheReadTokens,
744
+ cache_creation_input_tokens: 0
745
+ }
746
+ }
747
+ };
748
+ }
749
+
750
+ // Process each part
751
+ for (const part of parts) {
752
+ if (part.thought === true) {
753
+ // Handle thinking block
754
+ const text = part.text || '';
755
+ const signature = part.thoughtSignature || '';
756
+
757
+ if (currentBlockType !== 'thinking') {
758
+ if (currentBlockType !== null) {
759
+ yield { type: 'content_block_stop', index: blockIndex };
760
+ blockIndex++;
761
+ }
762
+ currentBlockType = 'thinking';
763
+ currentThinkingSignature = '';
764
+ yield {
765
+ type: 'content_block_start',
766
+ index: blockIndex,
767
+ content_block: { type: 'thinking', thinking: '' }
768
+ };
769
+ }
770
+
771
+ if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
772
+ currentThinkingSignature = signature;
773
+ }
774
+
775
+ yield {
776
+ type: 'content_block_delta',
777
+ index: blockIndex,
778
+ delta: { type: 'thinking_delta', thinking: text }
779
+ };
780
+
781
+ } else if (part.text !== undefined) {
782
+ // Skip empty text parts
783
+ if (!part.text || part.text.trim().length === 0) {
784
+ continue;
785
+ }
786
+
787
+ // Handle regular text
788
+ if (currentBlockType !== 'text') {
789
+ if (currentBlockType === 'thinking' && currentThinkingSignature) {
790
+ yield {
791
+ type: 'content_block_delta',
792
+ index: blockIndex,
793
+ delta: { type: 'signature_delta', signature: currentThinkingSignature }
794
+ };
795
+ currentThinkingSignature = '';
796
+ }
797
+ if (currentBlockType !== null) {
798
+ yield { type: 'content_block_stop', index: blockIndex };
799
+ blockIndex++;
800
+ }
801
+ currentBlockType = 'text';
802
+ yield {
803
+ type: 'content_block_start',
804
+ index: blockIndex,
805
+ content_block: { type: 'text', text: '' }
806
+ };
807
+ }
808
+
809
+ yield {
810
+ type: 'content_block_delta',
811
+ index: blockIndex,
812
+ delta: { type: 'text_delta', text: part.text }
813
+ };
814
+
815
+ } else if (part.functionCall) {
816
+ // Handle tool use
817
+ if (currentBlockType === 'thinking' && currentThinkingSignature) {
818
+ yield {
819
+ type: 'content_block_delta',
820
+ index: blockIndex,
821
+ delta: { type: 'signature_delta', signature: currentThinkingSignature }
822
+ };
823
+ currentThinkingSignature = '';
824
+ }
825
+ if (currentBlockType !== null) {
826
+ yield { type: 'content_block_stop', index: blockIndex };
827
+ blockIndex++;
828
+ }
829
+ currentBlockType = 'tool_use';
830
+ stopReason = 'tool_use';
831
+
832
+ const toolId = part.functionCall.id || `toolu_${crypto.randomBytes(12).toString('hex')}`;
833
+
834
+ yield {
835
+ type: 'content_block_start',
836
+ index: blockIndex,
837
+ content_block: {
838
+ type: 'tool_use',
839
+ id: toolId,
840
+ name: part.functionCall.name,
841
+ input: {}
842
+ }
843
+ };
844
+
845
+ yield {
846
+ type: 'content_block_delta',
847
+ index: blockIndex,
848
+ delta: {
849
+ type: 'input_json_delta',
850
+ partial_json: JSON.stringify(part.functionCall.args || {})
851
+ }
852
+ };
853
+ }
854
+ }
855
+
856
+ // Check finish reason
857
+ if (firstCandidate.finishReason) {
858
+ if (firstCandidate.finishReason === 'MAX_TOKENS') {
859
+ stopReason = 'max_tokens';
860
+ } else if (firstCandidate.finishReason === 'STOP') {
861
+ stopReason = 'end_turn';
862
+ }
863
+ }
864
+
865
+ } catch (parseError) {
866
+ console.log('[CloudCode] SSE parse error:', parseError.message);
867
+ }
868
+ }
869
+ }
870
+
871
+ // Handle no content received
872
+ if (!hasEmittedStart) {
873
+ console.log('[CloudCode] WARNING: No content parts received, emitting empty message');
874
+ yield {
875
+ type: 'message_start',
876
+ message: {
877
+ id: messageId,
878
+ type: 'message',
879
+ role: 'assistant',
880
+ content: [],
881
+ model: originalModel,
882
+ stop_reason: null,
883
+ stop_sequence: null,
884
+ usage: {
885
+ input_tokens: inputTokens - cacheReadTokens,
886
+ output_tokens: 0,
887
+ cache_read_input_tokens: cacheReadTokens,
888
+ cache_creation_input_tokens: 0
889
+ }
890
+ }
891
+ };
892
+
893
+ yield {
894
+ type: 'content_block_start',
895
+ index: 0,
896
+ content_block: { type: 'text', text: '' }
897
+ };
898
+ yield {
899
+ type: 'content_block_delta',
900
+ index: 0,
901
+ delta: { type: 'text_delta', text: '[No response received from API]' }
902
+ };
903
+ yield { type: 'content_block_stop', index: 0 };
904
+ } else {
905
+ // Close any open block
906
+ if (currentBlockType !== null) {
907
+ if (currentBlockType === 'thinking' && currentThinkingSignature) {
908
+ yield {
909
+ type: 'content_block_delta',
910
+ index: blockIndex,
911
+ delta: { type: 'signature_delta', signature: currentThinkingSignature }
912
+ };
913
+ }
914
+ yield { type: 'content_block_stop', index: blockIndex };
915
+ }
916
+ }
917
+
918
+ // Emit message_delta and message_stop
919
+ yield {
920
+ type: 'message_delta',
921
+ delta: { stop_reason: stopReason, stop_sequence: null },
922
+ usage: {
923
+ output_tokens: outputTokens,
924
+ cache_read_input_tokens: cacheReadTokens,
925
+ cache_creation_input_tokens: 0
926
+ }
927
+ };
928
+
929
+ yield { type: 'message_stop' };
930
+ }
931
+
932
+ /**
933
+ * List available models in Anthropic API format
934
+ *
935
+ * @returns {{object: string, data: Array<{id: string, object: string, created: number, owned_by: string, description: string}>}} List of available models
936
+ */
937
+ export function listModels() {
938
+ return {
939
+ object: 'list',
940
+ data: AVAILABLE_MODELS.map(m => ({
941
+ id: m.id,
942
+ object: 'model',
943
+ created: Math.floor(Date.now() / 1000),
944
+ owned_by: 'anthropic',
945
+ description: m.description
946
+ }))
947
+ };
948
+ }
949
+
950
+ /**
951
+ * Fetch available models with quota info from Cloud Code API
952
+ * Returns model quotas including remaining fraction and reset time
953
+ *
954
+ * @param {string} token - OAuth access token
955
+ * @returns {Promise<Object>} Raw response from fetchAvailableModels API
956
+ */
957
+ export async function fetchAvailableModels(token) {
958
+ const headers = {
959
+ 'Authorization': `Bearer ${token}`,
960
+ 'Content-Type': 'application/json',
961
+ ...ANTIGRAVITY_HEADERS
962
+ };
963
+
964
+ for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
965
+ try {
966
+ const url = `${endpoint}/v1internal:fetchAvailableModels`;
967
+ const response = await fetch(url, {
968
+ method: 'POST',
969
+ headers,
970
+ body: JSON.stringify({})
971
+ });
972
+
973
+ if (!response.ok) {
974
+ const errorText = await response.text();
975
+ console.log(`[CloudCode] fetchAvailableModels error at ${endpoint}: ${response.status}`);
976
+ continue;
977
+ }
978
+
979
+ return await response.json();
980
+ } catch (error) {
981
+ console.log(`[CloudCode] fetchAvailableModels failed at ${endpoint}:`, error.message);
982
+ }
983
+ }
984
+
985
+ throw new Error('Failed to fetch available models from all endpoints');
986
+ }
987
+
988
+ /**
989
+ * Get model quotas for an account
990
+ * Extracts quota info (remaining fraction and reset time) for each model
991
+ *
992
+ * @param {string} token - OAuth access token
993
+ * @returns {Promise<Object>} Map of modelId -> { remainingFraction, resetTime }
994
+ */
995
+ export async function getModelQuotas(token) {
996
+ const data = await fetchAvailableModels(token);
997
+ if (!data || !data.models) return {};
998
+
999
+ const quotas = {};
1000
+ for (const [modelId, modelData] of Object.entries(data.models)) {
1001
+ if (modelData.quotaInfo) {
1002
+ quotas[modelId] = {
1003
+ remainingFraction: modelData.quotaInfo.remainingFraction ?? null,
1004
+ resetTime: modelData.quotaInfo.resetTime ?? null
1005
+ };
1006
+ }
1007
+ }
1008
+
1009
+ return quotas;
1010
+ }
1011
+
1012
+ export default {
1013
+ sendMessage,
1014
+ sendMessageStream,
1015
+ listModels,
1016
+ fetchAvailableModels,
1017
+ getModelQuotas
1018
+ };