antigravity-claude-proxy 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "antigravity-claude-proxy",
3
- "version": "1.2.4",
3
+ "version": "1.2.5",
4
4
  "description": "Proxy server to use Antigravity's Claude models with Claude Code CLI",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -25,7 +25,8 @@
25
25
  "test:streaming": "node tests/test-multiturn-thinking-tools-streaming.cjs",
26
26
  "test:interleaved": "node tests/test-interleaved-thinking.cjs",
27
27
  "test:images": "node tests/test-images.cjs",
28
- "test:caching": "node tests/test-caching-streaming.cjs"
28
+ "test:caching": "node tests/test-caching-streaming.cjs",
29
+ "test:crossmodel": "node tests/test-cross-model-thinking.cjs"
29
30
  },
30
31
  "keywords": [
31
32
  "claude",
@@ -18,6 +18,7 @@ import { logger } from '../utils/logger.js';
18
18
  import { parseResetTime } from './rate-limit-parser.js';
19
19
  import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
20
20
  import { parseThinkingSSEResponse } from './sse-parser.js';
21
+ import { getFallbackModel } from '../fallback-config.js';
21
22
 
22
23
  /**
23
24
  * Send a non-streaming request to Cloud Code with multi-account support
@@ -32,7 +33,7 @@ import { parseThinkingSSEResponse } from './sse-parser.js';
32
33
  * @returns {Promise<Object>} Anthropic-format response object
33
34
  * @throws {Error} If max retries exceeded or no accounts available
34
35
  */
35
- export async function sendMessage(anthropicRequest, accountManager) {
36
+ export async function sendMessage(anthropicRequest, accountManager, fallbackEnabled = false) {
36
37
  const model = anthropicRequest.model;
37
38
  const isThinking = isThinkingModel(model);
38
39
 
@@ -76,6 +77,16 @@ export async function sendMessage(anthropicRequest, accountManager) {
76
77
  }
77
78
 
78
79
  if (!account) {
80
+ // Check if fallback is enabled and available
81
+ if (fallbackEnabled) {
82
+ const fallbackModel = getFallbackModel(model);
83
+ if (fallbackModel) {
84
+ logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel}`);
85
+ // Retry with fallback model
86
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
87
+ return await sendMessage(fallbackRequest, accountManager, false); // Disable fallback for recursive call
88
+ }
89
+ }
79
90
  throw new Error('No accounts available');
80
91
  }
81
92
  }
@@ -6,8 +6,8 @@
6
6
  */
7
7
 
8
8
  import crypto from 'crypto';
9
- import { MIN_SIGNATURE_LENGTH } from '../constants.js';
10
- import { cacheSignature } from '../format/signature-cache.js';
9
+ import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
10
+ import { cacheSignature, cacheThinkingSignature } from '../format/signature-cache.js';
11
11
  import { logger } from '../utils/logger.js';
12
12
 
13
13
  /**
@@ -110,6 +110,9 @@ export async function* streamSSEResponse(response, originalModel) {
110
110
 
111
111
  if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
112
112
  currentThinkingSignature = signature;
113
+ // Cache thinking signature with model family for cross-model compatibility
114
+ const modelFamily = getModelFamily(originalModel);
115
+ cacheThinkingSignature(signature, modelFamily);
113
116
  }
114
117
 
115
118
  yield {
@@ -16,6 +16,7 @@ import { logger } from '../utils/logger.js';
16
16
  import { parseResetTime } from './rate-limit-parser.js';
17
17
  import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
18
18
  import { streamSSEResponse } from './sse-streamer.js';
19
+ import { getFallbackModel } from '../fallback-config.js';
19
20
 
20
21
 
21
22
  /**
@@ -31,7 +32,7 @@ import { streamSSEResponse } from './sse-streamer.js';
31
32
  * @yields {Object} Anthropic-format SSE events (message_start, content_block_start, content_block_delta, etc.)
32
33
  * @throws {Error} If max retries exceeded or no accounts available
33
34
  */
34
- export async function* sendMessageStream(anthropicRequest, accountManager) {
35
+ export async function* sendMessageStream(anthropicRequest, accountManager, fallbackEnabled = false) {
35
36
  const model = anthropicRequest.model;
36
37
 
37
38
  // Retry loop with account failover
@@ -74,6 +75,17 @@ export async function* sendMessageStream(anthropicRequest, accountManager) {
74
75
  }
75
76
 
76
77
  if (!account) {
78
+ // Check if fallback is enabled and available
79
+ if (fallbackEnabled) {
80
+ const fallbackModel = getFallbackModel(model);
81
+ if (fallbackModel) {
82
+ logger.warn(`[CloudCode] All accounts exhausted for ${model}. Attempting fallback to ${fallbackModel} (streaming)`);
83
+ // Retry with fallback model
84
+ const fallbackRequest = { ...anthropicRequest, model: fallbackModel };
85
+ yield* sendMessageStream(fallbackRequest, accountManager, false); // Disable fallback for recursive call
86
+ return;
87
+ }
88
+ }
77
89
  throw new Error('No accounts available');
78
90
  }
79
91
  }
package/src/constants.js CHANGED
@@ -144,6 +144,16 @@ export const OAUTH_CONFIG = {
144
144
  };
145
145
  export const OAUTH_REDIRECT_URI = `http://localhost:${OAUTH_CONFIG.callbackPort}/oauth-callback`;
146
146
 
147
+ // Model fallback mapping - maps primary model to fallback when quota exhausted
148
+ export const MODEL_FALLBACK_MAP = {
149
+ 'gemini-3-pro-high': 'claude-opus-4-5-thinking',
150
+ 'gemini-3-pro-low': 'claude-sonnet-4-5',
151
+ 'gemini-3-flash': 'claude-sonnet-4-5-thinking',
152
+ 'claude-opus-4-5-thinking': 'gemini-3-pro-high',
153
+ 'claude-sonnet-4-5-thinking': 'gemini-3-flash',
154
+ 'claude-sonnet-4-5': 'gemini-3-flash'
155
+ };
156
+
147
157
  export default {
148
158
  ANTIGRAVITY_ENDPOINT_FALLBACKS,
149
159
  ANTIGRAVITY_HEADERS,
@@ -165,5 +175,6 @@ export default {
165
175
  getModelFamily,
166
176
  isThinkingModel,
167
177
  OAUTH_CONFIG,
168
- OAUTH_REDIRECT_URI
178
+ OAUTH_REDIRECT_URI,
179
+ MODEL_FALLBACK_MAP
169
180
  };
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Model Fallback Configuration
3
+ *
4
+ * Defines fallback mappings for when a model's quota is exhausted across all accounts.
5
+ * Enables graceful degradation to alternative models with similar capabilities.
6
+ */
7
+
8
+ import { MODEL_FALLBACK_MAP } from './constants.js';
9
+
10
+ // Re-export for convenience
11
+ export { MODEL_FALLBACK_MAP };
12
+
13
+ /**
14
+ * Get fallback model for a given model ID
15
+ * @param {string} model - Primary model ID
16
+ * @returns {string|null} Fallback model ID or null if no fallback exists
17
+ */
18
+ export function getFallbackModel(model) {
19
+ return MODEL_FALLBACK_MAP[model] || null;
20
+ }
21
+
22
+ /**
23
+ * Check if a model has a fallback configured
24
+ * @param {string} model - Model ID to check
25
+ * @returns {boolean} True if fallback exists
26
+ */
27
+ export function hasFallback(model) {
28
+ return model in MODEL_FALLBACK_MAP;
29
+ }
@@ -4,7 +4,7 @@
4
4
  */
5
5
 
6
6
  import { MIN_SIGNATURE_LENGTH, GEMINI_SKIP_SIGNATURE } from '../constants.js';
7
- import { getCachedSignature } from './signature-cache.js';
7
+ import { getCachedSignature, getCachedSignatureFamily } from './signature-cache.js';
8
8
  import { logger } from '../utils/logger.js';
9
9
 
10
10
  /**
@@ -155,16 +155,31 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
155
155
  // Add any images from the tool result as separate parts
156
156
  parts.push(...imageParts);
157
157
  } else if (block.type === 'thinking') {
158
- // Handle thinking blocks - only those with valid signatures
158
+ // Handle thinking blocks with signature compatibility check
159
159
  if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
160
- // Convert to Gemini format with signature
160
+ const signatureFamily = getCachedSignatureFamily(block.signature);
161
+ const targetFamily = isClaudeModel ? 'claude' : isGeminiModel ? 'gemini' : null;
162
+
163
+ // Drop blocks with incompatible signatures for Gemini (cross-model switch)
164
+ if (isGeminiModel && signatureFamily && targetFamily && signatureFamily !== targetFamily) {
165
+ logger.debug(`[ContentConverter] Dropping incompatible ${signatureFamily} thinking for ${targetFamily} model`);
166
+ continue;
167
+ }
168
+
169
+ // Drop blocks with unknown signature origin for Gemini (cold cache - safe default)
170
+ if (isGeminiModel && !signatureFamily && targetFamily) {
171
+ logger.debug(`[ContentConverter] Dropping thinking with unknown signature origin`);
172
+ continue;
173
+ }
174
+
175
+ // Compatible - convert to Gemini format with signature
161
176
  parts.push({
162
177
  text: block.thinking,
163
178
  thought: true,
164
179
  thoughtSignature: block.signature
165
180
  });
166
181
  }
167
- // Unsigned thinking blocks are dropped upstream
182
+ // Unsigned thinking blocks are dropped (existing behavior)
168
183
  }
169
184
  }
170
185
 
@@ -15,6 +15,7 @@ import {
15
15
  removeTrailingThinkingBlocks,
16
16
  reorderAssistantContent,
17
17
  filterUnsignedThinkingBlocks,
18
+ hasGeminiHistory,
18
19
  needsThinkingRecovery,
19
20
  closeToolLoopForThinking
20
21
  } from './thinking-utils.js';
@@ -78,12 +79,19 @@ export function convertAnthropicToGoogle(anthropicRequest) {
78
79
  }
79
80
 
80
81
  // Apply thinking recovery for Gemini thinking models when needed
81
- // This handles corrupted tool loops where thinking blocks are stripped
82
- // Claude models handle this differently and don't need this recovery
82
+ // Gemini needs recovery for tool loops/interrupted tools (stripped thinking)
83
83
  let processedMessages = messages;
84
+
84
85
  if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
85
86
  logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
86
- processedMessages = closeToolLoopForThinking(messages);
87
+ processedMessages = closeToolLoopForThinking(messages, 'gemini');
88
+ }
89
+
90
+ // For Claude: apply recovery only for cross-model (Gemini→Claude) switch
91
+ // Detected by checking if history has Gemini-style tool_use with thoughtSignature
92
+ if (isClaudeModel && isThinking && hasGeminiHistory(messages) && needsThinkingRecovery(messages)) {
93
+ logger.debug('[RequestConverter] Applying thinking recovery for Claude (cross-model from Gemini)');
94
+ processedMessages = closeToolLoopForThinking(messages, 'claude');
87
95
  }
88
96
 
89
97
  // Convert messages to contents, then filter unsigned thinking blocks
@@ -106,8 +114,10 @@ export function convertAnthropicToGoogle(anthropicRequest) {
106
114
  // SAFETY: Google API requires at least one part per content message
107
115
  // This happens when all thinking blocks are filtered out (unsigned)
108
116
  if (parts.length === 0) {
117
+ // Use '.' instead of '' because claude models reject empty text parts.
118
+ // A single period is invisible in practice but satisfies the API requirement.
109
119
  logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
110
- parts.push({ text: '' });
120
+ parts.push({ text: '.' });
111
121
  }
112
122
 
113
123
  const content = {
@@ -4,8 +4,8 @@
4
4
  */
5
5
 
6
6
  import crypto from 'crypto';
7
- import { MIN_SIGNATURE_LENGTH } from '../constants.js';
8
- import { cacheSignature } from './signature-cache.js';
7
+ import { MIN_SIGNATURE_LENGTH, getModelFamily } from '../constants.js';
8
+ import { cacheSignature, cacheThinkingSignature } from './signature-cache.js';
9
9
 
10
10
  /**
11
11
  * Convert Google Generative AI response to Anthropic Messages API format
@@ -33,6 +33,12 @@ export function convertGoogleToAnthropic(googleResponse, model) {
33
33
  if (part.thought === true) {
34
34
  const signature = part.thoughtSignature || '';
35
35
 
36
+ // Cache thinking signature with model family for cross-model compatibility
37
+ if (signature && signature.length >= MIN_SIGNATURE_LENGTH) {
38
+ const modelFamily = getModelFamily(model);
39
+ cacheThinkingSignature(signature, modelFamily);
40
+ }
41
+
36
42
  // Include thinking blocks in the response for Claude Code
37
43
  anthropicContent.push({
38
44
  type: 'thinking',
@@ -5,11 +5,15 @@
5
5
  * Gemini models require thoughtSignature on tool calls, but Claude Code
6
6
  * strips non-standard fields. This cache stores signatures by tool_use_id
7
7
  * so they can be restored in subsequent requests.
8
+ *
9
+ * Also caches thinking block signatures with model family for cross-model
10
+ * compatibility checking.
8
11
  */
9
12
 
10
- import { GEMINI_SIGNATURE_CACHE_TTL_MS } from '../constants.js';
13
+ import { GEMINI_SIGNATURE_CACHE_TTL_MS, MIN_SIGNATURE_LENGTH } from '../constants.js';
11
14
 
12
15
  const signatureCache = new Map();
16
+ const thinkingSignatureCache = new Map();
13
17
 
14
18
  /**
15
19
  * Store a signature for a tool_use_id
@@ -54,6 +58,11 @@ export function cleanupCache() {
54
58
  signatureCache.delete(key);
55
59
  }
56
60
  }
61
+ for (const [key, entry] of thinkingSignatureCache) {
62
+ if (now - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
63
+ thinkingSignatureCache.delete(key);
64
+ }
65
+ }
57
66
  }
58
67
 
59
68
  /**
@@ -63,3 +72,43 @@ export function cleanupCache() {
63
72
  export function getCacheSize() {
64
73
  return signatureCache.size;
65
74
  }
75
+
76
+ /**
77
+ * Cache a thinking block signature with its model family
78
+ * @param {string} signature - The thinking signature to cache
79
+ * @param {string} modelFamily - The model family ('claude' or 'gemini')
80
+ */
81
+ export function cacheThinkingSignature(signature, modelFamily) {
82
+ if (!signature || signature.length < MIN_SIGNATURE_LENGTH) return;
83
+ thinkingSignatureCache.set(signature, {
84
+ modelFamily,
85
+ timestamp: Date.now()
86
+ });
87
+ }
88
+
89
+ /**
90
+ * Get the cached model family for a thinking signature
91
+ * @param {string} signature - The signature to look up
92
+ * @returns {string|null} 'claude', 'gemini', or null if not found/expired
93
+ */
94
+ export function getCachedSignatureFamily(signature) {
95
+ if (!signature) return null;
96
+ const entry = thinkingSignatureCache.get(signature);
97
+ if (!entry) return null;
98
+
99
+ // Check TTL
100
+ if (Date.now() - entry.timestamp > GEMINI_SIGNATURE_CACHE_TTL_MS) {
101
+ thinkingSignatureCache.delete(signature);
102
+ return null;
103
+ }
104
+
105
+ return entry.modelFamily;
106
+ }
107
+
108
+ /**
109
+ * Get the current thinking signature cache size (for debugging)
110
+ * @returns {number} Number of entries in the thinking signature cache
111
+ */
112
+ export function getThinkingCacheSize() {
113
+ return thinkingSignatureCache.size;
114
+ }
@@ -4,6 +4,7 @@
4
4
  */
5
5
 
6
6
  import { MIN_SIGNATURE_LENGTH } from '../constants.js';
7
+ import { getCachedSignatureFamily } from './signature-cache.js';
7
8
  import { logger } from '../utils/logger.js';
8
9
 
9
10
  /**
@@ -26,6 +27,21 @@ export function hasValidSignature(part) {
26
27
  return typeof signature === 'string' && signature.length >= MIN_SIGNATURE_LENGTH;
27
28
  }
28
29
 
30
+ /**
31
+ * Check if conversation history contains Gemini-style messages.
32
+ * Gemini puts thoughtSignature on tool_use blocks, Claude puts signature on thinking blocks.
33
+ * @param {Array<Object>} messages - Array of messages
34
+ * @returns {boolean} True if any tool_use has thoughtSignature (Gemini pattern)
35
+ */
36
+ export function hasGeminiHistory(messages) {
37
+ return messages.some(msg =>
38
+ Array.isArray(msg.content) &&
39
+ msg.content.some(block =>
40
+ block.type === 'tool_use' && block.thoughtSignature !== undefined
41
+ )
42
+ );
43
+ }
44
+
29
45
  /**
30
46
  * Sanitize a thinking part by keeping only allowed fields
31
47
  */
@@ -386,40 +402,83 @@ export function analyzeConversationState(messages) {
386
402
 
387
403
  /**
388
404
  * Check if conversation needs thinking recovery.
389
- * Returns true when:
390
- * 1. We're in a tool loop but have no valid thinking blocks, OR
391
- * 2. We have an interrupted tool with no valid thinking blocks
405
+ *
406
+ * Recovery is only needed when:
407
+ * 1. We're in a tool loop or have an interrupted tool, AND
408
+ * 2. No valid thinking blocks exist in the current turn
409
+ *
410
+ * Cross-model signature compatibility is handled by stripInvalidThinkingBlocks
411
+ * during recovery (not here).
392
412
  *
393
413
  * @param {Array<Object>} messages - Array of messages
394
414
  * @returns {boolean} True if thinking recovery is needed
395
415
  */
396
416
  export function needsThinkingRecovery(messages) {
397
417
  const state = analyzeConversationState(messages);
398
- // Need recovery if (tool loop OR interrupted tool) AND no thinking
399
- return (state.inToolLoop || state.interruptedTool) && !state.turnHasThinking;
418
+
419
+ // Recovery is only needed in tool loops or interrupted tools
420
+ if (!state.inToolLoop && !state.interruptedTool) return false;
421
+
422
+ // Need recovery if no valid thinking blocks exist
423
+ return !state.turnHasThinking;
400
424
  }
401
425
 
402
426
  /**
403
- * Strip all thinking blocks from messages.
427
+ * Strip invalid or incompatible thinking blocks from messages.
404
428
  * Used before injecting synthetic messages for recovery.
429
+ * Keeps valid thinking blocks to preserve context from previous turns.
405
430
  *
406
431
  * @param {Array<Object>} messages - Array of messages
407
- * @returns {Array<Object>} Messages with all thinking blocks removed
432
+ * @param {string} targetFamily - Target model family ('claude' or 'gemini')
433
+ * @returns {Array<Object>} Messages with invalid thinking blocks removed
408
434
  */
409
- function stripAllThinkingBlocks(messages) {
410
- return messages.map(msg => {
435
+ function stripInvalidThinkingBlocks(messages, targetFamily = null) {
436
+ let strippedCount = 0;
437
+
438
+ const result = messages.map(msg => {
411
439
  const content = msg.content || msg.parts;
412
440
  if (!Array.isArray(content)) return msg;
413
441
 
414
- const filtered = content.filter(block => !isThinkingPart(block));
442
+ const filtered = content.filter(block => {
443
+ // Keep non-thinking blocks
444
+ if (!isThinkingPart(block)) return true;
415
445
 
446
+ // Check generic validity (has signature of sufficient length)
447
+ if (!hasValidSignature(block)) {
448
+ strippedCount++;
449
+ return false;
450
+ }
451
+
452
+ // Check family compatibility only for Gemini targets
453
+ // Claude can validate its own signatures, so we don't drop for Claude
454
+ if (targetFamily === 'gemini') {
455
+ const signature = block.thought === true ? block.thoughtSignature : block.signature;
456
+ const signatureFamily = getCachedSignatureFamily(signature);
457
+
458
+ // For Gemini: drop unknown or mismatched signatures
459
+ if (!signatureFamily || signatureFamily !== targetFamily) {
460
+ strippedCount++;
461
+ return false;
462
+ }
463
+ }
464
+
465
+ return true;
466
+ });
467
+
468
+ // Use '.' instead of '' because claude models reject empty text parts
416
469
  if (msg.content) {
417
- return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '' }] };
470
+ return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
418
471
  } else if (msg.parts) {
419
- return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '' }] };
472
+ return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '.' }] };
420
473
  }
421
474
  return msg;
422
475
  });
476
+
477
+ if (strippedCount > 0) {
478
+ logger.debug(`[ThinkingUtils] Stripped ${strippedCount} invalid/incompatible thinking block(s)`);
479
+ }
480
+
481
+ return result;
423
482
  }
424
483
 
425
484
  /**
@@ -432,16 +491,17 @@ function stripAllThinkingBlocks(messages) {
432
491
  * loop and allow the model to continue.
433
492
  *
434
493
  * @param {Array<Object>} messages - Array of messages
494
+ * @param {string} targetFamily - Target model family ('claude' or 'gemini')
435
495
  * @returns {Array<Object>} Modified messages with synthetic messages injected
436
496
  */
437
- export function closeToolLoopForThinking(messages) {
497
+ export function closeToolLoopForThinking(messages, targetFamily = null) {
438
498
  const state = analyzeConversationState(messages);
439
499
 
440
500
  // Handle neither tool loop nor interrupted tool
441
501
  if (!state.inToolLoop && !state.interruptedTool) return messages;
442
502
 
443
- // Strip all thinking blocks
444
- let modified = stripAllThinkingBlocks(messages);
503
+ // Strip only invalid/incompatible thinking blocks (keep valid ones)
504
+ let modified = stripInvalidThinkingBlocks(messages, targetFamily);
445
505
 
446
506
  if (state.interruptedTool) {
447
507
  // For interrupted tools: just strip thinking and add a synthetic assistant message
@@ -457,7 +517,7 @@ export function closeToolLoopForThinking(messages) {
457
517
  });
458
518
 
459
519
  logger.debug('[ThinkingUtils] Applied thinking recovery for interrupted tool');
460
- } else {
520
+ } else if (state.inToolLoop) {
461
521
  // For tool loops: add synthetic messages to close the loop
462
522
  const syntheticText = state.toolResultCount === 1
463
523
  ? '[Tool execution completed.]'
package/src/index.js CHANGED
@@ -12,6 +12,7 @@ import os from 'os';
12
12
  // Parse command line arguments
13
13
  const args = process.argv.slice(2);
14
14
  const isDebug = args.includes('--debug') || process.env.DEBUG === 'true';
15
+ const isFallbackEnabled = args.includes('--fallback') || process.env.FALLBACK === 'true';
15
16
 
16
17
  // Initialize logger
17
18
  logger.setDebug(isDebug);
@@ -20,6 +21,13 @@ if (isDebug) {
20
21
  logger.debug('Debug mode enabled');
21
22
  }
22
23
 
24
+ if (isFallbackEnabled) {
25
+ logger.info('Model fallback mode enabled');
26
+ }
27
+
28
+ // Export fallback flag for server to use
29
+ export const FALLBACK_ENABLED = isFallbackEnabled;
30
+
23
31
  const PORT = process.env.PORT || DEFAULT_PORT;
24
32
 
25
33
  // Home directory for account storage
@@ -40,14 +48,22 @@ app.listen(PORT, () => {
40
48
  if (!isDebug) {
41
49
  controlSection += '║ --debug Enable debug logging ║\n';
42
50
  }
51
+ if (!isFallbackEnabled) {
52
+ controlSection += '║ --fallback Enable model fallback on quota exhaust ║\n';
53
+ }
43
54
  controlSection += '║ Ctrl+C Stop server ║';
44
55
 
45
- // Build status section if debug mode is active
56
+ // Build status section if any modes are active
46
57
  let statusSection = '';
47
- if (isDebug) {
58
+ if (isDebug || isFallbackEnabled) {
48
59
  statusSection = '║ ║\n';
49
60
  statusSection += '║ Active Modes: ║\n';
50
- statusSection += '║ ✓ Debug mode enabled ║\n';
61
+ if (isDebug) {
62
+ statusSection += '║ ✓ Debug mode enabled ║\n';
63
+ }
64
+ if (isFallbackEnabled) {
65
+ statusSection += '║ ✓ Model fallback enabled ║\n';
66
+ }
51
67
  }
52
68
 
53
69
  logger.log(`
package/src/server.js CHANGED
@@ -13,6 +13,10 @@ import { AccountManager } from './account-manager/index.js';
13
13
  import { formatDuration } from './utils/helpers.js';
14
14
  import { logger } from './utils/logger.js';
15
15
 
16
+ // Parse fallback flag directly from command line args to avoid circular dependency
17
+ const args = process.argv.slice(2);
18
+ const FALLBACK_ENABLED = args.includes('--fallback') || process.env.FALLBACK === 'true';
19
+
16
20
  const app = express();
17
21
 
18
22
  // Initialize account manager (will be fully initialized on first request or startup)
@@ -595,7 +599,7 @@ app.post('/v1/messages', async (req, res) => {
595
599
 
596
600
  try {
597
601
  // Use the streaming generator with account manager
598
- for await (const event of sendMessageStream(request, accountManager)) {
602
+ for await (const event of sendMessageStream(request, accountManager, FALLBACK_ENABLED)) {
599
603
  res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
600
604
  // Flush after each event for real-time streaming
601
605
  if (res.flush) res.flush();
@@ -616,7 +620,7 @@ app.post('/v1/messages', async (req, res) => {
616
620
 
617
621
  } else {
618
622
  // Handle non-streaming response
619
- const response = await sendMessage(request, accountManager);
623
+ const response = await sendMessage(request, accountManager, FALLBACK_ENABLED);
620
624
  res.json(response);
621
625
  }
622
626