antigravity-claude-proxy 2.0.5 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "antigravity-claude-proxy",
3
- "version": "2.0.5",
3
+ "version": "2.0.7",
4
4
  "description": "Proxy server to use Antigravity's Claude models with Claude Code CLI",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -34,8 +34,7 @@
34
34
  "test:crossmodel": "node tests/test-cross-model-thinking.cjs",
35
35
  "test:oauth": "node tests/test-oauth-no-browser.cjs",
36
36
  "test:emptyretry": "node tests/test-empty-response-retry.cjs",
37
- "test:sanitizer": "node tests/test-schema-sanitizer.cjs",
38
- "test:counttokens": "node tests/test-count-tokens.cjs"
37
+ "test:sanitizer": "node tests/test-schema-sanitizer.cjs"
39
38
  },
40
39
  "keywords": [
41
40
  "claude",
@@ -58,8 +57,6 @@
58
57
  "node": ">=18.0.0"
59
58
  },
60
59
  "dependencies": {
61
- "@anthropic-ai/tokenizer": "^0.0.4",
62
- "@lenml/tokenizer-gemini": "^3.7.2",
63
60
  "async-mutex": "^0.5.0",
64
61
  "better-sqlite3": "^12.5.0",
65
62
  "cors": "^2.8.5",
@@ -117,6 +117,31 @@ export async function getModelQuotas(token) {
117
117
  return quotas;
118
118
  }
119
119
 
120
+ /**
121
+ * Parse tier ID string to determine subscription level
122
+ * @param {string} tierId - The tier ID from the API
123
+ * @returns {'free' | 'pro' | 'ultra' | 'unknown'} The subscription tier
124
+ */
125
+ function parseTierId(tierId) {
126
+ if (!tierId) return 'unknown';
127
+ const lower = tierId.toLowerCase();
128
+
129
+ if (lower.includes('ultra')) {
130
+ return 'ultra';
131
+ }
132
+ if (lower === 'standard-tier') {
133
+ // standard-tier = "Gemini Code Assist" (paid, project-based)
134
+ return 'pro';
135
+ }
136
+ if (lower.includes('pro') || lower.includes('premium')) {
137
+ return 'pro';
138
+ }
139
+ if (lower === 'free-tier' || lower.includes('free')) {
140
+ return 'free';
141
+ }
142
+ return 'unknown';
143
+ }
144
+
120
145
  /**
121
146
  * Get subscription tier for an account
122
147
  * Calls loadCodeAssist API to discover project ID and subscription tier
@@ -162,22 +187,46 @@ export async function getSubscriptionTier(token) {
162
187
  projectId = data.cloudaicompanionProject.id;
163
188
  }
164
189
 
165
- // Extract subscription tier (priority: paidTier > currentTier)
166
- let tier = 'free';
167
- const tierId = data.paidTier?.id || data.currentTier?.id;
168
-
169
- if (tierId) {
170
- const lowerTier = tierId.toLowerCase();
171
- if (lowerTier.includes('ultra')) {
190
+ // Extract subscription tier
191
+ // Priority: paidTier > currentTier > allowedTiers
192
+ // - paidTier.id: "g1-pro-tier", "g1-ultra-tier" (Google One subscription)
193
+ // - currentTier.id: "standard-tier" (pro), "free-tier" (free)
194
+ // - allowedTiers: fallback when currentTier is missing
195
+ // Note: paidTier is sometimes missing from the response even for Pro accounts
196
+ let tier = 'unknown';
197
+ let tierId = null;
198
+
199
+ // 1. Check paidTier first (Google One AI subscription - most reliable)
200
+ if (data.paidTier?.id) {
201
+ tierId = data.paidTier.id;
202
+ const lower = tierId.toLowerCase();
203
+ if (lower.includes('ultra')) {
172
204
  tier = 'ultra';
173
- } else if (lowerTier.includes('pro')) {
205
+ } else if (lower.includes('pro')) {
174
206
  tier = 'pro';
175
- } else {
176
- tier = 'free';
177
207
  }
178
208
  }
179
209
 
180
- logger.debug(`[CloudCode] Subscription detected: ${tier}, Project: ${projectId}`);
210
+ // 2. Fall back to currentTier if paidTier didn't give us a tier
211
+ if (tier === 'unknown' && data.currentTier?.id) {
212
+ tierId = data.currentTier.id;
213
+ tier = parseTierId(tierId);
214
+ }
215
+
216
+ // 3. Fall back to allowedTiers (find the default or first non-free tier)
217
+ if (tier === 'unknown' && Array.isArray(data.allowedTiers) && data.allowedTiers.length > 0) {
218
+ // First look for the default tier
219
+ let defaultTier = data.allowedTiers.find(t => t?.isDefault);
220
+ if (!defaultTier) {
221
+ defaultTier = data.allowedTiers[0];
222
+ }
223
+ if (defaultTier?.id) {
224
+ tierId = defaultTier.id;
225
+ tier = parseTierId(tierId);
226
+ }
227
+ }
228
+
229
+ logger.debug(`[CloudCode] Subscription detected: ${tier} (tierId: ${tierId}), Project: ${projectId}`);
181
230
 
182
231
  return { tier, projectId };
183
232
  } catch (error) {
@@ -35,6 +35,7 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
35
35
  }
36
36
 
37
37
  const parts = [];
38
+ const deferredInlineData = []; // Collect inlineData to add at the end (Issue #91)
38
39
 
39
40
  for (const block of content) {
40
41
  if (!block) continue;
@@ -152,8 +153,9 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
152
153
 
153
154
  parts.push({ functionResponse });
154
155
 
155
- // Add any images from the tool result as separate parts
156
- parts.push(...imageParts);
156
+ // Defer images from the tool result to end of parts array (Issue #91)
157
+ // This ensures all functionResponse parts are consecutive
158
+ deferredInlineData.push(...imageParts);
157
159
  } else if (block.type === 'thinking') {
158
160
  // Handle thinking blocks with signature compatibility check
159
161
  if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
@@ -183,5 +185,9 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
183
185
  }
184
186
  }
185
187
 
188
+ // Add deferred inlineData at the end (Issue #91)
189
+ // This ensures functionResponse parts are consecutive, which Claude's API requires
190
+ parts.push(...deferredInlineData);
191
+
186
192
  return parts;
187
193
  }
@@ -16,6 +16,7 @@ import {
16
16
  reorderAssistantContent,
17
17
  filterUnsignedThinkingBlocks,
18
18
  hasGeminiHistory,
19
+ hasUnsignedThinkingBlocks,
19
20
  needsThinkingRecovery,
20
21
  closeToolLoopForThinking
21
22
  } from './thinking-utils.js';
@@ -87,16 +88,16 @@ export function convertAnthropicToGoogle(anthropicRequest) {
87
88
  processedMessages = closeToolLoopForThinking(messages, 'gemini');
88
89
  }
89
90
 
90
- // For Claude: apply recovery only for cross-model (Gemini→Claude) switch
91
- // Detected by checking if history has Gemini-style tool_use with thoughtSignature
92
- if (isClaudeModel && isThinking && hasGeminiHistory(messages) && needsThinkingRecovery(messages)) {
93
- logger.debug('[RequestConverter] Applying thinking recovery for Claude (cross-model from Gemini)');
91
+ // For Claude: apply recovery for cross-model (Gemini→Claude) or unsigned thinking blocks
92
+ // Unsigned thinking blocks occur when Claude Code strips signatures it doesn't understand
93
+ const needsClaudeRecovery = hasGeminiHistory(messages) || hasUnsignedThinkingBlocks(messages);
94
+ if (isClaudeModel && isThinking && needsClaudeRecovery && needsThinkingRecovery(messages)) {
95
+ logger.debug('[RequestConverter] Applying thinking recovery for Claude');
94
96
  processedMessages = closeToolLoopForThinking(messages, 'claude');
95
97
  }
96
98
 
97
99
  // Convert messages to contents, then filter unsigned thinking blocks
98
- for (let i = 0; i < processedMessages.length; i++) {
99
- const msg = processedMessages[i];
100
+ for (const msg of processedMessages) {
100
101
  let msgContent = msg.content;
101
102
 
102
103
  // For assistant messages, process thinking blocks and reorder content
@@ -112,3 +112,11 @@ export function getCachedSignatureFamily(signature) {
112
112
  export function getThinkingCacheSize() {
113
113
  return thinkingSignatureCache.size;
114
114
  }
115
+
116
+ /**
117
+ * Clear all entries from the thinking signature cache.
118
+ * Used for testing cold cache scenarios.
119
+ */
120
+ export function clearThinkingSignatureCache() {
121
+ thinkingSignatureCache.clear();
122
+ }
@@ -42,6 +42,22 @@ export function hasGeminiHistory(messages) {
42
42
  );
43
43
  }
44
44
 
45
+ /**
46
+ * Check if conversation has unsigned thinking blocks that will be dropped.
47
+ * These cause "Expected thinking but found text" errors.
48
+ * @param {Array<Object>} messages - Array of messages
49
+ * @returns {boolean} True if any assistant message has unsigned thinking blocks
50
+ */
51
+ export function hasUnsignedThinkingBlocks(messages) {
52
+ return messages.some(msg => {
53
+ if (msg.role !== 'assistant' && msg.role !== 'model') return false;
54
+ if (!Array.isArray(msg.content)) return false;
55
+ return msg.content.some(block =>
56
+ isThinkingPart(block) && !hasValidSignature(block)
57
+ );
58
+ });
59
+ }
60
+
45
61
  /**
46
62
  * Sanitize a thinking part by keeping only allowed fields
47
63
  */
package/src/server.js CHANGED
@@ -9,7 +9,6 @@ import cors from 'cors';
9
9
  import path from 'path';
10
10
  import { fileURLToPath } from 'url';
11
11
  import { sendMessage, sendMessageStream, listModels, getModelQuotas, getSubscriptionTier } from './cloudcode/index.js';
12
- import { createCountTokensHandler } from './cloudcode/count-tokens.js';
13
12
  import { mountWebUI } from './webui/index.js';
14
13
  import { config } from './config.js';
15
14
 
@@ -18,6 +17,7 @@ const __dirname = path.dirname(__filename);
18
17
  import { forceRefresh } from './auth/token-extractor.js';
19
18
  import { REQUEST_BODY_LIMIT } from './constants.js';
20
19
  import { AccountManager } from './account-manager/index.js';
20
+ import { clearThinkingSignatureCache } from './format/signature-cache.js';
21
21
  import { formatDuration } from './utils/helpers.js';
22
22
  import { logger } from './utils/logger.js';
23
23
  import usageStats from './modules/usage-stats.js';
@@ -161,6 +161,16 @@ app.use((req, res, next) => {
161
161
  next();
162
162
  });
163
163
 
164
+ /**
165
+ * Test endpoint - Clear thinking signature cache
166
+ * Used for testing cold cache scenarios in cross-model tests
167
+ */
168
+ app.post('/test/clear-signature-cache', (req, res) => {
169
+ clearThinkingSignatureCache();
170
+ logger.debug('[Test] Cleared thinking signature cache');
171
+ res.json({ success: true, message: 'Thinking signature cache cleared' });
172
+ });
173
+
164
174
  /**
165
175
  * Health check endpoint - Detailed status
166
176
  * Returns status of all accounts including rate limits and model quotas
@@ -601,16 +611,14 @@ app.get('/v1/models', async (req, res) => {
601
611
  * Count tokens endpoint - Anthropic Messages API compatible
602
612
  * Uses local tokenization with official tokenizers (@anthropic-ai/tokenizer for Claude, @lenml/tokenizer-gemini for Gemini)
603
613
  */
604
- app.post('/v1/messages/count_tokens', async (req, res) => {
605
- try {
606
- // Ensure account manager is initialized for API-based counting
607
- await ensureInitialized();
608
- } catch (error) {
609
- // If initialization fails, handler will fall back to local estimation
610
- logger.debug(`[TokenCounter] Account manager not initialized: ${error.message}`);
611
- }
612
-
613
- return createCountTokensHandler(accountManager)(req, res);
614
+ app.post('/v1/messages/count_tokens', (req, res) => {
615
+ res.status(501).json({
616
+ type: 'error',
617
+ error: {
618
+ type: 'not_implemented',
619
+ message: 'Token counting is not implemented. Use /v1/messages with max_tokens or configure your client to skip token counting.'
620
+ }
621
+ });
614
622
  });
615
623
 
616
624
  /**
@@ -1,302 +0,0 @@
1
- /**
2
- * Token Counter Implementation for antigravity-claude-proxy
3
- *
4
- * Implements Anthropic's /v1/messages/count_tokens endpoint
5
- * Uses official tokenizers for each model family:
6
- * - Claude: @anthropic-ai/tokenizer
7
- * - Gemini: @lenml/tokenizer-gemini
8
- *
9
- * @see https://platform.claude.com/docs/en/api/messages-count-tokens
10
- */
11
-
12
- import { countTokens as claudeCountTokens } from '@anthropic-ai/tokenizer';
13
- import { fromPreTrained as loadGeminiTokenizer } from '@lenml/tokenizer-gemini';
14
- import { logger } from '../utils/logger.js';
15
- import { getModelFamily } from '../constants.js';
16
-
17
- // Lazy-loaded Gemini tokenizer (138MB, loaded once on first use)
18
- let geminiTokenizer = null;
19
- let geminiTokenizerLoading = null;
20
-
21
- /**
22
- * Get or initialize the Gemini tokenizer
23
- * Uses singleton pattern with loading lock to prevent multiple loads
24
- *
25
- * @returns {Promise<Object>} Gemini tokenizer instance
26
- */
27
- async function getGeminiTokenizer() {
28
- if (geminiTokenizer) {
29
- return geminiTokenizer;
30
- }
31
-
32
- // Prevent multiple simultaneous loads
33
- if (geminiTokenizerLoading) {
34
- return geminiTokenizerLoading;
35
- }
36
-
37
- geminiTokenizerLoading = (async () => {
38
- try {
39
- logger.debug('[TokenCounter] Loading Gemini tokenizer...');
40
- geminiTokenizer = await loadGeminiTokenizer();
41
- logger.debug('[TokenCounter] Gemini tokenizer loaded successfully');
42
- return geminiTokenizer;
43
- } catch (error) {
44
- logger.warn(`[TokenCounter] Failed to load Gemini tokenizer: ${error.message}`);
45
- throw error;
46
- } finally {
47
- geminiTokenizerLoading = null;
48
- }
49
- })();
50
-
51
- return geminiTokenizerLoading;
52
- }
53
-
54
- /**
55
- * Count tokens for text using Claude tokenizer
56
- *
57
- * @param {string} text - Text to tokenize
58
- * @returns {number} Token count
59
- */
60
- function countClaudeTokens(text) {
61
- if (!text) return 0;
62
- try {
63
- return claudeCountTokens(text);
64
- } catch (error) {
65
- logger.debug(`[TokenCounter] Claude tokenizer error: ${error.message}`);
66
- return Math.ceil(text.length / 4);
67
- }
68
- }
69
-
70
- /**
71
- * Count tokens for text using Gemini tokenizer
72
- *
73
- * @param {Object} tokenizer - Gemini tokenizer instance
74
- * @param {string} text - Text to tokenize
75
- * @returns {number} Token count
76
- */
77
- function countGeminiTokens(tokenizer, text) {
78
- if (!text) return 0;
79
- try {
80
- const tokens = tokenizer.encode(text);
81
- // Remove BOS token if present (token id 2)
82
- return tokens[0] === 2 ? tokens.length - 1 : tokens.length;
83
- } catch (error) {
84
- logger.debug(`[TokenCounter] Gemini tokenizer error: ${error.message}`);
85
- return Math.ceil(text.length / 4);
86
- }
87
- }
88
-
89
- /**
90
- * Estimate tokens for text content using appropriate tokenizer
91
- *
92
- * @param {string} text - Text to tokenize
93
- * @param {string} model - Model name to determine tokenizer
94
- * @param {Object} geminiTok - Gemini tokenizer instance (optional)
95
- * @returns {number} Token count
96
- */
97
- function estimateTextTokens(text, model, geminiTok = null) {
98
- if (!text) return 0;
99
-
100
- const family = getModelFamily(model);
101
-
102
- if (family === 'claude') {
103
- return countClaudeTokens(text);
104
- } else if (family === 'gemini' && geminiTok) {
105
- return countGeminiTokens(geminiTok, text);
106
- }
107
-
108
- // Fallback for unknown models: rough estimate
109
- return Math.ceil(text.length / 4);
110
- }
111
-
112
- /**
113
- * Extract text from message content
114
- *
115
- * Note: This function only extracts text from 'text' type blocks.
116
- * Image blocks (type: 'image') and document blocks (type: 'document') are not tokenized
117
- * and will not contribute to the token count. This is intentional as binary content
118
- * requires different handling and Anthropic's actual token counting for images uses
119
- * a fixed estimate (~1600 tokens per image) that depends on image dimensions.
120
- *
121
- * @param {string|Array} content - Message content
122
- * @returns {string} Concatenated text
123
- */
124
- function extractText(content) {
125
- if (typeof content === 'string') {
126
- return content;
127
- }
128
-
129
- if (Array.isArray(content)) {
130
- return content
131
- .filter(block => block.type === 'text')
132
- .map(block => block.text)
133
- .join('\n');
134
- }
135
-
136
- return '';
137
- }
138
-
139
- /**
140
- * Count tokens locally using model-specific tokenizer
141
- *
142
- * @param {Object} request - Anthropic format request
143
- * @param {Object} geminiTok - Gemini tokenizer instance (optional)
144
- * @returns {number} Token count
145
- */
146
- function countTokensLocally(request, geminiTok = null) {
147
- const { messages = [], system, tools, model } = request;
148
- let totalTokens = 0;
149
-
150
- // Count system prompt tokens
151
- if (system) {
152
- if (typeof system === 'string') {
153
- totalTokens += estimateTextTokens(system, model, geminiTok);
154
- } else if (Array.isArray(system)) {
155
- for (const block of system) {
156
- if (block.type === 'text') {
157
- totalTokens += estimateTextTokens(block.text, model, geminiTok);
158
- }
159
- }
160
- }
161
- }
162
-
163
- // Count message tokens
164
- for (const message of messages) {
165
- // Add overhead for role and structure (~4 tokens per message)
166
- totalTokens += 4;
167
- totalTokens += estimateTextTokens(extractText(message.content), model, geminiTok);
168
-
169
- // Handle tool_use and tool_result blocks
170
- if (Array.isArray(message.content)) {
171
- for (const block of message.content) {
172
- if (block.type === 'tool_use') {
173
- totalTokens += estimateTextTokens(block.name, model, geminiTok);
174
- totalTokens += estimateTextTokens(JSON.stringify(block.input), model, geminiTok);
175
- } else if (block.type === 'tool_result') {
176
- if (typeof block.content === 'string') {
177
- totalTokens += estimateTextTokens(block.content, model, geminiTok);
178
- } else if (Array.isArray(block.content)) {
179
- totalTokens += estimateTextTokens(extractText(block.content), model, geminiTok);
180
- }
181
- } else if (block.type === 'thinking') {
182
- totalTokens += estimateTextTokens(block.thinking, model, geminiTok);
183
- }
184
- }
185
- }
186
- }
187
-
188
- // Count tool definitions
189
- if (tools && tools.length > 0) {
190
- for (const tool of tools) {
191
- totalTokens += estimateTextTokens(tool.name, model, geminiTok);
192
- totalTokens += estimateTextTokens(tool.description || '', model, geminiTok);
193
- totalTokens += estimateTextTokens(JSON.stringify(tool.input_schema || {}), model, geminiTok);
194
- }
195
- }
196
-
197
- return totalTokens;
198
- }
199
-
200
- /**
201
- * Count tokens in a message request
202
- * Implements Anthropic's /v1/messages/count_tokens endpoint
203
- * Uses local tokenization for all content types
204
- *
205
- * @param {Object} anthropicRequest - Anthropic format request with messages, model, system, tools
206
- * @param {Object} accountManager - Account manager instance (unused, kept for API compatibility)
207
- * @param {Object} options - Options (unused, kept for API compatibility)
208
- * @returns {Promise<Object>} Response with input_tokens count
209
- */
210
- export async function countTokens(anthropicRequest, accountManager = null, options = {}) {
211
- try {
212
- const family = getModelFamily(anthropicRequest.model);
213
- let geminiTok = null;
214
-
215
- // Load Gemini tokenizer if needed
216
- if (family === 'gemini') {
217
- try {
218
- geminiTok = await getGeminiTokenizer();
219
- } catch (error) {
220
- logger.warn(`[TokenCounter] Gemini tokenizer unavailable, using fallback`);
221
- }
222
- }
223
-
224
- const inputTokens = countTokensLocally(anthropicRequest, geminiTok);
225
- logger.debug(`[TokenCounter] Local count (${family}): ${inputTokens} tokens`);
226
-
227
- return {
228
- input_tokens: inputTokens
229
- };
230
-
231
- } catch (error) {
232
- logger.warn(`[TokenCounter] Error: ${error.message}, using character-based fallback`);
233
-
234
- // Ultimate fallback: character-based estimation
235
- const { messages = [], system } = anthropicRequest;
236
- let charCount = 0;
237
-
238
- if (system) {
239
- charCount += typeof system === 'string' ? system.length : JSON.stringify(system).length;
240
- }
241
-
242
- for (const message of messages) {
243
- charCount += JSON.stringify(message.content).length;
244
- }
245
-
246
- return {
247
- input_tokens: Math.ceil(charCount / 4)
248
- };
249
- }
250
- }
251
-
252
- /**
253
- * Express route handler for /v1/messages/count_tokens
254
- *
255
- * @param {Object} accountManager - Account manager instance
256
- * @returns {Function} Express middleware
257
- */
258
- export function createCountTokensHandler(accountManager) {
259
- return async (req, res) => {
260
- try {
261
- const { messages, model, system, tools, tool_choice, thinking } = req.body;
262
-
263
- // Validate required fields
264
- if (!messages || !Array.isArray(messages)) {
265
- return res.status(400).json({
266
- type: 'error',
267
- error: {
268
- type: 'invalid_request_error',
269
- message: 'messages is required and must be an array'
270
- }
271
- });
272
- }
273
-
274
- if (!model) {
275
- return res.status(400).json({
276
- type: 'error',
277
- error: {
278
- type: 'invalid_request_error',
279
- message: 'model is required'
280
- }
281
- });
282
- }
283
-
284
- const result = await countTokens(
285
- { messages, model, system, tools, tool_choice, thinking },
286
- accountManager
287
- );
288
-
289
- res.json(result);
290
-
291
- } catch (error) {
292
- logger.error(`[TokenCounter] Handler error: ${error.message}`);
293
- res.status(500).json({
294
- type: 'error',
295
- error: {
296
- type: 'api_error',
297
- message: error.message
298
- }
299
- });
300
- }
301
- };
302
- }