antigravity-claude-proxy 2.0.5 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -5
- package/src/cloudcode/model-api.js +60 -11
- package/src/format/content-converter.js +8 -2
- package/src/format/request-converter.js +7 -6
- package/src/format/signature-cache.js +8 -0
- package/src/format/thinking-utils.js +16 -0
- package/src/server.js +19 -11
- package/src/cloudcode/count-tokens.js +0 -302
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "antigravity-claude-proxy",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.7",
|
|
4
4
|
"description": "Proxy server to use Antigravity's Claude models with Claude Code CLI",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -34,8 +34,7 @@
|
|
|
34
34
|
"test:crossmodel": "node tests/test-cross-model-thinking.cjs",
|
|
35
35
|
"test:oauth": "node tests/test-oauth-no-browser.cjs",
|
|
36
36
|
"test:emptyretry": "node tests/test-empty-response-retry.cjs",
|
|
37
|
-
"test:sanitizer": "node tests/test-schema-sanitizer.cjs"
|
|
38
|
-
"test:counttokens": "node tests/test-count-tokens.cjs"
|
|
37
|
+
"test:sanitizer": "node tests/test-schema-sanitizer.cjs"
|
|
39
38
|
},
|
|
40
39
|
"keywords": [
|
|
41
40
|
"claude",
|
|
@@ -58,8 +57,6 @@
|
|
|
58
57
|
"node": ">=18.0.0"
|
|
59
58
|
},
|
|
60
59
|
"dependencies": {
|
|
61
|
-
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
62
|
-
"@lenml/tokenizer-gemini": "^3.7.2",
|
|
63
60
|
"async-mutex": "^0.5.0",
|
|
64
61
|
"better-sqlite3": "^12.5.0",
|
|
65
62
|
"cors": "^2.8.5",
|
|
@@ -117,6 +117,31 @@ export async function getModelQuotas(token) {
|
|
|
117
117
|
return quotas;
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
/**
|
|
121
|
+
* Parse tier ID string to determine subscription level
|
|
122
|
+
* @param {string} tierId - The tier ID from the API
|
|
123
|
+
* @returns {'free' | 'pro' | 'ultra' | 'unknown'} The subscription tier
|
|
124
|
+
*/
|
|
125
|
+
function parseTierId(tierId) {
|
|
126
|
+
if (!tierId) return 'unknown';
|
|
127
|
+
const lower = tierId.toLowerCase();
|
|
128
|
+
|
|
129
|
+
if (lower.includes('ultra')) {
|
|
130
|
+
return 'ultra';
|
|
131
|
+
}
|
|
132
|
+
if (lower === 'standard-tier') {
|
|
133
|
+
// standard-tier = "Gemini Code Assist" (paid, project-based)
|
|
134
|
+
return 'pro';
|
|
135
|
+
}
|
|
136
|
+
if (lower.includes('pro') || lower.includes('premium')) {
|
|
137
|
+
return 'pro';
|
|
138
|
+
}
|
|
139
|
+
if (lower === 'free-tier' || lower.includes('free')) {
|
|
140
|
+
return 'free';
|
|
141
|
+
}
|
|
142
|
+
return 'unknown';
|
|
143
|
+
}
|
|
144
|
+
|
|
120
145
|
/**
|
|
121
146
|
* Get subscription tier for an account
|
|
122
147
|
* Calls loadCodeAssist API to discover project ID and subscription tier
|
|
@@ -162,22 +187,46 @@ export async function getSubscriptionTier(token) {
|
|
|
162
187
|
projectId = data.cloudaicompanionProject.id;
|
|
163
188
|
}
|
|
164
189
|
|
|
165
|
-
// Extract subscription tier
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
190
|
+
// Extract subscription tier
|
|
191
|
+
// Priority: paidTier > currentTier > allowedTiers
|
|
192
|
+
// - paidTier.id: "g1-pro-tier", "g1-ultra-tier" (Google One subscription)
|
|
193
|
+
// - currentTier.id: "standard-tier" (pro), "free-tier" (free)
|
|
194
|
+
// - allowedTiers: fallback when currentTier is missing
|
|
195
|
+
// Note: paidTier is sometimes missing from the response even for Pro accounts
|
|
196
|
+
let tier = 'unknown';
|
|
197
|
+
let tierId = null;
|
|
198
|
+
|
|
199
|
+
// 1. Check paidTier first (Google One AI subscription - most reliable)
|
|
200
|
+
if (data.paidTier?.id) {
|
|
201
|
+
tierId = data.paidTier.id;
|
|
202
|
+
const lower = tierId.toLowerCase();
|
|
203
|
+
if (lower.includes('ultra')) {
|
|
172
204
|
tier = 'ultra';
|
|
173
|
-
} else if (
|
|
205
|
+
} else if (lower.includes('pro')) {
|
|
174
206
|
tier = 'pro';
|
|
175
|
-
} else {
|
|
176
|
-
tier = 'free';
|
|
177
207
|
}
|
|
178
208
|
}
|
|
179
209
|
|
|
180
|
-
|
|
210
|
+
// 2. Fall back to currentTier if paidTier didn't give us a tier
|
|
211
|
+
if (tier === 'unknown' && data.currentTier?.id) {
|
|
212
|
+
tierId = data.currentTier.id;
|
|
213
|
+
tier = parseTierId(tierId);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// 3. Fall back to allowedTiers (find the default or first non-free tier)
|
|
217
|
+
if (tier === 'unknown' && Array.isArray(data.allowedTiers) && data.allowedTiers.length > 0) {
|
|
218
|
+
// First look for the default tier
|
|
219
|
+
let defaultTier = data.allowedTiers.find(t => t?.isDefault);
|
|
220
|
+
if (!defaultTier) {
|
|
221
|
+
defaultTier = data.allowedTiers[0];
|
|
222
|
+
}
|
|
223
|
+
if (defaultTier?.id) {
|
|
224
|
+
tierId = defaultTier.id;
|
|
225
|
+
tier = parseTierId(tierId);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
logger.debug(`[CloudCode] Subscription detected: ${tier} (tierId: ${tierId}), Project: ${projectId}`);
|
|
181
230
|
|
|
182
231
|
return { tier, projectId };
|
|
183
232
|
} catch (error) {
|
|
@@ -35,6 +35,7 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
const parts = [];
|
|
38
|
+
const deferredInlineData = []; // Collect inlineData to add at the end (Issue #91)
|
|
38
39
|
|
|
39
40
|
for (const block of content) {
|
|
40
41
|
if (!block) continue;
|
|
@@ -152,8 +153,9 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
|
|
|
152
153
|
|
|
153
154
|
parts.push({ functionResponse });
|
|
154
155
|
|
|
155
|
-
//
|
|
156
|
-
parts
|
|
156
|
+
// Defer images from the tool result to end of parts array (Issue #91)
|
|
157
|
+
// This ensures all functionResponse parts are consecutive
|
|
158
|
+
deferredInlineData.push(...imageParts);
|
|
157
159
|
} else if (block.type === 'thinking') {
|
|
158
160
|
// Handle thinking blocks with signature compatibility check
|
|
159
161
|
if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
|
|
@@ -183,5 +185,9 @@ export function convertContentToParts(content, isClaudeModel = false, isGeminiMo
|
|
|
183
185
|
}
|
|
184
186
|
}
|
|
185
187
|
|
|
188
|
+
// Add deferred inlineData at the end (Issue #91)
|
|
189
|
+
// This ensures functionResponse parts are consecutive, which Claude's API requires
|
|
190
|
+
parts.push(...deferredInlineData);
|
|
191
|
+
|
|
186
192
|
return parts;
|
|
187
193
|
}
|
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
reorderAssistantContent,
|
|
17
17
|
filterUnsignedThinkingBlocks,
|
|
18
18
|
hasGeminiHistory,
|
|
19
|
+
hasUnsignedThinkingBlocks,
|
|
19
20
|
needsThinkingRecovery,
|
|
20
21
|
closeToolLoopForThinking
|
|
21
22
|
} from './thinking-utils.js';
|
|
@@ -87,16 +88,16 @@ export function convertAnthropicToGoogle(anthropicRequest) {
|
|
|
87
88
|
processedMessages = closeToolLoopForThinking(messages, 'gemini');
|
|
88
89
|
}
|
|
89
90
|
|
|
90
|
-
// For Claude: apply recovery
|
|
91
|
-
//
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
// For Claude: apply recovery for cross-model (Gemini→Claude) or unsigned thinking blocks
|
|
92
|
+
// Unsigned thinking blocks occur when Claude Code strips signatures it doesn't understand
|
|
93
|
+
const needsClaudeRecovery = hasGeminiHistory(messages) || hasUnsignedThinkingBlocks(messages);
|
|
94
|
+
if (isClaudeModel && isThinking && needsClaudeRecovery && needsThinkingRecovery(messages)) {
|
|
95
|
+
logger.debug('[RequestConverter] Applying thinking recovery for Claude');
|
|
94
96
|
processedMessages = closeToolLoopForThinking(messages, 'claude');
|
|
95
97
|
}
|
|
96
98
|
|
|
97
99
|
// Convert messages to contents, then filter unsigned thinking blocks
|
|
98
|
-
for (
|
|
99
|
-
const msg = processedMessages[i];
|
|
100
|
+
for (const msg of processedMessages) {
|
|
100
101
|
let msgContent = msg.content;
|
|
101
102
|
|
|
102
103
|
// For assistant messages, process thinking blocks and reorder content
|
|
@@ -112,3 +112,11 @@ export function getCachedSignatureFamily(signature) {
|
|
|
112
112
|
export function getThinkingCacheSize() {
|
|
113
113
|
return thinkingSignatureCache.size;
|
|
114
114
|
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Clear all entries from the thinking signature cache.
|
|
118
|
+
* Used for testing cold cache scenarios.
|
|
119
|
+
*/
|
|
120
|
+
export function clearThinkingSignatureCache() {
|
|
121
|
+
thinkingSignatureCache.clear();
|
|
122
|
+
}
|
|
@@ -42,6 +42,22 @@ export function hasGeminiHistory(messages) {
|
|
|
42
42
|
);
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Check if conversation has unsigned thinking blocks that will be dropped.
|
|
47
|
+
* These cause "Expected thinking but found text" errors.
|
|
48
|
+
* @param {Array<Object>} messages - Array of messages
|
|
49
|
+
* @returns {boolean} True if any assistant message has unsigned thinking blocks
|
|
50
|
+
*/
|
|
51
|
+
export function hasUnsignedThinkingBlocks(messages) {
|
|
52
|
+
return messages.some(msg => {
|
|
53
|
+
if (msg.role !== 'assistant' && msg.role !== 'model') return false;
|
|
54
|
+
if (!Array.isArray(msg.content)) return false;
|
|
55
|
+
return msg.content.some(block =>
|
|
56
|
+
isThinkingPart(block) && !hasValidSignature(block)
|
|
57
|
+
);
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
45
61
|
/**
|
|
46
62
|
* Sanitize a thinking part by keeping only allowed fields
|
|
47
63
|
*/
|
package/src/server.js
CHANGED
|
@@ -9,7 +9,6 @@ import cors from 'cors';
|
|
|
9
9
|
import path from 'path';
|
|
10
10
|
import { fileURLToPath } from 'url';
|
|
11
11
|
import { sendMessage, sendMessageStream, listModels, getModelQuotas, getSubscriptionTier } from './cloudcode/index.js';
|
|
12
|
-
import { createCountTokensHandler } from './cloudcode/count-tokens.js';
|
|
13
12
|
import { mountWebUI } from './webui/index.js';
|
|
14
13
|
import { config } from './config.js';
|
|
15
14
|
|
|
@@ -18,6 +17,7 @@ const __dirname = path.dirname(__filename);
|
|
|
18
17
|
import { forceRefresh } from './auth/token-extractor.js';
|
|
19
18
|
import { REQUEST_BODY_LIMIT } from './constants.js';
|
|
20
19
|
import { AccountManager } from './account-manager/index.js';
|
|
20
|
+
import { clearThinkingSignatureCache } from './format/signature-cache.js';
|
|
21
21
|
import { formatDuration } from './utils/helpers.js';
|
|
22
22
|
import { logger } from './utils/logger.js';
|
|
23
23
|
import usageStats from './modules/usage-stats.js';
|
|
@@ -161,6 +161,16 @@ app.use((req, res, next) => {
|
|
|
161
161
|
next();
|
|
162
162
|
});
|
|
163
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Test endpoint - Clear thinking signature cache
|
|
166
|
+
* Used for testing cold cache scenarios in cross-model tests
|
|
167
|
+
*/
|
|
168
|
+
app.post('/test/clear-signature-cache', (req, res) => {
|
|
169
|
+
clearThinkingSignatureCache();
|
|
170
|
+
logger.debug('[Test] Cleared thinking signature cache');
|
|
171
|
+
res.json({ success: true, message: 'Thinking signature cache cleared' });
|
|
172
|
+
});
|
|
173
|
+
|
|
164
174
|
/**
|
|
165
175
|
* Health check endpoint - Detailed status
|
|
166
176
|
* Returns status of all accounts including rate limits and model quotas
|
|
@@ -601,16 +611,14 @@ app.get('/v1/models', async (req, res) => {
|
|
|
601
611
|
* Count tokens endpoint - Anthropic Messages API compatible
|
|
602
612
|
* Uses local tokenization with official tokenizers (@anthropic-ai/tokenizer for Claude, @lenml/tokenizer-gemini for Gemini)
|
|
603
613
|
*/
|
|
604
|
-
app.post('/v1/messages/count_tokens',
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
return createCountTokensHandler(accountManager)(req, res);
|
|
614
|
+
app.post('/v1/messages/count_tokens', (req, res) => {
|
|
615
|
+
res.status(501).json({
|
|
616
|
+
type: 'error',
|
|
617
|
+
error: {
|
|
618
|
+
type: 'not_implemented',
|
|
619
|
+
message: 'Token counting is not implemented. Use /v1/messages with max_tokens or configure your client to skip token counting.'
|
|
620
|
+
}
|
|
621
|
+
});
|
|
614
622
|
});
|
|
615
623
|
|
|
616
624
|
/**
|
|
@@ -1,302 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Token Counter Implementation for antigravity-claude-proxy
|
|
3
|
-
*
|
|
4
|
-
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
|
5
|
-
* Uses official tokenizers for each model family:
|
|
6
|
-
* - Claude: @anthropic-ai/tokenizer
|
|
7
|
-
* - Gemini: @lenml/tokenizer-gemini
|
|
8
|
-
*
|
|
9
|
-
* @see https://platform.claude.com/docs/en/api/messages-count-tokens
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { countTokens as claudeCountTokens } from '@anthropic-ai/tokenizer';
|
|
13
|
-
import { fromPreTrained as loadGeminiTokenizer } from '@lenml/tokenizer-gemini';
|
|
14
|
-
import { logger } from '../utils/logger.js';
|
|
15
|
-
import { getModelFamily } from '../constants.js';
|
|
16
|
-
|
|
17
|
-
// Lazy-loaded Gemini tokenizer (138MB, loaded once on first use)
|
|
18
|
-
let geminiTokenizer = null;
|
|
19
|
-
let geminiTokenizerLoading = null;
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Get or initialize the Gemini tokenizer
|
|
23
|
-
* Uses singleton pattern with loading lock to prevent multiple loads
|
|
24
|
-
*
|
|
25
|
-
* @returns {Promise<Object>} Gemini tokenizer instance
|
|
26
|
-
*/
|
|
27
|
-
async function getGeminiTokenizer() {
|
|
28
|
-
if (geminiTokenizer) {
|
|
29
|
-
return geminiTokenizer;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// Prevent multiple simultaneous loads
|
|
33
|
-
if (geminiTokenizerLoading) {
|
|
34
|
-
return geminiTokenizerLoading;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
geminiTokenizerLoading = (async () => {
|
|
38
|
-
try {
|
|
39
|
-
logger.debug('[TokenCounter] Loading Gemini tokenizer...');
|
|
40
|
-
geminiTokenizer = await loadGeminiTokenizer();
|
|
41
|
-
logger.debug('[TokenCounter] Gemini tokenizer loaded successfully');
|
|
42
|
-
return geminiTokenizer;
|
|
43
|
-
} catch (error) {
|
|
44
|
-
logger.warn(`[TokenCounter] Failed to load Gemini tokenizer: ${error.message}`);
|
|
45
|
-
throw error;
|
|
46
|
-
} finally {
|
|
47
|
-
geminiTokenizerLoading = null;
|
|
48
|
-
}
|
|
49
|
-
})();
|
|
50
|
-
|
|
51
|
-
return geminiTokenizerLoading;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Count tokens for text using Claude tokenizer
|
|
56
|
-
*
|
|
57
|
-
* @param {string} text - Text to tokenize
|
|
58
|
-
* @returns {number} Token count
|
|
59
|
-
*/
|
|
60
|
-
function countClaudeTokens(text) {
|
|
61
|
-
if (!text) return 0;
|
|
62
|
-
try {
|
|
63
|
-
return claudeCountTokens(text);
|
|
64
|
-
} catch (error) {
|
|
65
|
-
logger.debug(`[TokenCounter] Claude tokenizer error: ${error.message}`);
|
|
66
|
-
return Math.ceil(text.length / 4);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Count tokens for text using Gemini tokenizer
|
|
72
|
-
*
|
|
73
|
-
* @param {Object} tokenizer - Gemini tokenizer instance
|
|
74
|
-
* @param {string} text - Text to tokenize
|
|
75
|
-
* @returns {number} Token count
|
|
76
|
-
*/
|
|
77
|
-
function countGeminiTokens(tokenizer, text) {
|
|
78
|
-
if (!text) return 0;
|
|
79
|
-
try {
|
|
80
|
-
const tokens = tokenizer.encode(text);
|
|
81
|
-
// Remove BOS token if present (token id 2)
|
|
82
|
-
return tokens[0] === 2 ? tokens.length - 1 : tokens.length;
|
|
83
|
-
} catch (error) {
|
|
84
|
-
logger.debug(`[TokenCounter] Gemini tokenizer error: ${error.message}`);
|
|
85
|
-
return Math.ceil(text.length / 4);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Estimate tokens for text content using appropriate tokenizer
|
|
91
|
-
*
|
|
92
|
-
* @param {string} text - Text to tokenize
|
|
93
|
-
* @param {string} model - Model name to determine tokenizer
|
|
94
|
-
* @param {Object} geminiTok - Gemini tokenizer instance (optional)
|
|
95
|
-
* @returns {number} Token count
|
|
96
|
-
*/
|
|
97
|
-
function estimateTextTokens(text, model, geminiTok = null) {
|
|
98
|
-
if (!text) return 0;
|
|
99
|
-
|
|
100
|
-
const family = getModelFamily(model);
|
|
101
|
-
|
|
102
|
-
if (family === 'claude') {
|
|
103
|
-
return countClaudeTokens(text);
|
|
104
|
-
} else if (family === 'gemini' && geminiTok) {
|
|
105
|
-
return countGeminiTokens(geminiTok, text);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Fallback for unknown models: rough estimate
|
|
109
|
-
return Math.ceil(text.length / 4);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Extract text from message content
|
|
114
|
-
*
|
|
115
|
-
* Note: This function only extracts text from 'text' type blocks.
|
|
116
|
-
* Image blocks (type: 'image') and document blocks (type: 'document') are not tokenized
|
|
117
|
-
* and will not contribute to the token count. This is intentional as binary content
|
|
118
|
-
* requires different handling and Anthropic's actual token counting for images uses
|
|
119
|
-
* a fixed estimate (~1600 tokens per image) that depends on image dimensions.
|
|
120
|
-
*
|
|
121
|
-
* @param {string|Array} content - Message content
|
|
122
|
-
* @returns {string} Concatenated text
|
|
123
|
-
*/
|
|
124
|
-
function extractText(content) {
|
|
125
|
-
if (typeof content === 'string') {
|
|
126
|
-
return content;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
if (Array.isArray(content)) {
|
|
130
|
-
return content
|
|
131
|
-
.filter(block => block.type === 'text')
|
|
132
|
-
.map(block => block.text)
|
|
133
|
-
.join('\n');
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
return '';
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
/**
|
|
140
|
-
* Count tokens locally using model-specific tokenizer
|
|
141
|
-
*
|
|
142
|
-
* @param {Object} request - Anthropic format request
|
|
143
|
-
* @param {Object} geminiTok - Gemini tokenizer instance (optional)
|
|
144
|
-
* @returns {number} Token count
|
|
145
|
-
*/
|
|
146
|
-
function countTokensLocally(request, geminiTok = null) {
|
|
147
|
-
const { messages = [], system, tools, model } = request;
|
|
148
|
-
let totalTokens = 0;
|
|
149
|
-
|
|
150
|
-
// Count system prompt tokens
|
|
151
|
-
if (system) {
|
|
152
|
-
if (typeof system === 'string') {
|
|
153
|
-
totalTokens += estimateTextTokens(system, model, geminiTok);
|
|
154
|
-
} else if (Array.isArray(system)) {
|
|
155
|
-
for (const block of system) {
|
|
156
|
-
if (block.type === 'text') {
|
|
157
|
-
totalTokens += estimateTextTokens(block.text, model, geminiTok);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Count message tokens
|
|
164
|
-
for (const message of messages) {
|
|
165
|
-
// Add overhead for role and structure (~4 tokens per message)
|
|
166
|
-
totalTokens += 4;
|
|
167
|
-
totalTokens += estimateTextTokens(extractText(message.content), model, geminiTok);
|
|
168
|
-
|
|
169
|
-
// Handle tool_use and tool_result blocks
|
|
170
|
-
if (Array.isArray(message.content)) {
|
|
171
|
-
for (const block of message.content) {
|
|
172
|
-
if (block.type === 'tool_use') {
|
|
173
|
-
totalTokens += estimateTextTokens(block.name, model, geminiTok);
|
|
174
|
-
totalTokens += estimateTextTokens(JSON.stringify(block.input), model, geminiTok);
|
|
175
|
-
} else if (block.type === 'tool_result') {
|
|
176
|
-
if (typeof block.content === 'string') {
|
|
177
|
-
totalTokens += estimateTextTokens(block.content, model, geminiTok);
|
|
178
|
-
} else if (Array.isArray(block.content)) {
|
|
179
|
-
totalTokens += estimateTextTokens(extractText(block.content), model, geminiTok);
|
|
180
|
-
}
|
|
181
|
-
} else if (block.type === 'thinking') {
|
|
182
|
-
totalTokens += estimateTextTokens(block.thinking, model, geminiTok);
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// Count tool definitions
|
|
189
|
-
if (tools && tools.length > 0) {
|
|
190
|
-
for (const tool of tools) {
|
|
191
|
-
totalTokens += estimateTextTokens(tool.name, model, geminiTok);
|
|
192
|
-
totalTokens += estimateTextTokens(tool.description || '', model, geminiTok);
|
|
193
|
-
totalTokens += estimateTextTokens(JSON.stringify(tool.input_schema || {}), model, geminiTok);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
return totalTokens;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
/**
|
|
201
|
-
* Count tokens in a message request
|
|
202
|
-
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
|
203
|
-
* Uses local tokenization for all content types
|
|
204
|
-
*
|
|
205
|
-
* @param {Object} anthropicRequest - Anthropic format request with messages, model, system, tools
|
|
206
|
-
* @param {Object} accountManager - Account manager instance (unused, kept for API compatibility)
|
|
207
|
-
* @param {Object} options - Options (unused, kept for API compatibility)
|
|
208
|
-
* @returns {Promise<Object>} Response with input_tokens count
|
|
209
|
-
*/
|
|
210
|
-
export async function countTokens(anthropicRequest, accountManager = null, options = {}) {
|
|
211
|
-
try {
|
|
212
|
-
const family = getModelFamily(anthropicRequest.model);
|
|
213
|
-
let geminiTok = null;
|
|
214
|
-
|
|
215
|
-
// Load Gemini tokenizer if needed
|
|
216
|
-
if (family === 'gemini') {
|
|
217
|
-
try {
|
|
218
|
-
geminiTok = await getGeminiTokenizer();
|
|
219
|
-
} catch (error) {
|
|
220
|
-
logger.warn(`[TokenCounter] Gemini tokenizer unavailable, using fallback`);
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
const inputTokens = countTokensLocally(anthropicRequest, geminiTok);
|
|
225
|
-
logger.debug(`[TokenCounter] Local count (${family}): ${inputTokens} tokens`);
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
input_tokens: inputTokens
|
|
229
|
-
};
|
|
230
|
-
|
|
231
|
-
} catch (error) {
|
|
232
|
-
logger.warn(`[TokenCounter] Error: ${error.message}, using character-based fallback`);
|
|
233
|
-
|
|
234
|
-
// Ultimate fallback: character-based estimation
|
|
235
|
-
const { messages = [], system } = anthropicRequest;
|
|
236
|
-
let charCount = 0;
|
|
237
|
-
|
|
238
|
-
if (system) {
|
|
239
|
-
charCount += typeof system === 'string' ? system.length : JSON.stringify(system).length;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
for (const message of messages) {
|
|
243
|
-
charCount += JSON.stringify(message.content).length;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
return {
|
|
247
|
-
input_tokens: Math.ceil(charCount / 4)
|
|
248
|
-
};
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
/**
|
|
253
|
-
* Express route handler for /v1/messages/count_tokens
|
|
254
|
-
*
|
|
255
|
-
* @param {Object} accountManager - Account manager instance
|
|
256
|
-
* @returns {Function} Express middleware
|
|
257
|
-
*/
|
|
258
|
-
export function createCountTokensHandler(accountManager) {
|
|
259
|
-
return async (req, res) => {
|
|
260
|
-
try {
|
|
261
|
-
const { messages, model, system, tools, tool_choice, thinking } = req.body;
|
|
262
|
-
|
|
263
|
-
// Validate required fields
|
|
264
|
-
if (!messages || !Array.isArray(messages)) {
|
|
265
|
-
return res.status(400).json({
|
|
266
|
-
type: 'error',
|
|
267
|
-
error: {
|
|
268
|
-
type: 'invalid_request_error',
|
|
269
|
-
message: 'messages is required and must be an array'
|
|
270
|
-
}
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
if (!model) {
|
|
275
|
-
return res.status(400).json({
|
|
276
|
-
type: 'error',
|
|
277
|
-
error: {
|
|
278
|
-
type: 'invalid_request_error',
|
|
279
|
-
message: 'model is required'
|
|
280
|
-
}
|
|
281
|
-
});
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
const result = await countTokens(
|
|
285
|
-
{ messages, model, system, tools, tool_choice, thinking },
|
|
286
|
-
accountManager
|
|
287
|
-
);
|
|
288
|
-
|
|
289
|
-
res.json(result);
|
|
290
|
-
|
|
291
|
-
} catch (error) {
|
|
292
|
-
logger.error(`[TokenCounter] Handler error: ${error.message}`);
|
|
293
|
-
res.status(500).json({
|
|
294
|
-
type: 'error',
|
|
295
|
-
error: {
|
|
296
|
-
type: 'api_error',
|
|
297
|
-
message: error.message
|
|
298
|
-
}
|
|
299
|
-
});
|
|
300
|
-
}
|
|
301
|
-
};
|
|
302
|
-
}
|