vertex-ai-proxy 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,1026 @@
1
- export { createServer, startServer } from './server.js';
2
- export { DEFAULT_CONFIG, CLAUDE_MODELS, GEMINI_MODELS } from './types.js';
1
+ /**
2
+ * Vertex AI Proxy Server
3
+ *
4
+ * Provides OpenAI and Anthropic compatible API endpoints for Google Vertex AI models.
5
+ */
6
+ import express from 'express';
7
+ import { GoogleAuth } from 'google-auth-library';
8
+ import { VertexAI } from '@google-cloud/vertexai';
9
+ import * as fs from 'fs';
10
+ import * as path from 'path';
11
+ import * as os from 'os';
12
+ import * as yaml from 'js-yaml';
13
+ // ============================================================================
14
+ // Logging
15
+ // ============================================================================
16
+ const DATA_DIR = path.join(os.homedir(), '.vertex_proxy');
17
+ const LOG_FILE = path.join(DATA_DIR, 'proxy.log');
18
+ const STATS_FILE = path.join(DATA_DIR, 'stats.json');
19
+ let proxyStats = {
20
+ startTime: Date.now(),
21
+ requestCount: 0,
22
+ lastRequestTime: null,
23
+ port: 8001
24
+ };
25
+ function ensureDataDir() {
26
+ if (!fs.existsSync(DATA_DIR)) {
27
+ fs.mkdirSync(DATA_DIR, { recursive: true });
28
+ }
29
+ }
30
+ function log(message, level = 'INFO') {
31
+ ensureDataDir();
32
+ const timestamp = new Date().toISOString();
33
+ const logLine = `[${timestamp}] [${level}] ${message}\n`;
34
+ // Console output
35
+ console.log(logLine.trim());
36
+ // File output
37
+ try {
38
+ fs.appendFileSync(LOG_FILE, logLine);
39
+ // Rotate log if > 10MB
40
+ const stats = fs.statSync(LOG_FILE);
41
+ if (stats.size > 10 * 1024 * 1024) {
42
+ const backupPath = LOG_FILE + '.1';
43
+ if (fs.existsSync(backupPath))
44
+ fs.unlinkSync(backupPath);
45
+ fs.renameSync(LOG_FILE, backupPath);
46
+ }
47
+ }
48
+ catch (e) {
49
+ // Ignore file logging errors
50
+ }
51
+ }
52
+ function saveStats() {
53
+ ensureDataDir();
54
+ try {
55
+ fs.writeFileSync(STATS_FILE, JSON.stringify(proxyStats, null, 2));
56
+ }
57
+ catch (e) {
58
+ // Ignore stats save errors
59
+ }
60
+ }
61
+ function loadStats() {
62
+ try {
63
+ if (fs.existsSync(STATS_FILE)) {
64
+ return JSON.parse(fs.readFileSync(STATS_FILE, 'utf8'));
65
+ }
66
+ }
67
+ catch (e) { }
68
+ return null;
69
+ }
70
+ // ============================================================================
71
+ // Model Catalog
72
+ // ============================================================================
73
+ export const MODEL_CATALOG = {
74
+ // Claude Models
75
+ 'claude-opus-4-5@20251101': {
76
+ id: 'claude-opus-4-5@20251101',
77
+ name: 'Claude Opus 4.5',
78
+ provider: 'anthropic',
79
+ contextWindow: 200000,
80
+ maxTokens: 8192,
81
+ inputPrice: 15,
82
+ outputPrice: 75,
83
+ regions: ['us-east5', 'europe-west1'],
84
+ capabilities: ['text', 'vision', 'tools']
85
+ },
86
+ 'claude-sonnet-4-5@20250514': {
87
+ id: 'claude-sonnet-4-5@20250514',
88
+ name: 'Claude Sonnet 4.5',
89
+ provider: 'anthropic',
90
+ contextWindow: 200000,
91
+ maxTokens: 8192,
92
+ inputPrice: 3,
93
+ outputPrice: 15,
94
+ regions: ['us-east5', 'europe-west1'],
95
+ capabilities: ['text', 'vision', 'tools']
96
+ },
97
+ 'claude-haiku-4-5@20251001': {
98
+ id: 'claude-haiku-4-5@20251001',
99
+ name: 'Claude Haiku 4.5',
100
+ provider: 'anthropic',
101
+ contextWindow: 200000,
102
+ maxTokens: 8192,
103
+ inputPrice: 0.25,
104
+ outputPrice: 1.25,
105
+ regions: ['us-east5', 'europe-west1'],
106
+ capabilities: ['text', 'vision', 'tools']
107
+ },
108
+ // Gemini Models
109
+ 'gemini-3-pro': {
110
+ id: 'gemini-3-pro',
111
+ name: 'Gemini 3 Pro',
112
+ provider: 'google',
113
+ contextWindow: 1000000,
114
+ maxTokens: 8192,
115
+ inputPrice: 2.5,
116
+ outputPrice: 15,
117
+ regions: ['us-central1', 'europe-west4'],
118
+ capabilities: ['text', 'vision', 'audio', 'video']
119
+ },
120
+ 'gemini-2.5-pro': {
121
+ id: 'gemini-2.5-pro',
122
+ name: 'Gemini 2.5 Pro',
123
+ provider: 'google',
124
+ contextWindow: 1000000,
125
+ maxTokens: 8192,
126
+ inputPrice: 1.25,
127
+ outputPrice: 5,
128
+ regions: ['us-central1', 'europe-west4'],
129
+ capabilities: ['text', 'vision']
130
+ },
131
+ 'gemini-2.5-flash': {
132
+ id: 'gemini-2.5-flash',
133
+ name: 'Gemini 2.5 Flash',
134
+ provider: 'google',
135
+ contextWindow: 1000000,
136
+ maxTokens: 8192,
137
+ inputPrice: 0.15,
138
+ outputPrice: 0.60,
139
+ regions: ['us-central1', 'europe-west4'],
140
+ capabilities: ['text', 'vision']
141
+ },
142
+ 'gemini-2.5-flash-lite': {
143
+ id: 'gemini-2.5-flash-lite',
144
+ name: 'Gemini 2.5 Flash Lite',
145
+ provider: 'google',
146
+ contextWindow: 1000000,
147
+ maxTokens: 8192,
148
+ inputPrice: 0.075,
149
+ outputPrice: 0.30,
150
+ regions: ['us-central1', 'europe-west4'],
151
+ capabilities: ['text']
152
+ },
153
+ // Imagen Models
154
+ 'imagen-4.0-generate-001': {
155
+ id: 'imagen-4.0-generate-001',
156
+ name: 'Imagen 4 Generate',
157
+ provider: 'imagen',
158
+ contextWindow: 0,
159
+ maxTokens: 0,
160
+ inputPrice: 0.04,
161
+ outputPrice: 0,
162
+ regions: ['us-central1'],
163
+ capabilities: ['image-generation']
164
+ },
165
+ 'imagen-4.0-fast-generate-001': {
166
+ id: 'imagen-4.0-fast-generate-001',
167
+ name: 'Imagen 4 Fast',
168
+ provider: 'imagen',
169
+ contextWindow: 0,
170
+ maxTokens: 0,
171
+ inputPrice: 0.02,
172
+ outputPrice: 0,
173
+ regions: ['us-central1'],
174
+ capabilities: ['image-generation']
175
+ },
176
+ 'imagen-4.0-ultra-generate-001': {
177
+ id: 'imagen-4.0-ultra-generate-001',
178
+ name: 'Imagen 4 Ultra',
179
+ provider: 'imagen',
180
+ contextWindow: 0,
181
+ maxTokens: 0,
182
+ inputPrice: 0.08,
183
+ outputPrice: 0,
184
+ regions: ['us-central1'],
185
+ capabilities: ['image-generation']
186
+ }
187
+ };
188
+ // ============================================================================
189
+ // Dynamic Region Fallback
190
+ // ============================================================================
191
+ /**
192
+ * Get ordered fallback regions for a model.
193
+ * Priority: us-east5 -> us-central1 (global) -> europe-west1 -> other regions
194
+ */
195
+ function getRegionFallbackOrder(modelId) {
196
+ const modelSpec = MODEL_CATALOG[modelId];
197
+ if (!modelSpec) {
198
+ // Default fallback order if model not found
199
+ return ['us-east5', 'us-central1', 'europe-west1'];
200
+ }
201
+ const modelRegions = modelSpec.regions;
202
+ const priorityOrder = ['us-east5', 'us-central1', 'europe-west1'];
203
+ // Build ordered list: priority regions first (if available), then remaining
204
+ const ordered = [];
205
+ for (const region of priorityOrder) {
206
+ if (modelRegions.includes(region)) {
207
+ ordered.push(region);
208
+ }
209
+ }
210
+ // Add any remaining model regions not in priority list
211
+ for (const region of modelRegions) {
212
+ if (!ordered.includes(region)) {
213
+ ordered.push(region);
214
+ }
215
+ }
216
+ return ordered;
217
+ }
218
+ // ============================================================================
219
+ // Configuration
220
+ // ============================================================================
221
+ function loadConfig() {
222
+ const defaultConfig = {
223
+ project_id: process.env.GOOGLE_CLOUD_PROJECT || '',
224
+ default_region: process.env.VERTEX_PROXY_REGION || 'us-east5',
225
+ google_region: process.env.VERTEX_PROXY_GOOGLE_REGION || 'us-central1',
226
+ model_aliases: {
227
+ 'gpt-4': 'claude-opus-4-5@20251101',
228
+ 'gpt-4-turbo': 'claude-sonnet-4-5@20250514',
229
+ 'gpt-4o': 'claude-sonnet-4-5@20250514',
230
+ 'gpt-4o-mini': 'claude-haiku-4-5@20251001',
231
+ 'gpt-3.5-turbo': 'claude-haiku-4-5@20251001',
232
+ 'claude': 'claude-opus-4-5@20251101',
233
+ 'claude-latest': 'claude-opus-4-5@20251101',
234
+ 'opus': 'claude-opus-4-5@20251101',
235
+ 'sonnet': 'claude-sonnet-4-5@20250514',
236
+ 'haiku': 'claude-haiku-4-5@20251001'
237
+ },
238
+ fallback_chains: {},
239
+ auto_truncate: true,
240
+ reserve_output_tokens: 4096
241
+ };
242
+ // Try to load config file
243
+ const configPaths = [
244
+ process.env.VERTEX_PROXY_CONFIG,
245
+ path.join(os.homedir(), '.vertex-proxy', 'config.yaml'),
246
+ path.join(os.homedir(), '.vertex-proxy', 'config.yml'),
247
+ './config.yaml'
248
+ ].filter(Boolean);
249
+ for (const configPath of configPaths) {
250
+ try {
251
+ if (fs.existsSync(configPath)) {
252
+ const content = fs.readFileSync(configPath, 'utf8');
253
+ const fileConfig = yaml.load(content);
254
+ return { ...defaultConfig, ...fileConfig };
255
+ }
256
+ }
257
+ catch (e) {
258
+ log(`Warning: Could not load config from ${configPath}`, 'WARN');
259
+ }
260
+ }
261
+ return defaultConfig;
262
+ }
263
+ // ============================================================================
264
+ // Helper Functions
265
+ // ============================================================================
266
+ function resolveModel(modelInput, config) {
267
+ // Check aliases first
268
+ if (config.model_aliases[modelInput]) {
269
+ return config.model_aliases[modelInput];
270
+ }
271
+ // Check if it's a known model
272
+ if (MODEL_CATALOG[modelInput]) {
273
+ return modelInput;
274
+ }
275
+ // Try adding version suffix for claude models
276
+ if (modelInput.startsWith('claude-') && !modelInput.includes('@')) {
277
+ // Find matching model
278
+ for (const [id, spec] of Object.entries(MODEL_CATALOG)) {
279
+ if (id.startsWith(modelInput)) {
280
+ return id;
281
+ }
282
+ }
283
+ }
284
+ // Return as-is
285
+ return modelInput;
286
+ }
287
+ function getModelSpec(modelId) {
288
+ return MODEL_CATALOG[modelId];
289
+ }
290
+ function extractSystemMessage(messages) {
291
+ let system = null;
292
+ const filteredMessages = [];
293
+ for (const msg of messages) {
294
+ if (msg.role === 'system') {
295
+ // Combine multiple system messages
296
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
297
+ system = system ? `${system}\n\n${content}` : content;
298
+ }
299
+ else {
300
+ filteredMessages.push(msg);
301
+ }
302
+ }
303
+ return { system, messages: filteredMessages };
304
+ }
305
+ function estimateTokens(text) {
306
+ // Rough estimate: ~4 chars per token
307
+ return Math.ceil(text.length / 4);
308
+ }
309
+ function truncateMessages(messages, maxTokens, reserveTokens) {
310
+ const targetTokens = maxTokens - reserveTokens;
311
+ let totalTokens = 0;
312
+ let truncated = false;
313
+ // Always keep the last few messages
314
+ const keepLast = 4;
315
+ const lastMessages = messages.slice(-keepLast);
316
+ const earlierMessages = messages.slice(0, -keepLast);
317
+ // Estimate tokens for last messages
318
+ for (const msg of lastMessages) {
319
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
320
+ totalTokens += estimateTokens(content);
321
+ }
322
+ // Add earlier messages from the end until we hit the limit
323
+ const keptEarlier = [];
324
+ for (let i = earlierMessages.length - 1; i >= 0; i--) {
325
+ const msg = earlierMessages[i];
326
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
327
+ const tokens = estimateTokens(content);
328
+ if (totalTokens + tokens > targetTokens) {
329
+ truncated = true;
330
+ break;
331
+ }
332
+ keptEarlier.unshift(msg);
333
+ totalTokens += tokens;
334
+ }
335
+ return {
336
+ messages: [...keptEarlier, ...lastMessages],
337
+ truncated
338
+ };
339
+ }
340
+ // ============================================================================
341
+ // API Handlers
342
+ // ============================================================================
343
+ async function handleChatCompletions(req, res, config) {
344
+ const { model: modelInput, messages, stream, max_tokens, temperature } = req.body;
345
+ // Resolve model alias
346
+ const modelId = resolveModel(modelInput, config);
347
+ const modelSpec = getModelSpec(modelId);
348
+ if (!modelSpec) {
349
+ log(`Unknown model: ${modelInput} -> ${modelId}`, 'WARN');
350
+ }
351
+ const provider = modelSpec?.provider || 'anthropic';
352
+ // Extract system message (OpenAI format -> Anthropic format)
353
+ const { system, messages: cleanMessages } = extractSystemMessage(messages);
354
+ // Auto-truncate if needed
355
+ let finalMessages = cleanMessages;
356
+ if (config.auto_truncate && modelSpec) {
357
+ const result = truncateMessages(cleanMessages, modelSpec.contextWindow, config.reserve_output_tokens);
358
+ finalMessages = result.messages;
359
+ if (result.truncated) {
360
+ log(`Truncated ${cleanMessages.length - finalMessages.length} messages to fit context`);
361
+ }
362
+ }
363
+ log(`Chat: ${modelInput} -> ${modelId} (${provider}), stream=${stream}, messages=${finalMessages.length}`);
364
+ // Update stats
365
+ proxyStats.requestCount++;
366
+ proxyStats.lastRequestTime = Date.now();
367
+ saveStats();
368
+ try {
369
+ if (provider === 'anthropic') {
370
+ await handleAnthropicChatWithFallback(res, {
371
+ modelId,
372
+ system,
373
+ messages: finalMessages,
374
+ stream: stream ?? false,
375
+ maxTokens: max_tokens || modelSpec?.maxTokens || 4096,
376
+ temperature,
377
+ config
378
+ });
379
+ }
380
+ else if (provider === 'google') {
381
+ await handleGeminiChat(res, {
382
+ modelId,
383
+ system,
384
+ messages: finalMessages,
385
+ stream: stream ?? false,
386
+ maxTokens: max_tokens || modelSpec?.maxTokens || 4096,
387
+ temperature,
388
+ config
389
+ });
390
+ }
391
+ else {
392
+ res.status(400).json({ error: `Unsupported provider: ${provider}` });
393
+ }
394
+ }
395
+ catch (error) {
396
+ log(`Error: ${error.message}`, 'ERROR');
397
+ // Try fallback if configured
398
+ const fallbacks = config.fallback_chains[modelId];
399
+ if (fallbacks && fallbacks.length > 0) {
400
+ log(`Trying model fallback: ${fallbacks[0]}`);
401
+ req.body.model = fallbacks[0];
402
+ return handleChatCompletions(req, res, config);
403
+ }
404
+ res.status(500).json({
405
+ error: {
406
+ message: error.message,
407
+ type: 'proxy_error',
408
+ code: error.status || 500
409
+ }
410
+ });
411
+ }
412
+ }
413
+ async function handleAnthropicChatWithFallback(res, options) {
414
+ const { modelId, config } = options;
415
+ const regions = getRegionFallbackOrder(modelId);
416
+ let lastError = null;
417
+ for (const region of regions) {
418
+ try {
419
+ log(`Trying region: ${region} for model ${modelId}`);
420
+ await handleAnthropicChat(res, { ...options, region });
421
+ return; // Success, exit
422
+ }
423
+ catch (error) {
424
+ lastError = error;
425
+ log(`Region ${region} failed: ${error.message}`, 'WARN');
426
+ // Only retry on certain errors (capacity, unavailable, etc.)
427
+ const shouldRetry = error.status === 429 || // Rate limit
428
+ error.status === 503 || // Service unavailable
429
+ error.status === 500 || // Internal error
430
+ error.message?.includes('capacity') ||
431
+ error.message?.includes('overloaded') ||
432
+ error.message?.includes('unavailable');
433
+ if (!shouldRetry) {
434
+ throw error; // Don't retry on client errors (400, 401, etc.)
435
+ }
436
+ // Continue to next region
437
+ }
438
+ }
439
+ // All regions failed
440
+ throw lastError || new Error('All regions failed');
441
+ }
442
+ async function handleAnthropicChat(res, options) {
443
+ const { modelId, system, messages, stream, maxTokens, temperature, config, region } = options;
444
+ // Get access token via google-auth-library
445
+ const auth = new GoogleAuth({ scopes: 'https://www.googleapis.com/auth/cloud-platform' });
446
+ const client = await auth.getClient();
447
+ const tokenResponse = await client.getAccessToken();
448
+ const accessToken = tokenResponse.token;
449
+ const useRegion = region || config.default_region;
450
+ const projectId = config.project_id;
451
+ const url = `https://${useRegion}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${useRegion}/publishers/anthropic/models/${modelId}:${stream ? 'streamRawPredict' : 'rawPredict'}`;
452
+ // Convert messages to Anthropic format
453
+ const anthropicMessages = messages.map(msg => ({
454
+ role: msg.role,
455
+ content: msg.content
456
+ }));
457
+ const requestBody = {
458
+ anthropic_version: 'vertex-2023-10-16',
459
+ max_tokens: maxTokens,
460
+ messages: anthropicMessages
461
+ };
462
+ if (system) {
463
+ requestBody.system = system;
464
+ }
465
+ if (temperature !== undefined) {
466
+ requestBody.temperature = temperature;
467
+ }
468
+ if (stream) {
469
+ requestBody.stream = true;
470
+ res.setHeader('Content-Type', 'text/event-stream');
471
+ res.setHeader('Cache-Control', 'no-cache');
472
+ res.setHeader('Connection', 'keep-alive');
473
+ const response = await fetch(url, {
474
+ method: 'POST',
475
+ headers: {
476
+ 'Authorization': `Bearer ${accessToken}`,
477
+ 'Content-Type': 'application/json'
478
+ },
479
+ body: JSON.stringify(requestBody)
480
+ });
481
+ if (!response.ok) {
482
+ const errorText = await response.text();
483
+ throw { status: response.status, message: errorText };
484
+ }
485
+ const reader = response.body?.getReader();
486
+ const decoder = new TextDecoder();
487
+ let buffer = '';
488
+ if (reader) {
489
+ while (true) {
490
+ const { done, value } = await reader.read();
491
+ if (done)
492
+ break;
493
+ buffer += decoder.decode(value, { stream: true });
494
+ const lines = buffer.split('\n');
495
+ buffer = lines.pop() || '';
496
+ for (const line of lines) {
497
+ if (line.startsWith('data: ')) {
498
+ const data = line.slice(6).trim();
499
+ if (!data || data === '[DONE]')
500
+ continue;
501
+ try {
502
+ const event = JSON.parse(data);
503
+ if (event.type === 'content_block_delta' && event.delta?.type === 'text_delta') {
504
+ const chunk = {
505
+ id: `chatcmpl-${Date.now()}`,
506
+ object: 'chat.completion.chunk',
507
+ created: Math.floor(Date.now() / 1000),
508
+ model: modelId,
509
+ choices: [{
510
+ index: 0,
511
+ delta: { content: event.delta.text },
512
+ finish_reason: null
513
+ }]
514
+ };
515
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
516
+ }
517
+ else if (event.type === 'message_stop') {
518
+ const chunk = {
519
+ id: `chatcmpl-${Date.now()}`,
520
+ object: 'chat.completion.chunk',
521
+ created: Math.floor(Date.now() / 1000),
522
+ model: modelId,
523
+ choices: [{
524
+ index: 0,
525
+ delta: {},
526
+ finish_reason: 'stop'
527
+ }]
528
+ };
529
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
530
+ }
531
+ }
532
+ catch (e) {
533
+ // skip non-JSON lines
534
+ }
535
+ }
536
+ }
537
+ }
538
+ }
539
+ res.write('data: [DONE]\n\n');
540
+ res.end();
541
+ }
542
+ else {
543
+ // Non-streaming response
544
+ const response = await fetch(url, {
545
+ method: 'POST',
546
+ headers: {
547
+ 'Authorization': `Bearer ${accessToken}`,
548
+ 'Content-Type': 'application/json'
549
+ },
550
+ body: JSON.stringify(requestBody)
551
+ });
552
+ if (!response.ok) {
553
+ const errorText = await response.text();
554
+ throw { status: response.status, message: errorText };
555
+ }
556
+ const data = await response.json();
557
+ // Convert to OpenAI format
558
+ const content = (data.content || [])
559
+ .filter((block) => block.type === 'text')
560
+ .map((block) => block.text)
561
+ .join('');
562
+ res.json({
563
+ id: data.id || `chatcmpl-${Date.now()}`,
564
+ object: 'chat.completion',
565
+ created: Math.floor(Date.now() / 1000),
566
+ model: modelId,
567
+ choices: [{
568
+ index: 0,
569
+ message: {
570
+ role: 'assistant',
571
+ content
572
+ },
573
+ finish_reason: data.stop_reason === 'end_turn' ? 'stop' : data.stop_reason
574
+ }],
575
+ usage: {
576
+ prompt_tokens: data.usage?.input_tokens || 0,
577
+ completion_tokens: data.usage?.output_tokens || 0,
578
+ total_tokens: (data.usage?.input_tokens || 0) + (data.usage?.output_tokens || 0)
579
+ }
580
+ });
581
+ }
582
+ }
583
+ async function handleGeminiChat(res, options) {
584
+ const { modelId, system, messages, stream, maxTokens, temperature, config } = options;
585
+ const vertexAI = new VertexAI({
586
+ project: config.project_id,
587
+ location: config.google_region
588
+ });
589
+ const model = vertexAI.getGenerativeModel({
590
+ model: modelId,
591
+ generationConfig: {
592
+ maxOutputTokens: maxTokens,
593
+ temperature: temperature
594
+ },
595
+ systemInstruction: system || undefined
596
+ });
597
+ // Convert messages to Gemini format
598
+ const contents = messages.map(msg => ({
599
+ role: msg.role === 'assistant' ? 'model' : 'user',
600
+ parts: [{ text: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }]
601
+ }));
602
+ if (stream) {
603
+ res.setHeader('Content-Type', 'text/event-stream');
604
+ res.setHeader('Cache-Control', 'no-cache');
605
+ res.setHeader('Connection', 'keep-alive');
606
+ const result = await model.generateContentStream({ contents });
607
+ for await (const chunk of result.stream) {
608
+ const text = chunk.candidates?.[0]?.content?.parts?.[0]?.text || '';
609
+ if (text) {
610
+ const openaiChunk = {
611
+ id: `chatcmpl-${Date.now()}`,
612
+ object: 'chat.completion.chunk',
613
+ created: Math.floor(Date.now() / 1000),
614
+ model: modelId,
615
+ choices: [{
616
+ index: 0,
617
+ delta: { content: text },
618
+ finish_reason: null
619
+ }]
620
+ };
621
+ res.write(`data: ${JSON.stringify(openaiChunk)}\n\n`);
622
+ }
623
+ }
624
+ const finalChunk = {
625
+ id: `chatcmpl-${Date.now()}`,
626
+ object: 'chat.completion.chunk',
627
+ created: Math.floor(Date.now() / 1000),
628
+ model: modelId,
629
+ choices: [{
630
+ index: 0,
631
+ delta: {},
632
+ finish_reason: 'stop'
633
+ }]
634
+ };
635
+ res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
636
+ res.write('data: [DONE]\n\n');
637
+ res.end();
638
+ }
639
+ else {
640
+ const result = await model.generateContent({ contents });
641
+ const response = result.response;
642
+ const text = response.candidates?.[0]?.content?.parts?.[0]?.text || '';
643
+ res.json({
644
+ id: `chatcmpl-${Date.now()}`,
645
+ object: 'chat.completion',
646
+ created: Math.floor(Date.now() / 1000),
647
+ model: modelId,
648
+ choices: [{
649
+ index: 0,
650
+ message: {
651
+ role: 'assistant',
652
+ content: text
653
+ },
654
+ finish_reason: 'stop'
655
+ }],
656
+ usage: {
657
+ prompt_tokens: response.usageMetadata?.promptTokenCount || 0,
658
+ completion_tokens: response.usageMetadata?.candidatesTokenCount || 0,
659
+ total_tokens: response.usageMetadata?.totalTokenCount || 0
660
+ }
661
+ });
662
+ }
663
+ }
664
+ async function handleAnthropicMessages(req, res, config) {
665
+ const { model: modelInput, messages, system, stream, max_tokens, temperature } = req.body;
666
+ const modelId = resolveModel(modelInput, config);
667
+ const modelSpec = getModelSpec(modelId);
668
+ log(`Messages API: ${modelInput} -> ${modelId}, stream=${stream}`);
669
+ // Update stats
670
+ proxyStats.requestCount++;
671
+ proxyStats.lastRequestTime = Date.now();
672
+ saveStats();
673
+ const regions = getRegionFallbackOrder(modelId);
674
+ let lastError = null;
675
+ for (const region of regions) {
676
+ try {
677
+ log(`Trying region: ${region} for model ${modelId}`);
678
+ // Get access token via google-auth-library
679
+ const auth = new GoogleAuth({ scopes: 'https://www.googleapis.com/auth/cloud-platform' });
680
+ const client = await auth.getClient();
681
+ const tokenResponse = await client.getAccessToken();
682
+ const accessToken = tokenResponse.token;
683
+ const projectId = config.project_id;
684
+ const url = `https://${region}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/anthropic/models/${modelId}:${stream ? 'streamRawPredict' : 'rawPredict'}`;
685
+ const requestBody = {
686
+ anthropic_version: 'vertex-2023-10-16',
687
+ max_tokens: max_tokens || modelSpec?.maxTokens || 4096,
688
+ messages: messages
689
+ };
690
+ if (system) {
691
+ requestBody.system = system;
692
+ }
693
+ if (temperature !== undefined) {
694
+ requestBody.temperature = temperature;
695
+ }
696
+ if (stream) {
697
+ requestBody.stream = true;
698
+ }
699
+ const response = await fetch(url, {
700
+ method: 'POST',
701
+ headers: {
702
+ 'Authorization': `Bearer ${accessToken}`,
703
+ 'Content-Type': 'application/json'
704
+ },
705
+ body: JSON.stringify(requestBody)
706
+ });
707
+ if (!response.ok) {
708
+ const errorText = await response.text();
709
+ const error = { status: response.status, message: errorText };
710
+ // Only retry on certain errors
711
+ const shouldRetry = response.status === 429 ||
712
+ response.status === 503 ||
713
+ response.status === 500 ||
714
+ errorText.includes('capacity') ||
715
+ errorText.includes('overloaded');
716
+ if (!shouldRetry) {
717
+ throw error;
718
+ }
719
+ lastError = error;
720
+ log(`Region ${region} failed: ${errorText}`, 'WARN');
721
+ continue;
722
+ }
723
+ if (stream) {
724
+ res.setHeader('Content-Type', 'text/event-stream');
725
+ res.setHeader('Cache-Control', 'no-cache');
726
+ res.setHeader('Connection', 'keep-alive');
727
+ const reader = response.body?.getReader();
728
+ const decoder = new TextDecoder();
729
+ let buffer = '';
730
+ if (reader) {
731
+ while (true) {
732
+ const { done, value } = await reader.read();
733
+ if (done)
734
+ break;
735
+ buffer += decoder.decode(value, { stream: true });
736
+ const lines = buffer.split('\n');
737
+ buffer = lines.pop() || '';
738
+ for (const line of lines) {
739
+ if (line.startsWith('event: ') || line.startsWith('data: ')) {
740
+ res.write(line + '\n');
741
+ }
742
+ else if (line === '') {
743
+ res.write('\n');
744
+ }
745
+ }
746
+ }
747
+ }
748
+ res.end();
749
+ }
750
+ else {
751
+ const data = await response.json();
752
+ res.json(data);
753
+ }
754
+ return; // Success
755
+ }
756
+ catch (error) {
757
+ lastError = error;
758
+ log(`Region ${region} failed: ${error.message}`, 'WARN');
759
+ // Check if we should retry
760
+ const shouldRetry = error.status === 429 ||
761
+ error.status === 503 ||
762
+ error.status === 500 ||
763
+ error.message?.includes('capacity') ||
764
+ error.message?.includes('overloaded');
765
+ if (!shouldRetry) {
766
+ throw error;
767
+ }
768
+ }
769
+ }
770
+ // All regions failed
771
+ log(`All regions failed for ${modelId}`, 'ERROR');
772
+ res.status(lastError?.status || 500).json({
773
+ error: {
774
+ type: 'api_error',
775
+ message: lastError?.message || 'All regions failed'
776
+ }
777
+ });
778
+ }
779
+ async function handleModels(req, res, config) {
780
+ const models = Object.entries(MODEL_CATALOG).map(([id, spec]) => ({
781
+ id,
782
+ object: 'model',
783
+ created: 1700000000,
784
+ owned_by: spec.provider === 'anthropic' ? 'anthropic' : 'google',
785
+ permission: [],
786
+ root: id,
787
+ parent: null,
788
+ // Extra info
789
+ _vertex_proxy: {
790
+ name: spec.name,
791
+ provider: spec.provider,
792
+ context_window: spec.contextWindow,
793
+ max_tokens: spec.maxTokens,
794
+ input_price_per_1m: spec.inputPrice,
795
+ output_price_per_1m: spec.outputPrice,
796
+ regions: spec.regions,
797
+ capabilities: spec.capabilities
798
+ }
799
+ }));
800
+ // Add aliases
801
+ for (const [alias, target] of Object.entries(config.model_aliases)) {
802
+ const targetSpec = MODEL_CATALOG[target];
803
+ if (targetSpec) {
804
+ models.push({
805
+ id: alias,
806
+ object: 'model',
807
+ created: 1700000000,
808
+ owned_by: 'vertex-proxy',
809
+ permission: [],
810
+ root: target,
811
+ parent: null,
812
+ _vertex_proxy: {
813
+ name: `${alias} → ${targetSpec.name}`,
814
+ provider: targetSpec.provider,
815
+ context_window: targetSpec.contextWindow,
816
+ max_tokens: targetSpec.maxTokens,
817
+ input_price_per_1m: targetSpec.inputPrice,
818
+ output_price_per_1m: targetSpec.outputPrice,
819
+ regions: targetSpec.regions,
820
+ capabilities: targetSpec.capabilities
821
+ }
822
+ });
823
+ }
824
+ }
825
+ res.json({
826
+ object: 'list',
827
+ data: models
828
+ });
829
+ }
830
+ // ============================================================================
831
+ // Image Generation Handler (Imagen)
832
+ // ============================================================================
833
+ async function handleImageGeneration(req, res, config) {
834
+ try {
835
+ const { model, prompt, n = 1, size = '1024x1024' } = req.body;
836
+ // Update stats
837
+ proxyStats.requestCount++;
838
+ proxyStats.lastRequestTime = Date.now();
839
+ saveStats();
840
+ // Resolve model alias
841
+ let resolvedModel = config.model_aliases[model] || model || 'imagen-4.0-generate-001';
842
+ const modelSpec = MODEL_CATALOG[resolvedModel];
843
+ if (!modelSpec || modelSpec.provider !== 'imagen') {
844
+ return res.status(400).json({
845
+ error: {
846
+ message: `Model ${resolvedModel} is not an image generation model`,
847
+ type: 'invalid_request_error'
848
+ }
849
+ });
850
+ }
851
+ if (!prompt) {
852
+ return res.status(400).json({
853
+ error: {
854
+ message: 'prompt is required',
855
+ type: 'invalid_request_error'
856
+ }
857
+ });
858
+ }
859
+ log(`Imagen: ${resolvedModel}, prompt="${prompt.substring(0, 50)}..."`);
860
+ // Parse size to get aspect ratio
861
+ const [width, height] = size.split('x').map(Number);
862
+ let aspectRatio = '1:1';
863
+ if (width > height)
864
+ aspectRatio = '16:9';
865
+ else if (height > width)
866
+ aspectRatio = '9:16';
867
+ // Build Vertex AI Imagen API request
868
+ const region = config.google_region || 'us-central1';
869
+ const endpoint = `https://${region}-aiplatform.googleapis.com/v1/projects/${config.project_id}/locations/${region}/publishers/google/models/${resolvedModel}:predict`;
870
+ // Get access token
871
+ const { GoogleAuth } = await import('google-auth-library');
872
+ const auth = new GoogleAuth({
873
+ scopes: ['https://www.googleapis.com/auth/cloud-platform']
874
+ });
875
+ const client = await auth.getClient();
876
+ const accessToken = await client.getAccessToken();
877
+ const imagenRequest = {
878
+ instances: [{ prompt }],
879
+ parameters: {
880
+ sampleCount: Math.min(n, 4), // Imagen supports 1-4
881
+ aspectRatio,
882
+ // Add safety settings if needed
883
+ safetySetting: 'block_medium_and_above'
884
+ }
885
+ };
886
+ const response = await fetch(endpoint, {
887
+ method: 'POST',
888
+ headers: {
889
+ 'Authorization': `Bearer ${accessToken.token}`,
890
+ 'Content-Type': 'application/json'
891
+ },
892
+ body: JSON.stringify(imagenRequest)
893
+ });
894
+ if (!response.ok) {
895
+ const errorText = await response.text();
896
+ log(`Imagen error: ${response.status} ${errorText}`, 'ERROR');
897
+ return res.status(response.status).json({
898
+ error: {
899
+ message: `Imagen API error: ${errorText}`,
900
+ type: 'api_error'
901
+ }
902
+ });
903
+ }
904
+ const result = await response.json();
905
+ // Convert Vertex AI response to OpenAI format
906
+ const images = (result.predictions || []).map((pred, index) => ({
907
+ b64_json: pred.bytesBase64Encoded,
908
+ revised_prompt: prompt
909
+ }));
910
+ res.json({
911
+ created: Math.floor(Date.now() / 1000),
912
+ data: images
913
+ });
914
+ }
915
+ catch (error) {
916
+ log(`Imagen error: ${error.message}`, 'ERROR');
917
+ res.status(500).json({
918
+ error: {
919
+ message: error.message,
920
+ type: 'api_error'
921
+ }
922
+ });
923
+ }
924
+ }
925
+ // ============================================================================
926
+ // Server Setup
927
+ // ============================================================================
928
+ export async function startProxy(daemonMode = false) {
929
+ const config = loadConfig();
930
+ if (!config.project_id) {
931
+ console.error('Error: GOOGLE_CLOUD_PROJECT is required');
932
+ process.exit(1);
933
+ }
934
+ const app = express();
935
+ const port = parseInt(process.env.VERTEX_PROXY_PORT || '8001');
936
+ // Initialize stats
937
+ proxyStats = {
938
+ startTime: Date.now(),
939
+ requestCount: 0,
940
+ lastRequestTime: null,
941
+ port
942
+ };
943
+ saveStats();
944
+ // Middleware
945
+ app.use(express.json({ limit: '50mb' }));
946
+ // Logging middleware
947
+ app.use((req, res, next) => {
948
+ const start = Date.now();
949
+ res.on('finish', () => {
950
+ const duration = Date.now() - start;
951
+ log(`${req.method} ${req.path} ${res.statusCode} ${duration}ms`);
952
+ });
953
+ next();
954
+ });
955
+ // Routes
956
+ app.get('/', (req, res) => {
957
+ res.json({
958
+ name: 'Vertex AI Proxy',
959
+ version: '1.1.0',
960
+ status: 'running',
961
+ project: config.project_id,
962
+ uptime: Math.floor((Date.now() - proxyStats.startTime) / 1000),
963
+ requestCount: proxyStats.requestCount,
964
+ regions: {
965
+ claude: config.default_region,
966
+ gemini: config.google_region,
967
+ imagen: config.google_region
968
+ },
969
+ endpoints: {
970
+ models: '/v1/models',
971
+ chat: '/v1/chat/completions',
972
+ messages: '/v1/messages',
973
+ images: '/v1/images/generations'
974
+ }
975
+ });
976
+ });
977
+ app.get('/health', (req, res) => {
978
+ res.json({
979
+ status: 'ok',
980
+ uptime: Math.floor((Date.now() - proxyStats.startTime) / 1000),
981
+ requestCount: proxyStats.requestCount
982
+ });
983
+ });
984
+ app.get('/v1/models', (req, res) => handleModels(req, res, config));
985
+ app.post('/v1/chat/completions', (req, res) => handleChatCompletions(req, res, config));
986
+ app.post('/v1/messages', (req, res) => handleAnthropicMessages(req, res, config));
987
+ app.post('/messages', (req, res) => handleAnthropicMessages(req, res, config));
988
+ // Image generation (Imagen)
989
+ app.post('/v1/images/generations', (req, res) => handleImageGeneration(req, res, config));
990
+ // Start server
991
+ const server = app.listen(port, () => {
992
+ const banner = `
993
+ ╔══════════════════════════════════════════════════════════╗
994
+ ║ Vertex AI Proxy v1.1.0 ║
995
+ ╠══════════════════════════════════════════════════════════╣
996
+ ║ Status: Running ║
997
+ ║ Port: ${port.toString().padEnd(45)}║
998
+ ║ Project: ${config.project_id.padEnd(45)}║
999
+ ║ Claude: ${config.default_region.padEnd(45)}║
1000
+ ║ Gemini: ${config.google_region.padEnd(45)}║
1001
+ ╠══════════════════════════════════════════════════════════╣
1002
+ ║ Endpoints: ║
1003
+ ║ GET /v1/models List models ║
1004
+ ║ POST /v1/chat/completions OpenAI chat format ║
1005
+ ║ POST /v1/messages Anthropic format ║
1006
+ ║ POST /v1/images/generations Image generation ║
1007
+ ╠══════════════════════════════════════════════════════════╣
1008
+ ║ Features: ║
1009
+ ║ • Dynamic region fallback (us-east5 → global → EU) ║
1010
+ ║ • Logs: ~/.vertex_proxy/proxy.log ║
1011
+ ╚══════════════════════════════════════════════════════════╝
1012
+ `;
1013
+ if (!daemonMode) {
1014
+ console.log(banner);
1015
+ }
1016
+ log(`Server started on port ${port}`);
1017
+ });
1018
+ return server;
1019
+ }
1020
+ // Export for daemon management
1021
+ export { proxyStats, loadStats, DATA_DIR, LOG_FILE };
1022
+ // Run if executed directly
1023
+ if (require.main === module) {
1024
+ startProxy();
1025
+ }
3
1026
  //# sourceMappingURL=index.js.map