@relayplane/proxy 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -221
- package/dist/__tests__/model-suggestions.test.d.ts +2 -0
- package/dist/__tests__/model-suggestions.test.d.ts.map +1 -0
- package/dist/__tests__/model-suggestions.test.js +67 -0
- package/dist/__tests__/model-suggestions.test.js.map +1 -0
- package/dist/__tests__/routing-aliases.test.d.ts +2 -0
- package/dist/__tests__/routing-aliases.test.d.ts.map +1 -0
- package/dist/__tests__/routing-aliases.test.js +81 -0
- package/dist/__tests__/routing-aliases.test.js.map +1 -0
- package/dist/cli.d.ts +36 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +304 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +80 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +208 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +60 -0
- package/dist/index.js.map +1 -0
- package/dist/standalone-proxy.d.ts +101 -0
- package/dist/standalone-proxy.d.ts.map +1 -0
- package/dist/standalone-proxy.js +2524 -0
- package/dist/standalone-proxy.js.map +1 -0
- package/dist/swarm-client.d.ts +87 -0
- package/dist/swarm-client.d.ts.map +1 -0
- package/dist/swarm-client.js +205 -0
- package/dist/swarm-client.js.map +1 -0
- package/dist/telemetry.d.ts +127 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +426 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/utils/model-suggestions.d.ts +28 -0
- package/dist/utils/model-suggestions.d.ts.map +1 -0
- package/dist/utils/model-suggestions.js +50 -0
- package/dist/utils/model-suggestions.js.map +1 -0
- package/package.json +35 -29
|
@@ -0,0 +1,2524 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RelayPlane L2/L3 Proxy Server
|
|
4
|
+
*
|
|
5
|
+
* An LLM Gateway proxy that intelligently routes requests
|
|
6
|
+
* to the optimal model using @relayplane/core.
|
|
7
|
+
*
|
|
8
|
+
* Supports:
|
|
9
|
+
* - OpenAI-compatible API (/v1/chat/completions)
|
|
10
|
+
* - Native Anthropic API (/v1/messages) for Claude Code integration
|
|
11
|
+
* - Streaming (SSE) for both OpenAI and Anthropic formats
|
|
12
|
+
* - Auth passthrough for Claude Code (OAuth/subscription billing)
|
|
13
|
+
* - Cross-provider routing (Anthropic, OpenAI, Google, xAI)
|
|
14
|
+
* - Tool/function calling with format conversion
|
|
15
|
+
*
|
|
16
|
+
* Authentication:
|
|
17
|
+
* - Anthropic: Passthrough incoming Authorization header OR ANTHROPIC_API_KEY env
|
|
18
|
+
* - Other providers: Require provider-specific API key env vars
|
|
19
|
+
*
|
|
20
|
+
* @packageDocumentation
|
|
21
|
+
*/
|
|
22
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
25
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
26
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
27
|
+
}
|
|
28
|
+
Object.defineProperty(o, k2, desc);
|
|
29
|
+
}) : (function(o, m, k, k2) {
|
|
30
|
+
if (k2 === undefined) k2 = k;
|
|
31
|
+
o[k2] = m[k];
|
|
32
|
+
}));
|
|
33
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
34
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
35
|
+
}) : function(o, v) {
|
|
36
|
+
o["default"] = v;
|
|
37
|
+
});
|
|
38
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
39
|
+
var ownKeys = function(o) {
|
|
40
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
41
|
+
var ar = [];
|
|
42
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
43
|
+
return ar;
|
|
44
|
+
};
|
|
45
|
+
return ownKeys(o);
|
|
46
|
+
};
|
|
47
|
+
return function (mod) {
|
|
48
|
+
if (mod && mod.__esModule) return mod;
|
|
49
|
+
var result = {};
|
|
50
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
51
|
+
__setModuleDefault(result, mod);
|
|
52
|
+
return result;
|
|
53
|
+
};
|
|
54
|
+
})();
|
|
55
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
56
|
+
exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = void 0;
|
|
57
|
+
exports.getAvailableModelNames = getAvailableModelNames;
|
|
58
|
+
exports.resolveModelAlias = resolveModelAlias;
|
|
59
|
+
exports.parseModelSuffix = parseModelSuffix;
|
|
60
|
+
exports.classifyComplexity = classifyComplexity;
|
|
61
|
+
exports.shouldEscalate = shouldEscalate;
|
|
62
|
+
exports.startProxy = startProxy;
|
|
63
|
+
const http = __importStar(require("node:http"));
|
|
64
|
+
const fs = __importStar(require("node:fs"));
|
|
65
|
+
const os = __importStar(require("node:os"));
|
|
66
|
+
const path = __importStar(require("node:path"));
|
|
67
|
+
const core_1 = require("@relayplane/core");
|
|
68
|
+
const model_suggestions_js_1 = require("./utils/model-suggestions.js");
|
|
69
|
+
/**
|
|
70
|
+
* Default provider endpoints
|
|
71
|
+
*/
|
|
72
|
+
exports.DEFAULT_ENDPOINTS = {
|
|
73
|
+
anthropic: {
|
|
74
|
+
baseUrl: 'https://api.anthropic.com/v1',
|
|
75
|
+
apiKeyEnv: 'ANTHROPIC_API_KEY',
|
|
76
|
+
},
|
|
77
|
+
openai: {
|
|
78
|
+
baseUrl: 'https://api.openai.com/v1',
|
|
79
|
+
apiKeyEnv: 'OPENAI_API_KEY',
|
|
80
|
+
},
|
|
81
|
+
google: {
|
|
82
|
+
baseUrl: 'https://generativelanguage.googleapis.com/v1beta',
|
|
83
|
+
apiKeyEnv: 'GEMINI_API_KEY',
|
|
84
|
+
},
|
|
85
|
+
xai: {
|
|
86
|
+
baseUrl: 'https://api.x.ai/v1',
|
|
87
|
+
apiKeyEnv: 'XAI_API_KEY',
|
|
88
|
+
},
|
|
89
|
+
moonshot: {
|
|
90
|
+
baseUrl: 'https://api.moonshot.cn/v1',
|
|
91
|
+
apiKeyEnv: 'MOONSHOT_API_KEY',
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* Model to provider/model mapping
|
|
96
|
+
*/
|
|
97
|
+
exports.MODEL_MAPPING = {
|
|
98
|
+
// Anthropic models (using correct API model IDs)
|
|
99
|
+
'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-5-20250514' },
|
|
100
|
+
'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
|
|
101
|
+
'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
|
|
102
|
+
'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
|
|
103
|
+
haiku: { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
|
|
104
|
+
sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
|
|
105
|
+
opus: { provider: 'anthropic', model: 'claude-opus-4-5-20250514' },
|
|
106
|
+
// OpenAI models
|
|
107
|
+
'gpt-4o': { provider: 'openai', model: 'gpt-4o' },
|
|
108
|
+
'gpt-4o-mini': { provider: 'openai', model: 'gpt-4o-mini' },
|
|
109
|
+
'gpt-4.1': { provider: 'openai', model: 'gpt-4.1' },
|
|
110
|
+
};
|
|
111
|
+
/**
|
|
112
|
+
* RelayPlane model aliases - resolve before routing
|
|
113
|
+
* These are user-friendly aliases that map to internal routing modes
|
|
114
|
+
*/
|
|
115
|
+
exports.RELAYPLANE_ALIASES = {
|
|
116
|
+
'relayplane:auto': 'rp:balanced',
|
|
117
|
+
'rp:auto': 'rp:balanced',
|
|
118
|
+
};
|
|
119
|
+
/**
|
|
120
|
+
* Smart routing aliases - map to specific provider/model combinations
|
|
121
|
+
* These provide semantic shortcuts for common use cases
|
|
122
|
+
*/
|
|
123
|
+
exports.SMART_ALIASES = {
|
|
124
|
+
// Best quality model (current flagship)
|
|
125
|
+
'rp:best': { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
|
|
126
|
+
// Fast/cheap model for simple tasks
|
|
127
|
+
'rp:fast': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
|
|
128
|
+
'rp:cheap': { provider: 'openai', model: 'gpt-4o-mini' },
|
|
129
|
+
// Balanced model for general use (good quality/cost tradeoff)
|
|
130
|
+
'rp:balanced': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
|
|
131
|
+
};
|
|
132
|
+
/**
|
|
133
|
+
* Get all available model names for error suggestions
|
|
134
|
+
*/
|
|
135
|
+
function getAvailableModelNames() {
|
|
136
|
+
return [
|
|
137
|
+
...Object.keys(exports.MODEL_MAPPING),
|
|
138
|
+
...Object.keys(exports.SMART_ALIASES),
|
|
139
|
+
...Object.keys(exports.RELAYPLANE_ALIASES),
|
|
140
|
+
// Add common model prefixes users might type
|
|
141
|
+
'relayplane:auto',
|
|
142
|
+
'relayplane:cost',
|
|
143
|
+
'relayplane:fast',
|
|
144
|
+
'relayplane:quality',
|
|
145
|
+
];
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Resolve model aliases before routing
|
|
149
|
+
* Returns the resolved model name (may be same as input if no alias found)
|
|
150
|
+
*/
|
|
151
|
+
function resolveModelAlias(model) {
|
|
152
|
+
// Check RELAYPLANE_ALIASES first (e.g., relayplane:auto → rp:balanced)
|
|
153
|
+
if (exports.RELAYPLANE_ALIASES[model]) {
|
|
154
|
+
return exports.RELAYPLANE_ALIASES[model];
|
|
155
|
+
}
|
|
156
|
+
return model;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Default routing based on task type
|
|
160
|
+
* Uses Haiku 3.5 for cost optimization, upgrades based on learned rules
|
|
161
|
+
*/
|
|
162
|
+
const DEFAULT_ROUTING = {
|
|
163
|
+
code_generation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
164
|
+
code_review: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
165
|
+
summarization: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
166
|
+
analysis: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
167
|
+
creative_writing: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
168
|
+
data_extraction: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
169
|
+
translation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
170
|
+
question_answering: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
171
|
+
general: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
|
|
172
|
+
};
|
|
173
|
+
const UNCERTAINTY_PATTERNS = [
|
|
174
|
+
/i'?m not (entirely |completely |really )?sure/i,
|
|
175
|
+
/i don'?t (really |actually )?know/i,
|
|
176
|
+
/it'?s (difficult|hard|tough) to say/i,
|
|
177
|
+
/i can'?t (definitively|accurately|confidently)/i,
|
|
178
|
+
/i'?m (uncertain|unsure)/i,
|
|
179
|
+
/this is (just )?(a guess|speculation)/i,
|
|
180
|
+
];
|
|
181
|
+
const REFUSAL_PATTERNS = [
|
|
182
|
+
/i can'?t (help|assist) with that/i,
|
|
183
|
+
/i'?m (not able|unable) to/i,
|
|
184
|
+
/i (cannot|can't|won't) (provide|give|create)/i,
|
|
185
|
+
/as an ai/i,
|
|
186
|
+
];
|
|
187
|
+
class CooldownManager {
|
|
188
|
+
health = new Map();
|
|
189
|
+
config;
|
|
190
|
+
constructor(config) {
|
|
191
|
+
this.config = config;
|
|
192
|
+
}
|
|
193
|
+
updateConfig(config) {
|
|
194
|
+
this.config = config;
|
|
195
|
+
}
|
|
196
|
+
recordFailure(provider, error) {
|
|
197
|
+
const h = this.getOrCreateHealth(provider);
|
|
198
|
+
const now = Date.now();
|
|
199
|
+
h.failures = h.failures.filter((f) => now - f.timestamp < this.config.windowSeconds * 1000);
|
|
200
|
+
h.failures.push({ timestamp: now, error });
|
|
201
|
+
if (h.failures.length >= this.config.allowedFails) {
|
|
202
|
+
h.cooledUntil = now + this.config.cooldownSeconds * 1000;
|
|
203
|
+
console.log(`[RelayPlane] Provider ${provider} cooled down for ${this.config.cooldownSeconds}s`);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
recordSuccess(provider) {
|
|
207
|
+
const h = this.health.get(provider);
|
|
208
|
+
if (h) {
|
|
209
|
+
h.failures = [];
|
|
210
|
+
h.cooledUntil = null;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
isAvailable(provider) {
|
|
214
|
+
const h = this.health.get(provider);
|
|
215
|
+
if (!h?.cooledUntil)
|
|
216
|
+
return true;
|
|
217
|
+
if (Date.now() > h.cooledUntil) {
|
|
218
|
+
h.cooledUntil = null;
|
|
219
|
+
h.failures = [];
|
|
220
|
+
return true;
|
|
221
|
+
}
|
|
222
|
+
return false;
|
|
223
|
+
}
|
|
224
|
+
getOrCreateHealth(provider) {
|
|
225
|
+
if (!this.health.has(provider)) {
|
|
226
|
+
this.health.set(provider, { failures: [], cooledUntil: null });
|
|
227
|
+
}
|
|
228
|
+
return this.health.get(provider);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
const globalStats = {
|
|
232
|
+
totalRequests: 0,
|
|
233
|
+
successfulRequests: 0,
|
|
234
|
+
failedRequests: 0,
|
|
235
|
+
totalLatencyMs: 0,
|
|
236
|
+
routingCounts: {},
|
|
237
|
+
modelCounts: {},
|
|
238
|
+
escalations: 0,
|
|
239
|
+
startedAt: Date.now(),
|
|
240
|
+
};
|
|
241
|
+
function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
|
|
242
|
+
const timestamp = new Date().toISOString();
|
|
243
|
+
const status = success ? '✓' : '✗';
|
|
244
|
+
const escalateTag = escalated ? ' [ESCALATED]' : '';
|
|
245
|
+
console.log(`[RelayPlane] ${timestamp} ${status} ${originalModel} → ${provider}/${targetModel} (${mode}) ${latencyMs}ms${escalateTag}`);
|
|
246
|
+
// Update stats
|
|
247
|
+
globalStats.totalRequests++;
|
|
248
|
+
if (success) {
|
|
249
|
+
globalStats.successfulRequests++;
|
|
250
|
+
}
|
|
251
|
+
else {
|
|
252
|
+
globalStats.failedRequests++;
|
|
253
|
+
}
|
|
254
|
+
globalStats.totalLatencyMs += latencyMs;
|
|
255
|
+
globalStats.routingCounts[mode] = (globalStats.routingCounts[mode] || 0) + 1;
|
|
256
|
+
const modelKey = `${provider}/${targetModel}`;
|
|
257
|
+
globalStats.modelCounts[modelKey] = (globalStats.modelCounts[modelKey] || 0) + 1;
|
|
258
|
+
if (escalated) {
|
|
259
|
+
globalStats.escalations++;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
const DEFAULT_PROXY_CONFIG = {
|
|
263
|
+
enabled: true,
|
|
264
|
+
modelOverrides: {},
|
|
265
|
+
routing: {
|
|
266
|
+
mode: 'cascade',
|
|
267
|
+
cascade: {
|
|
268
|
+
enabled: true,
|
|
269
|
+
models: [
|
|
270
|
+
'claude-3-5-haiku-20241022',
|
|
271
|
+
'claude-sonnet-4-20250514',
|
|
272
|
+
'claude-opus-4-5-20250514',
|
|
273
|
+
],
|
|
274
|
+
escalateOn: 'uncertainty',
|
|
275
|
+
maxEscalations: 1,
|
|
276
|
+
},
|
|
277
|
+
complexity: {
|
|
278
|
+
enabled: true,
|
|
279
|
+
simple: 'claude-3-5-haiku-20241022',
|
|
280
|
+
moderate: 'claude-sonnet-4-20250514',
|
|
281
|
+
complex: 'claude-opus-4-5-20250514',
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
reliability: {
|
|
285
|
+
cooldowns: {
|
|
286
|
+
enabled: true,
|
|
287
|
+
allowedFails: 3,
|
|
288
|
+
windowSeconds: 60,
|
|
289
|
+
cooldownSeconds: 120,
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
function getProxyConfigPath() {
|
|
294
|
+
const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
|
|
295
|
+
if (customPath && customPath.trim())
|
|
296
|
+
return customPath;
|
|
297
|
+
return path.join(os.homedir(), '.relayplane', 'config.json');
|
|
298
|
+
}
|
|
299
|
+
function normalizeProxyConfig(config) {
|
|
300
|
+
const defaultRouting = DEFAULT_PROXY_CONFIG.routing;
|
|
301
|
+
const configRouting = (config?.routing ?? {});
|
|
302
|
+
const cascade = { ...defaultRouting.cascade, ...(configRouting.cascade ?? {}) };
|
|
303
|
+
const complexity = { ...defaultRouting.complexity, ...(configRouting.complexity ?? {}) };
|
|
304
|
+
const routing = {
|
|
305
|
+
...defaultRouting,
|
|
306
|
+
...configRouting,
|
|
307
|
+
cascade,
|
|
308
|
+
complexity,
|
|
309
|
+
};
|
|
310
|
+
const defaultReliability = DEFAULT_PROXY_CONFIG.reliability;
|
|
311
|
+
const configReliability = (config?.reliability ?? {});
|
|
312
|
+
const cooldowns = {
|
|
313
|
+
...defaultReliability.cooldowns,
|
|
314
|
+
...(configReliability.cooldowns ?? {}),
|
|
315
|
+
};
|
|
316
|
+
const reliability = {
|
|
317
|
+
...defaultReliability,
|
|
318
|
+
...configReliability,
|
|
319
|
+
cooldowns,
|
|
320
|
+
};
|
|
321
|
+
return {
|
|
322
|
+
...DEFAULT_PROXY_CONFIG,
|
|
323
|
+
...(config ?? {}),
|
|
324
|
+
modelOverrides: {
|
|
325
|
+
...(DEFAULT_PROXY_CONFIG.modelOverrides ?? {}),
|
|
326
|
+
...(config?.modelOverrides ?? {}),
|
|
327
|
+
},
|
|
328
|
+
routing,
|
|
329
|
+
reliability,
|
|
330
|
+
enabled: config?.enabled !== undefined ? !!config.enabled : DEFAULT_PROXY_CONFIG.enabled,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
async function loadProxyConfig(configPath, log) {
|
|
334
|
+
try {
|
|
335
|
+
const raw = await fs.promises.readFile(configPath, 'utf8');
|
|
336
|
+
const parsed = JSON.parse(raw);
|
|
337
|
+
return normalizeProxyConfig(parsed);
|
|
338
|
+
}
|
|
339
|
+
catch (err) {
|
|
340
|
+
const error = err;
|
|
341
|
+
if (error.code !== 'ENOENT') {
|
|
342
|
+
log(`Failed to load config: ${error.message}`);
|
|
343
|
+
}
|
|
344
|
+
return normalizeProxyConfig(null);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
async function saveProxyConfig(configPath, config) {
|
|
348
|
+
await fs.promises.mkdir(path.dirname(configPath), { recursive: true });
|
|
349
|
+
const payload = JSON.stringify(config, null, 2);
|
|
350
|
+
await fs.promises.writeFile(configPath, payload, 'utf8');
|
|
351
|
+
}
|
|
352
|
+
function isPlainObject(value) {
|
|
353
|
+
return !!value && typeof value === 'object' && !Array.isArray(value);
|
|
354
|
+
}
|
|
355
|
+
function deepMerge(base, patch) {
|
|
356
|
+
const result = { ...base };
|
|
357
|
+
for (const [key, value] of Object.entries(patch)) {
|
|
358
|
+
if (isPlainObject(value) && isPlainObject(result[key])) {
|
|
359
|
+
result[key] = deepMerge(result[key], value);
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
result[key] = value;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return result;
|
|
366
|
+
}
|
|
367
|
+
function mergeProxyConfig(base, patch) {
|
|
368
|
+
// Deep merge without normalizing intermediate results
|
|
369
|
+
const merged = deepMerge(base, patch);
|
|
370
|
+
return normalizeProxyConfig(merged);
|
|
371
|
+
}
|
|
372
|
+
function getHeaderValue(req, headerName) {
|
|
373
|
+
const raw = req.headers[headerName.toLowerCase()];
|
|
374
|
+
if (Array.isArray(raw))
|
|
375
|
+
return raw[0];
|
|
376
|
+
return raw;
|
|
377
|
+
}
|
|
378
|
+
function parseHeaderBoolean(value) {
|
|
379
|
+
if (!value)
|
|
380
|
+
return false;
|
|
381
|
+
const normalized = value.trim().toLowerCase();
|
|
382
|
+
return normalized === '1' || normalized === 'true' || normalized === 'yes' || normalized === 'on';
|
|
383
|
+
}
|
|
384
|
+
function parseModelSuffix(model) {
|
|
385
|
+
const trimmed = model.trim();
|
|
386
|
+
if (/^relayplane:(auto|cost|fast|quality)$/.test(trimmed)) {
|
|
387
|
+
return { baseModel: trimmed, suffix: null };
|
|
388
|
+
}
|
|
389
|
+
const suffixes = ['cost', 'fast', 'quality'];
|
|
390
|
+
for (const suffix of suffixes) {
|
|
391
|
+
if (trimmed.endsWith(`:${suffix}`)) {
|
|
392
|
+
return {
|
|
393
|
+
baseModel: trimmed.slice(0, -(suffix.length + 1)),
|
|
394
|
+
suffix,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
return { baseModel: trimmed, suffix: null };
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Extract text content from messages for routing analysis
|
|
402
|
+
*/
|
|
403
|
+
function extractPromptText(messages) {
|
|
404
|
+
return messages
|
|
405
|
+
.map((msg) => {
|
|
406
|
+
if (typeof msg.content === 'string')
|
|
407
|
+
return msg.content;
|
|
408
|
+
if (Array.isArray(msg.content)) {
|
|
409
|
+
return msg.content
|
|
410
|
+
.map((c) => {
|
|
411
|
+
const part = c;
|
|
412
|
+
return part.type === 'text' ? (part.text ?? '') : '';
|
|
413
|
+
})
|
|
414
|
+
.join(' ');
|
|
415
|
+
}
|
|
416
|
+
return '';
|
|
417
|
+
})
|
|
418
|
+
.join('\n');
|
|
419
|
+
}
|
|
420
|
+
function extractMessageText(messages) {
|
|
421
|
+
return messages
|
|
422
|
+
.map((msg) => {
|
|
423
|
+
const content = msg.content;
|
|
424
|
+
if (typeof content === 'string')
|
|
425
|
+
return content;
|
|
426
|
+
if (Array.isArray(content)) {
|
|
427
|
+
return content
|
|
428
|
+
.map((c) => {
|
|
429
|
+
const part = c;
|
|
430
|
+
return part.type === 'text' ? (part.text ?? '') : '';
|
|
431
|
+
})
|
|
432
|
+
.join(' ');
|
|
433
|
+
}
|
|
434
|
+
return '';
|
|
435
|
+
})
|
|
436
|
+
.join(' ');
|
|
437
|
+
}
|
|
438
|
+
function classifyComplexity(messages) {
|
|
439
|
+
const text = extractMessageText(messages).toLowerCase();
|
|
440
|
+
const tokens = Math.ceil(text.length / 4);
|
|
441
|
+
let score = 0;
|
|
442
|
+
if (/```/.test(text) || /function |class |const |let |import /.test(text))
|
|
443
|
+
score += 2;
|
|
444
|
+
if (/analyze|compare|evaluate|assess|review|audit/.test(text))
|
|
445
|
+
score += 1;
|
|
446
|
+
if (/calculate|compute|solve|equation|prove|derive/.test(text))
|
|
447
|
+
score += 2;
|
|
448
|
+
if (/first.*then|step \d|1\).*2\)|phase \d/.test(text))
|
|
449
|
+
score += 1;
|
|
450
|
+
if (tokens > 2000)
|
|
451
|
+
score += 1;
|
|
452
|
+
if (tokens > 5000)
|
|
453
|
+
score += 1;
|
|
454
|
+
if (/write a (story|essay|article|report)|create a|design a|build a/.test(text))
|
|
455
|
+
score += 1;
|
|
456
|
+
if (score >= 4)
|
|
457
|
+
return 'complex';
|
|
458
|
+
if (score >= 2)
|
|
459
|
+
return 'moderate';
|
|
460
|
+
return 'simple';
|
|
461
|
+
}
|
|
462
|
+
function shouldEscalate(responseText, trigger) {
|
|
463
|
+
if (trigger === 'error')
|
|
464
|
+
return false;
|
|
465
|
+
const patterns = trigger === 'refusal' ? REFUSAL_PATTERNS : UNCERTAINTY_PATTERNS;
|
|
466
|
+
return patterns.some((p) => p.test(responseText));
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Check if a model should use MAX token (hybrid auth)
|
|
470
|
+
*/
|
|
471
|
+
function shouldUseMaxToken(model, authConfig) {
|
|
472
|
+
if (!authConfig?.anthropicMaxToken || !authConfig?.useMaxForModels?.length) {
|
|
473
|
+
return false;
|
|
474
|
+
}
|
|
475
|
+
const modelLower = model.toLowerCase();
|
|
476
|
+
return authConfig.useMaxForModels.some(pattern => modelLower.includes(pattern.toLowerCase()));
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Get the appropriate API key for a model (hybrid auth support)
|
|
480
|
+
*/
|
|
481
|
+
function getAuthForModel(model, authConfig, envApiKey) {
|
|
482
|
+
if (shouldUseMaxToken(model, authConfig)) {
|
|
483
|
+
return { apiKey: authConfig.anthropicMaxToken, isMax: true };
|
|
484
|
+
}
|
|
485
|
+
return { apiKey: envApiKey, isMax: false };
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Build Anthropic headers with hybrid auth support
|
|
489
|
+
* MAX tokens (sk-ant-oat*) use Authorization: Bearer header
|
|
490
|
+
* API keys (sk-ant-api*) use x-api-key header
|
|
491
|
+
*/
|
|
492
|
+
function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
|
|
493
|
+
const headers = {
|
|
494
|
+
'Content-Type': 'application/json',
|
|
495
|
+
'anthropic-version': ctx.versionHeader || '2023-06-01',
|
|
496
|
+
};
|
|
497
|
+
// Auth: prefer incoming auth for passthrough
|
|
498
|
+
if (ctx.authHeader) {
|
|
499
|
+
headers['Authorization'] = ctx.authHeader;
|
|
500
|
+
}
|
|
501
|
+
else if (ctx.apiKeyHeader) {
|
|
502
|
+
headers['x-api-key'] = ctx.apiKeyHeader;
|
|
503
|
+
}
|
|
504
|
+
else if (apiKey) {
|
|
505
|
+
// MAX tokens (OAuth) use Authorization: Bearer, API keys use x-api-key
|
|
506
|
+
if (isMaxToken || apiKey.startsWith('sk-ant-oat')) {
|
|
507
|
+
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
508
|
+
}
|
|
509
|
+
else {
|
|
510
|
+
headers['x-api-key'] = apiKey;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
// Pass through beta headers
|
|
514
|
+
if (ctx.betaHeaders) {
|
|
515
|
+
headers['anthropic-beta'] = ctx.betaHeaders;
|
|
516
|
+
}
|
|
517
|
+
return headers;
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* Build Anthropic headers with auth passthrough support
|
|
521
|
+
*
|
|
522
|
+
* Auth priority:
|
|
523
|
+
* 1. Incoming Authorization header (Bearer token from Claude Code OAuth)
|
|
524
|
+
* 2. Incoming x-api-key header
|
|
525
|
+
* 3. ANTHROPIC_API_KEY env var (or MAX token for Opus models)
|
|
526
|
+
*/
|
|
527
|
+
function buildAnthropicHeaders(ctx, envApiKey) {
|
|
528
|
+
const headers = {
|
|
529
|
+
'Content-Type': 'application/json',
|
|
530
|
+
'anthropic-version': ctx.versionHeader || '2023-06-01',
|
|
531
|
+
};
|
|
532
|
+
// Auth: prefer incoming auth for passthrough, fallback to env
|
|
533
|
+
if (ctx.authHeader) {
|
|
534
|
+
// Claude Code sends "Authorization: Bearer <token>" for OAuth
|
|
535
|
+
headers['Authorization'] = ctx.authHeader;
|
|
536
|
+
}
|
|
537
|
+
else if (ctx.apiKeyHeader) {
|
|
538
|
+
// Direct x-api-key header
|
|
539
|
+
headers['x-api-key'] = ctx.apiKeyHeader;
|
|
540
|
+
}
|
|
541
|
+
else if (envApiKey) {
|
|
542
|
+
// Fallback to env var
|
|
543
|
+
headers['x-api-key'] = envApiKey;
|
|
544
|
+
}
|
|
545
|
+
// Pass through beta headers (prompt caching, extended thinking, etc.)
|
|
546
|
+
if (ctx.betaHeaders) {
|
|
547
|
+
headers['anthropic-beta'] = ctx.betaHeaders;
|
|
548
|
+
}
|
|
549
|
+
return headers;
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Forward non-streaming request to Anthropic API
|
|
553
|
+
*/
|
|
554
|
+
async function forwardToAnthropic(request, targetModel, ctx, envApiKey) {
|
|
555
|
+
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
556
|
+
const headers = buildAnthropicHeaders(ctx, envApiKey);
|
|
557
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
558
|
+
method: 'POST',
|
|
559
|
+
headers,
|
|
560
|
+
body: JSON.stringify(anthropicBody),
|
|
561
|
+
});
|
|
562
|
+
return response;
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* Forward streaming request to Anthropic API
|
|
566
|
+
*/
|
|
567
|
+
async function forwardToAnthropicStream(request, targetModel, ctx, envApiKey) {
|
|
568
|
+
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
569
|
+
const headers = buildAnthropicHeaders(ctx, envApiKey);
|
|
570
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
571
|
+
method: 'POST',
|
|
572
|
+
headers,
|
|
573
|
+
body: JSON.stringify(anthropicBody),
|
|
574
|
+
});
|
|
575
|
+
return response;
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Forward native Anthropic /v1/messages request (passthrough with routing)
|
|
579
|
+
* Used for Claude Code direct integration
|
|
580
|
+
*/
|
|
581
|
+
async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken) {
|
|
582
|
+
const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken);
|
|
583
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
584
|
+
method: 'POST',
|
|
585
|
+
headers,
|
|
586
|
+
body: JSON.stringify(body),
|
|
587
|
+
});
|
|
588
|
+
return response;
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Convert OpenAI messages array to Anthropic format
|
|
592
|
+
* Handles: user, assistant, tool_calls, tool results
|
|
593
|
+
*/
|
|
594
|
+
function convertMessagesToAnthropic(messages) {
|
|
595
|
+
const result = [];
|
|
596
|
+
for (const msg of messages) {
|
|
597
|
+
const m = msg;
|
|
598
|
+
// Skip system messages (handled separately)
|
|
599
|
+
if (m.role === 'system')
|
|
600
|
+
continue;
|
|
601
|
+
// Tool result message → Anthropic user message with tool_result content
|
|
602
|
+
if (m.role === 'tool') {
|
|
603
|
+
result.push({
|
|
604
|
+
role: 'user',
|
|
605
|
+
content: [
|
|
606
|
+
{
|
|
607
|
+
type: 'tool_result',
|
|
608
|
+
tool_use_id: m.tool_call_id,
|
|
609
|
+
content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
|
|
610
|
+
},
|
|
611
|
+
],
|
|
612
|
+
});
|
|
613
|
+
continue;
|
|
614
|
+
}
|
|
615
|
+
// Assistant message with tool_calls → Anthropic assistant with tool_use content
|
|
616
|
+
if (m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0) {
|
|
617
|
+
const content = [];
|
|
618
|
+
// Add text content if present
|
|
619
|
+
if (m.content && typeof m.content === 'string') {
|
|
620
|
+
content.push({ type: 'text', text: m.content });
|
|
621
|
+
}
|
|
622
|
+
// Add tool_use blocks
|
|
623
|
+
for (const tc of m.tool_calls) {
|
|
624
|
+
content.push({
|
|
625
|
+
type: 'tool_use',
|
|
626
|
+
id: tc.id,
|
|
627
|
+
name: tc.function.name,
|
|
628
|
+
input: JSON.parse(tc.function.arguments || '{}'),
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
result.push({ role: 'assistant', content });
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
// Regular user/assistant message
|
|
635
|
+
result.push({
|
|
636
|
+
role: m.role === 'assistant' ? 'assistant' : 'user',
|
|
637
|
+
content: m.content,
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
return result;
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* Build Anthropic request body from OpenAI format
|
|
644
|
+
*/
|
|
645
|
+
function buildAnthropicBody(request, targetModel, stream) {
|
|
646
|
+
// Convert OpenAI messages to Anthropic format
|
|
647
|
+
const anthropicMessages = convertMessagesToAnthropic(request.messages);
|
|
648
|
+
const systemMessage = request.messages.find((m) => m.role === 'system');
|
|
649
|
+
const anthropicBody = {
|
|
650
|
+
model: targetModel,
|
|
651
|
+
messages: anthropicMessages,
|
|
652
|
+
max_tokens: request.max_tokens ?? 4096,
|
|
653
|
+
stream,
|
|
654
|
+
};
|
|
655
|
+
if (systemMessage) {
|
|
656
|
+
anthropicBody['system'] = systemMessage.content;
|
|
657
|
+
}
|
|
658
|
+
if (request.temperature !== undefined) {
|
|
659
|
+
anthropicBody['temperature'] = request.temperature;
|
|
660
|
+
}
|
|
661
|
+
// Convert OpenAI tools format to Anthropic tools format
|
|
662
|
+
if (request.tools && Array.isArray(request.tools)) {
|
|
663
|
+
anthropicBody['tools'] = convertToolsToAnthropic(request.tools);
|
|
664
|
+
}
|
|
665
|
+
// Convert tool_choice
|
|
666
|
+
if (request.tool_choice) {
|
|
667
|
+
anthropicBody['tool_choice'] = convertToolChoiceToAnthropic(request.tool_choice);
|
|
668
|
+
}
|
|
669
|
+
return anthropicBody;
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* Convert OpenAI tools format to Anthropic format
|
|
673
|
+
* OpenAI: { type: "function", function: { name, description, parameters } }
|
|
674
|
+
* Anthropic: { name, description, input_schema }
|
|
675
|
+
*/
|
|
676
|
+
function convertToolsToAnthropic(tools) {
|
|
677
|
+
return tools.map((tool) => {
|
|
678
|
+
const t = tool;
|
|
679
|
+
if (t.type === 'function' && t.function) {
|
|
680
|
+
return {
|
|
681
|
+
name: t.function.name,
|
|
682
|
+
description: t.function.description,
|
|
683
|
+
input_schema: t.function.parameters || { type: 'object', properties: {} },
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
// Already in Anthropic format or unknown
|
|
687
|
+
return tool;
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Convert OpenAI tool_choice to Anthropic format
|
|
692
|
+
*/
|
|
693
|
+
function convertToolChoiceToAnthropic(toolChoice) {
|
|
694
|
+
if (toolChoice === 'auto')
|
|
695
|
+
return { type: 'auto' };
|
|
696
|
+
if (toolChoice === 'none')
|
|
697
|
+
return { type: 'none' };
|
|
698
|
+
if (toolChoice === 'required')
|
|
699
|
+
return { type: 'any' };
|
|
700
|
+
// Specific tool: { type: "function", function: { name: "xxx" } }
|
|
701
|
+
const tc = toolChoice;
|
|
702
|
+
if (tc.type === 'function' && tc.function?.name) {
|
|
703
|
+
return { type: 'tool', name: tc.function.name };
|
|
704
|
+
}
|
|
705
|
+
return toolChoice;
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Forward non-streaming request to OpenAI API
|
|
709
|
+
*/
|
|
710
|
+
async function forwardToOpenAI(request, targetModel, apiKey) {
|
|
711
|
+
const openaiBody = {
|
|
712
|
+
...request,
|
|
713
|
+
model: targetModel,
|
|
714
|
+
stream: false,
|
|
715
|
+
};
|
|
716
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
717
|
+
method: 'POST',
|
|
718
|
+
headers: {
|
|
719
|
+
'Content-Type': 'application/json',
|
|
720
|
+
Authorization: `Bearer ${apiKey}`,
|
|
721
|
+
},
|
|
722
|
+
body: JSON.stringify(openaiBody),
|
|
723
|
+
});
|
|
724
|
+
return response;
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Forward streaming request to OpenAI API
|
|
728
|
+
*/
|
|
729
|
+
async function forwardToOpenAIStream(request, targetModel, apiKey) {
|
|
730
|
+
const openaiBody = {
|
|
731
|
+
...request,
|
|
732
|
+
model: targetModel,
|
|
733
|
+
stream: true,
|
|
734
|
+
};
|
|
735
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
736
|
+
method: 'POST',
|
|
737
|
+
headers: {
|
|
738
|
+
'Content-Type': 'application/json',
|
|
739
|
+
Authorization: `Bearer ${apiKey}`,
|
|
740
|
+
},
|
|
741
|
+
body: JSON.stringify(openaiBody),
|
|
742
|
+
});
|
|
743
|
+
return response;
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Forward non-streaming request to xAI API (OpenAI-compatible)
|
|
747
|
+
*/
|
|
748
|
+
async function forwardToXAI(request, targetModel, apiKey) {
|
|
749
|
+
const xaiBody = {
|
|
750
|
+
...request,
|
|
751
|
+
model: targetModel,
|
|
752
|
+
stream: false,
|
|
753
|
+
};
|
|
754
|
+
const response = await fetch('https://api.x.ai/v1/chat/completions', {
|
|
755
|
+
method: 'POST',
|
|
756
|
+
headers: {
|
|
757
|
+
'Content-Type': 'application/json',
|
|
758
|
+
Authorization: `Bearer ${apiKey}`,
|
|
759
|
+
},
|
|
760
|
+
body: JSON.stringify(xaiBody),
|
|
761
|
+
});
|
|
762
|
+
return response;
|
|
763
|
+
}
|
|
764
|
+
/**
|
|
765
|
+
* Forward streaming request to xAI API (OpenAI-compatible)
|
|
766
|
+
*/
|
|
767
|
+
async function forwardToXAIStream(request, targetModel, apiKey) {
|
|
768
|
+
const xaiBody = {
|
|
769
|
+
...request,
|
|
770
|
+
model: targetModel,
|
|
771
|
+
stream: true,
|
|
772
|
+
};
|
|
773
|
+
const response = await fetch('https://api.x.ai/v1/chat/completions', {
|
|
774
|
+
method: 'POST',
|
|
775
|
+
headers: {
|
|
776
|
+
'Content-Type': 'application/json',
|
|
777
|
+
Authorization: `Bearer ${apiKey}`,
|
|
778
|
+
},
|
|
779
|
+
body: JSON.stringify(xaiBody),
|
|
780
|
+
});
|
|
781
|
+
return response;
|
|
782
|
+
}
|
|
783
|
+
/**
|
|
784
|
+
* Forward non-streaming request to Moonshot API (OpenAI-compatible)
|
|
785
|
+
*/
|
|
786
|
+
async function forwardToMoonshot(request, targetModel, apiKey) {
|
|
787
|
+
const moonshotBody = {
|
|
788
|
+
...request,
|
|
789
|
+
model: targetModel,
|
|
790
|
+
stream: false,
|
|
791
|
+
};
|
|
792
|
+
const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
|
|
793
|
+
method: 'POST',
|
|
794
|
+
headers: {
|
|
795
|
+
'Content-Type': 'application/json',
|
|
796
|
+
Authorization: `Bearer ${apiKey}`,
|
|
797
|
+
},
|
|
798
|
+
body: JSON.stringify(moonshotBody),
|
|
799
|
+
});
|
|
800
|
+
return response;
|
|
801
|
+
}
|
|
802
|
+
/**
|
|
803
|
+
* Forward streaming request to Moonshot API (OpenAI-compatible)
|
|
804
|
+
*/
|
|
805
|
+
async function forwardToMoonshotStream(request, targetModel, apiKey) {
|
|
806
|
+
const moonshotBody = {
|
|
807
|
+
...request,
|
|
808
|
+
model: targetModel,
|
|
809
|
+
stream: true,
|
|
810
|
+
};
|
|
811
|
+
const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
|
|
812
|
+
method: 'POST',
|
|
813
|
+
headers: {
|
|
814
|
+
'Content-Type': 'application/json',
|
|
815
|
+
Authorization: `Bearer ${apiKey}`,
|
|
816
|
+
},
|
|
817
|
+
body: JSON.stringify(moonshotBody),
|
|
818
|
+
});
|
|
819
|
+
return response;
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Convert OpenAI messages to Gemini format
|
|
823
|
+
*/
|
|
824
|
+
function convertMessagesToGemini(messages) {
|
|
825
|
+
const geminiContents = [];
|
|
826
|
+
for (const msg of messages) {
|
|
827
|
+
// Skip system messages (handled separately via systemInstruction)
|
|
828
|
+
if (msg.role === 'system')
|
|
829
|
+
continue;
|
|
830
|
+
const role = msg.role === 'assistant' ? 'model' : 'user';
|
|
831
|
+
if (typeof msg.content === 'string') {
|
|
832
|
+
geminiContents.push({
|
|
833
|
+
role,
|
|
834
|
+
parts: [{ text: msg.content }],
|
|
835
|
+
});
|
|
836
|
+
}
|
|
837
|
+
else if (Array.isArray(msg.content)) {
|
|
838
|
+
// Handle multimodal content
|
|
839
|
+
const parts = msg.content.map((part) => {
|
|
840
|
+
const p = part;
|
|
841
|
+
if (p.type === 'text') {
|
|
842
|
+
return { text: p.text };
|
|
843
|
+
}
|
|
844
|
+
if (p.type === 'image_url' && p.image_url?.url) {
|
|
845
|
+
// Handle base64 images
|
|
846
|
+
const url = p.image_url.url;
|
|
847
|
+
if (url.startsWith('data:')) {
|
|
848
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
849
|
+
if (match) {
|
|
850
|
+
return {
|
|
851
|
+
inline_data: {
|
|
852
|
+
mime_type: match[1],
|
|
853
|
+
data: match[2],
|
|
854
|
+
},
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
// URL-based images not directly supported, return as text
|
|
859
|
+
return { text: `[Image: ${url}]` };
|
|
860
|
+
}
|
|
861
|
+
return { text: '' };
|
|
862
|
+
});
|
|
863
|
+
geminiContents.push({ role, parts });
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
return geminiContents;
|
|
867
|
+
}
|
|
868
|
+
/**
|
|
869
|
+
* Forward non-streaming request to Gemini API
|
|
870
|
+
*/
|
|
871
|
+
async function forwardToGemini(request, targetModel, apiKey) {
|
|
872
|
+
const systemMessage = request.messages.find((m) => m.role === 'system');
|
|
873
|
+
const geminiContents = convertMessagesToGemini(request.messages);
|
|
874
|
+
const geminiBody = {
|
|
875
|
+
contents: geminiContents,
|
|
876
|
+
generationConfig: {
|
|
877
|
+
maxOutputTokens: request.max_tokens ?? 4096,
|
|
878
|
+
},
|
|
879
|
+
};
|
|
880
|
+
if (request.temperature !== undefined) {
|
|
881
|
+
geminiBody['generationConfig']['temperature'] = request.temperature;
|
|
882
|
+
}
|
|
883
|
+
if (systemMessage && typeof systemMessage.content === 'string') {
|
|
884
|
+
geminiBody['systemInstruction'] = {
|
|
885
|
+
parts: [{ text: systemMessage.content }],
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`, {
|
|
889
|
+
method: 'POST',
|
|
890
|
+
headers: {
|
|
891
|
+
'Content-Type': 'application/json',
|
|
892
|
+
},
|
|
893
|
+
body: JSON.stringify(geminiBody),
|
|
894
|
+
});
|
|
895
|
+
return response;
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Forward streaming request to Gemini API
|
|
899
|
+
*/
|
|
900
|
+
async function forwardToGeminiStream(request, targetModel, apiKey) {
|
|
901
|
+
const systemMessage = request.messages.find((m) => m.role === 'system');
|
|
902
|
+
const geminiContents = convertMessagesToGemini(request.messages);
|
|
903
|
+
const geminiBody = {
|
|
904
|
+
contents: geminiContents,
|
|
905
|
+
generationConfig: {
|
|
906
|
+
maxOutputTokens: request.max_tokens ?? 4096,
|
|
907
|
+
},
|
|
908
|
+
};
|
|
909
|
+
if (request.temperature !== undefined) {
|
|
910
|
+
geminiBody['generationConfig']['temperature'] = request.temperature;
|
|
911
|
+
}
|
|
912
|
+
if (systemMessage && typeof systemMessage.content === 'string') {
|
|
913
|
+
geminiBody['systemInstruction'] = {
|
|
914
|
+
parts: [{ text: systemMessage.content }],
|
|
915
|
+
};
|
|
916
|
+
}
|
|
917
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`, {
|
|
918
|
+
method: 'POST',
|
|
919
|
+
headers: {
|
|
920
|
+
'Content-Type': 'application/json',
|
|
921
|
+
},
|
|
922
|
+
body: JSON.stringify(geminiBody),
|
|
923
|
+
});
|
|
924
|
+
return response;
|
|
925
|
+
}
|
|
926
|
+
/**
|
|
927
|
+
* Convert Gemini response to OpenAI format
|
|
928
|
+
*/
|
|
929
|
+
function convertGeminiResponse(geminiData, model) {
|
|
930
|
+
const candidate = geminiData.candidates?.[0];
|
|
931
|
+
const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
|
|
932
|
+
let finishReason = 'stop';
|
|
933
|
+
if (candidate?.finishReason === 'MAX_TOKENS') {
|
|
934
|
+
finishReason = 'length';
|
|
935
|
+
}
|
|
936
|
+
else if (candidate?.finishReason === 'SAFETY') {
|
|
937
|
+
finishReason = 'content_filter';
|
|
938
|
+
}
|
|
939
|
+
return {
|
|
940
|
+
id: `chatcmpl-${Date.now()}`,
|
|
941
|
+
object: 'chat.completion',
|
|
942
|
+
created: Math.floor(Date.now() / 1000),
|
|
943
|
+
model,
|
|
944
|
+
choices: [
|
|
945
|
+
{
|
|
946
|
+
index: 0,
|
|
947
|
+
message: {
|
|
948
|
+
role: 'assistant',
|
|
949
|
+
content: text,
|
|
950
|
+
},
|
|
951
|
+
finish_reason: finishReason,
|
|
952
|
+
},
|
|
953
|
+
],
|
|
954
|
+
usage: {
|
|
955
|
+
prompt_tokens: geminiData.usageMetadata?.promptTokenCount ?? 0,
|
|
956
|
+
completion_tokens: geminiData.usageMetadata?.candidatesTokenCount ?? 0,
|
|
957
|
+
total_tokens: (geminiData.usageMetadata?.promptTokenCount ?? 0) +
|
|
958
|
+
(geminiData.usageMetadata?.candidatesTokenCount ?? 0),
|
|
959
|
+
},
|
|
960
|
+
};
|
|
961
|
+
}
|
|
962
|
+
/**
|
|
963
|
+
* Convert Gemini streaming event to OpenAI format
|
|
964
|
+
*/
|
|
965
|
+
function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
|
|
966
|
+
const candidate = eventData.candidates?.[0];
|
|
967
|
+
const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
|
|
968
|
+
const choice = {
|
|
969
|
+
index: 0,
|
|
970
|
+
delta: {},
|
|
971
|
+
finish_reason: null,
|
|
972
|
+
};
|
|
973
|
+
if (isFirst) {
|
|
974
|
+
choice['delta'] = { role: 'assistant', content: text };
|
|
975
|
+
}
|
|
976
|
+
else if (text) {
|
|
977
|
+
choice['delta'] = { content: text };
|
|
978
|
+
}
|
|
979
|
+
// Check for finish
|
|
980
|
+
if (candidate?.finishReason) {
|
|
981
|
+
let finishReason = 'stop';
|
|
982
|
+
if (candidate.finishReason === 'MAX_TOKENS') {
|
|
983
|
+
finishReason = 'length';
|
|
984
|
+
}
|
|
985
|
+
else if (candidate.finishReason === 'SAFETY') {
|
|
986
|
+
finishReason = 'content_filter';
|
|
987
|
+
}
|
|
988
|
+
choice['finish_reason'] = finishReason;
|
|
989
|
+
}
|
|
990
|
+
const chunk = {
|
|
991
|
+
id: messageId,
|
|
992
|
+
object: 'chat.completion.chunk',
|
|
993
|
+
created: Math.floor(Date.now() / 1000),
|
|
994
|
+
model,
|
|
995
|
+
choices: [choice],
|
|
996
|
+
};
|
|
997
|
+
return `data: ${JSON.stringify(chunk)}\n\n`;
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Parse Gemini SSE stream and convert to OpenAI format
|
|
1001
|
+
*/
|
|
1002
|
+
async function* convertGeminiStream(response, model) {
|
|
1003
|
+
const reader = response.body?.getReader();
|
|
1004
|
+
if (!reader) {
|
|
1005
|
+
throw new Error('No response body');
|
|
1006
|
+
}
|
|
1007
|
+
const decoder = new TextDecoder();
|
|
1008
|
+
let buffer = '';
|
|
1009
|
+
const messageId = `chatcmpl-${Date.now()}`;
|
|
1010
|
+
let isFirst = true;
|
|
1011
|
+
try {
|
|
1012
|
+
while (true) {
|
|
1013
|
+
const { done, value } = await reader.read();
|
|
1014
|
+
if (done)
|
|
1015
|
+
break;
|
|
1016
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1017
|
+
// Process complete SSE events (Gemini uses "data: " prefix)
|
|
1018
|
+
const lines = buffer.split('\n');
|
|
1019
|
+
buffer = lines.pop() || '';
|
|
1020
|
+
for (const line of lines) {
|
|
1021
|
+
if (line.startsWith('data: ')) {
|
|
1022
|
+
const jsonStr = line.slice(6);
|
|
1023
|
+
if (jsonStr.trim() === '[DONE]') {
|
|
1024
|
+
yield 'data: [DONE]\n\n';
|
|
1025
|
+
continue;
|
|
1026
|
+
}
|
|
1027
|
+
try {
|
|
1028
|
+
const parsed = JSON.parse(jsonStr);
|
|
1029
|
+
const converted = convertGeminiStreamEvent(parsed, messageId, model, isFirst);
|
|
1030
|
+
if (converted) {
|
|
1031
|
+
yield converted;
|
|
1032
|
+
isFirst = false;
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
catch {
|
|
1036
|
+
// Skip malformed JSON
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
// Send [DONE] at the end
|
|
1042
|
+
yield 'data: [DONE]\n\n';
|
|
1043
|
+
}
|
|
1044
|
+
finally {
|
|
1045
|
+
reader.releaseLock();
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
/**
|
|
1049
|
+
* Convert Anthropic response to OpenAI format
|
|
1050
|
+
* Handles both text and tool_use content blocks
|
|
1051
|
+
*/
|
|
1052
|
+
function convertAnthropicResponse(anthropicData) {
|
|
1053
|
+
const textBlocks = anthropicData.content?.filter((c) => c.type === 'text') ?? [];
|
|
1054
|
+
const toolBlocks = anthropicData.content?.filter((c) => c.type === 'tool_use') ?? [];
|
|
1055
|
+
const textContent = textBlocks.map((c) => c.text ?? '').join('');
|
|
1056
|
+
// Build message object
|
|
1057
|
+
const message = {
|
|
1058
|
+
role: 'assistant',
|
|
1059
|
+
content: textContent || null,
|
|
1060
|
+
};
|
|
1061
|
+
// Convert tool_use blocks to OpenAI tool_calls format
|
|
1062
|
+
if (toolBlocks.length > 0) {
|
|
1063
|
+
message['tool_calls'] = toolBlocks.map((block) => ({
|
|
1064
|
+
id: block.id || `call_${Date.now()}`,
|
|
1065
|
+
type: 'function',
|
|
1066
|
+
function: {
|
|
1067
|
+
name: block.name,
|
|
1068
|
+
arguments: typeof block.input === 'string' ? block.input : JSON.stringify(block.input ?? {}),
|
|
1069
|
+
},
|
|
1070
|
+
}));
|
|
1071
|
+
}
|
|
1072
|
+
// Determine finish_reason
|
|
1073
|
+
let finishReason = 'stop';
|
|
1074
|
+
if (anthropicData.stop_reason === 'tool_use') {
|
|
1075
|
+
finishReason = 'tool_calls';
|
|
1076
|
+
}
|
|
1077
|
+
else if (anthropicData.stop_reason === 'end_turn') {
|
|
1078
|
+
finishReason = 'stop';
|
|
1079
|
+
}
|
|
1080
|
+
else if (anthropicData.stop_reason) {
|
|
1081
|
+
finishReason = anthropicData.stop_reason;
|
|
1082
|
+
}
|
|
1083
|
+
return {
|
|
1084
|
+
id: anthropicData.id || `chatcmpl-${Date.now()}`,
|
|
1085
|
+
object: 'chat.completion',
|
|
1086
|
+
created: Math.floor(Date.now() / 1000),
|
|
1087
|
+
model: anthropicData.model,
|
|
1088
|
+
choices: [
|
|
1089
|
+
{
|
|
1090
|
+
index: 0,
|
|
1091
|
+
message,
|
|
1092
|
+
finish_reason: finishReason,
|
|
1093
|
+
},
|
|
1094
|
+
],
|
|
1095
|
+
usage: {
|
|
1096
|
+
prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
|
|
1097
|
+
completion_tokens: anthropicData.usage?.output_tokens ?? 0,
|
|
1098
|
+
total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0),
|
|
1099
|
+
},
|
|
1100
|
+
};
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Convert Anthropic streaming event to OpenAI streaming chunk format
|
|
1104
|
+
* Handles both text content and tool_use streaming
|
|
1105
|
+
*/
|
|
1106
|
+
function convertAnthropicStreamEvent(eventType, eventData, messageId, model, toolState) {
|
|
1107
|
+
const choice = { index: 0, delta: {}, finish_reason: null };
|
|
1108
|
+
const baseChunk = {
|
|
1109
|
+
id: messageId,
|
|
1110
|
+
object: 'chat.completion.chunk',
|
|
1111
|
+
created: Math.floor(Date.now() / 1000),
|
|
1112
|
+
model: model,
|
|
1113
|
+
choices: [choice],
|
|
1114
|
+
};
|
|
1115
|
+
switch (eventType) {
|
|
1116
|
+
case 'message_start': {
|
|
1117
|
+
// First chunk: include role
|
|
1118
|
+
const msg = eventData['message'];
|
|
1119
|
+
baseChunk.id = msg?.['id'] || messageId;
|
|
1120
|
+
choice.delta = { role: 'assistant', content: '' };
|
|
1121
|
+
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
1122
|
+
}
|
|
1123
|
+
case 'content_block_start': {
|
|
1124
|
+
// New content block starting - could be text or tool_use
|
|
1125
|
+
const contentBlock = eventData['content_block'];
|
|
1126
|
+
const blockIndex = eventData['index'];
|
|
1127
|
+
if (contentBlock?.['type'] === 'tool_use') {
|
|
1128
|
+
// Tool use starting - send first chunk with tool info
|
|
1129
|
+
const toolId = contentBlock['id'];
|
|
1130
|
+
const toolName = contentBlock['name'];
|
|
1131
|
+
toolState.tools.set(blockIndex ?? toolState.currentToolIndex, {
|
|
1132
|
+
id: toolId,
|
|
1133
|
+
name: toolName,
|
|
1134
|
+
arguments: '',
|
|
1135
|
+
});
|
|
1136
|
+
toolState.currentToolIndex = blockIndex ?? toolState.currentToolIndex;
|
|
1137
|
+
choice.delta = {
|
|
1138
|
+
tool_calls: [{
|
|
1139
|
+
index: blockIndex ?? 0,
|
|
1140
|
+
id: toolId,
|
|
1141
|
+
type: 'function',
|
|
1142
|
+
function: { name: toolName, arguments: '' },
|
|
1143
|
+
}],
|
|
1144
|
+
};
|
|
1145
|
+
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
1146
|
+
}
|
|
1147
|
+
return null;
|
|
1148
|
+
}
|
|
1149
|
+
case 'content_block_delta': {
|
|
1150
|
+
// Content chunk - text or tool arguments
|
|
1151
|
+
const delta = eventData['delta'];
|
|
1152
|
+
const blockIndex = eventData['index'];
|
|
1153
|
+
if (delta?.['type'] === 'text_delta') {
|
|
1154
|
+
choice.delta = { content: delta['text'] };
|
|
1155
|
+
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
1156
|
+
}
|
|
1157
|
+
if (delta?.['type'] === 'input_json_delta') {
|
|
1158
|
+
// Tool arguments streaming
|
|
1159
|
+
const partialJson = delta['partial_json'] || '';
|
|
1160
|
+
const tool = toolState.tools.get(blockIndex ?? toolState.currentToolIndex);
|
|
1161
|
+
if (tool) {
|
|
1162
|
+
tool.arguments += partialJson;
|
|
1163
|
+
}
|
|
1164
|
+
choice.delta = {
|
|
1165
|
+
tool_calls: [{
|
|
1166
|
+
index: blockIndex ?? 0,
|
|
1167
|
+
function: { arguments: partialJson },
|
|
1168
|
+
}],
|
|
1169
|
+
};
|
|
1170
|
+
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
1171
|
+
}
|
|
1172
|
+
return null;
|
|
1173
|
+
}
|
|
1174
|
+
case 'message_delta': {
|
|
1175
|
+
// Final chunk with stop reason
|
|
1176
|
+
const delta = eventData['delta'];
|
|
1177
|
+
const stopReason = delta?.['stop_reason'];
|
|
1178
|
+
if (stopReason === 'tool_use') {
|
|
1179
|
+
choice.finish_reason = 'tool_calls';
|
|
1180
|
+
}
|
|
1181
|
+
else if (stopReason === 'end_turn') {
|
|
1182
|
+
choice.finish_reason = 'stop';
|
|
1183
|
+
}
|
|
1184
|
+
else {
|
|
1185
|
+
choice.finish_reason = stopReason || 'stop';
|
|
1186
|
+
}
|
|
1187
|
+
choice.delta = {};
|
|
1188
|
+
return `data: ${JSON.stringify(baseChunk)}\n\n`;
|
|
1189
|
+
}
|
|
1190
|
+
case 'message_stop': {
|
|
1191
|
+
// Stream complete
|
|
1192
|
+
return 'data: [DONE]\n\n';
|
|
1193
|
+
}
|
|
1194
|
+
default:
|
|
1195
|
+
return null;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
/**
|
|
1199
|
+
* Parse SSE stream from Anthropic and convert to OpenAI format
|
|
1200
|
+
*/
|
|
1201
|
+
async function* convertAnthropicStream(response, model) {
|
|
1202
|
+
const reader = response.body?.getReader();
|
|
1203
|
+
if (!reader) {
|
|
1204
|
+
throw new Error('No response body');
|
|
1205
|
+
}
|
|
1206
|
+
const decoder = new TextDecoder();
|
|
1207
|
+
let buffer = '';
|
|
1208
|
+
let messageId = `chatcmpl-${Date.now()}`;
|
|
1209
|
+
// Tool state for tracking streaming tool calls
|
|
1210
|
+
const toolState = {
|
|
1211
|
+
currentToolIndex: 0,
|
|
1212
|
+
tools: new Map(),
|
|
1213
|
+
};
|
|
1214
|
+
try {
|
|
1215
|
+
while (true) {
|
|
1216
|
+
const { done, value } = await reader.read();
|
|
1217
|
+
if (done)
|
|
1218
|
+
break;
|
|
1219
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1220
|
+
// Process complete SSE events
|
|
1221
|
+
const lines = buffer.split('\n');
|
|
1222
|
+
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
|
1223
|
+
let eventType = '';
|
|
1224
|
+
let eventData = '';
|
|
1225
|
+
for (const line of lines) {
|
|
1226
|
+
if (line.startsWith('event: ')) {
|
|
1227
|
+
eventType = line.slice(7).trim();
|
|
1228
|
+
}
|
|
1229
|
+
else if (line.startsWith('data: ')) {
|
|
1230
|
+
eventData = line.slice(6);
|
|
1231
|
+
}
|
|
1232
|
+
else if (line === '' && eventType && eventData) {
|
|
1233
|
+
// Complete event, process it
|
|
1234
|
+
try {
|
|
1235
|
+
const parsed = JSON.parse(eventData);
|
|
1236
|
+
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
1237
|
+
if (converted) {
|
|
1238
|
+
yield converted;
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
catch {
|
|
1242
|
+
// Skip malformed JSON
|
|
1243
|
+
}
|
|
1244
|
+
eventType = '';
|
|
1245
|
+
eventData = '';
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
finally {
|
|
1251
|
+
reader.releaseLock();
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* Pipe OpenAI streaming response directly (already in correct format)
|
|
1256
|
+
*/
|
|
1257
|
+
async function* pipeOpenAIStream(response) {
|
|
1258
|
+
const reader = response.body?.getReader();
|
|
1259
|
+
if (!reader) {
|
|
1260
|
+
throw new Error('No response body');
|
|
1261
|
+
}
|
|
1262
|
+
const decoder = new TextDecoder();
|
|
1263
|
+
try {
|
|
1264
|
+
while (true) {
|
|
1265
|
+
const { done, value } = await reader.read();
|
|
1266
|
+
if (done)
|
|
1267
|
+
break;
|
|
1268
|
+
yield decoder.decode(value, { stream: true });
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
finally {
|
|
1272
|
+
reader.releaseLock();
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Parse preferred model string (format: "provider:model")
|
|
1277
|
+
*/
|
|
1278
|
+
function parsePreferredModel(preferredModel) {
|
|
1279
|
+
const [provider, model] = preferredModel.split(':');
|
|
1280
|
+
if (!provider || !model)
|
|
1281
|
+
return null;
|
|
1282
|
+
// Validate provider
|
|
1283
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
|
|
1284
|
+
if (!validProviders.includes(provider))
|
|
1285
|
+
return null;
|
|
1286
|
+
return { provider: provider, model };
|
|
1287
|
+
}
|
|
1288
|
+
/**
|
|
1289
|
+
* Resolve explicit model name to provider and model
|
|
1290
|
+
* Handles direct model names like "claude-3-5-sonnet-latest" or "gpt-4o"
|
|
1291
|
+
*/
|
|
1292
|
+
function resolveExplicitModel(modelName) {
|
|
1293
|
+
// Resolve aliases first (e.g., relayplane:auto → rp:balanced)
|
|
1294
|
+
const resolvedAlias = resolveModelAlias(modelName);
|
|
1295
|
+
// Check SMART_ALIASES (rp:best, rp:fast, etc.)
|
|
1296
|
+
if (exports.SMART_ALIASES[resolvedAlias]) {
|
|
1297
|
+
return exports.SMART_ALIASES[resolvedAlias];
|
|
1298
|
+
}
|
|
1299
|
+
// Check MODEL_MAPPING (aliases)
|
|
1300
|
+
if (exports.MODEL_MAPPING[resolvedAlias]) {
|
|
1301
|
+
return exports.MODEL_MAPPING[resolvedAlias];
|
|
1302
|
+
}
|
|
1303
|
+
// If alias was resolved but not in mappings, try original name
|
|
1304
|
+
if (resolvedAlias !== modelName && exports.MODEL_MAPPING[modelName]) {
|
|
1305
|
+
return exports.MODEL_MAPPING[modelName];
|
|
1306
|
+
}
|
|
1307
|
+
// Anthropic models (claude-*)
|
|
1308
|
+
if (modelName.startsWith('claude-')) {
|
|
1309
|
+
return { provider: 'anthropic', model: modelName };
|
|
1310
|
+
}
|
|
1311
|
+
// OpenAI models (gpt-*, o1-*, chatgpt-*, text-*, dall-e-*, whisper-*, tts-*)
|
|
1312
|
+
if (modelName.startsWith('gpt-') ||
|
|
1313
|
+
modelName.startsWith('o1-') ||
|
|
1314
|
+
modelName.startsWith('o3-') ||
|
|
1315
|
+
modelName.startsWith('chatgpt-') ||
|
|
1316
|
+
modelName.startsWith('text-') ||
|
|
1317
|
+
modelName.startsWith('dall-e') ||
|
|
1318
|
+
modelName.startsWith('whisper') ||
|
|
1319
|
+
modelName.startsWith('tts-')) {
|
|
1320
|
+
return { provider: 'openai', model: modelName };
|
|
1321
|
+
}
|
|
1322
|
+
// Google models (gemini-*, palm-*)
|
|
1323
|
+
if (modelName.startsWith('gemini-') || modelName.startsWith('palm-')) {
|
|
1324
|
+
return { provider: 'google', model: modelName };
|
|
1325
|
+
}
|
|
1326
|
+
// xAI models (grok-*)
|
|
1327
|
+
if (modelName.startsWith('grok-')) {
|
|
1328
|
+
return { provider: 'xai', model: modelName };
|
|
1329
|
+
}
|
|
1330
|
+
// Moonshot models (moonshot-*)
|
|
1331
|
+
if (modelName.startsWith('moonshot-')) {
|
|
1332
|
+
return { provider: 'moonshot', model: modelName };
|
|
1333
|
+
}
|
|
1334
|
+
// Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
|
|
1335
|
+
if (modelName.includes('/')) {
|
|
1336
|
+
const [provider, model] = modelName.split('/');
|
|
1337
|
+
const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
|
|
1338
|
+
if (provider && model && validProviders.includes(provider)) {
|
|
1339
|
+
return { provider: provider, model };
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
return null;
|
|
1343
|
+
}
|
|
1344
|
+
function resolveConfigModel(modelName) {
|
|
1345
|
+
return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
|
|
1346
|
+
}
|
|
1347
|
+
function extractResponseText(responseData) {
|
|
1348
|
+
const openAiChoices = responseData['choices'];
|
|
1349
|
+
if (openAiChoices && openAiChoices.length > 0) {
|
|
1350
|
+
const first = openAiChoices[0];
|
|
1351
|
+
const content = first?.message?.content;
|
|
1352
|
+
return typeof content === 'string' ? content : '';
|
|
1353
|
+
}
|
|
1354
|
+
const anthropicContent = responseData['content'];
|
|
1355
|
+
if (anthropicContent) {
|
|
1356
|
+
return anthropicContent
|
|
1357
|
+
.filter((c) => c.type === 'text')
|
|
1358
|
+
.map((c) => c.text ?? '')
|
|
1359
|
+
.join('');
|
|
1360
|
+
}
|
|
1361
|
+
const geminiCandidates = responseData['candidates'];
|
|
1362
|
+
if (geminiCandidates) {
|
|
1363
|
+
const text = geminiCandidates[0]?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
|
|
1364
|
+
return text;
|
|
1365
|
+
}
|
|
1366
|
+
return '';
|
|
1367
|
+
}
|
|
1368
|
+
class ProviderResponseError extends Error {
|
|
1369
|
+
status;
|
|
1370
|
+
payload;
|
|
1371
|
+
constructor(status, payload) {
|
|
1372
|
+
super(`Provider response error: ${status}`);
|
|
1373
|
+
this.status = status;
|
|
1374
|
+
this.payload = payload;
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
class CooldownError extends Error {
|
|
1378
|
+
provider;
|
|
1379
|
+
constructor(provider) {
|
|
1380
|
+
super(`Provider ${provider} is in cooldown`);
|
|
1381
|
+
this.provider = provider;
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
/**
|
|
1385
|
+
* Extract request context (auth headers) from incoming HTTP request
|
|
1386
|
+
*/
|
|
1387
|
+
function extractRequestContext(req) {
|
|
1388
|
+
return {
|
|
1389
|
+
authHeader: req.headers['authorization'],
|
|
1390
|
+
betaHeaders: req.headers['anthropic-beta'],
|
|
1391
|
+
versionHeader: req.headers['anthropic-version'],
|
|
1392
|
+
apiKeyHeader: req.headers['x-api-key'],
|
|
1393
|
+
};
|
|
1394
|
+
}
|
|
1395
|
+
const MAX_BODY_SIZE = 10 * 1024 * 1024; // 10MB max request body
|
|
1396
|
+
async function readRequestBody(req) {
|
|
1397
|
+
let body = '';
|
|
1398
|
+
let size = 0;
|
|
1399
|
+
for await (const chunk of req) {
|
|
1400
|
+
size += chunk.length;
|
|
1401
|
+
if (size > MAX_BODY_SIZE) {
|
|
1402
|
+
throw new Error('Request body too large (max 10MB)');
|
|
1403
|
+
}
|
|
1404
|
+
body += chunk;
|
|
1405
|
+
}
|
|
1406
|
+
return body;
|
|
1407
|
+
}
|
|
1408
|
+
async function readJsonBody(req) {
|
|
1409
|
+
const body = await readRequestBody(req);
|
|
1410
|
+
return JSON.parse(body);
|
|
1411
|
+
}
|
|
1412
|
+
/**
|
|
1413
|
+
* Check if we have valid Anthropic auth (either passthrough or env)
|
|
1414
|
+
*/
|
|
1415
|
+
function hasAnthropicAuth(ctx, envApiKey) {
|
|
1416
|
+
return !!(ctx.authHeader || ctx.apiKeyHeader || envApiKey);
|
|
1417
|
+
}
|
|
1418
|
+
function resolveProviderApiKey(provider, ctx, envApiKey) {
|
|
1419
|
+
if (provider === 'anthropic') {
|
|
1420
|
+
if (!hasAnthropicAuth(ctx, envApiKey)) {
|
|
1421
|
+
return {
|
|
1422
|
+
error: {
|
|
1423
|
+
status: 401,
|
|
1424
|
+
payload: {
|
|
1425
|
+
error: 'Missing Anthropic authentication. Provide Authorization header or set ANTHROPIC_API_KEY.',
|
|
1426
|
+
hint: 'For Claude Code: auth is passed through automatically. For API: set ANTHROPIC_API_KEY env var.',
|
|
1427
|
+
},
|
|
1428
|
+
},
|
|
1429
|
+
};
|
|
1430
|
+
}
|
|
1431
|
+
return { apiKey: envApiKey };
|
|
1432
|
+
}
|
|
1433
|
+
const apiKeyEnv = exports.DEFAULT_ENDPOINTS[provider]?.apiKeyEnv ?? `${provider.toUpperCase()}_API_KEY`;
|
|
1434
|
+
const apiKey = process.env[apiKeyEnv];
|
|
1435
|
+
if (!apiKey) {
|
|
1436
|
+
return {
|
|
1437
|
+
error: {
|
|
1438
|
+
status: 500,
|
|
1439
|
+
payload: {
|
|
1440
|
+
error: `Missing ${apiKeyEnv} environment variable`,
|
|
1441
|
+
hint: `Cross-provider routing requires API keys for each provider. Set ${apiKeyEnv} to enable ${provider} models.`,
|
|
1442
|
+
},
|
|
1443
|
+
},
|
|
1444
|
+
};
|
|
1445
|
+
}
|
|
1446
|
+
return { apiKey };
|
|
1447
|
+
}
|
|
1448
|
+
function getCascadeModels(config) {
|
|
1449
|
+
return config.routing?.cascade?.models ?? [];
|
|
1450
|
+
}
|
|
1451
|
+
function getCascadeConfig(config) {
|
|
1452
|
+
const c = config.routing?.cascade;
|
|
1453
|
+
return {
|
|
1454
|
+
enabled: c?.enabled ?? true,
|
|
1455
|
+
models: c?.models ?? ['claude-3-5-haiku-20241022', 'claude-sonnet-4-20250514', 'claude-opus-4-5-20250514'],
|
|
1456
|
+
escalateOn: c?.escalateOn ?? 'uncertainty',
|
|
1457
|
+
maxEscalations: c?.maxEscalations ?? 1,
|
|
1458
|
+
};
|
|
1459
|
+
}
|
|
1460
|
+
function getCooldownConfig(config) {
|
|
1461
|
+
const defaults = {
|
|
1462
|
+
enabled: true,
|
|
1463
|
+
allowedFails: 3,
|
|
1464
|
+
windowSeconds: 60,
|
|
1465
|
+
cooldownSeconds: 120,
|
|
1466
|
+
};
|
|
1467
|
+
return { ...defaults, ...config.reliability?.cooldowns };
|
|
1468
|
+
}
|
|
1469
|
+
function getCostModel(config) {
|
|
1470
|
+
return (config.routing?.complexity?.simple ||
|
|
1471
|
+
config.routing?.cascade?.models?.[0] ||
|
|
1472
|
+
'claude-3-5-haiku-20241022');
|
|
1473
|
+
}
|
|
1474
|
+
function getFastModel(config) {
|
|
1475
|
+
return (config.routing?.complexity?.simple ||
|
|
1476
|
+
config.routing?.cascade?.models?.[0] ||
|
|
1477
|
+
'claude-3-5-haiku-20241022');
|
|
1478
|
+
}
|
|
1479
|
+
function getQualityModel(config) {
|
|
1480
|
+
return (config.routing?.complexity?.complex ||
|
|
1481
|
+
config.routing?.cascade?.models?.[config.routing?.cascade?.models?.length ? config.routing.cascade.models.length - 1 : 0] ||
|
|
1482
|
+
process.env['RELAYPLANE_QUALITY_MODEL'] ||
|
|
1483
|
+
'claude-sonnet-4-20250514');
|
|
1484
|
+
}
|
|
1485
|
+
async function cascadeRequest(config, makeRequest, log) {
|
|
1486
|
+
let escalations = 0;
|
|
1487
|
+
for (let i = 0; i < config.models.length; i++) {
|
|
1488
|
+
const model = config.models[i]; // Safe: i is always < length
|
|
1489
|
+
const isLastModel = i === config.models.length - 1;
|
|
1490
|
+
try {
|
|
1491
|
+
const { responseData, provider, model: resolvedModel } = await makeRequest(model);
|
|
1492
|
+
const text = extractResponseText(responseData);
|
|
1493
|
+
if (isLastModel || escalations >= config.maxEscalations) {
|
|
1494
|
+
return { responseData, provider, model: resolvedModel, escalations };
|
|
1495
|
+
}
|
|
1496
|
+
if (shouldEscalate(text, config.escalateOn)) {
|
|
1497
|
+
log(`[RelayPlane] Escalating from ${model} due to ${config.escalateOn}`);
|
|
1498
|
+
escalations++;
|
|
1499
|
+
continue;
|
|
1500
|
+
}
|
|
1501
|
+
return { responseData, provider, model: resolvedModel, escalations };
|
|
1502
|
+
}
|
|
1503
|
+
catch (err) {
|
|
1504
|
+
if (err instanceof CooldownError) {
|
|
1505
|
+
log(`[RelayPlane] Skipping ${model} due to cooldown`);
|
|
1506
|
+
continue;
|
|
1507
|
+
}
|
|
1508
|
+
if (config.escalateOn === 'error' && !isLastModel) {
|
|
1509
|
+
log(`[RelayPlane] Escalating from ${model} due to error`);
|
|
1510
|
+
escalations++;
|
|
1511
|
+
continue;
|
|
1512
|
+
}
|
|
1513
|
+
throw err;
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
throw new Error('All cascade models exhausted');
|
|
1517
|
+
}
|
|
1518
|
+
/**
|
|
1519
|
+
* Start the RelayPlane proxy server
|
|
1520
|
+
*/
|
|
1521
|
+
async function startProxy(config = {}) {
|
|
1522
|
+
const port = config.port ?? 3001;
|
|
1523
|
+
const host = config.host ?? '127.0.0.1';
|
|
1524
|
+
const verbose = config.verbose ?? false;
|
|
1525
|
+
const anthropicAuthMode = config.anthropicAuth ?? 'auto';
|
|
1526
|
+
const log = (msg) => {
|
|
1527
|
+
if (verbose)
|
|
1528
|
+
console.log(`[relayplane] ${msg}`);
|
|
1529
|
+
};
|
|
1530
|
+
const configPath = getProxyConfigPath();
|
|
1531
|
+
let proxyConfig = await loadProxyConfig(configPath, log);
|
|
1532
|
+
const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
|
|
1533
|
+
let configWatcher = null;
|
|
1534
|
+
let configReloadTimer = null;
|
|
1535
|
+
const reloadConfig = async () => {
|
|
1536
|
+
proxyConfig = await loadProxyConfig(configPath, log);
|
|
1537
|
+
cooldownManager.updateConfig(getCooldownConfig(proxyConfig));
|
|
1538
|
+
log(`Reloaded config from ${configPath}`);
|
|
1539
|
+
};
|
|
1540
|
+
const scheduleConfigReload = () => {
|
|
1541
|
+
if (configReloadTimer)
|
|
1542
|
+
clearTimeout(configReloadTimer);
|
|
1543
|
+
configReloadTimer = setTimeout(() => {
|
|
1544
|
+
reloadConfig().catch(() => { });
|
|
1545
|
+
}, 50);
|
|
1546
|
+
};
|
|
1547
|
+
const startConfigWatcher = () => {
|
|
1548
|
+
if (configWatcher)
|
|
1549
|
+
return;
|
|
1550
|
+
try {
|
|
1551
|
+
configWatcher = fs.watch(configPath, scheduleConfigReload);
|
|
1552
|
+
}
|
|
1553
|
+
catch (err) {
|
|
1554
|
+
const error = err;
|
|
1555
|
+
log(`Config watch error: ${error.message}`);
|
|
1556
|
+
}
|
|
1557
|
+
};
|
|
1558
|
+
startConfigWatcher();
|
|
1559
|
+
// Initialize RelayPlane
|
|
1560
|
+
const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
|
|
1561
|
+
const server = http.createServer(async (req, res) => {
|
|
1562
|
+
// CORS headers
|
|
1563
|
+
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
1564
|
+
res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
|
|
1565
|
+
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, x-api-key, anthropic-beta, anthropic-version, X-RelayPlane-Bypass, X-RelayPlane-Model');
|
|
1566
|
+
if (req.method === 'OPTIONS') {
|
|
1567
|
+
res.writeHead(204);
|
|
1568
|
+
res.end();
|
|
1569
|
+
return;
|
|
1570
|
+
}
|
|
1571
|
+
const url = req.url ?? '';
|
|
1572
|
+
const pathname = url.split('?')[0] ?? '';
|
|
1573
|
+
// === Control endpoints ===
|
|
1574
|
+
if (pathname.startsWith('/control/')) {
|
|
1575
|
+
if (req.method === 'POST' && pathname === '/control/enable') {
|
|
1576
|
+
proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: true });
|
|
1577
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
1578
|
+
startConfigWatcher();
|
|
1579
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1580
|
+
res.end(JSON.stringify({ enabled: true }));
|
|
1581
|
+
return;
|
|
1582
|
+
}
|
|
1583
|
+
if (req.method === 'POST' && pathname === '/control/disable') {
|
|
1584
|
+
proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: false });
|
|
1585
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
1586
|
+
startConfigWatcher();
|
|
1587
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1588
|
+
res.end(JSON.stringify({ enabled: false }));
|
|
1589
|
+
return;
|
|
1590
|
+
}
|
|
1591
|
+
if (req.method === 'GET' && pathname === '/control/status') {
|
|
1592
|
+
const enabled = proxyConfig.enabled !== false;
|
|
1593
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1594
|
+
res.end(JSON.stringify({
|
|
1595
|
+
enabled,
|
|
1596
|
+
mode: proxyConfig.mode ?? (enabled ? 'enabled' : 'disabled'),
|
|
1597
|
+
modelOverrides: proxyConfig.modelOverrides ?? {},
|
|
1598
|
+
}));
|
|
1599
|
+
return;
|
|
1600
|
+
}
|
|
1601
|
+
if (req.method === 'GET' && pathname === '/control/stats') {
|
|
1602
|
+
const uptimeMs = Date.now() - globalStats.startedAt;
|
|
1603
|
+
const avgLatencyMs = globalStats.totalRequests > 0
|
|
1604
|
+
? Math.round(globalStats.totalLatencyMs / globalStats.totalRequests)
|
|
1605
|
+
: 0;
|
|
1606
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1607
|
+
res.end(JSON.stringify({
|
|
1608
|
+
uptimeMs,
|
|
1609
|
+
uptimeFormatted: `${Math.floor(uptimeMs / 60000)}m ${Math.floor((uptimeMs % 60000) / 1000)}s`,
|
|
1610
|
+
totalRequests: globalStats.totalRequests,
|
|
1611
|
+
successfulRequests: globalStats.successfulRequests,
|
|
1612
|
+
failedRequests: globalStats.failedRequests,
|
|
1613
|
+
successRate: globalStats.totalRequests > 0
|
|
1614
|
+
? `${((globalStats.successfulRequests / globalStats.totalRequests) * 100).toFixed(1)}%`
|
|
1615
|
+
: 'N/A',
|
|
1616
|
+
avgLatencyMs,
|
|
1617
|
+
escalations: globalStats.escalations,
|
|
1618
|
+
routingCounts: globalStats.routingCounts,
|
|
1619
|
+
modelCounts: globalStats.modelCounts,
|
|
1620
|
+
}));
|
|
1621
|
+
return;
|
|
1622
|
+
}
|
|
1623
|
+
if (req.method === 'POST' && pathname === '/control/config') {
|
|
1624
|
+
try {
|
|
1625
|
+
const patch = await readJsonBody(req);
|
|
1626
|
+
proxyConfig = mergeProxyConfig(proxyConfig, patch);
|
|
1627
|
+
await saveProxyConfig(configPath, proxyConfig);
|
|
1628
|
+
startConfigWatcher();
|
|
1629
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1630
|
+
res.end(JSON.stringify({ ok: true, config: proxyConfig }));
|
|
1631
|
+
}
|
|
1632
|
+
catch {
|
|
1633
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
1634
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
1635
|
+
}
|
|
1636
|
+
return;
|
|
1637
|
+
}
|
|
1638
|
+
}
|
|
1639
|
+
// Extract auth context from incoming request
|
|
1640
|
+
const ctx = extractRequestContext(req);
|
|
1641
|
+
const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
|
|
1642
|
+
const relayplaneBypass = parseHeaderBoolean(getHeaderValue(req, 'x-relayplane-bypass'));
|
|
1643
|
+
const headerModelOverride = getHeaderValue(req, 'x-relayplane-model');
|
|
1644
|
+
const relayplaneEnabled = proxyConfig.enabled !== false;
|
|
1645
|
+
const recordTelemetry = relayplaneEnabled && !relayplaneBypass;
|
|
1646
|
+
// Determine which Anthropic auth to use based on mode
|
|
1647
|
+
let useAnthropicEnvKey;
|
|
1648
|
+
if (anthropicAuthMode === 'env') {
|
|
1649
|
+
useAnthropicEnvKey = anthropicEnvKey;
|
|
1650
|
+
}
|
|
1651
|
+
else if (anthropicAuthMode === 'passthrough') {
|
|
1652
|
+
useAnthropicEnvKey = undefined; // Only use incoming auth
|
|
1653
|
+
}
|
|
1654
|
+
else {
|
|
1655
|
+
// 'auto': Use incoming auth if present, fallback to env
|
|
1656
|
+
useAnthropicEnvKey = (ctx.authHeader || ctx.apiKeyHeader) ? undefined : anthropicEnvKey;
|
|
1657
|
+
}
|
|
1658
|
+
// === Native Anthropic /v1/messages endpoint (for Claude Code) ===
|
|
1659
|
+
if (req.method === 'POST' && (url.endsWith('/v1/messages') || url.includes('/v1/messages?'))) {
|
|
1660
|
+
log('Native Anthropic /v1/messages request');
|
|
1661
|
+
// Check auth
|
|
1662
|
+
if (!hasAnthropicAuth(ctx, useAnthropicEnvKey)) {
|
|
1663
|
+
res.writeHead(401, { 'Content-Type': 'application/json' });
|
|
1664
|
+
res.end(JSON.stringify({ error: 'Missing authentication. Provide Authorization header or set ANTHROPIC_API_KEY.' }));
|
|
1665
|
+
return;
|
|
1666
|
+
}
|
|
1667
|
+
// Read body
|
|
1668
|
+
let requestBody;
|
|
1669
|
+
try {
|
|
1670
|
+
requestBody = await readJsonBody(req);
|
|
1671
|
+
}
|
|
1672
|
+
catch {
|
|
1673
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
1674
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
1675
|
+
return;
|
|
1676
|
+
}
|
|
1677
|
+
const originalModel = requestBody['model'];
|
|
1678
|
+
let requestedModel = headerModelOverride ?? originalModel ?? '';
|
|
1679
|
+
if (headerModelOverride) {
|
|
1680
|
+
log(`Header model override: ${originalModel ?? 'unknown'} → ${headerModelOverride}`);
|
|
1681
|
+
}
|
|
1682
|
+
const parsedModel = parseModelSuffix(requestedModel);
|
|
1683
|
+
let routingSuffix = parsedModel.suffix;
|
|
1684
|
+
requestedModel = parsedModel.baseModel;
|
|
1685
|
+
if (relayplaneEnabled && !relayplaneBypass && requestedModel) {
|
|
1686
|
+
const override = proxyConfig.modelOverrides?.[requestedModel];
|
|
1687
|
+
if (override) {
|
|
1688
|
+
log(`Model override: ${requestedModel} → ${override}`);
|
|
1689
|
+
const overrideParsed = parseModelSuffix(override);
|
|
1690
|
+
if (!routingSuffix && overrideParsed.suffix) {
|
|
1691
|
+
routingSuffix = overrideParsed.suffix;
|
|
1692
|
+
}
|
|
1693
|
+
requestedModel = overrideParsed.baseModel;
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
// Resolve aliases (e.g., relayplane:auto → rp:balanced)
|
|
1697
|
+
const resolvedModel = resolveModelAlias(requestedModel);
|
|
1698
|
+
if (resolvedModel !== requestedModel) {
|
|
1699
|
+
log(`Alias resolution: ${requestedModel} → ${resolvedModel}`);
|
|
1700
|
+
requestedModel = resolvedModel;
|
|
1701
|
+
}
|
|
1702
|
+
if (requestedModel && requestedModel !== originalModel) {
|
|
1703
|
+
requestBody['model'] = requestedModel;
|
|
1704
|
+
}
|
|
1705
|
+
let routingMode = 'auto';
|
|
1706
|
+
if (!relayplaneEnabled || relayplaneBypass) {
|
|
1707
|
+
routingMode = 'passthrough';
|
|
1708
|
+
}
|
|
1709
|
+
else if (routingSuffix) {
|
|
1710
|
+
routingMode = routingSuffix;
|
|
1711
|
+
}
|
|
1712
|
+
else if (requestedModel.startsWith('relayplane:')) {
|
|
1713
|
+
if (requestedModel.includes(':cost')) {
|
|
1714
|
+
routingMode = 'cost';
|
|
1715
|
+
}
|
|
1716
|
+
else if (requestedModel.includes(':fast')) {
|
|
1717
|
+
routingMode = 'fast';
|
|
1718
|
+
}
|
|
1719
|
+
else if (requestedModel.includes(':quality')) {
|
|
1720
|
+
routingMode = 'quality';
|
|
1721
|
+
}
|
|
1722
|
+
// relayplane:auto stays as 'auto'
|
|
1723
|
+
}
|
|
1724
|
+
else if (requestedModel.startsWith('rp:')) {
|
|
1725
|
+
// Handle rp:* smart aliases - route through passthrough to use SMART_ALIASES
|
|
1726
|
+
if (requestedModel === 'rp:cost' || requestedModel === 'rp:cheap') {
|
|
1727
|
+
routingMode = 'cost';
|
|
1728
|
+
}
|
|
1729
|
+
else if (requestedModel === 'rp:fast') {
|
|
1730
|
+
routingMode = 'fast';
|
|
1731
|
+
}
|
|
1732
|
+
else if (requestedModel === 'rp:quality' || requestedModel === 'rp:best') {
|
|
1733
|
+
routingMode = 'quality';
|
|
1734
|
+
}
|
|
1735
|
+
else {
|
|
1736
|
+
// rp:balanced and others go through passthrough to resolve via SMART_ALIASES
|
|
1737
|
+
routingMode = 'passthrough';
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1740
|
+
else if (requestedModel === 'auto' || requestedModel === 'relayplane:auto') {
|
|
1741
|
+
routingMode = 'auto';
|
|
1742
|
+
}
|
|
1743
|
+
else if (requestedModel === 'cost') {
|
|
1744
|
+
routingMode = 'cost';
|
|
1745
|
+
}
|
|
1746
|
+
else if (requestedModel === 'fast') {
|
|
1747
|
+
routingMode = 'fast';
|
|
1748
|
+
}
|
|
1749
|
+
else if (requestedModel === 'quality') {
|
|
1750
|
+
routingMode = 'quality';
|
|
1751
|
+
}
|
|
1752
|
+
else {
|
|
1753
|
+
routingMode = 'passthrough';
|
|
1754
|
+
}
|
|
1755
|
+
const isStreaming = requestBody['stream'] === true;
|
|
1756
|
+
const messages = Array.isArray(requestBody['messages'])
|
|
1757
|
+
? requestBody['messages']
|
|
1758
|
+
: [];
|
|
1759
|
+
let promptText = '';
|
|
1760
|
+
let taskType = 'general';
|
|
1761
|
+
let confidence = 0;
|
|
1762
|
+
let complexity = 'simple';
|
|
1763
|
+
if (routingMode !== 'passthrough' || recordTelemetry) {
|
|
1764
|
+
promptText = extractMessageText(messages);
|
|
1765
|
+
taskType = (0, core_1.inferTaskType)(promptText);
|
|
1766
|
+
confidence = (0, core_1.getInferenceConfidence)(promptText, taskType);
|
|
1767
|
+
complexity = classifyComplexity(messages);
|
|
1768
|
+
log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
|
|
1769
|
+
}
|
|
1770
|
+
const cascadeConfig = getCascadeConfig(proxyConfig);
|
|
1771
|
+
let useCascade = routingMode === 'auto' &&
|
|
1772
|
+
proxyConfig.routing?.mode === 'cascade' &&
|
|
1773
|
+
cascadeConfig.enabled === true;
|
|
1774
|
+
let targetModel = '';
|
|
1775
|
+
let targetProvider = 'anthropic';
|
|
1776
|
+
// Enable cascade for streaming requests (complexity-based routing)
|
|
1777
|
+
if (useCascade && isStreaming) {
|
|
1778
|
+
log('Using complexity-based routing for streaming request');
|
|
1779
|
+
useCascade = false; // Disable full cascade, use complexity routing instead
|
|
1780
|
+
let selectedModel = null;
|
|
1781
|
+
if (proxyConfig.routing?.complexity?.enabled) {
|
|
1782
|
+
selectedModel = proxyConfig.routing?.complexity?.[complexity];
|
|
1783
|
+
}
|
|
1784
|
+
else {
|
|
1785
|
+
selectedModel = getCascadeModels(proxyConfig)[0] || getCostModel(proxyConfig);
|
|
1786
|
+
}
|
|
1787
|
+
if (selectedModel) {
|
|
1788
|
+
const resolved = resolveConfigModel(selectedModel);
|
|
1789
|
+
if (resolved) {
|
|
1790
|
+
targetProvider = resolved.provider;
|
|
1791
|
+
targetModel = resolved.model;
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
}
|
|
1795
|
+
if (routingMode === 'passthrough') {
|
|
1796
|
+
const resolved = resolveExplicitModel(requestedModel);
|
|
1797
|
+
if (!resolved) {
|
|
1798
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
1799
|
+
res.end(JSON.stringify((0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames())));
|
|
1800
|
+
return;
|
|
1801
|
+
}
|
|
1802
|
+
if (resolved.provider !== 'anthropic') {
|
|
1803
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
1804
|
+
res.end(JSON.stringify({ error: 'Native /v1/messages only supports Anthropic models.' }));
|
|
1805
|
+
return;
|
|
1806
|
+
}
|
|
1807
|
+
targetProvider = resolved.provider;
|
|
1808
|
+
targetModel = resolved.model;
|
|
1809
|
+
}
|
|
1810
|
+
else if (!useCascade) {
|
|
1811
|
+
let selectedModel = null;
|
|
1812
|
+
if (routingMode === 'cost') {
|
|
1813
|
+
selectedModel = getCostModel(proxyConfig);
|
|
1814
|
+
}
|
|
1815
|
+
else if (routingMode === 'fast') {
|
|
1816
|
+
selectedModel = getFastModel(proxyConfig);
|
|
1817
|
+
}
|
|
1818
|
+
else if (routingMode === 'quality') {
|
|
1819
|
+
selectedModel = getQualityModel(proxyConfig);
|
|
1820
|
+
}
|
|
1821
|
+
else {
|
|
1822
|
+
const rule = relay.routing.get(taskType);
|
|
1823
|
+
const parsedRule = rule?.preferredModel ? parsePreferredModel(rule.preferredModel) : null;
|
|
1824
|
+
if (parsedRule?.provider === 'anthropic') {
|
|
1825
|
+
selectedModel = parsedRule.model;
|
|
1826
|
+
}
|
|
1827
|
+
else if (proxyConfig.routing?.complexity?.enabled) {
|
|
1828
|
+
const complexityModel = proxyConfig.routing?.complexity?.[complexity];
|
|
1829
|
+
selectedModel = complexityModel ?? null;
|
|
1830
|
+
}
|
|
1831
|
+
else {
|
|
1832
|
+
selectedModel = DEFAULT_ROUTING[taskType].model;
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
if (!selectedModel) {
|
|
1836
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
1837
|
+
res.end(JSON.stringify({ error: 'Failed to resolve routing model' }));
|
|
1838
|
+
return;
|
|
1839
|
+
}
|
|
1840
|
+
const resolved = resolveConfigModel(selectedModel);
|
|
1841
|
+
if (!resolved || resolved.provider !== 'anthropic') {
|
|
1842
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
1843
|
+
res.end(JSON.stringify({ error: 'Resolved model is not supported for /v1/messages' }));
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
targetProvider = resolved.provider;
|
|
1847
|
+
targetModel = resolved.model;
|
|
1848
|
+
}
|
|
1849
|
+
if (proxyConfig.reliability?.cooldowns?.enabled &&
|
|
1850
|
+
!useCascade &&
|
|
1851
|
+
!cooldownManager.isAvailable(targetProvider)) {
|
|
1852
|
+
res.writeHead(503, { 'Content-Type': 'application/json' });
|
|
1853
|
+
res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
|
|
1854
|
+
return;
|
|
1855
|
+
}
|
|
1856
|
+
const startTime = Date.now();
|
|
1857
|
+
try {
|
|
1858
|
+
if (useCascade && cascadeConfig) {
|
|
1859
|
+
const cascadeResult = await cascadeRequest(cascadeConfig, async (modelName) => {
|
|
1860
|
+
const resolved = resolveConfigModel(modelName);
|
|
1861
|
+
if (!resolved) {
|
|
1862
|
+
throw new Error(`Invalid cascade model: ${modelName}`);
|
|
1863
|
+
}
|
|
1864
|
+
if (resolved.provider !== 'anthropic') {
|
|
1865
|
+
throw new Error(`Cascade model ${modelName} is not Anthropic-compatible`);
|
|
1866
|
+
}
|
|
1867
|
+
if (proxyConfig.reliability?.cooldowns?.enabled && !cooldownManager.isAvailable(resolved.provider)) {
|
|
1868
|
+
throw new CooldownError(resolved.provider);
|
|
1869
|
+
}
|
|
1870
|
+
const attemptBody = { ...requestBody, model: resolved.model };
|
|
1871
|
+
// Hybrid auth: use MAX token for Opus models, API key for others
|
|
1872
|
+
const modelAuth = getAuthForModel(resolved.model, proxyConfig.auth, useAnthropicEnvKey);
|
|
1873
|
+
if (modelAuth.isMax) {
|
|
1874
|
+
log(`Using MAX token for ${resolved.model}`);
|
|
1875
|
+
}
|
|
1876
|
+
const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax);
|
|
1877
|
+
const responseData = (await providerResponse.json());
|
|
1878
|
+
if (!providerResponse.ok) {
|
|
1879
|
+
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
1880
|
+
cooldownManager.recordFailure(resolved.provider, JSON.stringify(responseData));
|
|
1881
|
+
}
|
|
1882
|
+
throw new ProviderResponseError(providerResponse.status, responseData);
|
|
1883
|
+
}
|
|
1884
|
+
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
1885
|
+
cooldownManager.recordSuccess(resolved.provider);
|
|
1886
|
+
}
|
|
1887
|
+
return { responseData, provider: resolved.provider, model: resolved.model };
|
|
1888
|
+
}, log);
|
|
1889
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
1890
|
+
res.end(JSON.stringify(cascadeResult.responseData));
|
|
1891
|
+
targetProvider = cascadeResult.provider;
|
|
1892
|
+
targetModel = cascadeResult.model;
|
|
1893
|
+
}
|
|
1894
|
+
else {
|
|
1895
|
+
// Hybrid auth: use MAX token for Opus models, API key for others
|
|
1896
|
+
const finalModel = targetModel || requestedModel;
|
|
1897
|
+
const modelAuth = getAuthForModel(finalModel, proxyConfig.auth, useAnthropicEnvKey);
|
|
1898
|
+
if (modelAuth.isMax) {
|
|
1899
|
+
log(`Using MAX token for ${finalModel}`);
|
|
1900
|
+
}
|
|
1901
|
+
const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax);
|
|
1902
|
+
if (!providerResponse.ok) {
|
|
1903
|
+
const errorPayload = (await providerResponse.json());
|
|
1904
|
+
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
1905
|
+
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
|
|
1906
|
+
}
|
|
1907
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
1908
|
+
res.end(JSON.stringify(errorPayload));
|
|
1909
|
+
return;
|
|
1910
|
+
}
|
|
1911
|
+
if (proxyConfig.reliability?.cooldowns?.enabled) {
|
|
1912
|
+
cooldownManager.recordSuccess(targetProvider);
|
|
1913
|
+
}
|
|
1914
|
+
if (isStreaming) {
|
|
1915
|
+
res.writeHead(providerResponse.status, {
|
|
1916
|
+
'Content-Type': 'text/event-stream',
|
|
1917
|
+
'Cache-Control': 'no-cache',
|
|
1918
|
+
'Connection': 'keep-alive',
|
|
1919
|
+
});
|
|
1920
|
+
const reader = providerResponse.body?.getReader();
|
|
1921
|
+
if (reader) {
|
|
1922
|
+
const decoder = new TextDecoder();
|
|
1923
|
+
try {
|
|
1924
|
+
while (true) {
|
|
1925
|
+
const { done, value } = await reader.read();
|
|
1926
|
+
if (done)
|
|
1927
|
+
break;
|
|
1928
|
+
res.write(decoder.decode(value, { stream: true }));
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
finally {
|
|
1932
|
+
reader.releaseLock();
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
res.end();
|
|
1936
|
+
}
|
|
1937
|
+
else {
|
|
1938
|
+
const responseData = await providerResponse.json();
|
|
1939
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
1940
|
+
res.end(JSON.stringify(responseData));
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1943
|
+
const durationMs = Date.now() - startTime;
|
|
1944
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false);
|
|
1945
|
+
if (recordTelemetry) {
|
|
1946
|
+
relay
|
|
1947
|
+
.run({
|
|
1948
|
+
prompt: promptText.slice(0, 500),
|
|
1949
|
+
taskType,
|
|
1950
|
+
model: `${targetProvider}:${targetModel || requestedModel}`,
|
|
1951
|
+
})
|
|
1952
|
+
.catch(() => { });
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
catch (err) {
|
|
1956
|
+
const durationMs = Date.now() - startTime;
|
|
1957
|
+
logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
|
|
1958
|
+
if (err instanceof ProviderResponseError) {
|
|
1959
|
+
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
1960
|
+
res.end(JSON.stringify(err.payload));
|
|
1961
|
+
return;
|
|
1962
|
+
}
|
|
1963
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
1964
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
1965
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
1966
|
+
}
|
|
1967
|
+
return;
|
|
1968
|
+
}
|
|
1969
|
+
// === Token counting endpoint ===
|
|
1970
|
+
if (req.method === 'POST' && url.includes('/v1/messages/count_tokens')) {
|
|
1971
|
+
log('Token count request');
|
|
1972
|
+
if (!hasAnthropicAuth(ctx, useAnthropicEnvKey)) {
|
|
1973
|
+
res.writeHead(401, { 'Content-Type': 'application/json' });
|
|
1974
|
+
res.end(JSON.stringify({ error: 'Missing authentication' }));
|
|
1975
|
+
return;
|
|
1976
|
+
}
|
|
1977
|
+
let body = '';
|
|
1978
|
+
for await (const chunk of req) {
|
|
1979
|
+
body += chunk;
|
|
1980
|
+
}
|
|
1981
|
+
try {
|
|
1982
|
+
const headers = buildAnthropicHeaders(ctx, useAnthropicEnvKey);
|
|
1983
|
+
const response = await fetch('https://api.anthropic.com/v1/messages/count_tokens', {
|
|
1984
|
+
method: 'POST',
|
|
1985
|
+
headers,
|
|
1986
|
+
body,
|
|
1987
|
+
});
|
|
1988
|
+
const data = await response.json();
|
|
1989
|
+
res.writeHead(response.status, { 'Content-Type': 'application/json' });
|
|
1990
|
+
res.end(JSON.stringify(data));
|
|
1991
|
+
}
|
|
1992
|
+
catch (err) {
|
|
1993
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
1994
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
1995
|
+
res.end(JSON.stringify({ error: errorMsg }));
|
|
1996
|
+
}
|
|
1997
|
+
return;
|
|
1998
|
+
}
|
|
1999
|
+
// === Model list endpoint ===
|
|
2000
|
+
if (req.method === 'GET' && url.includes('/models')) {
|
|
2001
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2002
|
+
res.end(JSON.stringify({
|
|
2003
|
+
object: 'list',
|
|
2004
|
+
data: [
|
|
2005
|
+
{ id: 'relayplane:auto', object: 'model', owned_by: 'relayplane' },
|
|
2006
|
+
{ id: 'relayplane:cost', object: 'model', owned_by: 'relayplane' },
|
|
2007
|
+
{ id: 'relayplane:fast', object: 'model', owned_by: 'relayplane' },
|
|
2008
|
+
{ id: 'relayplane:quality', object: 'model', owned_by: 'relayplane' },
|
|
2009
|
+
],
|
|
2010
|
+
}));
|
|
2011
|
+
return;
|
|
2012
|
+
}
|
|
2013
|
+
// === OpenAI-compatible /v1/chat/completions endpoint ===
|
|
2014
|
+
if (req.method !== 'POST' || !url.includes('/chat/completions')) {
|
|
2015
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
2016
|
+
res.end(JSON.stringify({ error: 'Not found. Supported: POST /v1/messages, POST /v1/chat/completions, GET /v1/models' }));
|
|
2017
|
+
return;
|
|
2018
|
+
}
|
|
2019
|
+
// Parse request body
|
|
2020
|
+
let body = '';
|
|
2021
|
+
for await (const chunk of req) {
|
|
2022
|
+
body += chunk;
|
|
2023
|
+
}
|
|
2024
|
+
let request;
|
|
2025
|
+
try {
|
|
2026
|
+
request = JSON.parse(body);
|
|
2027
|
+
}
|
|
2028
|
+
catch {
|
|
2029
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2030
|
+
res.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
2031
|
+
return;
|
|
2032
|
+
}
|
|
2033
|
+
const isStreaming = request.stream === true;
|
|
2034
|
+
const bypassRouting = !relayplaneEnabled || relayplaneBypass;
|
|
2035
|
+
// Extract routing mode from model name
|
|
2036
|
+
const originalRequestedModel = request.model;
|
|
2037
|
+
let requestedModel = headerModelOverride ?? originalRequestedModel;
|
|
2038
|
+
if (headerModelOverride) {
|
|
2039
|
+
log(`Header model override: ${originalRequestedModel} → ${headerModelOverride}`);
|
|
2040
|
+
}
|
|
2041
|
+
if (!requestedModel) {
|
|
2042
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2043
|
+
res.end(JSON.stringify({ error: 'Missing model in request' }));
|
|
2044
|
+
return;
|
|
2045
|
+
}
|
|
2046
|
+
const parsedModel = parseModelSuffix(requestedModel);
|
|
2047
|
+
let routingSuffix = parsedModel.suffix;
|
|
2048
|
+
requestedModel = parsedModel.baseModel;
|
|
2049
|
+
if (!bypassRouting) {
|
|
2050
|
+
const override = proxyConfig.modelOverrides?.[requestedModel];
|
|
2051
|
+
if (override) {
|
|
2052
|
+
log(`Model override: ${requestedModel} → ${override}`);
|
|
2053
|
+
const overrideParsed = parseModelSuffix(override);
|
|
2054
|
+
if (!routingSuffix && overrideParsed.suffix) {
|
|
2055
|
+
routingSuffix = overrideParsed.suffix;
|
|
2056
|
+
}
|
|
2057
|
+
requestedModel = overrideParsed.baseModel;
|
|
2058
|
+
}
|
|
2059
|
+
}
|
|
2060
|
+
// Resolve aliases (e.g., relayplane:auto → rp:balanced)
|
|
2061
|
+
const resolvedModel = resolveModelAlias(requestedModel);
|
|
2062
|
+
if (resolvedModel !== requestedModel) {
|
|
2063
|
+
log(`Alias resolution: ${requestedModel} → ${resolvedModel}`);
|
|
2064
|
+
requestedModel = resolvedModel;
|
|
2065
|
+
}
|
|
2066
|
+
let routingMode = 'auto';
|
|
2067
|
+
let targetModel = '';
|
|
2068
|
+
let targetProvider = 'anthropic';
|
|
2069
|
+
if (bypassRouting) {
|
|
2070
|
+
routingMode = 'passthrough';
|
|
2071
|
+
}
|
|
2072
|
+
else if (routingSuffix) {
|
|
2073
|
+
routingMode = routingSuffix;
|
|
2074
|
+
}
|
|
2075
|
+
else if (requestedModel.startsWith('relayplane:')) {
|
|
2076
|
+
if (requestedModel.includes(':cost')) {
|
|
2077
|
+
routingMode = 'cost';
|
|
2078
|
+
}
|
|
2079
|
+
else if (requestedModel.includes(':fast')) {
|
|
2080
|
+
routingMode = 'fast';
|
|
2081
|
+
}
|
|
2082
|
+
else if (requestedModel.includes(':quality')) {
|
|
2083
|
+
routingMode = 'quality';
|
|
2084
|
+
}
|
|
2085
|
+
// relayplane:auto stays as 'auto'
|
|
2086
|
+
}
|
|
2087
|
+
else if (requestedModel.startsWith('rp:')) {
|
|
2088
|
+
// Handle rp:* smart aliases - route through passthrough to use SMART_ALIASES
|
|
2089
|
+
if (requestedModel === 'rp:cost' || requestedModel === 'rp:cheap') {
|
|
2090
|
+
routingMode = 'cost';
|
|
2091
|
+
}
|
|
2092
|
+
else if (requestedModel === 'rp:fast') {
|
|
2093
|
+
routingMode = 'fast';
|
|
2094
|
+
}
|
|
2095
|
+
else if (requestedModel === 'rp:quality' || requestedModel === 'rp:best') {
|
|
2096
|
+
routingMode = 'quality';
|
|
2097
|
+
}
|
|
2098
|
+
else {
|
|
2099
|
+
// rp:balanced and others go through passthrough to resolve via SMART_ALIASES
|
|
2100
|
+
routingMode = 'passthrough';
|
|
2101
|
+
}
|
|
2102
|
+
}
|
|
2103
|
+
else if (requestedModel === 'auto' || requestedModel === 'relayplane:auto') {
|
|
2104
|
+
routingMode = 'auto';
|
|
2105
|
+
}
|
|
2106
|
+
else if (requestedModel === 'cost') {
|
|
2107
|
+
routingMode = 'cost';
|
|
2108
|
+
}
|
|
2109
|
+
else if (requestedModel === 'fast') {
|
|
2110
|
+
routingMode = 'fast';
|
|
2111
|
+
}
|
|
2112
|
+
else if (requestedModel === 'quality') {
|
|
2113
|
+
routingMode = 'quality';
|
|
2114
|
+
}
|
|
2115
|
+
else {
|
|
2116
|
+
routingMode = 'passthrough';
|
|
2117
|
+
}
|
|
2118
|
+
log(`Received request for model: ${requestedModel} (mode: ${routingMode}, stream: ${isStreaming})`);
|
|
2119
|
+
let promptText = '';
|
|
2120
|
+
let taskType = 'general';
|
|
2121
|
+
let confidence = 0;
|
|
2122
|
+
let complexity = 'simple';
|
|
2123
|
+
if (routingMode !== 'passthrough' || recordTelemetry) {
|
|
2124
|
+
promptText = extractPromptText(request.messages);
|
|
2125
|
+
taskType = (0, core_1.inferTaskType)(promptText);
|
|
2126
|
+
confidence = (0, core_1.getInferenceConfidence)(promptText, taskType);
|
|
2127
|
+
complexity = classifyComplexity(request.messages);
|
|
2128
|
+
log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
|
|
2129
|
+
}
|
|
2130
|
+
const cascadeConfig = getCascadeConfig(proxyConfig);
|
|
2131
|
+
let useCascade = routingMode === 'auto' &&
|
|
2132
|
+
proxyConfig.routing?.mode === 'cascade' &&
|
|
2133
|
+
cascadeConfig.enabled === true;
|
|
2134
|
+
if (useCascade && isStreaming) {
|
|
2135
|
+
log('Cascade disabled for streaming request; using first cascade model');
|
|
2136
|
+
useCascade = false;
|
|
2137
|
+
const fallbackModel = getCascadeModels(proxyConfig)[0] || getCostModel(proxyConfig);
|
|
2138
|
+
const resolvedFallback = resolveConfigModel(fallbackModel);
|
|
2139
|
+
if (resolvedFallback) {
|
|
2140
|
+
targetProvider = resolvedFallback.provider;
|
|
2141
|
+
targetModel = resolvedFallback.model;
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
if (routingMode === 'passthrough') {
|
|
2145
|
+
const resolved = resolveExplicitModel(requestedModel);
|
|
2146
|
+
if (resolved) {
|
|
2147
|
+
targetProvider = resolved.provider;
|
|
2148
|
+
targetModel = resolved.model;
|
|
2149
|
+
log(`Pass-through mode: ${requestedModel} → ${targetProvider}/${targetModel}`);
|
|
2150
|
+
}
|
|
2151
|
+
else {
|
|
2152
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
2153
|
+
if (bypassRouting) {
|
|
2154
|
+
const modelError = (0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames());
|
|
2155
|
+
res.end(JSON.stringify({
|
|
2156
|
+
error: `RelayPlane disabled or bypassed. Use an explicit model instead of ${requestedModel}.`,
|
|
2157
|
+
suggestions: modelError.suggestions,
|
|
2158
|
+
hint: modelError.hint,
|
|
2159
|
+
}));
|
|
2160
|
+
}
|
|
2161
|
+
else {
|
|
2162
|
+
res.end(JSON.stringify((0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames())));
|
|
2163
|
+
}
|
|
2164
|
+
return;
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
else if (!useCascade) {
|
|
2168
|
+
let selectedModel = null;
|
|
2169
|
+
if (routingMode === 'cost') {
|
|
2170
|
+
selectedModel = getCostModel(proxyConfig);
|
|
2171
|
+
}
|
|
2172
|
+
else if (routingMode === 'fast') {
|
|
2173
|
+
selectedModel = getFastModel(proxyConfig);
|
|
2174
|
+
}
|
|
2175
|
+
else if (routingMode === 'quality') {
|
|
2176
|
+
selectedModel = getQualityModel(proxyConfig);
|
|
2177
|
+
}
|
|
2178
|
+
else {
|
|
2179
|
+
const rule = relay.routing.get(taskType);
|
|
2180
|
+
if (rule && rule.preferredModel) {
|
|
2181
|
+
const parsedRule = parsePreferredModel(rule.preferredModel);
|
|
2182
|
+
if (parsedRule) {
|
|
2183
|
+
targetProvider = parsedRule.provider;
|
|
2184
|
+
targetModel = parsedRule.model;
|
|
2185
|
+
log(`Using learned rule: ${rule.preferredModel}`);
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
if (!targetModel) {
|
|
2189
|
+
if (proxyConfig.routing?.complexity?.enabled) {
|
|
2190
|
+
const complexityModel = proxyConfig.routing?.complexity?.[complexity];
|
|
2191
|
+
selectedModel = complexityModel ?? null;
|
|
2192
|
+
}
|
|
2193
|
+
else {
|
|
2194
|
+
selectedModel = DEFAULT_ROUTING[taskType].model;
|
|
2195
|
+
}
|
|
2196
|
+
}
|
|
2197
|
+
}
|
|
2198
|
+
if (selectedModel) {
|
|
2199
|
+
const resolved = resolveConfigModel(selectedModel);
|
|
2200
|
+
if (resolved) {
|
|
2201
|
+
targetProvider = resolved.provider;
|
|
2202
|
+
targetModel = resolved.model;
|
|
2203
|
+
}
|
|
2204
|
+
}
|
|
2205
|
+
if (!targetModel) {
|
|
2206
|
+
const defaultRoute = DEFAULT_ROUTING[taskType];
|
|
2207
|
+
targetProvider = defaultRoute.provider;
|
|
2208
|
+
targetModel = defaultRoute.model;
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
if (!useCascade) {
|
|
2212
|
+
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2213
|
+
}
|
|
2214
|
+
else {
|
|
2215
|
+
log(`Cascade routing enabled with models: ${cascadeConfig?.models?.join(', ') ?? ''}`);
|
|
2216
|
+
}
|
|
2217
|
+
const cooldownsEnabled = proxyConfig.reliability?.cooldowns?.enabled === true;
|
|
2218
|
+
if (!useCascade && cooldownsEnabled && !cooldownManager.isAvailable(targetProvider)) {
|
|
2219
|
+
res.writeHead(503, { 'Content-Type': 'application/json' });
|
|
2220
|
+
res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
|
|
2221
|
+
return;
|
|
2222
|
+
}
|
|
2223
|
+
let apiKey;
|
|
2224
|
+
if (!useCascade) {
|
|
2225
|
+
const apiKeyResult = resolveProviderApiKey(targetProvider, ctx, useAnthropicEnvKey);
|
|
2226
|
+
if (apiKeyResult.error) {
|
|
2227
|
+
res.writeHead(apiKeyResult.error.status, { 'Content-Type': 'application/json' });
|
|
2228
|
+
res.end(JSON.stringify(apiKeyResult.error.payload));
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
apiKey = apiKeyResult.apiKey;
|
|
2232
|
+
}
|
|
2233
|
+
const startTime = Date.now();
|
|
2234
|
+
// Handle streaming vs non-streaming
|
|
2235
|
+
if (isStreaming) {
|
|
2236
|
+
await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
|
|
2237
|
+
}
|
|
2238
|
+
else {
|
|
2239
|
+
if (useCascade && cascadeConfig) {
|
|
2240
|
+
try {
|
|
2241
|
+
const cascadeResult = await cascadeRequest(cascadeConfig, async (modelName) => {
|
|
2242
|
+
const resolved = resolveConfigModel(modelName);
|
|
2243
|
+
if (!resolved) {
|
|
2244
|
+
throw new Error(`Invalid cascade model: ${modelName}`);
|
|
2245
|
+
}
|
|
2246
|
+
if (cooldownsEnabled && !cooldownManager.isAvailable(resolved.provider)) {
|
|
2247
|
+
throw new CooldownError(resolved.provider);
|
|
2248
|
+
}
|
|
2249
|
+
const apiKeyResult = resolveProviderApiKey(resolved.provider, ctx, useAnthropicEnvKey);
|
|
2250
|
+
if (apiKeyResult.error) {
|
|
2251
|
+
throw new ProviderResponseError(apiKeyResult.error.status, apiKeyResult.error.payload);
|
|
2252
|
+
}
|
|
2253
|
+
const result = await executeNonStreamingProviderRequest(request, resolved.provider, resolved.model, apiKeyResult.apiKey, ctx);
|
|
2254
|
+
if (!result.ok) {
|
|
2255
|
+
if (cooldownsEnabled) {
|
|
2256
|
+
cooldownManager.recordFailure(resolved.provider, JSON.stringify(result.responseData));
|
|
2257
|
+
}
|
|
2258
|
+
throw new ProviderResponseError(result.status, result.responseData);
|
|
2259
|
+
}
|
|
2260
|
+
if (cooldownsEnabled) {
|
|
2261
|
+
cooldownManager.recordSuccess(resolved.provider);
|
|
2262
|
+
}
|
|
2263
|
+
return { responseData: result.responseData, provider: resolved.provider, model: resolved.model };
|
|
2264
|
+
}, log);
|
|
2265
|
+
const durationMs = Date.now() - startTime;
|
|
2266
|
+
let responseData = cascadeResult.responseData;
|
|
2267
|
+
if (recordTelemetry) {
|
|
2268
|
+
try {
|
|
2269
|
+
const runResult = await relay.run({
|
|
2270
|
+
prompt: promptText.slice(0, 500),
|
|
2271
|
+
taskType,
|
|
2272
|
+
model: `${cascadeResult.provider}:${cascadeResult.model}`,
|
|
2273
|
+
});
|
|
2274
|
+
responseData['_relayplane'] = {
|
|
2275
|
+
runId: runResult.runId,
|
|
2276
|
+
routedTo: `${cascadeResult.provider}/${cascadeResult.model}`,
|
|
2277
|
+
taskType,
|
|
2278
|
+
confidence,
|
|
2279
|
+
durationMs,
|
|
2280
|
+
mode: 'cascade',
|
|
2281
|
+
escalations: cascadeResult.escalations,
|
|
2282
|
+
};
|
|
2283
|
+
log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2284
|
+
}
|
|
2285
|
+
catch (err) {
|
|
2286
|
+
log(`Failed to record run: ${err}`);
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2289
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2290
|
+
res.end(JSON.stringify(responseData));
|
|
2291
|
+
}
|
|
2292
|
+
catch (err) {
|
|
2293
|
+
if (err instanceof ProviderResponseError) {
|
|
2294
|
+
res.writeHead(err.status, { 'Content-Type': 'application/json' });
|
|
2295
|
+
res.end(JSON.stringify(err.payload));
|
|
2296
|
+
return;
|
|
2297
|
+
}
|
|
2298
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2299
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2300
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
else {
|
|
2304
|
+
await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
});
|
|
2308
|
+
return new Promise((resolve, reject) => {
|
|
2309
|
+
server.on('error', reject);
|
|
2310
|
+
server.listen(port, host, () => {
|
|
2311
|
+
console.log(`RelayPlane proxy listening on http://${host}:${port}`);
|
|
2312
|
+
console.log(` Endpoints:`);
|
|
2313
|
+
console.log(` POST /v1/messages - Native Anthropic API (Claude Code)`);
|
|
2314
|
+
console.log(` POST /v1/chat/completions - OpenAI-compatible API`);
|
|
2315
|
+
console.log(` POST /v1/messages/count_tokens - Token counting`);
|
|
2316
|
+
console.log(` GET /v1/models - Model list`);
|
|
2317
|
+
console.log(` Models: relayplane:auto, relayplane:cost, relayplane:fast, relayplane:quality`);
|
|
2318
|
+
console.log(` Auth: Passthrough for Anthropic, env vars for other providers`);
|
|
2319
|
+
console.log(` Streaming: ✅ Enabled`);
|
|
2320
|
+
resolve(server);
|
|
2321
|
+
});
|
|
2322
|
+
});
|
|
2323
|
+
}
|
|
2324
|
+
/**
|
|
2325
|
+
* Handle streaming request
|
|
2326
|
+
*/
|
|
2327
|
+
async function executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx) {
|
|
2328
|
+
let providerResponse;
|
|
2329
|
+
let responseData;
|
|
2330
|
+
switch (targetProvider) {
|
|
2331
|
+
case 'anthropic': {
|
|
2332
|
+
providerResponse = await forwardToAnthropic(request, targetModel, ctx, apiKey);
|
|
2333
|
+
const rawData = (await providerResponse.json());
|
|
2334
|
+
if (!providerResponse.ok) {
|
|
2335
|
+
return { responseData: rawData, ok: false, status: providerResponse.status };
|
|
2336
|
+
}
|
|
2337
|
+
responseData = convertAnthropicResponse(rawData);
|
|
2338
|
+
break;
|
|
2339
|
+
}
|
|
2340
|
+
case 'google': {
|
|
2341
|
+
providerResponse = await forwardToGemini(request, targetModel, apiKey);
|
|
2342
|
+
const rawData = (await providerResponse.json());
|
|
2343
|
+
if (!providerResponse.ok) {
|
|
2344
|
+
return { responseData: rawData, ok: false, status: providerResponse.status };
|
|
2345
|
+
}
|
|
2346
|
+
responseData = convertGeminiResponse(rawData, targetModel);
|
|
2347
|
+
break;
|
|
2348
|
+
}
|
|
2349
|
+
case 'xai': {
|
|
2350
|
+
providerResponse = await forwardToXAI(request, targetModel, apiKey);
|
|
2351
|
+
responseData = (await providerResponse.json());
|
|
2352
|
+
if (!providerResponse.ok) {
|
|
2353
|
+
return { responseData, ok: false, status: providerResponse.status };
|
|
2354
|
+
}
|
|
2355
|
+
break;
|
|
2356
|
+
}
|
|
2357
|
+
case 'moonshot': {
|
|
2358
|
+
providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
|
|
2359
|
+
responseData = (await providerResponse.json());
|
|
2360
|
+
if (!providerResponse.ok) {
|
|
2361
|
+
return { responseData, ok: false, status: providerResponse.status };
|
|
2362
|
+
}
|
|
2363
|
+
break;
|
|
2364
|
+
}
|
|
2365
|
+
default: {
|
|
2366
|
+
providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
|
|
2367
|
+
responseData = (await providerResponse.json());
|
|
2368
|
+
if (!providerResponse.ok) {
|
|
2369
|
+
return { responseData, ok: false, status: providerResponse.status };
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
return { responseData, ok: true, status: 200 };
|
|
2374
|
+
}
|
|
2375
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
|
|
2376
|
+
let providerResponse;
|
|
2377
|
+
try {
|
|
2378
|
+
switch (targetProvider) {
|
|
2379
|
+
case 'anthropic':
|
|
2380
|
+
// Use auth passthrough for Anthropic
|
|
2381
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, ctx, apiKey);
|
|
2382
|
+
break;
|
|
2383
|
+
case 'google':
|
|
2384
|
+
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
2385
|
+
break;
|
|
2386
|
+
case 'xai':
|
|
2387
|
+
providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
|
|
2388
|
+
break;
|
|
2389
|
+
case 'moonshot':
|
|
2390
|
+
providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
|
|
2391
|
+
break;
|
|
2392
|
+
default:
|
|
2393
|
+
providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
|
|
2394
|
+
}
|
|
2395
|
+
if (!providerResponse.ok) {
|
|
2396
|
+
const errorData = await providerResponse.json();
|
|
2397
|
+
if (cooldownsEnabled) {
|
|
2398
|
+
cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
|
|
2399
|
+
}
|
|
2400
|
+
res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
|
|
2401
|
+
res.end(JSON.stringify(errorData));
|
|
2402
|
+
return;
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
catch (err) {
|
|
2406
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2407
|
+
if (cooldownsEnabled) {
|
|
2408
|
+
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2409
|
+
}
|
|
2410
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2411
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2412
|
+
return;
|
|
2413
|
+
}
|
|
2414
|
+
// Set SSE headers
|
|
2415
|
+
res.writeHead(200, {
|
|
2416
|
+
'Content-Type': 'text/event-stream',
|
|
2417
|
+
'Cache-Control': 'no-cache',
|
|
2418
|
+
'Connection': 'keep-alive',
|
|
2419
|
+
});
|
|
2420
|
+
try {
|
|
2421
|
+
// Stream the response based on provider format
|
|
2422
|
+
switch (targetProvider) {
|
|
2423
|
+
case 'anthropic':
|
|
2424
|
+
// Convert Anthropic stream to OpenAI format
|
|
2425
|
+
for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
|
|
2426
|
+
res.write(chunk);
|
|
2427
|
+
}
|
|
2428
|
+
break;
|
|
2429
|
+
case 'google':
|
|
2430
|
+
// Convert Gemini stream to OpenAI format
|
|
2431
|
+
for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
|
|
2432
|
+
res.write(chunk);
|
|
2433
|
+
}
|
|
2434
|
+
break;
|
|
2435
|
+
default:
|
|
2436
|
+
// xAI, Moonshot, OpenAI all use OpenAI-compatible streaming format
|
|
2437
|
+
for await (const chunk of pipeOpenAIStream(providerResponse)) {
|
|
2438
|
+
res.write(chunk);
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
}
|
|
2442
|
+
catch (err) {
|
|
2443
|
+
log(`Streaming error: ${err}`);
|
|
2444
|
+
}
|
|
2445
|
+
if (cooldownsEnabled) {
|
|
2446
|
+
cooldownManager.recordSuccess(targetProvider);
|
|
2447
|
+
}
|
|
2448
|
+
const durationMs = Date.now() - startTime;
|
|
2449
|
+
if (recordTelemetry) {
|
|
2450
|
+
// Record the run (non-blocking)
|
|
2451
|
+
relay
|
|
2452
|
+
.run({
|
|
2453
|
+
prompt: promptText.slice(0, 500),
|
|
2454
|
+
taskType,
|
|
2455
|
+
model: `${targetProvider}:${targetModel}`,
|
|
2456
|
+
})
|
|
2457
|
+
.then((runResult) => {
|
|
2458
|
+
log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2459
|
+
})
|
|
2460
|
+
.catch((err) => {
|
|
2461
|
+
log(`Failed to record run: ${err}`);
|
|
2462
|
+
});
|
|
2463
|
+
}
|
|
2464
|
+
res.end();
|
|
2465
|
+
}
|
|
2466
|
+
/**
|
|
2467
|
+
* Handle non-streaming request
|
|
2468
|
+
*/
|
|
2469
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
|
|
2470
|
+
let responseData;
|
|
2471
|
+
try {
|
|
2472
|
+
const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
|
|
2473
|
+
responseData = result.responseData;
|
|
2474
|
+
if (!result.ok) {
|
|
2475
|
+
if (cooldownsEnabled) {
|
|
2476
|
+
cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
|
|
2477
|
+
}
|
|
2478
|
+
res.writeHead(result.status, { 'Content-Type': 'application/json' });
|
|
2479
|
+
res.end(JSON.stringify(responseData));
|
|
2480
|
+
return;
|
|
2481
|
+
}
|
|
2482
|
+
}
|
|
2483
|
+
catch (err) {
|
|
2484
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2485
|
+
if (cooldownsEnabled) {
|
|
2486
|
+
cooldownManager.recordFailure(targetProvider, errorMsg);
|
|
2487
|
+
}
|
|
2488
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
2489
|
+
res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
|
|
2490
|
+
return;
|
|
2491
|
+
}
|
|
2492
|
+
if (cooldownsEnabled) {
|
|
2493
|
+
cooldownManager.recordSuccess(targetProvider);
|
|
2494
|
+
}
|
|
2495
|
+
const durationMs = Date.now() - startTime;
|
|
2496
|
+
if (recordTelemetry) {
|
|
2497
|
+
// Record the run in RelayPlane
|
|
2498
|
+
try {
|
|
2499
|
+
const runResult = await relay.run({
|
|
2500
|
+
prompt: promptText.slice(0, 500),
|
|
2501
|
+
taskType,
|
|
2502
|
+
model: `${targetProvider}:${targetModel}`,
|
|
2503
|
+
});
|
|
2504
|
+
// Add routing metadata to response
|
|
2505
|
+
responseData['_relayplane'] = {
|
|
2506
|
+
runId: runResult.runId,
|
|
2507
|
+
routedTo: `${targetProvider}/${targetModel}`,
|
|
2508
|
+
taskType,
|
|
2509
|
+
confidence,
|
|
2510
|
+
durationMs,
|
|
2511
|
+
mode: routingMode,
|
|
2512
|
+
};
|
|
2513
|
+
log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
|
|
2514
|
+
}
|
|
2515
|
+
catch (err) {
|
|
2516
|
+
log(`Failed to record run: ${err}`);
|
|
2517
|
+
}
|
|
2518
|
+
}
|
|
2519
|
+
// Send response
|
|
2520
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
2521
|
+
res.end(JSON.stringify(responseData));
|
|
2522
|
+
}
|
|
2523
|
+
// Note: CLI entry point is in cli.ts
|
|
2524
|
+
//# sourceMappingURL=standalone-proxy.js.map
|