@relayplane/proxy 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +120 -221
  2. package/dist/__tests__/model-suggestions.test.d.ts +2 -0
  3. package/dist/__tests__/model-suggestions.test.d.ts.map +1 -0
  4. package/dist/__tests__/model-suggestions.test.js +67 -0
  5. package/dist/__tests__/model-suggestions.test.js.map +1 -0
  6. package/dist/__tests__/routing-aliases.test.d.ts +2 -0
  7. package/dist/__tests__/routing-aliases.test.d.ts.map +1 -0
  8. package/dist/__tests__/routing-aliases.test.js +81 -0
  9. package/dist/__tests__/routing-aliases.test.js.map +1 -0
  10. package/dist/cli.d.ts +36 -0
  11. package/dist/cli.d.ts.map +1 -0
  12. package/dist/cli.js +304 -0
  13. package/dist/cli.js.map +1 -0
  14. package/dist/config.d.ts +80 -0
  15. package/dist/config.d.ts.map +1 -0
  16. package/dist/config.js +208 -0
  17. package/dist/config.js.map +1 -0
  18. package/dist/index.d.ts +27 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +60 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/standalone-proxy.d.ts +101 -0
  23. package/dist/standalone-proxy.d.ts.map +1 -0
  24. package/dist/standalone-proxy.js +2524 -0
  25. package/dist/standalone-proxy.js.map +1 -0
  26. package/dist/swarm-client.d.ts +87 -0
  27. package/dist/swarm-client.d.ts.map +1 -0
  28. package/dist/swarm-client.js +205 -0
  29. package/dist/swarm-client.js.map +1 -0
  30. package/dist/telemetry.d.ts +127 -0
  31. package/dist/telemetry.d.ts.map +1 -0
  32. package/dist/telemetry.js +426 -0
  33. package/dist/telemetry.js.map +1 -0
  34. package/dist/utils/model-suggestions.d.ts +28 -0
  35. package/dist/utils/model-suggestions.d.ts.map +1 -0
  36. package/dist/utils/model-suggestions.js +50 -0
  37. package/dist/utils/model-suggestions.js.map +1 -0
  38. package/package.json +35 -29
@@ -0,0 +1,2524 @@
1
+ "use strict";
2
+ /**
3
+ * RelayPlane L2/L3 Proxy Server
4
+ *
5
+ * An LLM Gateway proxy that intelligently routes requests
6
+ * to the optimal model using @relayplane/core.
7
+ *
8
+ * Supports:
9
+ * - OpenAI-compatible API (/v1/chat/completions)
10
+ * - Native Anthropic API (/v1/messages) for Claude Code integration
11
+ * - Streaming (SSE) for both OpenAI and Anthropic formats
12
+ * - Auth passthrough for Claude Code (OAuth/subscription billing)
13
+ * - Cross-provider routing (Anthropic, OpenAI, Google, xAI)
14
+ * - Tool/function calling with format conversion
15
+ *
16
+ * Authentication:
17
+ * - Anthropic: Passthrough incoming Authorization header OR ANTHROPIC_API_KEY env
18
+ * - Other providers: Require provider-specific API key env vars
19
+ *
20
+ * @packageDocumentation
21
+ */
22
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ var desc = Object.getOwnPropertyDescriptor(m, k);
25
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
26
+ desc = { enumerable: true, get: function() { return m[k]; } };
27
+ }
28
+ Object.defineProperty(o, k2, desc);
29
+ }) : (function(o, m, k, k2) {
30
+ if (k2 === undefined) k2 = k;
31
+ o[k2] = m[k];
32
+ }));
33
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
34
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
35
+ }) : function(o, v) {
36
+ o["default"] = v;
37
+ });
38
+ var __importStar = (this && this.__importStar) || (function () {
39
+ var ownKeys = function(o) {
40
+ ownKeys = Object.getOwnPropertyNames || function (o) {
41
+ var ar = [];
42
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
43
+ return ar;
44
+ };
45
+ return ownKeys(o);
46
+ };
47
+ return function (mod) {
48
+ if (mod && mod.__esModule) return mod;
49
+ var result = {};
50
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
51
+ __setModuleDefault(result, mod);
52
+ return result;
53
+ };
54
+ })();
55
+ Object.defineProperty(exports, "__esModule", { value: true });
56
+ exports.SMART_ALIASES = exports.RELAYPLANE_ALIASES = exports.MODEL_MAPPING = exports.DEFAULT_ENDPOINTS = void 0;
57
+ exports.getAvailableModelNames = getAvailableModelNames;
58
+ exports.resolveModelAlias = resolveModelAlias;
59
+ exports.parseModelSuffix = parseModelSuffix;
60
+ exports.classifyComplexity = classifyComplexity;
61
+ exports.shouldEscalate = shouldEscalate;
62
+ exports.startProxy = startProxy;
63
+ const http = __importStar(require("node:http"));
64
+ const fs = __importStar(require("node:fs"));
65
+ const os = __importStar(require("node:os"));
66
+ const path = __importStar(require("node:path"));
67
+ const core_1 = require("@relayplane/core");
68
+ const model_suggestions_js_1 = require("./utils/model-suggestions.js");
69
+ /**
70
+ * Default provider endpoints
71
+ */
72
+ exports.DEFAULT_ENDPOINTS = {
73
+ anthropic: {
74
+ baseUrl: 'https://api.anthropic.com/v1',
75
+ apiKeyEnv: 'ANTHROPIC_API_KEY',
76
+ },
77
+ openai: {
78
+ baseUrl: 'https://api.openai.com/v1',
79
+ apiKeyEnv: 'OPENAI_API_KEY',
80
+ },
81
+ google: {
82
+ baseUrl: 'https://generativelanguage.googleapis.com/v1beta',
83
+ apiKeyEnv: 'GEMINI_API_KEY',
84
+ },
85
+ xai: {
86
+ baseUrl: 'https://api.x.ai/v1',
87
+ apiKeyEnv: 'XAI_API_KEY',
88
+ },
89
+ moonshot: {
90
+ baseUrl: 'https://api.moonshot.cn/v1',
91
+ apiKeyEnv: 'MOONSHOT_API_KEY',
92
+ },
93
+ };
94
+ /**
95
+ * Model to provider/model mapping
96
+ */
97
+ exports.MODEL_MAPPING = {
98
+ // Anthropic models (using correct API model IDs)
99
+ 'claude-opus-4-5': { provider: 'anthropic', model: 'claude-opus-4-5-20250514' },
100
+ 'claude-sonnet-4': { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
101
+ 'claude-3-5-sonnet': { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
102
+ 'claude-3-5-haiku': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
103
+ haiku: { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
104
+ sonnet: { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
105
+ opus: { provider: 'anthropic', model: 'claude-opus-4-5-20250514' },
106
+ // OpenAI models
107
+ 'gpt-4o': { provider: 'openai', model: 'gpt-4o' },
108
+ 'gpt-4o-mini': { provider: 'openai', model: 'gpt-4o-mini' },
109
+ 'gpt-4.1': { provider: 'openai', model: 'gpt-4.1' },
110
+ };
111
+ /**
112
+ * RelayPlane model aliases - resolve before routing
113
+ * These are user-friendly aliases that map to internal routing modes
114
+ */
115
+ exports.RELAYPLANE_ALIASES = {
116
+ 'relayplane:auto': 'rp:balanced',
117
+ 'rp:auto': 'rp:balanced',
118
+ };
119
+ /**
120
+ * Smart routing aliases - map to specific provider/model combinations
121
+ * These provide semantic shortcuts for common use cases
122
+ */
123
+ exports.SMART_ALIASES = {
124
+ // Best quality model (current flagship)
125
+ 'rp:best': { provider: 'anthropic', model: 'claude-sonnet-4-20250514' },
126
+ // Fast/cheap model for simple tasks
127
+ 'rp:fast': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
128
+ 'rp:cheap': { provider: 'openai', model: 'gpt-4o-mini' },
129
+ // Balanced model for general use (good quality/cost tradeoff)
130
+ 'rp:balanced': { provider: 'anthropic', model: 'claude-3-5-haiku-20241022' },
131
+ };
132
+ /**
133
+ * Get all available model names for error suggestions
134
+ */
135
+ function getAvailableModelNames() {
136
+ return [
137
+ ...Object.keys(exports.MODEL_MAPPING),
138
+ ...Object.keys(exports.SMART_ALIASES),
139
+ ...Object.keys(exports.RELAYPLANE_ALIASES),
140
+ // Add common model prefixes users might type
141
+ 'relayplane:auto',
142
+ 'relayplane:cost',
143
+ 'relayplane:fast',
144
+ 'relayplane:quality',
145
+ ];
146
+ }
147
+ /**
148
+ * Resolve model aliases before routing
149
+ * Returns the resolved model name (may be same as input if no alias found)
150
+ */
151
+ function resolveModelAlias(model) {
152
+ // Check RELAYPLANE_ALIASES first (e.g., relayplane:auto → rp:balanced)
153
+ if (exports.RELAYPLANE_ALIASES[model]) {
154
+ return exports.RELAYPLANE_ALIASES[model];
155
+ }
156
+ return model;
157
+ }
158
+ /**
159
+ * Default routing based on task type
160
+ * Uses Haiku 3.5 for cost optimization, upgrades based on learned rules
161
+ */
162
+ const DEFAULT_ROUTING = {
163
+ code_generation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
164
+ code_review: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
165
+ summarization: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
166
+ analysis: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
167
+ creative_writing: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
168
+ data_extraction: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
169
+ translation: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
170
+ question_answering: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
171
+ general: { provider: 'anthropic', model: 'claude-3-5-haiku-latest' },
172
+ };
173
+ const UNCERTAINTY_PATTERNS = [
174
+ /i'?m not (entirely |completely |really )?sure/i,
175
+ /i don'?t (really |actually )?know/i,
176
+ /it'?s (difficult|hard|tough) to say/i,
177
+ /i can'?t (definitively|accurately|confidently)/i,
178
+ /i'?m (uncertain|unsure)/i,
179
+ /this is (just )?(a guess|speculation)/i,
180
+ ];
181
+ const REFUSAL_PATTERNS = [
182
+ /i can'?t (help|assist) with that/i,
183
+ /i'?m (not able|unable) to/i,
184
+ /i (cannot|can't|won't) (provide|give|create)/i,
185
+ /as an ai/i,
186
+ ];
187
+ class CooldownManager {
188
+ health = new Map();
189
+ config;
190
+ constructor(config) {
191
+ this.config = config;
192
+ }
193
+ updateConfig(config) {
194
+ this.config = config;
195
+ }
196
+ recordFailure(provider, error) {
197
+ const h = this.getOrCreateHealth(provider);
198
+ const now = Date.now();
199
+ h.failures = h.failures.filter((f) => now - f.timestamp < this.config.windowSeconds * 1000);
200
+ h.failures.push({ timestamp: now, error });
201
+ if (h.failures.length >= this.config.allowedFails) {
202
+ h.cooledUntil = now + this.config.cooldownSeconds * 1000;
203
+ console.log(`[RelayPlane] Provider ${provider} cooled down for ${this.config.cooldownSeconds}s`);
204
+ }
205
+ }
206
+ recordSuccess(provider) {
207
+ const h = this.health.get(provider);
208
+ if (h) {
209
+ h.failures = [];
210
+ h.cooledUntil = null;
211
+ }
212
+ }
213
+ isAvailable(provider) {
214
+ const h = this.health.get(provider);
215
+ if (!h?.cooledUntil)
216
+ return true;
217
+ if (Date.now() > h.cooledUntil) {
218
+ h.cooledUntil = null;
219
+ h.failures = [];
220
+ return true;
221
+ }
222
+ return false;
223
+ }
224
+ getOrCreateHealth(provider) {
225
+ if (!this.health.has(provider)) {
226
+ this.health.set(provider, { failures: [], cooledUntil: null });
227
+ }
228
+ return this.health.get(provider);
229
+ }
230
+ }
231
+ const globalStats = {
232
+ totalRequests: 0,
233
+ successfulRequests: 0,
234
+ failedRequests: 0,
235
+ totalLatencyMs: 0,
236
+ routingCounts: {},
237
+ modelCounts: {},
238
+ escalations: 0,
239
+ startedAt: Date.now(),
240
+ };
241
+ function logRequest(originalModel, targetModel, provider, latencyMs, success, mode, escalated) {
242
+ const timestamp = new Date().toISOString();
243
+ const status = success ? '✓' : '✗';
244
+ const escalateTag = escalated ? ' [ESCALATED]' : '';
245
+ console.log(`[RelayPlane] ${timestamp} ${status} ${originalModel} → ${provider}/${targetModel} (${mode}) ${latencyMs}ms${escalateTag}`);
246
+ // Update stats
247
+ globalStats.totalRequests++;
248
+ if (success) {
249
+ globalStats.successfulRequests++;
250
+ }
251
+ else {
252
+ globalStats.failedRequests++;
253
+ }
254
+ globalStats.totalLatencyMs += latencyMs;
255
+ globalStats.routingCounts[mode] = (globalStats.routingCounts[mode] || 0) + 1;
256
+ const modelKey = `${provider}/${targetModel}`;
257
+ globalStats.modelCounts[modelKey] = (globalStats.modelCounts[modelKey] || 0) + 1;
258
+ if (escalated) {
259
+ globalStats.escalations++;
260
+ }
261
+ }
262
+ const DEFAULT_PROXY_CONFIG = {
263
+ enabled: true,
264
+ modelOverrides: {},
265
+ routing: {
266
+ mode: 'cascade',
267
+ cascade: {
268
+ enabled: true,
269
+ models: [
270
+ 'claude-3-5-haiku-20241022',
271
+ 'claude-sonnet-4-20250514',
272
+ 'claude-opus-4-5-20250514',
273
+ ],
274
+ escalateOn: 'uncertainty',
275
+ maxEscalations: 1,
276
+ },
277
+ complexity: {
278
+ enabled: true,
279
+ simple: 'claude-3-5-haiku-20241022',
280
+ moderate: 'claude-sonnet-4-20250514',
281
+ complex: 'claude-opus-4-5-20250514',
282
+ },
283
+ },
284
+ reliability: {
285
+ cooldowns: {
286
+ enabled: true,
287
+ allowedFails: 3,
288
+ windowSeconds: 60,
289
+ cooldownSeconds: 120,
290
+ },
291
+ },
292
+ };
293
+ function getProxyConfigPath() {
294
+ const customPath = process.env['RELAYPLANE_CONFIG_PATH'];
295
+ if (customPath && customPath.trim())
296
+ return customPath;
297
+ return path.join(os.homedir(), '.relayplane', 'config.json');
298
+ }
299
+ function normalizeProxyConfig(config) {
300
+ const defaultRouting = DEFAULT_PROXY_CONFIG.routing;
301
+ const configRouting = (config?.routing ?? {});
302
+ const cascade = { ...defaultRouting.cascade, ...(configRouting.cascade ?? {}) };
303
+ const complexity = { ...defaultRouting.complexity, ...(configRouting.complexity ?? {}) };
304
+ const routing = {
305
+ ...defaultRouting,
306
+ ...configRouting,
307
+ cascade,
308
+ complexity,
309
+ };
310
+ const defaultReliability = DEFAULT_PROXY_CONFIG.reliability;
311
+ const configReliability = (config?.reliability ?? {});
312
+ const cooldowns = {
313
+ ...defaultReliability.cooldowns,
314
+ ...(configReliability.cooldowns ?? {}),
315
+ };
316
+ const reliability = {
317
+ ...defaultReliability,
318
+ ...configReliability,
319
+ cooldowns,
320
+ };
321
+ return {
322
+ ...DEFAULT_PROXY_CONFIG,
323
+ ...(config ?? {}),
324
+ modelOverrides: {
325
+ ...(DEFAULT_PROXY_CONFIG.modelOverrides ?? {}),
326
+ ...(config?.modelOverrides ?? {}),
327
+ },
328
+ routing,
329
+ reliability,
330
+ enabled: config?.enabled !== undefined ? !!config.enabled : DEFAULT_PROXY_CONFIG.enabled,
331
+ };
332
+ }
333
+ async function loadProxyConfig(configPath, log) {
334
+ try {
335
+ const raw = await fs.promises.readFile(configPath, 'utf8');
336
+ const parsed = JSON.parse(raw);
337
+ return normalizeProxyConfig(parsed);
338
+ }
339
+ catch (err) {
340
+ const error = err;
341
+ if (error.code !== 'ENOENT') {
342
+ log(`Failed to load config: ${error.message}`);
343
+ }
344
+ return normalizeProxyConfig(null);
345
+ }
346
+ }
347
+ async function saveProxyConfig(configPath, config) {
348
+ await fs.promises.mkdir(path.dirname(configPath), { recursive: true });
349
+ const payload = JSON.stringify(config, null, 2);
350
+ await fs.promises.writeFile(configPath, payload, 'utf8');
351
+ }
352
+ function isPlainObject(value) {
353
+ return !!value && typeof value === 'object' && !Array.isArray(value);
354
+ }
355
+ function deepMerge(base, patch) {
356
+ const result = { ...base };
357
+ for (const [key, value] of Object.entries(patch)) {
358
+ if (isPlainObject(value) && isPlainObject(result[key])) {
359
+ result[key] = deepMerge(result[key], value);
360
+ }
361
+ else {
362
+ result[key] = value;
363
+ }
364
+ }
365
+ return result;
366
+ }
367
+ function mergeProxyConfig(base, patch) {
368
+ // Deep merge without normalizing intermediate results
369
+ const merged = deepMerge(base, patch);
370
+ return normalizeProxyConfig(merged);
371
+ }
372
+ function getHeaderValue(req, headerName) {
373
+ const raw = req.headers[headerName.toLowerCase()];
374
+ if (Array.isArray(raw))
375
+ return raw[0];
376
+ return raw;
377
+ }
378
+ function parseHeaderBoolean(value) {
379
+ if (!value)
380
+ return false;
381
+ const normalized = value.trim().toLowerCase();
382
+ return normalized === '1' || normalized === 'true' || normalized === 'yes' || normalized === 'on';
383
+ }
384
+ function parseModelSuffix(model) {
385
+ const trimmed = model.trim();
386
+ if (/^relayplane:(auto|cost|fast|quality)$/.test(trimmed)) {
387
+ return { baseModel: trimmed, suffix: null };
388
+ }
389
+ const suffixes = ['cost', 'fast', 'quality'];
390
+ for (const suffix of suffixes) {
391
+ if (trimmed.endsWith(`:${suffix}`)) {
392
+ return {
393
+ baseModel: trimmed.slice(0, -(suffix.length + 1)),
394
+ suffix,
395
+ };
396
+ }
397
+ }
398
+ return { baseModel: trimmed, suffix: null };
399
+ }
400
+ /**
401
+ * Extract text content from messages for routing analysis
402
+ */
403
+ function extractPromptText(messages) {
404
+ return messages
405
+ .map((msg) => {
406
+ if (typeof msg.content === 'string')
407
+ return msg.content;
408
+ if (Array.isArray(msg.content)) {
409
+ return msg.content
410
+ .map((c) => {
411
+ const part = c;
412
+ return part.type === 'text' ? (part.text ?? '') : '';
413
+ })
414
+ .join(' ');
415
+ }
416
+ return '';
417
+ })
418
+ .join('\n');
419
+ }
420
+ function extractMessageText(messages) {
421
+ return messages
422
+ .map((msg) => {
423
+ const content = msg.content;
424
+ if (typeof content === 'string')
425
+ return content;
426
+ if (Array.isArray(content)) {
427
+ return content
428
+ .map((c) => {
429
+ const part = c;
430
+ return part.type === 'text' ? (part.text ?? '') : '';
431
+ })
432
+ .join(' ');
433
+ }
434
+ return '';
435
+ })
436
+ .join(' ');
437
+ }
438
+ function classifyComplexity(messages) {
439
+ const text = extractMessageText(messages).toLowerCase();
440
+ const tokens = Math.ceil(text.length / 4);
441
+ let score = 0;
442
+ if (/```/.test(text) || /function |class |const |let |import /.test(text))
443
+ score += 2;
444
+ if (/analyze|compare|evaluate|assess|review|audit/.test(text))
445
+ score += 1;
446
+ if (/calculate|compute|solve|equation|prove|derive/.test(text))
447
+ score += 2;
448
+ if (/first.*then|step \d|1\).*2\)|phase \d/.test(text))
449
+ score += 1;
450
+ if (tokens > 2000)
451
+ score += 1;
452
+ if (tokens > 5000)
453
+ score += 1;
454
+ if (/write a (story|essay|article|report)|create a|design a|build a/.test(text))
455
+ score += 1;
456
+ if (score >= 4)
457
+ return 'complex';
458
+ if (score >= 2)
459
+ return 'moderate';
460
+ return 'simple';
461
+ }
462
+ function shouldEscalate(responseText, trigger) {
463
+ if (trigger === 'error')
464
+ return false;
465
+ const patterns = trigger === 'refusal' ? REFUSAL_PATTERNS : UNCERTAINTY_PATTERNS;
466
+ return patterns.some((p) => p.test(responseText));
467
+ }
468
+ /**
469
+ * Check if a model should use MAX token (hybrid auth)
470
+ */
471
+ function shouldUseMaxToken(model, authConfig) {
472
+ if (!authConfig?.anthropicMaxToken || !authConfig?.useMaxForModels?.length) {
473
+ return false;
474
+ }
475
+ const modelLower = model.toLowerCase();
476
+ return authConfig.useMaxForModels.some(pattern => modelLower.includes(pattern.toLowerCase()));
477
+ }
478
+ /**
479
+ * Get the appropriate API key for a model (hybrid auth support)
480
+ */
481
+ function getAuthForModel(model, authConfig, envApiKey) {
482
+ if (shouldUseMaxToken(model, authConfig)) {
483
+ return { apiKey: authConfig.anthropicMaxToken, isMax: true };
484
+ }
485
+ return { apiKey: envApiKey, isMax: false };
486
+ }
487
+ /**
488
+ * Build Anthropic headers with hybrid auth support
489
+ * MAX tokens (sk-ant-oat*) use Authorization: Bearer header
490
+ * API keys (sk-ant-api*) use x-api-key header
491
+ */
492
+ function buildAnthropicHeadersWithAuth(ctx, apiKey, isMaxToken) {
493
+ const headers = {
494
+ 'Content-Type': 'application/json',
495
+ 'anthropic-version': ctx.versionHeader || '2023-06-01',
496
+ };
497
+ // Auth: prefer incoming auth for passthrough
498
+ if (ctx.authHeader) {
499
+ headers['Authorization'] = ctx.authHeader;
500
+ }
501
+ else if (ctx.apiKeyHeader) {
502
+ headers['x-api-key'] = ctx.apiKeyHeader;
503
+ }
504
+ else if (apiKey) {
505
+ // MAX tokens (OAuth) use Authorization: Bearer, API keys use x-api-key
506
+ if (isMaxToken || apiKey.startsWith('sk-ant-oat')) {
507
+ headers['Authorization'] = `Bearer ${apiKey}`;
508
+ }
509
+ else {
510
+ headers['x-api-key'] = apiKey;
511
+ }
512
+ }
513
+ // Pass through beta headers
514
+ if (ctx.betaHeaders) {
515
+ headers['anthropic-beta'] = ctx.betaHeaders;
516
+ }
517
+ return headers;
518
+ }
519
+ /**
520
+ * Build Anthropic headers with auth passthrough support
521
+ *
522
+ * Auth priority:
523
+ * 1. Incoming Authorization header (Bearer token from Claude Code OAuth)
524
+ * 2. Incoming x-api-key header
525
+ * 3. ANTHROPIC_API_KEY env var (or MAX token for Opus models)
526
+ */
527
+ function buildAnthropicHeaders(ctx, envApiKey) {
528
+ const headers = {
529
+ 'Content-Type': 'application/json',
530
+ 'anthropic-version': ctx.versionHeader || '2023-06-01',
531
+ };
532
+ // Auth: prefer incoming auth for passthrough, fallback to env
533
+ if (ctx.authHeader) {
534
+ // Claude Code sends "Authorization: Bearer <token>" for OAuth
535
+ headers['Authorization'] = ctx.authHeader;
536
+ }
537
+ else if (ctx.apiKeyHeader) {
538
+ // Direct x-api-key header
539
+ headers['x-api-key'] = ctx.apiKeyHeader;
540
+ }
541
+ else if (envApiKey) {
542
+ // Fallback to env var
543
+ headers['x-api-key'] = envApiKey;
544
+ }
545
+ // Pass through beta headers (prompt caching, extended thinking, etc.)
546
+ if (ctx.betaHeaders) {
547
+ headers['anthropic-beta'] = ctx.betaHeaders;
548
+ }
549
+ return headers;
550
+ }
551
+ /**
552
+ * Forward non-streaming request to Anthropic API
553
+ */
554
+ async function forwardToAnthropic(request, targetModel, ctx, envApiKey) {
555
+ const anthropicBody = buildAnthropicBody(request, targetModel, false);
556
+ const headers = buildAnthropicHeaders(ctx, envApiKey);
557
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
558
+ method: 'POST',
559
+ headers,
560
+ body: JSON.stringify(anthropicBody),
561
+ });
562
+ return response;
563
+ }
564
+ /**
565
+ * Forward streaming request to Anthropic API
566
+ */
567
+ async function forwardToAnthropicStream(request, targetModel, ctx, envApiKey) {
568
+ const anthropicBody = buildAnthropicBody(request, targetModel, true);
569
+ const headers = buildAnthropicHeaders(ctx, envApiKey);
570
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
571
+ method: 'POST',
572
+ headers,
573
+ body: JSON.stringify(anthropicBody),
574
+ });
575
+ return response;
576
+ }
577
+ /**
578
+ * Forward native Anthropic /v1/messages request (passthrough with routing)
579
+ * Used for Claude Code direct integration
580
+ */
581
+ async function forwardNativeAnthropicRequest(body, ctx, envApiKey, isMaxToken) {
582
+ const headers = buildAnthropicHeadersWithAuth(ctx, envApiKey, isMaxToken);
583
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
584
+ method: 'POST',
585
+ headers,
586
+ body: JSON.stringify(body),
587
+ });
588
+ return response;
589
+ }
590
+ /**
591
+ * Convert OpenAI messages array to Anthropic format
592
+ * Handles: user, assistant, tool_calls, tool results
593
+ */
594
+ function convertMessagesToAnthropic(messages) {
595
+ const result = [];
596
+ for (const msg of messages) {
597
+ const m = msg;
598
+ // Skip system messages (handled separately)
599
+ if (m.role === 'system')
600
+ continue;
601
+ // Tool result message → Anthropic user message with tool_result content
602
+ if (m.role === 'tool') {
603
+ result.push({
604
+ role: 'user',
605
+ content: [
606
+ {
607
+ type: 'tool_result',
608
+ tool_use_id: m.tool_call_id,
609
+ content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
610
+ },
611
+ ],
612
+ });
613
+ continue;
614
+ }
615
+ // Assistant message with tool_calls → Anthropic assistant with tool_use content
616
+ if (m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0) {
617
+ const content = [];
618
+ // Add text content if present
619
+ if (m.content && typeof m.content === 'string') {
620
+ content.push({ type: 'text', text: m.content });
621
+ }
622
+ // Add tool_use blocks
623
+ for (const tc of m.tool_calls) {
624
+ content.push({
625
+ type: 'tool_use',
626
+ id: tc.id,
627
+ name: tc.function.name,
628
+ input: JSON.parse(tc.function.arguments || '{}'),
629
+ });
630
+ }
631
+ result.push({ role: 'assistant', content });
632
+ continue;
633
+ }
634
+ // Regular user/assistant message
635
+ result.push({
636
+ role: m.role === 'assistant' ? 'assistant' : 'user',
637
+ content: m.content,
638
+ });
639
+ }
640
+ return result;
641
+ }
642
+ /**
643
+ * Build Anthropic request body from OpenAI format
644
+ */
645
+ function buildAnthropicBody(request, targetModel, stream) {
646
+ // Convert OpenAI messages to Anthropic format
647
+ const anthropicMessages = convertMessagesToAnthropic(request.messages);
648
+ const systemMessage = request.messages.find((m) => m.role === 'system');
649
+ const anthropicBody = {
650
+ model: targetModel,
651
+ messages: anthropicMessages,
652
+ max_tokens: request.max_tokens ?? 4096,
653
+ stream,
654
+ };
655
+ if (systemMessage) {
656
+ anthropicBody['system'] = systemMessage.content;
657
+ }
658
+ if (request.temperature !== undefined) {
659
+ anthropicBody['temperature'] = request.temperature;
660
+ }
661
+ // Convert OpenAI tools format to Anthropic tools format
662
+ if (request.tools && Array.isArray(request.tools)) {
663
+ anthropicBody['tools'] = convertToolsToAnthropic(request.tools);
664
+ }
665
+ // Convert tool_choice
666
+ if (request.tool_choice) {
667
+ anthropicBody['tool_choice'] = convertToolChoiceToAnthropic(request.tool_choice);
668
+ }
669
+ return anthropicBody;
670
+ }
671
+ /**
672
+ * Convert OpenAI tools format to Anthropic format
673
+ * OpenAI: { type: "function", function: { name, description, parameters } }
674
+ * Anthropic: { name, description, input_schema }
675
+ */
676
+ function convertToolsToAnthropic(tools) {
677
+ return tools.map((tool) => {
678
+ const t = tool;
679
+ if (t.type === 'function' && t.function) {
680
+ return {
681
+ name: t.function.name,
682
+ description: t.function.description,
683
+ input_schema: t.function.parameters || { type: 'object', properties: {} },
684
+ };
685
+ }
686
+ // Already in Anthropic format or unknown
687
+ return tool;
688
+ });
689
+ }
690
+ /**
691
+ * Convert OpenAI tool_choice to Anthropic format
692
+ */
693
+ function convertToolChoiceToAnthropic(toolChoice) {
694
+ if (toolChoice === 'auto')
695
+ return { type: 'auto' };
696
+ if (toolChoice === 'none')
697
+ return { type: 'none' };
698
+ if (toolChoice === 'required')
699
+ return { type: 'any' };
700
+ // Specific tool: { type: "function", function: { name: "xxx" } }
701
+ const tc = toolChoice;
702
+ if (tc.type === 'function' && tc.function?.name) {
703
+ return { type: 'tool', name: tc.function.name };
704
+ }
705
+ return toolChoice;
706
+ }
707
+ /**
708
+ * Forward non-streaming request to OpenAI API
709
+ */
710
+ async function forwardToOpenAI(request, targetModel, apiKey) {
711
+ const openaiBody = {
712
+ ...request,
713
+ model: targetModel,
714
+ stream: false,
715
+ };
716
+ const response = await fetch('https://api.openai.com/v1/chat/completions', {
717
+ method: 'POST',
718
+ headers: {
719
+ 'Content-Type': 'application/json',
720
+ Authorization: `Bearer ${apiKey}`,
721
+ },
722
+ body: JSON.stringify(openaiBody),
723
+ });
724
+ return response;
725
+ }
726
+ /**
727
+ * Forward streaming request to OpenAI API
728
+ */
729
+ async function forwardToOpenAIStream(request, targetModel, apiKey) {
730
+ const openaiBody = {
731
+ ...request,
732
+ model: targetModel,
733
+ stream: true,
734
+ };
735
+ const response = await fetch('https://api.openai.com/v1/chat/completions', {
736
+ method: 'POST',
737
+ headers: {
738
+ 'Content-Type': 'application/json',
739
+ Authorization: `Bearer ${apiKey}`,
740
+ },
741
+ body: JSON.stringify(openaiBody),
742
+ });
743
+ return response;
744
+ }
745
+ /**
746
+ * Forward non-streaming request to xAI API (OpenAI-compatible)
747
+ */
748
+ async function forwardToXAI(request, targetModel, apiKey) {
749
+ const xaiBody = {
750
+ ...request,
751
+ model: targetModel,
752
+ stream: false,
753
+ };
754
+ const response = await fetch('https://api.x.ai/v1/chat/completions', {
755
+ method: 'POST',
756
+ headers: {
757
+ 'Content-Type': 'application/json',
758
+ Authorization: `Bearer ${apiKey}`,
759
+ },
760
+ body: JSON.stringify(xaiBody),
761
+ });
762
+ return response;
763
+ }
764
+ /**
765
+ * Forward streaming request to xAI API (OpenAI-compatible)
766
+ */
767
+ async function forwardToXAIStream(request, targetModel, apiKey) {
768
+ const xaiBody = {
769
+ ...request,
770
+ model: targetModel,
771
+ stream: true,
772
+ };
773
+ const response = await fetch('https://api.x.ai/v1/chat/completions', {
774
+ method: 'POST',
775
+ headers: {
776
+ 'Content-Type': 'application/json',
777
+ Authorization: `Bearer ${apiKey}`,
778
+ },
779
+ body: JSON.stringify(xaiBody),
780
+ });
781
+ return response;
782
+ }
783
+ /**
784
+ * Forward non-streaming request to Moonshot API (OpenAI-compatible)
785
+ */
786
+ async function forwardToMoonshot(request, targetModel, apiKey) {
787
+ const moonshotBody = {
788
+ ...request,
789
+ model: targetModel,
790
+ stream: false,
791
+ };
792
+ const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
793
+ method: 'POST',
794
+ headers: {
795
+ 'Content-Type': 'application/json',
796
+ Authorization: `Bearer ${apiKey}`,
797
+ },
798
+ body: JSON.stringify(moonshotBody),
799
+ });
800
+ return response;
801
+ }
802
+ /**
803
+ * Forward streaming request to Moonshot API (OpenAI-compatible)
804
+ */
805
+ async function forwardToMoonshotStream(request, targetModel, apiKey) {
806
+ const moonshotBody = {
807
+ ...request,
808
+ model: targetModel,
809
+ stream: true,
810
+ };
811
+ const response = await fetch('https://api.moonshot.cn/v1/chat/completions', {
812
+ method: 'POST',
813
+ headers: {
814
+ 'Content-Type': 'application/json',
815
+ Authorization: `Bearer ${apiKey}`,
816
+ },
817
+ body: JSON.stringify(moonshotBody),
818
+ });
819
+ return response;
820
+ }
821
+ /**
822
+ * Convert OpenAI messages to Gemini format
823
+ */
824
+ function convertMessagesToGemini(messages) {
825
+ const geminiContents = [];
826
+ for (const msg of messages) {
827
+ // Skip system messages (handled separately via systemInstruction)
828
+ if (msg.role === 'system')
829
+ continue;
830
+ const role = msg.role === 'assistant' ? 'model' : 'user';
831
+ if (typeof msg.content === 'string') {
832
+ geminiContents.push({
833
+ role,
834
+ parts: [{ text: msg.content }],
835
+ });
836
+ }
837
+ else if (Array.isArray(msg.content)) {
838
+ // Handle multimodal content
839
+ const parts = msg.content.map((part) => {
840
+ const p = part;
841
+ if (p.type === 'text') {
842
+ return { text: p.text };
843
+ }
844
+ if (p.type === 'image_url' && p.image_url?.url) {
845
+ // Handle base64 images
846
+ const url = p.image_url.url;
847
+ if (url.startsWith('data:')) {
848
+ const match = url.match(/^data:([^;]+);base64,(.+)$/);
849
+ if (match) {
850
+ return {
851
+ inline_data: {
852
+ mime_type: match[1],
853
+ data: match[2],
854
+ },
855
+ };
856
+ }
857
+ }
858
+ // URL-based images not directly supported, return as text
859
+ return { text: `[Image: ${url}]` };
860
+ }
861
+ return { text: '' };
862
+ });
863
+ geminiContents.push({ role, parts });
864
+ }
865
+ }
866
+ return geminiContents;
867
+ }
868
+ /**
869
+ * Forward non-streaming request to Gemini API
870
+ */
871
+ async function forwardToGemini(request, targetModel, apiKey) {
872
+ const systemMessage = request.messages.find((m) => m.role === 'system');
873
+ const geminiContents = convertMessagesToGemini(request.messages);
874
+ const geminiBody = {
875
+ contents: geminiContents,
876
+ generationConfig: {
877
+ maxOutputTokens: request.max_tokens ?? 4096,
878
+ },
879
+ };
880
+ if (request.temperature !== undefined) {
881
+ geminiBody['generationConfig']['temperature'] = request.temperature;
882
+ }
883
+ if (systemMessage && typeof systemMessage.content === 'string') {
884
+ geminiBody['systemInstruction'] = {
885
+ parts: [{ text: systemMessage.content }],
886
+ };
887
+ }
888
+ const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:generateContent?key=${apiKey}`, {
889
+ method: 'POST',
890
+ headers: {
891
+ 'Content-Type': 'application/json',
892
+ },
893
+ body: JSON.stringify(geminiBody),
894
+ });
895
+ return response;
896
+ }
897
+ /**
898
+ * Forward streaming request to Gemini API
899
+ */
900
+ async function forwardToGeminiStream(request, targetModel, apiKey) {
901
+ const systemMessage = request.messages.find((m) => m.role === 'system');
902
+ const geminiContents = convertMessagesToGemini(request.messages);
903
+ const geminiBody = {
904
+ contents: geminiContents,
905
+ generationConfig: {
906
+ maxOutputTokens: request.max_tokens ?? 4096,
907
+ },
908
+ };
909
+ if (request.temperature !== undefined) {
910
+ geminiBody['generationConfig']['temperature'] = request.temperature;
911
+ }
912
+ if (systemMessage && typeof systemMessage.content === 'string') {
913
+ geminiBody['systemInstruction'] = {
914
+ parts: [{ text: systemMessage.content }],
915
+ };
916
+ }
917
+ const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${targetModel}:streamGenerateContent?alt=sse&key=${apiKey}`, {
918
+ method: 'POST',
919
+ headers: {
920
+ 'Content-Type': 'application/json',
921
+ },
922
+ body: JSON.stringify(geminiBody),
923
+ });
924
+ return response;
925
+ }
926
+ /**
927
+ * Convert Gemini response to OpenAI format
928
+ */
929
+ function convertGeminiResponse(geminiData, model) {
930
+ const candidate = geminiData.candidates?.[0];
931
+ const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
932
+ let finishReason = 'stop';
933
+ if (candidate?.finishReason === 'MAX_TOKENS') {
934
+ finishReason = 'length';
935
+ }
936
+ else if (candidate?.finishReason === 'SAFETY') {
937
+ finishReason = 'content_filter';
938
+ }
939
+ return {
940
+ id: `chatcmpl-${Date.now()}`,
941
+ object: 'chat.completion',
942
+ created: Math.floor(Date.now() / 1000),
943
+ model,
944
+ choices: [
945
+ {
946
+ index: 0,
947
+ message: {
948
+ role: 'assistant',
949
+ content: text,
950
+ },
951
+ finish_reason: finishReason,
952
+ },
953
+ ],
954
+ usage: {
955
+ prompt_tokens: geminiData.usageMetadata?.promptTokenCount ?? 0,
956
+ completion_tokens: geminiData.usageMetadata?.candidatesTokenCount ?? 0,
957
+ total_tokens: (geminiData.usageMetadata?.promptTokenCount ?? 0) +
958
+ (geminiData.usageMetadata?.candidatesTokenCount ?? 0),
959
+ },
960
+ };
961
+ }
962
+ /**
963
+ * Convert Gemini streaming event to OpenAI format
964
+ */
965
+ function convertGeminiStreamEvent(eventData, messageId, model, isFirst) {
966
+ const candidate = eventData.candidates?.[0];
967
+ const text = candidate?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
968
+ const choice = {
969
+ index: 0,
970
+ delta: {},
971
+ finish_reason: null,
972
+ };
973
+ if (isFirst) {
974
+ choice['delta'] = { role: 'assistant', content: text };
975
+ }
976
+ else if (text) {
977
+ choice['delta'] = { content: text };
978
+ }
979
+ // Check for finish
980
+ if (candidate?.finishReason) {
981
+ let finishReason = 'stop';
982
+ if (candidate.finishReason === 'MAX_TOKENS') {
983
+ finishReason = 'length';
984
+ }
985
+ else if (candidate.finishReason === 'SAFETY') {
986
+ finishReason = 'content_filter';
987
+ }
988
+ choice['finish_reason'] = finishReason;
989
+ }
990
+ const chunk = {
991
+ id: messageId,
992
+ object: 'chat.completion.chunk',
993
+ created: Math.floor(Date.now() / 1000),
994
+ model,
995
+ choices: [choice],
996
+ };
997
+ return `data: ${JSON.stringify(chunk)}\n\n`;
998
+ }
999
+ /**
1000
+ * Parse Gemini SSE stream and convert to OpenAI format
1001
+ */
1002
+ async function* convertGeminiStream(response, model) {
1003
+ const reader = response.body?.getReader();
1004
+ if (!reader) {
1005
+ throw new Error('No response body');
1006
+ }
1007
+ const decoder = new TextDecoder();
1008
+ let buffer = '';
1009
+ const messageId = `chatcmpl-${Date.now()}`;
1010
+ let isFirst = true;
1011
+ try {
1012
+ while (true) {
1013
+ const { done, value } = await reader.read();
1014
+ if (done)
1015
+ break;
1016
+ buffer += decoder.decode(value, { stream: true });
1017
+ // Process complete SSE events (Gemini uses "data: " prefix)
1018
+ const lines = buffer.split('\n');
1019
+ buffer = lines.pop() || '';
1020
+ for (const line of lines) {
1021
+ if (line.startsWith('data: ')) {
1022
+ const jsonStr = line.slice(6);
1023
+ if (jsonStr.trim() === '[DONE]') {
1024
+ yield 'data: [DONE]\n\n';
1025
+ continue;
1026
+ }
1027
+ try {
1028
+ const parsed = JSON.parse(jsonStr);
1029
+ const converted = convertGeminiStreamEvent(parsed, messageId, model, isFirst);
1030
+ if (converted) {
1031
+ yield converted;
1032
+ isFirst = false;
1033
+ }
1034
+ }
1035
+ catch {
1036
+ // Skip malformed JSON
1037
+ }
1038
+ }
1039
+ }
1040
+ }
1041
+ // Send [DONE] at the end
1042
+ yield 'data: [DONE]\n\n';
1043
+ }
1044
+ finally {
1045
+ reader.releaseLock();
1046
+ }
1047
+ }
1048
+ /**
1049
+ * Convert Anthropic response to OpenAI format
1050
+ * Handles both text and tool_use content blocks
1051
+ */
1052
+ function convertAnthropicResponse(anthropicData) {
1053
+ const textBlocks = anthropicData.content?.filter((c) => c.type === 'text') ?? [];
1054
+ const toolBlocks = anthropicData.content?.filter((c) => c.type === 'tool_use') ?? [];
1055
+ const textContent = textBlocks.map((c) => c.text ?? '').join('');
1056
+ // Build message object
1057
+ const message = {
1058
+ role: 'assistant',
1059
+ content: textContent || null,
1060
+ };
1061
+ // Convert tool_use blocks to OpenAI tool_calls format
1062
+ if (toolBlocks.length > 0) {
1063
+ message['tool_calls'] = toolBlocks.map((block) => ({
1064
+ id: block.id || `call_${Date.now()}`,
1065
+ type: 'function',
1066
+ function: {
1067
+ name: block.name,
1068
+ arguments: typeof block.input === 'string' ? block.input : JSON.stringify(block.input ?? {}),
1069
+ },
1070
+ }));
1071
+ }
1072
+ // Determine finish_reason
1073
+ let finishReason = 'stop';
1074
+ if (anthropicData.stop_reason === 'tool_use') {
1075
+ finishReason = 'tool_calls';
1076
+ }
1077
+ else if (anthropicData.stop_reason === 'end_turn') {
1078
+ finishReason = 'stop';
1079
+ }
1080
+ else if (anthropicData.stop_reason) {
1081
+ finishReason = anthropicData.stop_reason;
1082
+ }
1083
+ return {
1084
+ id: anthropicData.id || `chatcmpl-${Date.now()}`,
1085
+ object: 'chat.completion',
1086
+ created: Math.floor(Date.now() / 1000),
1087
+ model: anthropicData.model,
1088
+ choices: [
1089
+ {
1090
+ index: 0,
1091
+ message,
1092
+ finish_reason: finishReason,
1093
+ },
1094
+ ],
1095
+ usage: {
1096
+ prompt_tokens: anthropicData.usage?.input_tokens ?? 0,
1097
+ completion_tokens: anthropicData.usage?.output_tokens ?? 0,
1098
+ total_tokens: (anthropicData.usage?.input_tokens ?? 0) + (anthropicData.usage?.output_tokens ?? 0),
1099
+ },
1100
+ };
1101
+ }
1102
+ /**
1103
+ * Convert Anthropic streaming event to OpenAI streaming chunk format
1104
+ * Handles both text content and tool_use streaming
1105
+ */
1106
+ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, toolState) {
1107
+ const choice = { index: 0, delta: {}, finish_reason: null };
1108
+ const baseChunk = {
1109
+ id: messageId,
1110
+ object: 'chat.completion.chunk',
1111
+ created: Math.floor(Date.now() / 1000),
1112
+ model: model,
1113
+ choices: [choice],
1114
+ };
1115
+ switch (eventType) {
1116
+ case 'message_start': {
1117
+ // First chunk: include role
1118
+ const msg = eventData['message'];
1119
+ baseChunk.id = msg?.['id'] || messageId;
1120
+ choice.delta = { role: 'assistant', content: '' };
1121
+ return `data: ${JSON.stringify(baseChunk)}\n\n`;
1122
+ }
1123
+ case 'content_block_start': {
1124
+ // New content block starting - could be text or tool_use
1125
+ const contentBlock = eventData['content_block'];
1126
+ const blockIndex = eventData['index'];
1127
+ if (contentBlock?.['type'] === 'tool_use') {
1128
+ // Tool use starting - send first chunk with tool info
1129
+ const toolId = contentBlock['id'];
1130
+ const toolName = contentBlock['name'];
1131
+ toolState.tools.set(blockIndex ?? toolState.currentToolIndex, {
1132
+ id: toolId,
1133
+ name: toolName,
1134
+ arguments: '',
1135
+ });
1136
+ toolState.currentToolIndex = blockIndex ?? toolState.currentToolIndex;
1137
+ choice.delta = {
1138
+ tool_calls: [{
1139
+ index: blockIndex ?? 0,
1140
+ id: toolId,
1141
+ type: 'function',
1142
+ function: { name: toolName, arguments: '' },
1143
+ }],
1144
+ };
1145
+ return `data: ${JSON.stringify(baseChunk)}\n\n`;
1146
+ }
1147
+ return null;
1148
+ }
1149
+ case 'content_block_delta': {
1150
+ // Content chunk - text or tool arguments
1151
+ const delta = eventData['delta'];
1152
+ const blockIndex = eventData['index'];
1153
+ if (delta?.['type'] === 'text_delta') {
1154
+ choice.delta = { content: delta['text'] };
1155
+ return `data: ${JSON.stringify(baseChunk)}\n\n`;
1156
+ }
1157
+ if (delta?.['type'] === 'input_json_delta') {
1158
+ // Tool arguments streaming
1159
+ const partialJson = delta['partial_json'] || '';
1160
+ const tool = toolState.tools.get(blockIndex ?? toolState.currentToolIndex);
1161
+ if (tool) {
1162
+ tool.arguments += partialJson;
1163
+ }
1164
+ choice.delta = {
1165
+ tool_calls: [{
1166
+ index: blockIndex ?? 0,
1167
+ function: { arguments: partialJson },
1168
+ }],
1169
+ };
1170
+ return `data: ${JSON.stringify(baseChunk)}\n\n`;
1171
+ }
1172
+ return null;
1173
+ }
1174
+ case 'message_delta': {
1175
+ // Final chunk with stop reason
1176
+ const delta = eventData['delta'];
1177
+ const stopReason = delta?.['stop_reason'];
1178
+ if (stopReason === 'tool_use') {
1179
+ choice.finish_reason = 'tool_calls';
1180
+ }
1181
+ else if (stopReason === 'end_turn') {
1182
+ choice.finish_reason = 'stop';
1183
+ }
1184
+ else {
1185
+ choice.finish_reason = stopReason || 'stop';
1186
+ }
1187
+ choice.delta = {};
1188
+ return `data: ${JSON.stringify(baseChunk)}\n\n`;
1189
+ }
1190
+ case 'message_stop': {
1191
+ // Stream complete
1192
+ return 'data: [DONE]\n\n';
1193
+ }
1194
+ default:
1195
+ return null;
1196
+ }
1197
+ }
1198
+ /**
1199
+ * Parse SSE stream from Anthropic and convert to OpenAI format
1200
+ */
1201
+ async function* convertAnthropicStream(response, model) {
1202
+ const reader = response.body?.getReader();
1203
+ if (!reader) {
1204
+ throw new Error('No response body');
1205
+ }
1206
+ const decoder = new TextDecoder();
1207
+ let buffer = '';
1208
+ let messageId = `chatcmpl-${Date.now()}`;
1209
+ // Tool state for tracking streaming tool calls
1210
+ const toolState = {
1211
+ currentToolIndex: 0,
1212
+ tools: new Map(),
1213
+ };
1214
+ try {
1215
+ while (true) {
1216
+ const { done, value } = await reader.read();
1217
+ if (done)
1218
+ break;
1219
+ buffer += decoder.decode(value, { stream: true });
1220
+ // Process complete SSE events
1221
+ const lines = buffer.split('\n');
1222
+ buffer = lines.pop() || ''; // Keep incomplete line in buffer
1223
+ let eventType = '';
1224
+ let eventData = '';
1225
+ for (const line of lines) {
1226
+ if (line.startsWith('event: ')) {
1227
+ eventType = line.slice(7).trim();
1228
+ }
1229
+ else if (line.startsWith('data: ')) {
1230
+ eventData = line.slice(6);
1231
+ }
1232
+ else if (line === '' && eventType && eventData) {
1233
+ // Complete event, process it
1234
+ try {
1235
+ const parsed = JSON.parse(eventData);
1236
+ const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
1237
+ if (converted) {
1238
+ yield converted;
1239
+ }
1240
+ }
1241
+ catch {
1242
+ // Skip malformed JSON
1243
+ }
1244
+ eventType = '';
1245
+ eventData = '';
1246
+ }
1247
+ }
1248
+ }
1249
+ }
1250
+ finally {
1251
+ reader.releaseLock();
1252
+ }
1253
+ }
1254
+ /**
1255
+ * Pipe OpenAI streaming response directly (already in correct format)
1256
+ */
1257
+ async function* pipeOpenAIStream(response) {
1258
+ const reader = response.body?.getReader();
1259
+ if (!reader) {
1260
+ throw new Error('No response body');
1261
+ }
1262
+ const decoder = new TextDecoder();
1263
+ try {
1264
+ while (true) {
1265
+ const { done, value } = await reader.read();
1266
+ if (done)
1267
+ break;
1268
+ yield decoder.decode(value, { stream: true });
1269
+ }
1270
+ }
1271
+ finally {
1272
+ reader.releaseLock();
1273
+ }
1274
+ }
1275
+ /**
1276
+ * Parse preferred model string (format: "provider:model")
1277
+ */
1278
+ function parsePreferredModel(preferredModel) {
1279
+ const [provider, model] = preferredModel.split(':');
1280
+ if (!provider || !model)
1281
+ return null;
1282
+ // Validate provider
1283
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1284
+ if (!validProviders.includes(provider))
1285
+ return null;
1286
+ return { provider: provider, model };
1287
+ }
1288
+ /**
1289
+ * Resolve explicit model name to provider and model
1290
+ * Handles direct model names like "claude-3-5-sonnet-latest" or "gpt-4o"
1291
+ */
1292
+ function resolveExplicitModel(modelName) {
1293
+ // Resolve aliases first (e.g., relayplane:auto → rp:balanced)
1294
+ const resolvedAlias = resolveModelAlias(modelName);
1295
+ // Check SMART_ALIASES (rp:best, rp:fast, etc.)
1296
+ if (exports.SMART_ALIASES[resolvedAlias]) {
1297
+ return exports.SMART_ALIASES[resolvedAlias];
1298
+ }
1299
+ // Check MODEL_MAPPING (aliases)
1300
+ if (exports.MODEL_MAPPING[resolvedAlias]) {
1301
+ return exports.MODEL_MAPPING[resolvedAlias];
1302
+ }
1303
+ // If alias was resolved but not in mappings, try original name
1304
+ if (resolvedAlias !== modelName && exports.MODEL_MAPPING[modelName]) {
1305
+ return exports.MODEL_MAPPING[modelName];
1306
+ }
1307
+ // Anthropic models (claude-*)
1308
+ if (modelName.startsWith('claude-')) {
1309
+ return { provider: 'anthropic', model: modelName };
1310
+ }
1311
+ // OpenAI models (gpt-*, o1-*, chatgpt-*, text-*, dall-e-*, whisper-*, tts-*)
1312
+ if (modelName.startsWith('gpt-') ||
1313
+ modelName.startsWith('o1-') ||
1314
+ modelName.startsWith('o3-') ||
1315
+ modelName.startsWith('chatgpt-') ||
1316
+ modelName.startsWith('text-') ||
1317
+ modelName.startsWith('dall-e') ||
1318
+ modelName.startsWith('whisper') ||
1319
+ modelName.startsWith('tts-')) {
1320
+ return { provider: 'openai', model: modelName };
1321
+ }
1322
+ // Google models (gemini-*, palm-*)
1323
+ if (modelName.startsWith('gemini-') || modelName.startsWith('palm-')) {
1324
+ return { provider: 'google', model: modelName };
1325
+ }
1326
+ // xAI models (grok-*)
1327
+ if (modelName.startsWith('grok-')) {
1328
+ return { provider: 'xai', model: modelName };
1329
+ }
1330
+ // Moonshot models (moonshot-*)
1331
+ if (modelName.startsWith('moonshot-')) {
1332
+ return { provider: 'moonshot', model: modelName };
1333
+ }
1334
+ // Provider-prefixed format: "anthropic/claude-3-5-sonnet-latest"
1335
+ if (modelName.includes('/')) {
1336
+ const [provider, model] = modelName.split('/');
1337
+ const validProviders = ['openai', 'anthropic', 'google', 'xai', 'moonshot', 'local'];
1338
+ if (provider && model && validProviders.includes(provider)) {
1339
+ return { provider: provider, model };
1340
+ }
1341
+ }
1342
+ return null;
1343
+ }
1344
+ function resolveConfigModel(modelName) {
1345
+ return resolveExplicitModel(modelName) ?? parsePreferredModel(modelName);
1346
+ }
1347
+ function extractResponseText(responseData) {
1348
+ const openAiChoices = responseData['choices'];
1349
+ if (openAiChoices && openAiChoices.length > 0) {
1350
+ const first = openAiChoices[0];
1351
+ const content = first?.message?.content;
1352
+ return typeof content === 'string' ? content : '';
1353
+ }
1354
+ const anthropicContent = responseData['content'];
1355
+ if (anthropicContent) {
1356
+ return anthropicContent
1357
+ .filter((c) => c.type === 'text')
1358
+ .map((c) => c.text ?? '')
1359
+ .join('');
1360
+ }
1361
+ const geminiCandidates = responseData['candidates'];
1362
+ if (geminiCandidates) {
1363
+ const text = geminiCandidates[0]?.content?.parts?.map((p) => p.text ?? '').join('') ?? '';
1364
+ return text;
1365
+ }
1366
+ return '';
1367
+ }
1368
+ class ProviderResponseError extends Error {
1369
+ status;
1370
+ payload;
1371
+ constructor(status, payload) {
1372
+ super(`Provider response error: ${status}`);
1373
+ this.status = status;
1374
+ this.payload = payload;
1375
+ }
1376
+ }
1377
+ class CooldownError extends Error {
1378
+ provider;
1379
+ constructor(provider) {
1380
+ super(`Provider ${provider} is in cooldown`);
1381
+ this.provider = provider;
1382
+ }
1383
+ }
1384
+ /**
1385
+ * Extract request context (auth headers) from incoming HTTP request
1386
+ */
1387
+ function extractRequestContext(req) {
1388
+ return {
1389
+ authHeader: req.headers['authorization'],
1390
+ betaHeaders: req.headers['anthropic-beta'],
1391
+ versionHeader: req.headers['anthropic-version'],
1392
+ apiKeyHeader: req.headers['x-api-key'],
1393
+ };
1394
+ }
1395
+ const MAX_BODY_SIZE = 10 * 1024 * 1024; // 10MB max request body
1396
+ async function readRequestBody(req) {
1397
+ let body = '';
1398
+ let size = 0;
1399
+ for await (const chunk of req) {
1400
+ size += chunk.length;
1401
+ if (size > MAX_BODY_SIZE) {
1402
+ throw new Error('Request body too large (max 10MB)');
1403
+ }
1404
+ body += chunk;
1405
+ }
1406
+ return body;
1407
+ }
1408
+ async function readJsonBody(req) {
1409
+ const body = await readRequestBody(req);
1410
+ return JSON.parse(body);
1411
+ }
1412
+ /**
1413
+ * Check if we have valid Anthropic auth (either passthrough or env)
1414
+ */
1415
+ function hasAnthropicAuth(ctx, envApiKey) {
1416
+ return !!(ctx.authHeader || ctx.apiKeyHeader || envApiKey);
1417
+ }
1418
+ function resolveProviderApiKey(provider, ctx, envApiKey) {
1419
+ if (provider === 'anthropic') {
1420
+ if (!hasAnthropicAuth(ctx, envApiKey)) {
1421
+ return {
1422
+ error: {
1423
+ status: 401,
1424
+ payload: {
1425
+ error: 'Missing Anthropic authentication. Provide Authorization header or set ANTHROPIC_API_KEY.',
1426
+ hint: 'For Claude Code: auth is passed through automatically. For API: set ANTHROPIC_API_KEY env var.',
1427
+ },
1428
+ },
1429
+ };
1430
+ }
1431
+ return { apiKey: envApiKey };
1432
+ }
1433
+ const apiKeyEnv = exports.DEFAULT_ENDPOINTS[provider]?.apiKeyEnv ?? `${provider.toUpperCase()}_API_KEY`;
1434
+ const apiKey = process.env[apiKeyEnv];
1435
+ if (!apiKey) {
1436
+ return {
1437
+ error: {
1438
+ status: 500,
1439
+ payload: {
1440
+ error: `Missing ${apiKeyEnv} environment variable`,
1441
+ hint: `Cross-provider routing requires API keys for each provider. Set ${apiKeyEnv} to enable ${provider} models.`,
1442
+ },
1443
+ },
1444
+ };
1445
+ }
1446
+ return { apiKey };
1447
+ }
1448
+ function getCascadeModels(config) {
1449
+ return config.routing?.cascade?.models ?? [];
1450
+ }
1451
+ function getCascadeConfig(config) {
1452
+ const c = config.routing?.cascade;
1453
+ return {
1454
+ enabled: c?.enabled ?? true,
1455
+ models: c?.models ?? ['claude-3-5-haiku-20241022', 'claude-sonnet-4-20250514', 'claude-opus-4-5-20250514'],
1456
+ escalateOn: c?.escalateOn ?? 'uncertainty',
1457
+ maxEscalations: c?.maxEscalations ?? 1,
1458
+ };
1459
+ }
1460
+ function getCooldownConfig(config) {
1461
+ const defaults = {
1462
+ enabled: true,
1463
+ allowedFails: 3,
1464
+ windowSeconds: 60,
1465
+ cooldownSeconds: 120,
1466
+ };
1467
+ return { ...defaults, ...config.reliability?.cooldowns };
1468
+ }
1469
+ function getCostModel(config) {
1470
+ return (config.routing?.complexity?.simple ||
1471
+ config.routing?.cascade?.models?.[0] ||
1472
+ 'claude-3-5-haiku-20241022');
1473
+ }
1474
+ function getFastModel(config) {
1475
+ return (config.routing?.complexity?.simple ||
1476
+ config.routing?.cascade?.models?.[0] ||
1477
+ 'claude-3-5-haiku-20241022');
1478
+ }
1479
+ function getQualityModel(config) {
1480
+ return (config.routing?.complexity?.complex ||
1481
+ config.routing?.cascade?.models?.[config.routing?.cascade?.models?.length ? config.routing.cascade.models.length - 1 : 0] ||
1482
+ process.env['RELAYPLANE_QUALITY_MODEL'] ||
1483
+ 'claude-sonnet-4-20250514');
1484
+ }
1485
+ async function cascadeRequest(config, makeRequest, log) {
1486
+ let escalations = 0;
1487
+ for (let i = 0; i < config.models.length; i++) {
1488
+ const model = config.models[i]; // Safe: i is always < length
1489
+ const isLastModel = i === config.models.length - 1;
1490
+ try {
1491
+ const { responseData, provider, model: resolvedModel } = await makeRequest(model);
1492
+ const text = extractResponseText(responseData);
1493
+ if (isLastModel || escalations >= config.maxEscalations) {
1494
+ return { responseData, provider, model: resolvedModel, escalations };
1495
+ }
1496
+ if (shouldEscalate(text, config.escalateOn)) {
1497
+ log(`[RelayPlane] Escalating from ${model} due to ${config.escalateOn}`);
1498
+ escalations++;
1499
+ continue;
1500
+ }
1501
+ return { responseData, provider, model: resolvedModel, escalations };
1502
+ }
1503
+ catch (err) {
1504
+ if (err instanceof CooldownError) {
1505
+ log(`[RelayPlane] Skipping ${model} due to cooldown`);
1506
+ continue;
1507
+ }
1508
+ if (config.escalateOn === 'error' && !isLastModel) {
1509
+ log(`[RelayPlane] Escalating from ${model} due to error`);
1510
+ escalations++;
1511
+ continue;
1512
+ }
1513
+ throw err;
1514
+ }
1515
+ }
1516
+ throw new Error('All cascade models exhausted');
1517
+ }
1518
+ /**
1519
+ * Start the RelayPlane proxy server
1520
+ */
1521
+ async function startProxy(config = {}) {
1522
+ const port = config.port ?? 3001;
1523
+ const host = config.host ?? '127.0.0.1';
1524
+ const verbose = config.verbose ?? false;
1525
+ const anthropicAuthMode = config.anthropicAuth ?? 'auto';
1526
+ const log = (msg) => {
1527
+ if (verbose)
1528
+ console.log(`[relayplane] ${msg}`);
1529
+ };
1530
+ const configPath = getProxyConfigPath();
1531
+ let proxyConfig = await loadProxyConfig(configPath, log);
1532
+ const cooldownManager = new CooldownManager(getCooldownConfig(proxyConfig));
1533
+ let configWatcher = null;
1534
+ let configReloadTimer = null;
1535
+ const reloadConfig = async () => {
1536
+ proxyConfig = await loadProxyConfig(configPath, log);
1537
+ cooldownManager.updateConfig(getCooldownConfig(proxyConfig));
1538
+ log(`Reloaded config from ${configPath}`);
1539
+ };
1540
+ const scheduleConfigReload = () => {
1541
+ if (configReloadTimer)
1542
+ clearTimeout(configReloadTimer);
1543
+ configReloadTimer = setTimeout(() => {
1544
+ reloadConfig().catch(() => { });
1545
+ }, 50);
1546
+ };
1547
+ const startConfigWatcher = () => {
1548
+ if (configWatcher)
1549
+ return;
1550
+ try {
1551
+ configWatcher = fs.watch(configPath, scheduleConfigReload);
1552
+ }
1553
+ catch (err) {
1554
+ const error = err;
1555
+ log(`Config watch error: ${error.message}`);
1556
+ }
1557
+ };
1558
+ startConfigWatcher();
1559
+ // Initialize RelayPlane
1560
+ const relay = new core_1.RelayPlane({ dbPath: config.dbPath });
1561
+ const server = http.createServer(async (req, res) => {
1562
+ // CORS headers
1563
+ res.setHeader('Access-Control-Allow-Origin', '*');
1564
+ res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
1565
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, x-api-key, anthropic-beta, anthropic-version, X-RelayPlane-Bypass, X-RelayPlane-Model');
1566
+ if (req.method === 'OPTIONS') {
1567
+ res.writeHead(204);
1568
+ res.end();
1569
+ return;
1570
+ }
1571
+ const url = req.url ?? '';
1572
+ const pathname = url.split('?')[0] ?? '';
1573
+ // === Control endpoints ===
1574
+ if (pathname.startsWith('/control/')) {
1575
+ if (req.method === 'POST' && pathname === '/control/enable') {
1576
+ proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: true });
1577
+ await saveProxyConfig(configPath, proxyConfig);
1578
+ startConfigWatcher();
1579
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1580
+ res.end(JSON.stringify({ enabled: true }));
1581
+ return;
1582
+ }
1583
+ if (req.method === 'POST' && pathname === '/control/disable') {
1584
+ proxyConfig = normalizeProxyConfig({ ...proxyConfig, enabled: false });
1585
+ await saveProxyConfig(configPath, proxyConfig);
1586
+ startConfigWatcher();
1587
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1588
+ res.end(JSON.stringify({ enabled: false }));
1589
+ return;
1590
+ }
1591
+ if (req.method === 'GET' && pathname === '/control/status') {
1592
+ const enabled = proxyConfig.enabled !== false;
1593
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1594
+ res.end(JSON.stringify({
1595
+ enabled,
1596
+ mode: proxyConfig.mode ?? (enabled ? 'enabled' : 'disabled'),
1597
+ modelOverrides: proxyConfig.modelOverrides ?? {},
1598
+ }));
1599
+ return;
1600
+ }
1601
+ if (req.method === 'GET' && pathname === '/control/stats') {
1602
+ const uptimeMs = Date.now() - globalStats.startedAt;
1603
+ const avgLatencyMs = globalStats.totalRequests > 0
1604
+ ? Math.round(globalStats.totalLatencyMs / globalStats.totalRequests)
1605
+ : 0;
1606
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1607
+ res.end(JSON.stringify({
1608
+ uptimeMs,
1609
+ uptimeFormatted: `${Math.floor(uptimeMs / 60000)}m ${Math.floor((uptimeMs % 60000) / 1000)}s`,
1610
+ totalRequests: globalStats.totalRequests,
1611
+ successfulRequests: globalStats.successfulRequests,
1612
+ failedRequests: globalStats.failedRequests,
1613
+ successRate: globalStats.totalRequests > 0
1614
+ ? `${((globalStats.successfulRequests / globalStats.totalRequests) * 100).toFixed(1)}%`
1615
+ : 'N/A',
1616
+ avgLatencyMs,
1617
+ escalations: globalStats.escalations,
1618
+ routingCounts: globalStats.routingCounts,
1619
+ modelCounts: globalStats.modelCounts,
1620
+ }));
1621
+ return;
1622
+ }
1623
+ if (req.method === 'POST' && pathname === '/control/config') {
1624
+ try {
1625
+ const patch = await readJsonBody(req);
1626
+ proxyConfig = mergeProxyConfig(proxyConfig, patch);
1627
+ await saveProxyConfig(configPath, proxyConfig);
1628
+ startConfigWatcher();
1629
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1630
+ res.end(JSON.stringify({ ok: true, config: proxyConfig }));
1631
+ }
1632
+ catch {
1633
+ res.writeHead(400, { 'Content-Type': 'application/json' });
1634
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
1635
+ }
1636
+ return;
1637
+ }
1638
+ }
1639
+ // Extract auth context from incoming request
1640
+ const ctx = extractRequestContext(req);
1641
+ const anthropicEnvKey = process.env['ANTHROPIC_API_KEY'];
1642
+ const relayplaneBypass = parseHeaderBoolean(getHeaderValue(req, 'x-relayplane-bypass'));
1643
+ const headerModelOverride = getHeaderValue(req, 'x-relayplane-model');
1644
+ const relayplaneEnabled = proxyConfig.enabled !== false;
1645
+ const recordTelemetry = relayplaneEnabled && !relayplaneBypass;
1646
+ // Determine which Anthropic auth to use based on mode
1647
+ let useAnthropicEnvKey;
1648
+ if (anthropicAuthMode === 'env') {
1649
+ useAnthropicEnvKey = anthropicEnvKey;
1650
+ }
1651
+ else if (anthropicAuthMode === 'passthrough') {
1652
+ useAnthropicEnvKey = undefined; // Only use incoming auth
1653
+ }
1654
+ else {
1655
+ // 'auto': Use incoming auth if present, fallback to env
1656
+ useAnthropicEnvKey = (ctx.authHeader || ctx.apiKeyHeader) ? undefined : anthropicEnvKey;
1657
+ }
1658
+ // === Native Anthropic /v1/messages endpoint (for Claude Code) ===
1659
+ if (req.method === 'POST' && (url.endsWith('/v1/messages') || url.includes('/v1/messages?'))) {
1660
+ log('Native Anthropic /v1/messages request');
1661
+ // Check auth
1662
+ if (!hasAnthropicAuth(ctx, useAnthropicEnvKey)) {
1663
+ res.writeHead(401, { 'Content-Type': 'application/json' });
1664
+ res.end(JSON.stringify({ error: 'Missing authentication. Provide Authorization header or set ANTHROPIC_API_KEY.' }));
1665
+ return;
1666
+ }
1667
+ // Read body
1668
+ let requestBody;
1669
+ try {
1670
+ requestBody = await readJsonBody(req);
1671
+ }
1672
+ catch {
1673
+ res.writeHead(400, { 'Content-Type': 'application/json' });
1674
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
1675
+ return;
1676
+ }
1677
+ const originalModel = requestBody['model'];
1678
+ let requestedModel = headerModelOverride ?? originalModel ?? '';
1679
+ if (headerModelOverride) {
1680
+ log(`Header model override: ${originalModel ?? 'unknown'} → ${headerModelOverride}`);
1681
+ }
1682
+ const parsedModel = parseModelSuffix(requestedModel);
1683
+ let routingSuffix = parsedModel.suffix;
1684
+ requestedModel = parsedModel.baseModel;
1685
+ if (relayplaneEnabled && !relayplaneBypass && requestedModel) {
1686
+ const override = proxyConfig.modelOverrides?.[requestedModel];
1687
+ if (override) {
1688
+ log(`Model override: ${requestedModel} → ${override}`);
1689
+ const overrideParsed = parseModelSuffix(override);
1690
+ if (!routingSuffix && overrideParsed.suffix) {
1691
+ routingSuffix = overrideParsed.suffix;
1692
+ }
1693
+ requestedModel = overrideParsed.baseModel;
1694
+ }
1695
+ }
1696
+ // Resolve aliases (e.g., relayplane:auto → rp:balanced)
1697
+ const resolvedModel = resolveModelAlias(requestedModel);
1698
+ if (resolvedModel !== requestedModel) {
1699
+ log(`Alias resolution: ${requestedModel} → ${resolvedModel}`);
1700
+ requestedModel = resolvedModel;
1701
+ }
1702
+ if (requestedModel && requestedModel !== originalModel) {
1703
+ requestBody['model'] = requestedModel;
1704
+ }
1705
+ let routingMode = 'auto';
1706
+ if (!relayplaneEnabled || relayplaneBypass) {
1707
+ routingMode = 'passthrough';
1708
+ }
1709
+ else if (routingSuffix) {
1710
+ routingMode = routingSuffix;
1711
+ }
1712
+ else if (requestedModel.startsWith('relayplane:')) {
1713
+ if (requestedModel.includes(':cost')) {
1714
+ routingMode = 'cost';
1715
+ }
1716
+ else if (requestedModel.includes(':fast')) {
1717
+ routingMode = 'fast';
1718
+ }
1719
+ else if (requestedModel.includes(':quality')) {
1720
+ routingMode = 'quality';
1721
+ }
1722
+ // relayplane:auto stays as 'auto'
1723
+ }
1724
+ else if (requestedModel.startsWith('rp:')) {
1725
+ // Handle rp:* smart aliases - route through passthrough to use SMART_ALIASES
1726
+ if (requestedModel === 'rp:cost' || requestedModel === 'rp:cheap') {
1727
+ routingMode = 'cost';
1728
+ }
1729
+ else if (requestedModel === 'rp:fast') {
1730
+ routingMode = 'fast';
1731
+ }
1732
+ else if (requestedModel === 'rp:quality' || requestedModel === 'rp:best') {
1733
+ routingMode = 'quality';
1734
+ }
1735
+ else {
1736
+ // rp:balanced and others go through passthrough to resolve via SMART_ALIASES
1737
+ routingMode = 'passthrough';
1738
+ }
1739
+ }
1740
+ else if (requestedModel === 'auto' || requestedModel === 'relayplane:auto') {
1741
+ routingMode = 'auto';
1742
+ }
1743
+ else if (requestedModel === 'cost') {
1744
+ routingMode = 'cost';
1745
+ }
1746
+ else if (requestedModel === 'fast') {
1747
+ routingMode = 'fast';
1748
+ }
1749
+ else if (requestedModel === 'quality') {
1750
+ routingMode = 'quality';
1751
+ }
1752
+ else {
1753
+ routingMode = 'passthrough';
1754
+ }
1755
+ const isStreaming = requestBody['stream'] === true;
1756
+ const messages = Array.isArray(requestBody['messages'])
1757
+ ? requestBody['messages']
1758
+ : [];
1759
+ let promptText = '';
1760
+ let taskType = 'general';
1761
+ let confidence = 0;
1762
+ let complexity = 'simple';
1763
+ if (routingMode !== 'passthrough' || recordTelemetry) {
1764
+ promptText = extractMessageText(messages);
1765
+ taskType = (0, core_1.inferTaskType)(promptText);
1766
+ confidence = (0, core_1.getInferenceConfidence)(promptText, taskType);
1767
+ complexity = classifyComplexity(messages);
1768
+ log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
1769
+ }
1770
+ const cascadeConfig = getCascadeConfig(proxyConfig);
1771
+ let useCascade = routingMode === 'auto' &&
1772
+ proxyConfig.routing?.mode === 'cascade' &&
1773
+ cascadeConfig.enabled === true;
1774
+ let targetModel = '';
1775
+ let targetProvider = 'anthropic';
1776
+ // Enable cascade for streaming requests (complexity-based routing)
1777
+ if (useCascade && isStreaming) {
1778
+ log('Using complexity-based routing for streaming request');
1779
+ useCascade = false; // Disable full cascade, use complexity routing instead
1780
+ let selectedModel = null;
1781
+ if (proxyConfig.routing?.complexity?.enabled) {
1782
+ selectedModel = proxyConfig.routing?.complexity?.[complexity];
1783
+ }
1784
+ else {
1785
+ selectedModel = getCascadeModels(proxyConfig)[0] || getCostModel(proxyConfig);
1786
+ }
1787
+ if (selectedModel) {
1788
+ const resolved = resolveConfigModel(selectedModel);
1789
+ if (resolved) {
1790
+ targetProvider = resolved.provider;
1791
+ targetModel = resolved.model;
1792
+ }
1793
+ }
1794
+ }
1795
+ if (routingMode === 'passthrough') {
1796
+ const resolved = resolveExplicitModel(requestedModel);
1797
+ if (!resolved) {
1798
+ res.writeHead(400, { 'Content-Type': 'application/json' });
1799
+ res.end(JSON.stringify((0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames())));
1800
+ return;
1801
+ }
1802
+ if (resolved.provider !== 'anthropic') {
1803
+ res.writeHead(400, { 'Content-Type': 'application/json' });
1804
+ res.end(JSON.stringify({ error: 'Native /v1/messages only supports Anthropic models.' }));
1805
+ return;
1806
+ }
1807
+ targetProvider = resolved.provider;
1808
+ targetModel = resolved.model;
1809
+ }
1810
+ else if (!useCascade) {
1811
+ let selectedModel = null;
1812
+ if (routingMode === 'cost') {
1813
+ selectedModel = getCostModel(proxyConfig);
1814
+ }
1815
+ else if (routingMode === 'fast') {
1816
+ selectedModel = getFastModel(proxyConfig);
1817
+ }
1818
+ else if (routingMode === 'quality') {
1819
+ selectedModel = getQualityModel(proxyConfig);
1820
+ }
1821
+ else {
1822
+ const rule = relay.routing.get(taskType);
1823
+ const parsedRule = rule?.preferredModel ? parsePreferredModel(rule.preferredModel) : null;
1824
+ if (parsedRule?.provider === 'anthropic') {
1825
+ selectedModel = parsedRule.model;
1826
+ }
1827
+ else if (proxyConfig.routing?.complexity?.enabled) {
1828
+ const complexityModel = proxyConfig.routing?.complexity?.[complexity];
1829
+ selectedModel = complexityModel ?? null;
1830
+ }
1831
+ else {
1832
+ selectedModel = DEFAULT_ROUTING[taskType].model;
1833
+ }
1834
+ }
1835
+ if (!selectedModel) {
1836
+ res.writeHead(500, { 'Content-Type': 'application/json' });
1837
+ res.end(JSON.stringify({ error: 'Failed to resolve routing model' }));
1838
+ return;
1839
+ }
1840
+ const resolved = resolveConfigModel(selectedModel);
1841
+ if (!resolved || resolved.provider !== 'anthropic') {
1842
+ res.writeHead(500, { 'Content-Type': 'application/json' });
1843
+ res.end(JSON.stringify({ error: 'Resolved model is not supported for /v1/messages' }));
1844
+ return;
1845
+ }
1846
+ targetProvider = resolved.provider;
1847
+ targetModel = resolved.model;
1848
+ }
1849
+ if (proxyConfig.reliability?.cooldowns?.enabled &&
1850
+ !useCascade &&
1851
+ !cooldownManager.isAvailable(targetProvider)) {
1852
+ res.writeHead(503, { 'Content-Type': 'application/json' });
1853
+ res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
1854
+ return;
1855
+ }
1856
+ const startTime = Date.now();
1857
+ try {
1858
+ if (useCascade && cascadeConfig) {
1859
+ const cascadeResult = await cascadeRequest(cascadeConfig, async (modelName) => {
1860
+ const resolved = resolveConfigModel(modelName);
1861
+ if (!resolved) {
1862
+ throw new Error(`Invalid cascade model: ${modelName}`);
1863
+ }
1864
+ if (resolved.provider !== 'anthropic') {
1865
+ throw new Error(`Cascade model ${modelName} is not Anthropic-compatible`);
1866
+ }
1867
+ if (proxyConfig.reliability?.cooldowns?.enabled && !cooldownManager.isAvailable(resolved.provider)) {
1868
+ throw new CooldownError(resolved.provider);
1869
+ }
1870
+ const attemptBody = { ...requestBody, model: resolved.model };
1871
+ // Hybrid auth: use MAX token for Opus models, API key for others
1872
+ const modelAuth = getAuthForModel(resolved.model, proxyConfig.auth, useAnthropicEnvKey);
1873
+ if (modelAuth.isMax) {
1874
+ log(`Using MAX token for ${resolved.model}`);
1875
+ }
1876
+ const providerResponse = await forwardNativeAnthropicRequest(attemptBody, ctx, modelAuth.apiKey, modelAuth.isMax);
1877
+ const responseData = (await providerResponse.json());
1878
+ if (!providerResponse.ok) {
1879
+ if (proxyConfig.reliability?.cooldowns?.enabled) {
1880
+ cooldownManager.recordFailure(resolved.provider, JSON.stringify(responseData));
1881
+ }
1882
+ throw new ProviderResponseError(providerResponse.status, responseData);
1883
+ }
1884
+ if (proxyConfig.reliability?.cooldowns?.enabled) {
1885
+ cooldownManager.recordSuccess(resolved.provider);
1886
+ }
1887
+ return { responseData, provider: resolved.provider, model: resolved.model };
1888
+ }, log);
1889
+ res.writeHead(200, { 'Content-Type': 'application/json' });
1890
+ res.end(JSON.stringify(cascadeResult.responseData));
1891
+ targetProvider = cascadeResult.provider;
1892
+ targetModel = cascadeResult.model;
1893
+ }
1894
+ else {
1895
+ // Hybrid auth: use MAX token for Opus models, API key for others
1896
+ const finalModel = targetModel || requestedModel;
1897
+ const modelAuth = getAuthForModel(finalModel, proxyConfig.auth, useAnthropicEnvKey);
1898
+ if (modelAuth.isMax) {
1899
+ log(`Using MAX token for ${finalModel}`);
1900
+ }
1901
+ const providerResponse = await forwardNativeAnthropicRequest({ ...requestBody, model: finalModel }, ctx, modelAuth.apiKey, modelAuth.isMax);
1902
+ if (!providerResponse.ok) {
1903
+ const errorPayload = (await providerResponse.json());
1904
+ if (proxyConfig.reliability?.cooldowns?.enabled) {
1905
+ cooldownManager.recordFailure(targetProvider, JSON.stringify(errorPayload));
1906
+ }
1907
+ res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
1908
+ res.end(JSON.stringify(errorPayload));
1909
+ return;
1910
+ }
1911
+ if (proxyConfig.reliability?.cooldowns?.enabled) {
1912
+ cooldownManager.recordSuccess(targetProvider);
1913
+ }
1914
+ if (isStreaming) {
1915
+ res.writeHead(providerResponse.status, {
1916
+ 'Content-Type': 'text/event-stream',
1917
+ 'Cache-Control': 'no-cache',
1918
+ 'Connection': 'keep-alive',
1919
+ });
1920
+ const reader = providerResponse.body?.getReader();
1921
+ if (reader) {
1922
+ const decoder = new TextDecoder();
1923
+ try {
1924
+ while (true) {
1925
+ const { done, value } = await reader.read();
1926
+ if (done)
1927
+ break;
1928
+ res.write(decoder.decode(value, { stream: true }));
1929
+ }
1930
+ }
1931
+ finally {
1932
+ reader.releaseLock();
1933
+ }
1934
+ }
1935
+ res.end();
1936
+ }
1937
+ else {
1938
+ const responseData = await providerResponse.json();
1939
+ res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
1940
+ res.end(JSON.stringify(responseData));
1941
+ }
1942
+ }
1943
+ const durationMs = Date.now() - startTime;
1944
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, true, routingMode, useCascade && cascadeConfig ? undefined : false);
1945
+ if (recordTelemetry) {
1946
+ relay
1947
+ .run({
1948
+ prompt: promptText.slice(0, 500),
1949
+ taskType,
1950
+ model: `${targetProvider}:${targetModel || requestedModel}`,
1951
+ })
1952
+ .catch(() => { });
1953
+ }
1954
+ }
1955
+ catch (err) {
1956
+ const durationMs = Date.now() - startTime;
1957
+ logRequest(originalModel ?? 'unknown', targetModel || requestedModel, targetProvider, durationMs, false, routingMode);
1958
+ if (err instanceof ProviderResponseError) {
1959
+ res.writeHead(err.status, { 'Content-Type': 'application/json' });
1960
+ res.end(JSON.stringify(err.payload));
1961
+ return;
1962
+ }
1963
+ const errorMsg = err instanceof Error ? err.message : String(err);
1964
+ res.writeHead(500, { 'Content-Type': 'application/json' });
1965
+ res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
1966
+ }
1967
+ return;
1968
+ }
1969
+ // === Token counting endpoint ===
1970
+ if (req.method === 'POST' && url.includes('/v1/messages/count_tokens')) {
1971
+ log('Token count request');
1972
+ if (!hasAnthropicAuth(ctx, useAnthropicEnvKey)) {
1973
+ res.writeHead(401, { 'Content-Type': 'application/json' });
1974
+ res.end(JSON.stringify({ error: 'Missing authentication' }));
1975
+ return;
1976
+ }
1977
+ let body = '';
1978
+ for await (const chunk of req) {
1979
+ body += chunk;
1980
+ }
1981
+ try {
1982
+ const headers = buildAnthropicHeaders(ctx, useAnthropicEnvKey);
1983
+ const response = await fetch('https://api.anthropic.com/v1/messages/count_tokens', {
1984
+ method: 'POST',
1985
+ headers,
1986
+ body,
1987
+ });
1988
+ const data = await response.json();
1989
+ res.writeHead(response.status, { 'Content-Type': 'application/json' });
1990
+ res.end(JSON.stringify(data));
1991
+ }
1992
+ catch (err) {
1993
+ const errorMsg = err instanceof Error ? err.message : String(err);
1994
+ res.writeHead(500, { 'Content-Type': 'application/json' });
1995
+ res.end(JSON.stringify({ error: errorMsg }));
1996
+ }
1997
+ return;
1998
+ }
1999
+ // === Model list endpoint ===
2000
+ if (req.method === 'GET' && url.includes('/models')) {
2001
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2002
+ res.end(JSON.stringify({
2003
+ object: 'list',
2004
+ data: [
2005
+ { id: 'relayplane:auto', object: 'model', owned_by: 'relayplane' },
2006
+ { id: 'relayplane:cost', object: 'model', owned_by: 'relayplane' },
2007
+ { id: 'relayplane:fast', object: 'model', owned_by: 'relayplane' },
2008
+ { id: 'relayplane:quality', object: 'model', owned_by: 'relayplane' },
2009
+ ],
2010
+ }));
2011
+ return;
2012
+ }
2013
+ // === OpenAI-compatible /v1/chat/completions endpoint ===
2014
+ if (req.method !== 'POST' || !url.includes('/chat/completions')) {
2015
+ res.writeHead(404, { 'Content-Type': 'application/json' });
2016
+ res.end(JSON.stringify({ error: 'Not found. Supported: POST /v1/messages, POST /v1/chat/completions, GET /v1/models' }));
2017
+ return;
2018
+ }
2019
+ // Parse request body
2020
+ let body = '';
2021
+ for await (const chunk of req) {
2022
+ body += chunk;
2023
+ }
2024
+ let request;
2025
+ try {
2026
+ request = JSON.parse(body);
2027
+ }
2028
+ catch {
2029
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2030
+ res.end(JSON.stringify({ error: 'Invalid JSON' }));
2031
+ return;
2032
+ }
2033
+ const isStreaming = request.stream === true;
2034
+ const bypassRouting = !relayplaneEnabled || relayplaneBypass;
2035
+ // Extract routing mode from model name
2036
+ const originalRequestedModel = request.model;
2037
+ let requestedModel = headerModelOverride ?? originalRequestedModel;
2038
+ if (headerModelOverride) {
2039
+ log(`Header model override: ${originalRequestedModel} → ${headerModelOverride}`);
2040
+ }
2041
+ if (!requestedModel) {
2042
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2043
+ res.end(JSON.stringify({ error: 'Missing model in request' }));
2044
+ return;
2045
+ }
2046
+ const parsedModel = parseModelSuffix(requestedModel);
2047
+ let routingSuffix = parsedModel.suffix;
2048
+ requestedModel = parsedModel.baseModel;
2049
+ if (!bypassRouting) {
2050
+ const override = proxyConfig.modelOverrides?.[requestedModel];
2051
+ if (override) {
2052
+ log(`Model override: ${requestedModel} → ${override}`);
2053
+ const overrideParsed = parseModelSuffix(override);
2054
+ if (!routingSuffix && overrideParsed.suffix) {
2055
+ routingSuffix = overrideParsed.suffix;
2056
+ }
2057
+ requestedModel = overrideParsed.baseModel;
2058
+ }
2059
+ }
2060
+ // Resolve aliases (e.g., relayplane:auto → rp:balanced)
2061
+ const resolvedModel = resolveModelAlias(requestedModel);
2062
+ if (resolvedModel !== requestedModel) {
2063
+ log(`Alias resolution: ${requestedModel} → ${resolvedModel}`);
2064
+ requestedModel = resolvedModel;
2065
+ }
2066
+ let routingMode = 'auto';
2067
+ let targetModel = '';
2068
+ let targetProvider = 'anthropic';
2069
+ if (bypassRouting) {
2070
+ routingMode = 'passthrough';
2071
+ }
2072
+ else if (routingSuffix) {
2073
+ routingMode = routingSuffix;
2074
+ }
2075
+ else if (requestedModel.startsWith('relayplane:')) {
2076
+ if (requestedModel.includes(':cost')) {
2077
+ routingMode = 'cost';
2078
+ }
2079
+ else if (requestedModel.includes(':fast')) {
2080
+ routingMode = 'fast';
2081
+ }
2082
+ else if (requestedModel.includes(':quality')) {
2083
+ routingMode = 'quality';
2084
+ }
2085
+ // relayplane:auto stays as 'auto'
2086
+ }
2087
+ else if (requestedModel.startsWith('rp:')) {
2088
+ // Handle rp:* smart aliases - route through passthrough to use SMART_ALIASES
2089
+ if (requestedModel === 'rp:cost' || requestedModel === 'rp:cheap') {
2090
+ routingMode = 'cost';
2091
+ }
2092
+ else if (requestedModel === 'rp:fast') {
2093
+ routingMode = 'fast';
2094
+ }
2095
+ else if (requestedModel === 'rp:quality' || requestedModel === 'rp:best') {
2096
+ routingMode = 'quality';
2097
+ }
2098
+ else {
2099
+ // rp:balanced and others go through passthrough to resolve via SMART_ALIASES
2100
+ routingMode = 'passthrough';
2101
+ }
2102
+ }
2103
+ else if (requestedModel === 'auto' || requestedModel === 'relayplane:auto') {
2104
+ routingMode = 'auto';
2105
+ }
2106
+ else if (requestedModel === 'cost') {
2107
+ routingMode = 'cost';
2108
+ }
2109
+ else if (requestedModel === 'fast') {
2110
+ routingMode = 'fast';
2111
+ }
2112
+ else if (requestedModel === 'quality') {
2113
+ routingMode = 'quality';
2114
+ }
2115
+ else {
2116
+ routingMode = 'passthrough';
2117
+ }
2118
+ log(`Received request for model: ${requestedModel} (mode: ${routingMode}, stream: ${isStreaming})`);
2119
+ let promptText = '';
2120
+ let taskType = 'general';
2121
+ let confidence = 0;
2122
+ let complexity = 'simple';
2123
+ if (routingMode !== 'passthrough' || recordTelemetry) {
2124
+ promptText = extractPromptText(request.messages);
2125
+ taskType = (0, core_1.inferTaskType)(promptText);
2126
+ confidence = (0, core_1.getInferenceConfidence)(promptText, taskType);
2127
+ complexity = classifyComplexity(request.messages);
2128
+ log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2129
+ }
2130
+ const cascadeConfig = getCascadeConfig(proxyConfig);
2131
+ let useCascade = routingMode === 'auto' &&
2132
+ proxyConfig.routing?.mode === 'cascade' &&
2133
+ cascadeConfig.enabled === true;
2134
+ if (useCascade && isStreaming) {
2135
+ log('Cascade disabled for streaming request; using first cascade model');
2136
+ useCascade = false;
2137
+ const fallbackModel = getCascadeModels(proxyConfig)[0] || getCostModel(proxyConfig);
2138
+ const resolvedFallback = resolveConfigModel(fallbackModel);
2139
+ if (resolvedFallback) {
2140
+ targetProvider = resolvedFallback.provider;
2141
+ targetModel = resolvedFallback.model;
2142
+ }
2143
+ }
2144
+ if (routingMode === 'passthrough') {
2145
+ const resolved = resolveExplicitModel(requestedModel);
2146
+ if (resolved) {
2147
+ targetProvider = resolved.provider;
2148
+ targetModel = resolved.model;
2149
+ log(`Pass-through mode: ${requestedModel} → ${targetProvider}/${targetModel}`);
2150
+ }
2151
+ else {
2152
+ res.writeHead(400, { 'Content-Type': 'application/json' });
2153
+ if (bypassRouting) {
2154
+ const modelError = (0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames());
2155
+ res.end(JSON.stringify({
2156
+ error: `RelayPlane disabled or bypassed. Use an explicit model instead of ${requestedModel}.`,
2157
+ suggestions: modelError.suggestions,
2158
+ hint: modelError.hint,
2159
+ }));
2160
+ }
2161
+ else {
2162
+ res.end(JSON.stringify((0, model_suggestions_js_1.buildModelNotFoundError)(requestedModel, getAvailableModelNames())));
2163
+ }
2164
+ return;
2165
+ }
2166
+ }
2167
+ else if (!useCascade) {
2168
+ let selectedModel = null;
2169
+ if (routingMode === 'cost') {
2170
+ selectedModel = getCostModel(proxyConfig);
2171
+ }
2172
+ else if (routingMode === 'fast') {
2173
+ selectedModel = getFastModel(proxyConfig);
2174
+ }
2175
+ else if (routingMode === 'quality') {
2176
+ selectedModel = getQualityModel(proxyConfig);
2177
+ }
2178
+ else {
2179
+ const rule = relay.routing.get(taskType);
2180
+ if (rule && rule.preferredModel) {
2181
+ const parsedRule = parsePreferredModel(rule.preferredModel);
2182
+ if (parsedRule) {
2183
+ targetProvider = parsedRule.provider;
2184
+ targetModel = parsedRule.model;
2185
+ log(`Using learned rule: ${rule.preferredModel}`);
2186
+ }
2187
+ }
2188
+ if (!targetModel) {
2189
+ if (proxyConfig.routing?.complexity?.enabled) {
2190
+ const complexityModel = proxyConfig.routing?.complexity?.[complexity];
2191
+ selectedModel = complexityModel ?? null;
2192
+ }
2193
+ else {
2194
+ selectedModel = DEFAULT_ROUTING[taskType].model;
2195
+ }
2196
+ }
2197
+ }
2198
+ if (selectedModel) {
2199
+ const resolved = resolveConfigModel(selectedModel);
2200
+ if (resolved) {
2201
+ targetProvider = resolved.provider;
2202
+ targetModel = resolved.model;
2203
+ }
2204
+ }
2205
+ if (!targetModel) {
2206
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2207
+ targetProvider = defaultRoute.provider;
2208
+ targetModel = defaultRoute.model;
2209
+ }
2210
+ }
2211
+ if (!useCascade) {
2212
+ log(`Routing to: ${targetProvider}/${targetModel}`);
2213
+ }
2214
+ else {
2215
+ log(`Cascade routing enabled with models: ${cascadeConfig?.models?.join(', ') ?? ''}`);
2216
+ }
2217
+ const cooldownsEnabled = proxyConfig.reliability?.cooldowns?.enabled === true;
2218
+ if (!useCascade && cooldownsEnabled && !cooldownManager.isAvailable(targetProvider)) {
2219
+ res.writeHead(503, { 'Content-Type': 'application/json' });
2220
+ res.end(JSON.stringify({ error: `Provider ${targetProvider} is temporarily cooled down` }));
2221
+ return;
2222
+ }
2223
+ let apiKey;
2224
+ if (!useCascade) {
2225
+ const apiKeyResult = resolveProviderApiKey(targetProvider, ctx, useAnthropicEnvKey);
2226
+ if (apiKeyResult.error) {
2227
+ res.writeHead(apiKeyResult.error.status, { 'Content-Type': 'application/json' });
2228
+ res.end(JSON.stringify(apiKeyResult.error.payload));
2229
+ return;
2230
+ }
2231
+ apiKey = apiKeyResult.apiKey;
2232
+ }
2233
+ const startTime = Date.now();
2234
+ // Handle streaming vs non-streaming
2235
+ if (isStreaming) {
2236
+ await handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, useCascade ? 'cascade' : routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
2237
+ }
2238
+ else {
2239
+ if (useCascade && cascadeConfig) {
2240
+ try {
2241
+ const cascadeResult = await cascadeRequest(cascadeConfig, async (modelName) => {
2242
+ const resolved = resolveConfigModel(modelName);
2243
+ if (!resolved) {
2244
+ throw new Error(`Invalid cascade model: ${modelName}`);
2245
+ }
2246
+ if (cooldownsEnabled && !cooldownManager.isAvailable(resolved.provider)) {
2247
+ throw new CooldownError(resolved.provider);
2248
+ }
2249
+ const apiKeyResult = resolveProviderApiKey(resolved.provider, ctx, useAnthropicEnvKey);
2250
+ if (apiKeyResult.error) {
2251
+ throw new ProviderResponseError(apiKeyResult.error.status, apiKeyResult.error.payload);
2252
+ }
2253
+ const result = await executeNonStreamingProviderRequest(request, resolved.provider, resolved.model, apiKeyResult.apiKey, ctx);
2254
+ if (!result.ok) {
2255
+ if (cooldownsEnabled) {
2256
+ cooldownManager.recordFailure(resolved.provider, JSON.stringify(result.responseData));
2257
+ }
2258
+ throw new ProviderResponseError(result.status, result.responseData);
2259
+ }
2260
+ if (cooldownsEnabled) {
2261
+ cooldownManager.recordSuccess(resolved.provider);
2262
+ }
2263
+ return { responseData: result.responseData, provider: resolved.provider, model: resolved.model };
2264
+ }, log);
2265
+ const durationMs = Date.now() - startTime;
2266
+ let responseData = cascadeResult.responseData;
2267
+ if (recordTelemetry) {
2268
+ try {
2269
+ const runResult = await relay.run({
2270
+ prompt: promptText.slice(0, 500),
2271
+ taskType,
2272
+ model: `${cascadeResult.provider}:${cascadeResult.model}`,
2273
+ });
2274
+ responseData['_relayplane'] = {
2275
+ runId: runResult.runId,
2276
+ routedTo: `${cascadeResult.provider}/${cascadeResult.model}`,
2277
+ taskType,
2278
+ confidence,
2279
+ durationMs,
2280
+ mode: 'cascade',
2281
+ escalations: cascadeResult.escalations,
2282
+ };
2283
+ log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
2284
+ }
2285
+ catch (err) {
2286
+ log(`Failed to record run: ${err}`);
2287
+ }
2288
+ }
2289
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2290
+ res.end(JSON.stringify(responseData));
2291
+ }
2292
+ catch (err) {
2293
+ if (err instanceof ProviderResponseError) {
2294
+ res.writeHead(err.status, { 'Content-Type': 'application/json' });
2295
+ res.end(JSON.stringify(err.payload));
2296
+ return;
2297
+ }
2298
+ const errorMsg = err instanceof Error ? err.message : String(err);
2299
+ res.writeHead(500, { 'Content-Type': 'application/json' });
2300
+ res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2301
+ }
2302
+ }
2303
+ else {
2304
+ await handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled);
2305
+ }
2306
+ }
2307
+ });
2308
+ return new Promise((resolve, reject) => {
2309
+ server.on('error', reject);
2310
+ server.listen(port, host, () => {
2311
+ console.log(`RelayPlane proxy listening on http://${host}:${port}`);
2312
+ console.log(` Endpoints:`);
2313
+ console.log(` POST /v1/messages - Native Anthropic API (Claude Code)`);
2314
+ console.log(` POST /v1/chat/completions - OpenAI-compatible API`);
2315
+ console.log(` POST /v1/messages/count_tokens - Token counting`);
2316
+ console.log(` GET /v1/models - Model list`);
2317
+ console.log(` Models: relayplane:auto, relayplane:cost, relayplane:fast, relayplane:quality`);
2318
+ console.log(` Auth: Passthrough for Anthropic, env vars for other providers`);
2319
+ console.log(` Streaming: ✅ Enabled`);
2320
+ resolve(server);
2321
+ });
2322
+ });
2323
+ }
2324
+ /**
2325
+ * Handle streaming request
2326
+ */
2327
+ async function executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx) {
2328
+ let providerResponse;
2329
+ let responseData;
2330
+ switch (targetProvider) {
2331
+ case 'anthropic': {
2332
+ providerResponse = await forwardToAnthropic(request, targetModel, ctx, apiKey);
2333
+ const rawData = (await providerResponse.json());
2334
+ if (!providerResponse.ok) {
2335
+ return { responseData: rawData, ok: false, status: providerResponse.status };
2336
+ }
2337
+ responseData = convertAnthropicResponse(rawData);
2338
+ break;
2339
+ }
2340
+ case 'google': {
2341
+ providerResponse = await forwardToGemini(request, targetModel, apiKey);
2342
+ const rawData = (await providerResponse.json());
2343
+ if (!providerResponse.ok) {
2344
+ return { responseData: rawData, ok: false, status: providerResponse.status };
2345
+ }
2346
+ responseData = convertGeminiResponse(rawData, targetModel);
2347
+ break;
2348
+ }
2349
+ case 'xai': {
2350
+ providerResponse = await forwardToXAI(request, targetModel, apiKey);
2351
+ responseData = (await providerResponse.json());
2352
+ if (!providerResponse.ok) {
2353
+ return { responseData, ok: false, status: providerResponse.status };
2354
+ }
2355
+ break;
2356
+ }
2357
+ case 'moonshot': {
2358
+ providerResponse = await forwardToMoonshot(request, targetModel, apiKey);
2359
+ responseData = (await providerResponse.json());
2360
+ if (!providerResponse.ok) {
2361
+ return { responseData, ok: false, status: providerResponse.status };
2362
+ }
2363
+ break;
2364
+ }
2365
+ default: {
2366
+ providerResponse = await forwardToOpenAI(request, targetModel, apiKey);
2367
+ responseData = (await providerResponse.json());
2368
+ if (!providerResponse.ok) {
2369
+ return { responseData, ok: false, status: providerResponse.status };
2370
+ }
2371
+ }
2372
+ }
2373
+ return { responseData, ok: true, status: 200 };
2374
+ }
2375
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
2376
+ let providerResponse;
2377
+ try {
2378
+ switch (targetProvider) {
2379
+ case 'anthropic':
2380
+ // Use auth passthrough for Anthropic
2381
+ providerResponse = await forwardToAnthropicStream(request, targetModel, ctx, apiKey);
2382
+ break;
2383
+ case 'google':
2384
+ providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
2385
+ break;
2386
+ case 'xai':
2387
+ providerResponse = await forwardToXAIStream(request, targetModel, apiKey);
2388
+ break;
2389
+ case 'moonshot':
2390
+ providerResponse = await forwardToMoonshotStream(request, targetModel, apiKey);
2391
+ break;
2392
+ default:
2393
+ providerResponse = await forwardToOpenAIStream(request, targetModel, apiKey);
2394
+ }
2395
+ if (!providerResponse.ok) {
2396
+ const errorData = await providerResponse.json();
2397
+ if (cooldownsEnabled) {
2398
+ cooldownManager.recordFailure(targetProvider, JSON.stringify(errorData));
2399
+ }
2400
+ res.writeHead(providerResponse.status, { 'Content-Type': 'application/json' });
2401
+ res.end(JSON.stringify(errorData));
2402
+ return;
2403
+ }
2404
+ }
2405
+ catch (err) {
2406
+ const errorMsg = err instanceof Error ? err.message : String(err);
2407
+ if (cooldownsEnabled) {
2408
+ cooldownManager.recordFailure(targetProvider, errorMsg);
2409
+ }
2410
+ res.writeHead(500, { 'Content-Type': 'application/json' });
2411
+ res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2412
+ return;
2413
+ }
2414
+ // Set SSE headers
2415
+ res.writeHead(200, {
2416
+ 'Content-Type': 'text/event-stream',
2417
+ 'Cache-Control': 'no-cache',
2418
+ 'Connection': 'keep-alive',
2419
+ });
2420
+ try {
2421
+ // Stream the response based on provider format
2422
+ switch (targetProvider) {
2423
+ case 'anthropic':
2424
+ // Convert Anthropic stream to OpenAI format
2425
+ for await (const chunk of convertAnthropicStream(providerResponse, targetModel)) {
2426
+ res.write(chunk);
2427
+ }
2428
+ break;
2429
+ case 'google':
2430
+ // Convert Gemini stream to OpenAI format
2431
+ for await (const chunk of convertGeminiStream(providerResponse, targetModel)) {
2432
+ res.write(chunk);
2433
+ }
2434
+ break;
2435
+ default:
2436
+ // xAI, Moonshot, OpenAI all use OpenAI-compatible streaming format
2437
+ for await (const chunk of pipeOpenAIStream(providerResponse)) {
2438
+ res.write(chunk);
2439
+ }
2440
+ }
2441
+ }
2442
+ catch (err) {
2443
+ log(`Streaming error: ${err}`);
2444
+ }
2445
+ if (cooldownsEnabled) {
2446
+ cooldownManager.recordSuccess(targetProvider);
2447
+ }
2448
+ const durationMs = Date.now() - startTime;
2449
+ if (recordTelemetry) {
2450
+ // Record the run (non-blocking)
2451
+ relay
2452
+ .run({
2453
+ prompt: promptText.slice(0, 500),
2454
+ taskType,
2455
+ model: `${targetProvider}:${targetModel}`,
2456
+ })
2457
+ .then((runResult) => {
2458
+ log(`Completed streaming in ${durationMs}ms, runId: ${runResult.runId}`);
2459
+ })
2460
+ .catch((err) => {
2461
+ log(`Failed to record run: ${err}`);
2462
+ });
2463
+ }
2464
+ res.end();
2465
+ }
2466
+ /**
2467
+ * Handle non-streaming request
2468
+ */
2469
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, ctx, relay, promptText, taskType, confidence, routingMode, recordTelemetry, startTime, log, cooldownManager, cooldownsEnabled) {
2470
+ let responseData;
2471
+ try {
2472
+ const result = await executeNonStreamingProviderRequest(request, targetProvider, targetModel, apiKey, ctx);
2473
+ responseData = result.responseData;
2474
+ if (!result.ok) {
2475
+ if (cooldownsEnabled) {
2476
+ cooldownManager.recordFailure(targetProvider, JSON.stringify(responseData));
2477
+ }
2478
+ res.writeHead(result.status, { 'Content-Type': 'application/json' });
2479
+ res.end(JSON.stringify(responseData));
2480
+ return;
2481
+ }
2482
+ }
2483
+ catch (err) {
2484
+ const errorMsg = err instanceof Error ? err.message : String(err);
2485
+ if (cooldownsEnabled) {
2486
+ cooldownManager.recordFailure(targetProvider, errorMsg);
2487
+ }
2488
+ res.writeHead(500, { 'Content-Type': 'application/json' });
2489
+ res.end(JSON.stringify({ error: `Provider error: ${errorMsg}` }));
2490
+ return;
2491
+ }
2492
+ if (cooldownsEnabled) {
2493
+ cooldownManager.recordSuccess(targetProvider);
2494
+ }
2495
+ const durationMs = Date.now() - startTime;
2496
+ if (recordTelemetry) {
2497
+ // Record the run in RelayPlane
2498
+ try {
2499
+ const runResult = await relay.run({
2500
+ prompt: promptText.slice(0, 500),
2501
+ taskType,
2502
+ model: `${targetProvider}:${targetModel}`,
2503
+ });
2504
+ // Add routing metadata to response
2505
+ responseData['_relayplane'] = {
2506
+ runId: runResult.runId,
2507
+ routedTo: `${targetProvider}/${targetModel}`,
2508
+ taskType,
2509
+ confidence,
2510
+ durationMs,
2511
+ mode: routingMode,
2512
+ };
2513
+ log(`Completed in ${durationMs}ms, runId: ${runResult.runId}`);
2514
+ }
2515
+ catch (err) {
2516
+ log(`Failed to record run: ${err}`);
2517
+ }
2518
+ }
2519
+ // Send response
2520
+ res.writeHead(200, { 'Content-Type': 'application/json' });
2521
+ res.end(JSON.stringify(responseData));
2522
+ }
2523
+ // Note: CLI entry point is in cli.ts
2524
+ //# sourceMappingURL=standalone-proxy.js.map