@agentgazer/proxy 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/proxy-server.js
CHANGED
|
@@ -35,27 +35,32 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.startProxy = startProxy;
|
|
37
37
|
const http = __importStar(require("node:http"));
|
|
38
|
+
const node_string_decoder_1 = require("node:string_decoder");
|
|
38
39
|
const shared_1 = require("@agentgazer/shared");
|
|
39
40
|
const server_1 = require("@agentgazer/server");
|
|
40
41
|
const modelOverrideCache = {};
|
|
41
42
|
const MODEL_OVERRIDE_CACHE_TTL_MS = 30_000; // 30 seconds
|
|
42
43
|
function getModelOverride(db, agentId, provider) {
|
|
44
|
+
const noOverride = { model: null, targetProvider: null };
|
|
43
45
|
if (!db)
|
|
44
|
-
return
|
|
46
|
+
return noOverride;
|
|
45
47
|
const cacheKey = `${agentId}:${provider}`;
|
|
46
48
|
const cached = modelOverrideCache[cacheKey];
|
|
47
49
|
if (cached && cached.expiresAt > Date.now()) {
|
|
48
|
-
return cached.
|
|
50
|
+
return cached.result;
|
|
49
51
|
}
|
|
50
52
|
// Fetch from DB
|
|
51
53
|
const rule = (0, server_1.getModelRule)(db, agentId, provider);
|
|
52
|
-
const
|
|
54
|
+
const result = {
|
|
55
|
+
model: rule?.model_override ?? null,
|
|
56
|
+
targetProvider: rule?.target_provider ?? null,
|
|
57
|
+
};
|
|
53
58
|
// Cache the result
|
|
54
59
|
modelOverrideCache[cacheKey] = {
|
|
55
|
-
|
|
60
|
+
result,
|
|
56
61
|
expiresAt: Date.now() + MODEL_OVERRIDE_CACHE_TTL_MS,
|
|
57
62
|
};
|
|
58
|
-
return
|
|
63
|
+
return result;
|
|
59
64
|
}
|
|
60
65
|
const log = (0, shared_1.createLogger)("proxy");
|
|
61
66
|
const event_buffer_js_1 = require("./event-buffer.js");
|
|
@@ -110,7 +115,11 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
110
115
|
let modified = false;
|
|
111
116
|
const changes = [];
|
|
112
117
|
// Fields that only OpenAI supports (top-level)
|
|
113
|
-
const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"
|
|
118
|
+
const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"];
|
|
119
|
+
// Providers that support stream_options for usage tracking
|
|
120
|
+
const streamOptionsProviders = new Set(["openai", "deepseek", "moonshot", "zhipu", "minimax", "yi", "baichuan"]);
|
|
121
|
+
// Providers that don't support stream_options at all
|
|
122
|
+
const noStreamOptionsProviders = new Set(["anthropic", "google", "mistral", "cohere"]);
|
|
114
123
|
// max_completion_tokens -> max_tokens conversion for non-OpenAI providers
|
|
115
124
|
if (provider !== "openai" && "max_completion_tokens" in result) {
|
|
116
125
|
if (!("max_tokens" in result)) {
|
|
@@ -164,6 +173,29 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
164
173
|
}
|
|
165
174
|
}
|
|
166
175
|
}
|
|
176
|
+
// Handle stream_options for usage tracking
|
|
177
|
+
if (result.stream === true) {
|
|
178
|
+
if (streamOptionsProviders.has(provider)) {
|
|
179
|
+
// Add stream_options.include_usage for providers that support it
|
|
180
|
+
const existingStreamOptions = result.stream_options;
|
|
181
|
+
if (!existingStreamOptions?.include_usage) {
|
|
182
|
+
result.stream_options = {
|
|
183
|
+
...existingStreamOptions,
|
|
184
|
+
include_usage: true,
|
|
185
|
+
};
|
|
186
|
+
changes.push("+stream_options.include_usage");
|
|
187
|
+
modified = true;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
else if (noStreamOptionsProviders.has(provider)) {
|
|
191
|
+
// Remove stream_options for providers that don't support it
|
|
192
|
+
if ("stream_options" in result) {
|
|
193
|
+
delete result.stream_options;
|
|
194
|
+
changes.push("-stream_options");
|
|
195
|
+
modified = true;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
167
199
|
// Provider-specific handling
|
|
168
200
|
switch (provider) {
|
|
169
201
|
case "mistral":
|
|
@@ -180,7 +212,7 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
180
212
|
case "cohere":
|
|
181
213
|
// Cohere uses different field names and doesn't support some OpenAI fields
|
|
182
214
|
// See: https://docs.cohere.com/reference/chat
|
|
183
|
-
const cohereUnsupported = ["top_logprobs", "n", "user"
|
|
215
|
+
const cohereUnsupported = ["top_logprobs", "n", "user"];
|
|
184
216
|
for (const field of cohereUnsupported) {
|
|
185
217
|
if (field in result) {
|
|
186
218
|
delete result[field];
|
|
@@ -238,12 +270,20 @@ function parseAnthropicSSE(dataLines, statusCode) {
|
|
|
238
270
|
let model = null;
|
|
239
271
|
let tokensIn = null;
|
|
240
272
|
let tokensOut = null;
|
|
273
|
+
let cacheCreationTokens = null;
|
|
274
|
+
let cacheReadTokens = null;
|
|
241
275
|
for (const line of dataLines) {
|
|
242
276
|
try {
|
|
243
277
|
const data = JSON.parse(line);
|
|
244
278
|
if (data.type === "message_start" && data.message) {
|
|
245
279
|
model = data.message.model ?? null;
|
|
246
|
-
|
|
280
|
+
const usage = data.message.usage;
|
|
281
|
+
if (usage) {
|
|
282
|
+
tokensIn = usage.input_tokens ?? null;
|
|
283
|
+
// Anthropic prompt caching: include cache tokens in input count
|
|
284
|
+
cacheCreationTokens = usage.cache_creation_input_tokens ?? null;
|
|
285
|
+
cacheReadTokens = usage.cache_read_input_tokens ?? null;
|
|
286
|
+
}
|
|
247
287
|
}
|
|
248
288
|
if (data.type === "message_delta" && data.usage) {
|
|
249
289
|
tokensOut = data.usage.output_tokens ?? null;
|
|
@@ -253,10 +293,22 @@ function parseAnthropicSSE(dataLines, statusCode) {
|
|
|
253
293
|
continue;
|
|
254
294
|
}
|
|
255
295
|
}
|
|
256
|
-
|
|
296
|
+
// Total input includes regular + cache tokens
|
|
297
|
+
// Note: cache_creation is charged at 1.25x, cache_read at 0.1x
|
|
298
|
+
// For now we count all as regular input tokens for simplicity
|
|
299
|
+
let totalInputTokens = tokensIn;
|
|
300
|
+
if (totalInputTokens != null) {
|
|
301
|
+
if (cacheCreationTokens != null) {
|
|
302
|
+
totalInputTokens += cacheCreationTokens;
|
|
303
|
+
}
|
|
304
|
+
if (cacheReadTokens != null) {
|
|
305
|
+
totalInputTokens += cacheReadTokens;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
const tokensTotal = totalInputTokens != null && tokensOut != null ? totalInputTokens + tokensOut : null;
|
|
257
309
|
return {
|
|
258
310
|
model,
|
|
259
|
-
tokensIn,
|
|
311
|
+
tokensIn: totalInputTokens,
|
|
260
312
|
tokensOut,
|
|
261
313
|
tokensTotal,
|
|
262
314
|
statusCode,
|
|
@@ -292,6 +344,77 @@ function parseGoogleSSE(dataLines, statusCode) {
|
|
|
292
344
|
errorMessage: null,
|
|
293
345
|
};
|
|
294
346
|
}
|
|
347
|
+
/**
|
|
348
|
+
* Parse Google's streaming response format (JSON array chunks, not standard SSE).
|
|
349
|
+
* Google Gemini API returns streaming data as: [{"candidates":...}, {"candidates":...}]
|
|
350
|
+
* Each chunk may or may not have usageMetadata - typically only the last chunk has it.
|
|
351
|
+
*/
|
|
352
|
+
function parseGoogleStreamingResponse(rawText, statusCode) {
|
|
353
|
+
let model = null;
|
|
354
|
+
let tokensIn = null;
|
|
355
|
+
let tokensOut = null;
|
|
356
|
+
let tokensTotal = null;
|
|
357
|
+
// Try to extract JSON objects from the response
|
|
358
|
+
// Google streaming format: [{"candidates":...},\n{"candidates":...}]
|
|
359
|
+
// We need to handle the array brackets and commas between objects
|
|
360
|
+
// First, try parsing as a complete JSON array
|
|
361
|
+
try {
|
|
362
|
+
const cleanedText = rawText.trim();
|
|
363
|
+
if (cleanedText.startsWith("[")) {
|
|
364
|
+
const data = JSON.parse(cleanedText);
|
|
365
|
+
if (Array.isArray(data)) {
|
|
366
|
+
for (const chunk of data) {
|
|
367
|
+
if (chunk.modelVersion)
|
|
368
|
+
model = chunk.modelVersion;
|
|
369
|
+
if (chunk.usageMetadata) {
|
|
370
|
+
tokensIn = chunk.usageMetadata.promptTokenCount ?? null;
|
|
371
|
+
tokensOut = chunk.usageMetadata.candidatesTokenCount ?? null;
|
|
372
|
+
tokensTotal = chunk.usageMetadata.totalTokenCount ?? null;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
// Not a valid JSON array, try line-by-line parsing
|
|
380
|
+
}
|
|
381
|
+
// If we didn't find data, try extracting individual JSON objects
|
|
382
|
+
if (tokensIn === null && tokensOut === null) {
|
|
383
|
+
// Remove array brackets and split by object boundaries
|
|
384
|
+
const cleanedText = rawText
|
|
385
|
+
.replace(/^\s*\[\s*/, "") // Remove leading [
|
|
386
|
+
.replace(/\s*\]\s*$/, "") // Remove trailing ]
|
|
387
|
+
.replace(/^\s*,\s*/gm, "") // Remove leading commas on each line
|
|
388
|
+
.trim();
|
|
389
|
+
// Try to find and parse JSON objects
|
|
390
|
+
const objectMatches = cleanedText.match(/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/g);
|
|
391
|
+
if (objectMatches) {
|
|
392
|
+
for (const objStr of objectMatches) {
|
|
393
|
+
try {
|
|
394
|
+
const data = JSON.parse(objStr);
|
|
395
|
+
if (data.modelVersion)
|
|
396
|
+
model = data.modelVersion;
|
|
397
|
+
if (data.usageMetadata) {
|
|
398
|
+
tokensIn = data.usageMetadata.promptTokenCount ?? null;
|
|
399
|
+
tokensOut = data.usageMetadata.candidatesTokenCount ?? null;
|
|
400
|
+
tokensTotal = data.usageMetadata.totalTokenCount ?? null;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return {
|
|
410
|
+
model,
|
|
411
|
+
tokensIn,
|
|
412
|
+
tokensOut,
|
|
413
|
+
tokensTotal,
|
|
414
|
+
statusCode,
|
|
415
|
+
errorMessage: null,
|
|
416
|
+
};
|
|
417
|
+
}
|
|
295
418
|
function parseCohereSSE(dataLines, statusCode) {
|
|
296
419
|
let tokensIn = null;
|
|
297
420
|
let tokensOut = null;
|
|
@@ -330,6 +453,18 @@ function parseSSEResponse(provider, sseText, statusCode) {
|
|
|
330
453
|
dataLines.push(line.slice(6));
|
|
331
454
|
}
|
|
332
455
|
}
|
|
456
|
+
// Google Gemini API uses a different streaming format (JSON array, not SSE)
|
|
457
|
+
// Handle it specially even if no "data: " lines found
|
|
458
|
+
if (provider === "google") {
|
|
459
|
+
if (dataLines.length > 0) {
|
|
460
|
+
// Google with SSE format (when ?alt=sse is used)
|
|
461
|
+
return parseGoogleSSE(dataLines, statusCode);
|
|
462
|
+
}
|
|
463
|
+
else {
|
|
464
|
+
// Google's default JSON array streaming format
|
|
465
|
+
return parseGoogleStreamingResponse(sseText, statusCode);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
333
468
|
if (dataLines.length === 0)
|
|
334
469
|
return null;
|
|
335
470
|
switch (provider) {
|
|
@@ -343,8 +478,6 @@ function parseSSEResponse(provider, sseText, statusCode) {
|
|
|
343
478
|
return parseOpenAISSE(dataLines, statusCode);
|
|
344
479
|
case "anthropic":
|
|
345
480
|
return parseAnthropicSSE(dataLines, statusCode);
|
|
346
|
-
case "google":
|
|
347
|
-
return parseGoogleSSE(dataLines, statusCode);
|
|
348
481
|
case "cohere":
|
|
349
482
|
return parseCohereSSE(dataLines, statusCode);
|
|
350
483
|
default:
|
|
@@ -802,9 +935,15 @@ function startProxy(options) {
|
|
|
802
935
|
log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
|
|
803
936
|
return;
|
|
804
937
|
}
|
|
938
|
+
// Use requestedModel as fallback when provider doesn't return model in response
|
|
939
|
+
// (Google Gemini API typically doesn't include model in streaming response)
|
|
940
|
+
const effectiveModel = parsed.model ?? requestedModel;
|
|
805
941
|
let costUsd = null;
|
|
806
|
-
if (
|
|
807
|
-
costUsd = (0, shared_1.calculateCost)(
|
|
942
|
+
if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
943
|
+
costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut, {
|
|
944
|
+
cacheCreation: parsed.cacheCreationTokens ?? undefined,
|
|
945
|
+
cacheRead: parsed.cacheReadTokens ?? undefined,
|
|
946
|
+
}, provider);
|
|
808
947
|
}
|
|
809
948
|
// Record response for loop detection
|
|
810
949
|
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, sseText);
|
|
@@ -812,7 +951,7 @@ function startProxy(options) {
|
|
|
812
951
|
agent_id: effectiveAgentId,
|
|
813
952
|
event_type: "llm_call",
|
|
814
953
|
provider,
|
|
815
|
-
model:
|
|
954
|
+
model: effectiveModel,
|
|
816
955
|
requested_model: requestedModel,
|
|
817
956
|
tokens_in: parsed.tokensIn,
|
|
818
957
|
tokens_out: parsed.tokensOut,
|
|
@@ -845,10 +984,16 @@ function startProxy(options) {
|
|
|
845
984
|
log.warn(`No parser result for provider: ${provider}`);
|
|
846
985
|
return;
|
|
847
986
|
}
|
|
987
|
+
// Use requestedModel as fallback when provider doesn't return model in response
|
|
988
|
+
// (Google Gemini API typically doesn't include model in response)
|
|
989
|
+
const effectiveModel = parsed.model ?? requestedModel;
|
|
848
990
|
// Calculate cost if we have the necessary token data
|
|
849
991
|
let costUsd = null;
|
|
850
|
-
if (
|
|
851
|
-
costUsd = (0, shared_1.calculateCost)(
|
|
992
|
+
if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
993
|
+
costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut, {
|
|
994
|
+
cacheCreation: parsed.cacheCreationTokens ?? undefined,
|
|
995
|
+
cacheRead: parsed.cacheReadTokens ?? undefined,
|
|
996
|
+
}, provider);
|
|
852
997
|
}
|
|
853
998
|
// Record response for loop detection
|
|
854
999
|
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, responseBody.toString("utf-8"));
|
|
@@ -856,7 +1001,7 @@ function startProxy(options) {
|
|
|
856
1001
|
agent_id: effectiveAgentId,
|
|
857
1002
|
event_type: "llm_call",
|
|
858
1003
|
provider,
|
|
859
|
-
model:
|
|
1004
|
+
model: effectiveModel,
|
|
860
1005
|
requested_model: requestedModel,
|
|
861
1006
|
tokens_in: parsed.tokensIn,
|
|
862
1007
|
tokens_out: parsed.tokensOut,
|
|
@@ -985,21 +1130,75 @@ function startProxy(options) {
|
|
|
985
1130
|
// Model override and request normalization
|
|
986
1131
|
let requestedModel = null;
|
|
987
1132
|
let modifiedRequestBody = requestBody;
|
|
1133
|
+
let crossProviderOverride = null;
|
|
1134
|
+
let effectiveProvider = provider; // May change if cross-provider override
|
|
1135
|
+
let isStreaming = false;
|
|
988
1136
|
try {
|
|
989
1137
|
let bodyJson = JSON.parse(requestBody.toString("utf-8"));
|
|
990
1138
|
let bodyModified = false;
|
|
991
|
-
|
|
1139
|
+
isStreaming = bodyJson.stream === true;
|
|
1140
|
+
// Extract model from request body if present
|
|
992
1141
|
if (bodyJson.model) {
|
|
993
1142
|
requestedModel = bodyJson.model;
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1143
|
+
}
|
|
1144
|
+
// Always check for model override rules (even if request has no model)
|
|
1145
|
+
// This handles providers like Google where model is in URL, not body
|
|
1146
|
+
const override = getModelOverride(db, effectiveAgentId, provider);
|
|
1147
|
+
// Apply model override if configured
|
|
1148
|
+
if (override.model) {
|
|
1149
|
+
if (requestedModel) {
|
|
1150
|
+
log.info(`[PROXY] Model override: ${requestedModel} → ${override.model}`);
|
|
1151
|
+
}
|
|
1152
|
+
else {
|
|
1153
|
+
log.info(`[PROXY] Model override (no model in request): → ${override.model}`);
|
|
1154
|
+
}
|
|
1155
|
+
bodyJson.model = override.model;
|
|
1156
|
+
bodyModified = true;
|
|
1157
|
+
}
|
|
1158
|
+
// Handle cross-provider override
|
|
1159
|
+
if (override.targetProvider && override.targetProvider !== provider && shared_1.KNOWN_PROVIDER_NAMES.includes(override.targetProvider)) {
|
|
1160
|
+
const targetProv = override.targetProvider;
|
|
1161
|
+
log.info(`[PROXY] Cross-provider override: ${provider} → ${targetProv}`);
|
|
1162
|
+
// Check if we have an API key for the target provider
|
|
1163
|
+
if (!providerKeys[targetProv]) {
|
|
1164
|
+
log.error(`[PROXY] No API key configured for target provider: ${targetProv}`);
|
|
1165
|
+
sendJson(res, 400, { error: `Cross-provider override failed: no API key for ${targetProv}` });
|
|
1166
|
+
return;
|
|
1167
|
+
}
|
|
1168
|
+
crossProviderOverride = { targetProvider: targetProv, originalProvider: provider };
|
|
1169
|
+
effectiveProvider = targetProv;
|
|
1170
|
+
// Transform request format if needed
|
|
1171
|
+
if (provider !== "anthropic" && targetProv === "anthropic") {
|
|
1172
|
+
// OpenAI-compatible or Google → Anthropic
|
|
1173
|
+
// For Google, we need to ensure the request has required fields
|
|
1174
|
+
if (provider === "google") {
|
|
1175
|
+
// Google native format → need to convert to OpenAI first, then to Anthropic
|
|
1176
|
+
// For now, assume the request is already in a compatible format or
|
|
1177
|
+
// the client is using OpenAI-compatible format through the Google endpoint
|
|
1178
|
+
log.info(`[PROXY] Converting Google request → Anthropic`);
|
|
1179
|
+
}
|
|
1180
|
+
const anthropicRequest = (0, shared_1.openaiToAnthropic)(bodyJson);
|
|
1181
|
+
bodyJson = anthropicRequest;
|
|
1182
|
+
bodyModified = true;
|
|
1183
|
+
log.info(`[PROXY] Transformed request: ${provider} → Anthropic`);
|
|
1184
|
+
}
|
|
1185
|
+
else if (provider === "anthropic" && targetProv !== "anthropic") {
|
|
1186
|
+
// Anthropic → OpenAI-compatible
|
|
1187
|
+
const openaiRequest = (0, shared_1.anthropicToOpenaiRequest)(bodyJson);
|
|
1188
|
+
bodyJson = openaiRequest;
|
|
998
1189
|
bodyModified = true;
|
|
1190
|
+
log.info(`[PROXY] Transformed request: Anthropic → OpenAI`);
|
|
1191
|
+
}
|
|
1192
|
+
// Other cases (OpenAI-compatible → OpenAI-compatible) don't need transformation
|
|
1193
|
+
// Update target URL for cross-provider
|
|
1194
|
+
const newEndpoint = (0, shared_1.getProviderChatEndpoint)(targetProv);
|
|
1195
|
+
if (newEndpoint) {
|
|
1196
|
+
targetUrl = newEndpoint;
|
|
1197
|
+
log.info(`[PROXY] Redirecting to: ${targetUrl}`);
|
|
999
1198
|
}
|
|
1000
1199
|
}
|
|
1001
1200
|
// Normalize request body for provider compatibility
|
|
1002
|
-
const normalized = normalizeRequestBody(
|
|
1201
|
+
const normalized = normalizeRequestBody(effectiveProvider, bodyJson, log);
|
|
1003
1202
|
if (normalized.modified) {
|
|
1004
1203
|
bodyJson = normalized.body;
|
|
1005
1204
|
bodyModified = true;
|
|
@@ -1050,23 +1249,43 @@ function startProxy(options) {
|
|
|
1050
1249
|
forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
|
|
1051
1250
|
}
|
|
1052
1251
|
}
|
|
1053
|
-
//
|
|
1054
|
-
|
|
1252
|
+
// For cross-provider override, remove ALL auth-related headers from the original request
|
|
1253
|
+
// This prevents the original provider's API key from being forwarded to the target provider
|
|
1254
|
+
if (crossProviderOverride) {
|
|
1255
|
+
const authHeaders = ["authorization", "x-api-key", "api-key", "x-goog-api-key"];
|
|
1256
|
+
for (const key of Object.keys(forwardHeaders)) {
|
|
1257
|
+
if (authHeaders.includes(key.toLowerCase())) {
|
|
1258
|
+
delete forwardHeaders[key];
|
|
1259
|
+
log.info(`[PROXY] Removed ${key} header for cross-provider override`);
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
// Inject API key (use effective provider for cross-provider override)
|
|
1264
|
+
const providerKey = providerKeys[effectiveProvider];
|
|
1265
|
+
if (crossProviderOverride) {
|
|
1266
|
+
// Debug: show available provider keys for troubleshooting
|
|
1267
|
+
const availableProviders = Object.keys(providerKeys);
|
|
1268
|
+
log.info(`[PROXY] Cross-provider: looking for "${effectiveProvider}" key, available: [${availableProviders.join(", ")}]`);
|
|
1269
|
+
// Show key lengths for debugging
|
|
1270
|
+
const keyLengths = availableProviders.map(p => `${p}:${providerKeys[p]?.length ?? 0}`).join(", ");
|
|
1271
|
+
log.info(`[PROXY] Key lengths: ${keyLengths}`);
|
|
1272
|
+
}
|
|
1055
1273
|
if (providerKey) {
|
|
1056
|
-
const authHeader = (0, shared_1.getProviderAuthHeader)(
|
|
1274
|
+
const authHeader = (0, shared_1.getProviderAuthHeader)(effectiveProvider, providerKey, useNativeApi && !crossProviderOverride);
|
|
1057
1275
|
if (authHeader) {
|
|
1058
1276
|
const existingAuthKey = Object.keys(forwardHeaders).find(k => k.toLowerCase() === authHeader.name.toLowerCase());
|
|
1059
1277
|
if (existingAuthKey)
|
|
1060
1278
|
delete forwardHeaders[existingAuthKey];
|
|
1061
1279
|
forwardHeaders[authHeader.name] = authHeader.value;
|
|
1062
|
-
|
|
1280
|
+
const maskedKey = providerKey.length > 12 ? `${providerKey.slice(0, 8)}...${providerKey.slice(-4)}` : "****";
|
|
1281
|
+
log.info(`[PROXY] Injected ${authHeader.name}=${maskedKey} (len=${providerKey.length}) for ${effectiveProvider}${crossProviderOverride ? " (cross-provider)" : ""}${useNativeApi ? " (native API)" : ""}`);
|
|
1063
1282
|
}
|
|
1064
1283
|
}
|
|
1065
1284
|
else {
|
|
1066
|
-
log.warn(`[PROXY] No API key configured for provider: ${
|
|
1285
|
+
log.warn(`[PROXY] No API key configured for provider: ${effectiveProvider}`);
|
|
1067
1286
|
}
|
|
1068
1287
|
// Add provider-specific required headers
|
|
1069
|
-
if (
|
|
1288
|
+
if (effectiveProvider === "anthropic") {
|
|
1070
1289
|
// Anthropic requires anthropic-version header
|
|
1071
1290
|
if (!forwardHeaders["anthropic-version"]) {
|
|
1072
1291
|
forwardHeaders["anthropic-version"] = "2023-06-01";
|
|
@@ -1113,30 +1332,191 @@ function startProxy(options) {
|
|
|
1113
1332
|
const contentType = providerResponse.headers.get("content-type") ?? "";
|
|
1114
1333
|
const isSSE = contentType.includes("text/event-stream");
|
|
1115
1334
|
if (isSSE && providerResponse.body) {
|
|
1116
|
-
//
|
|
1335
|
+
// Determine stream conversion direction BEFORE setting headers
|
|
1336
|
+
// Case 1: OpenAI-compatible client → Anthropic target (convert Anthropic SSE → OpenAI SSE)
|
|
1337
|
+
const needsAnthropicToOpenai = crossProviderOverride && effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic";
|
|
1338
|
+
// Case 2: Anthropic client → OpenAI-compatible target (convert OpenAI SSE → Anthropic SSE)
|
|
1339
|
+
const needsOpenaiToAnthropic = crossProviderOverride && crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic";
|
|
1340
|
+
// Streaming response - build headers carefully
|
|
1117
1341
|
const responseHeaders = {};
|
|
1342
|
+
// Headers to skip when forwarding SSE response
|
|
1343
|
+
const skipHeaders = new Set([
|
|
1344
|
+
"content-encoding", // fetch() auto-decompresses, so this would be wrong
|
|
1345
|
+
"content-length", // SSE is streamed, no fixed length
|
|
1346
|
+
"transfer-encoding", // Let Node.js handle this
|
|
1347
|
+
"connection", // Let Node.js handle this
|
|
1348
|
+
]);
|
|
1349
|
+
// Headers to skip when doing cross-provider override (provider-specific headers)
|
|
1350
|
+
const providerSpecificHeaders = new Set([
|
|
1351
|
+
"x-request-id",
|
|
1352
|
+
"openai-processing-ms",
|
|
1353
|
+
"openai-organization",
|
|
1354
|
+
"openai-version",
|
|
1355
|
+
"x-ratelimit-limit-requests",
|
|
1356
|
+
"x-ratelimit-limit-tokens",
|
|
1357
|
+
"x-ratelimit-remaining-requests",
|
|
1358
|
+
"x-ratelimit-remaining-tokens",
|
|
1359
|
+
"x-ratelimit-reset-requests",
|
|
1360
|
+
"x-ratelimit-reset-tokens",
|
|
1361
|
+
]);
|
|
1118
1362
|
providerResponse.headers.forEach((value, key) => {
|
|
1363
|
+
const lowerKey = key.toLowerCase();
|
|
1364
|
+
if (skipHeaders.has(lowerKey))
|
|
1365
|
+
return;
|
|
1366
|
+
if (crossProviderOverride && providerSpecificHeaders.has(lowerKey))
|
|
1367
|
+
return;
|
|
1119
1368
|
responseHeaders[key] = value;
|
|
1120
1369
|
});
|
|
1370
|
+
// Ensure correct headers for SSE
|
|
1371
|
+
responseHeaders["Content-Type"] = "text/event-stream; charset=utf-8";
|
|
1372
|
+
responseHeaders["Cache-Control"] = "no-cache";
|
|
1373
|
+
responseHeaders["Connection"] = "keep-alive";
|
|
1374
|
+
responseHeaders["X-Accel-Buffering"] = "no"; // Disable nginx buffering if behind nginx
|
|
1121
1375
|
res.writeHead(providerResponse.status, responseHeaders);
|
|
1376
|
+
res.flushHeaders(); // Ensure headers are sent immediately
|
|
1122
1377
|
const chunks = [];
|
|
1123
1378
|
let accumulatedSize = 0;
|
|
1124
1379
|
const reader = providerResponse.body.getReader();
|
|
1380
|
+
let streamState = null;
|
|
1381
|
+
let reverseStreamState = null;
|
|
1382
|
+
let lineBuffer = "";
|
|
1383
|
+
// Use StringDecoder to handle multi-byte UTF-8 characters split across chunks
|
|
1384
|
+
const utf8Decoder = new node_string_decoder_1.StringDecoder("utf8");
|
|
1385
|
+
if (needsAnthropicToOpenai) {
|
|
1386
|
+
streamState = (0, shared_1.createStreamingConverterState)();
|
|
1387
|
+
log.info(`[PROXY] Converting Anthropic SSE stream → OpenAI format`);
|
|
1388
|
+
}
|
|
1389
|
+
else if (needsOpenaiToAnthropic) {
|
|
1390
|
+
reverseStreamState = (0, shared_1.createOpenAIToAnthropicStreamState)();
|
|
1391
|
+
log.info(`[PROXY] Converting OpenAI SSE stream → Anthropic format`);
|
|
1392
|
+
}
|
|
1125
1393
|
try {
|
|
1126
1394
|
for (;;) {
|
|
1127
1395
|
const { done, value } = await reader.read();
|
|
1128
1396
|
if (done)
|
|
1129
1397
|
break;
|
|
1130
1398
|
const buf = Buffer.from(value);
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1399
|
+
if (needsAnthropicToOpenai && streamState) {
|
|
1400
|
+
// Transform Anthropic SSE to OpenAI SSE
|
|
1401
|
+
// Use StringDecoder to properly handle multi-byte UTF-8 characters
|
|
1402
|
+
lineBuffer += utf8Decoder.write(buf);
|
|
1403
|
+
const lines = lineBuffer.split("\n");
|
|
1404
|
+
lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
|
|
1405
|
+
for (const line of lines) {
|
|
1406
|
+
if (line.startsWith("data: ")) {
|
|
1407
|
+
const data = line.slice(6).trim();
|
|
1408
|
+
if (data && data !== "[DONE]") {
|
|
1409
|
+
try {
|
|
1410
|
+
const event = JSON.parse(data);
|
|
1411
|
+
const openaiChunks = (0, shared_1.anthropicSseToOpenaiChunks)(event, streamState, requestedModel ?? undefined);
|
|
1412
|
+
for (const chunk of openaiChunks) {
|
|
1413
|
+
const sseData = (0, shared_1.formatOpenAISSELine)(chunk);
|
|
1414
|
+
res.write(sseData);
|
|
1415
|
+
accumulatedSize += sseData.length;
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
catch (e) {
|
|
1419
|
+
log.debug(`[PROXY] Failed to parse Anthropic SSE event: ${data}`);
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
// Store original for metrics
|
|
1425
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1426
|
+
chunks.push(buf);
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
else if (needsOpenaiToAnthropic && reverseStreamState) {
|
|
1430
|
+
// Transform OpenAI SSE to Anthropic SSE
|
|
1431
|
+
// Use StringDecoder to properly handle multi-byte UTF-8 characters
|
|
1432
|
+
lineBuffer += utf8Decoder.write(buf);
|
|
1433
|
+
const lines = lineBuffer.split("\n");
|
|
1434
|
+
lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
|
|
1435
|
+
for (const line of lines) {
|
|
1436
|
+
if (line.startsWith("data: ")) {
|
|
1437
|
+
const data = line.slice(6).trim();
|
|
1438
|
+
if (data && data !== "[DONE]") {
|
|
1439
|
+
try {
|
|
1440
|
+
const chunk = JSON.parse(data);
|
|
1441
|
+
const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
|
|
1442
|
+
for (const sseLine of anthropicLines) {
|
|
1443
|
+
log.info(`[PROXY] Anthropic SSE: ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1444
|
+
res.write(sseLine);
|
|
1445
|
+
accumulatedSize += sseLine.length;
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
catch (e) {
|
|
1449
|
+
log.debug(`[PROXY] Failed to parse OpenAI SSE chunk: ${data}`);
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
// Store original for metrics
|
|
1455
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1456
|
+
chunks.push(buf);
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
else {
|
|
1460
|
+
// No conversion needed, pass through
|
|
1461
|
+
res.write(buf);
|
|
1462
|
+
accumulatedSize += buf.length;
|
|
1463
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1464
|
+
chunks.push(buf);
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
// Handle any remaining data in lineBuffer for OpenAI → Anthropic conversion
|
|
1469
|
+
if (needsOpenaiToAnthropic && reverseStreamState && lineBuffer.trim()) {
|
|
1470
|
+
if (lineBuffer.startsWith("data: ")) {
|
|
1471
|
+
const data = lineBuffer.slice(6).trim();
|
|
1472
|
+
if (data && data !== "[DONE]") {
|
|
1473
|
+
try {
|
|
1474
|
+
const chunk = JSON.parse(data);
|
|
1475
|
+
const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
|
|
1476
|
+
for (const sseLine of anthropicLines) {
|
|
1477
|
+
log.info(`[PROXY] Anthropic SSE (final): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1478
|
+
res.write(sseLine);
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
catch (e) {
|
|
1482
|
+
log.debug(`[PROXY] Failed to parse final OpenAI SSE chunk: ${data}`);
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
// Send done markers for converted streams
|
|
1488
|
+
if (needsAnthropicToOpenai) {
|
|
1489
|
+
res.write((0, shared_1.formatOpenAISSEDone)());
|
|
1490
|
+
}
|
|
1491
|
+
// For OpenAI → Anthropic conversion, ensure proper stream finalization
|
|
1492
|
+
// This handles cases where the OpenAI stream ended without a finish_reason chunk
|
|
1493
|
+
if (needsOpenaiToAnthropic && reverseStreamState) {
|
|
1494
|
+
if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
|
|
1495
|
+
log.info(`[PROXY] OpenAI stream ended without proper finalization, sending closing events`);
|
|
1496
|
+
const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
|
|
1497
|
+
for (const sseLine of finalLines) {
|
|
1498
|
+
log.info(`[PROXY] Anthropic SSE (finalize): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1499
|
+
res.write(sseLine);
|
|
1500
|
+
}
|
|
1135
1501
|
}
|
|
1136
1502
|
}
|
|
1137
1503
|
}
|
|
1138
1504
|
catch (error) {
|
|
1139
1505
|
log.error("Stream read error", { err: error instanceof Error ? error.message : String(error) });
|
|
1506
|
+
// Even on error, try to finalize the Anthropic stream
|
|
1507
|
+
if (needsOpenaiToAnthropic && reverseStreamState && reverseStreamState.sentMessageStart) {
|
|
1508
|
+
try {
|
|
1509
|
+
if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
|
|
1510
|
+
const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
|
|
1511
|
+
for (const sseLine of finalLines) {
|
|
1512
|
+
res.write(sseLine);
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
catch {
|
|
1517
|
+
// Ignore errors during error recovery
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1140
1520
|
}
|
|
1141
1521
|
finally {
|
|
1142
1522
|
res.end();
|
|
@@ -1144,7 +1524,8 @@ function startProxy(options) {
|
|
|
1144
1524
|
const latencyMs = Date.now() - requestStart;
|
|
1145
1525
|
const fullBody = Buffer.concat(chunks);
|
|
1146
1526
|
try {
|
|
1147
|
-
|
|
1527
|
+
// Use effective provider for metrics extraction
|
|
1528
|
+
extractStreamingMetrics(effectiveProvider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
|
|
1148
1529
|
}
|
|
1149
1530
|
catch (error) {
|
|
1150
1531
|
log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|
|
@@ -1162,26 +1543,70 @@ function startProxy(options) {
|
|
|
1162
1543
|
return;
|
|
1163
1544
|
}
|
|
1164
1545
|
const latencyMs = Date.now() - requestStart;
|
|
1546
|
+
// Transform response if cross-provider override
|
|
1547
|
+
let finalResponseBody = responseBodyBuffer;
|
|
1548
|
+
let responseConverted = false;
|
|
1549
|
+
if (crossProviderOverride && providerResponse.status < 400) {
|
|
1550
|
+
// Case 1: OpenAI-compatible client → Anthropic target
|
|
1551
|
+
// Need to convert Anthropic response → OpenAI format
|
|
1552
|
+
if (effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic") {
|
|
1553
|
+
try {
|
|
1554
|
+
const anthropicResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
|
|
1555
|
+
const openaiResponse = (0, shared_1.anthropicToOpenai)(anthropicResponse, requestedModel ?? undefined);
|
|
1556
|
+
finalResponseBody = Buffer.from(JSON.stringify(openaiResponse), "utf-8");
|
|
1557
|
+
responseConverted = true;
|
|
1558
|
+
log.info(`[PROXY] Converted Anthropic response → OpenAI format`);
|
|
1559
|
+
}
|
|
1560
|
+
catch (e) {
|
|
1561
|
+
log.error(`[PROXY] Failed to convert Anthropic response: ${e instanceof Error ? e.message : String(e)}`);
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
// Case 2: Anthropic client → OpenAI-compatible target
|
|
1565
|
+
// Need to convert OpenAI response → Anthropic format
|
|
1566
|
+
else if (crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic") {
|
|
1567
|
+
try {
|
|
1568
|
+
const openaiResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
|
|
1569
|
+
const anthropicResponse = (0, shared_1.openaiToAnthropicResponse)(openaiResponse, requestedModel ?? undefined);
|
|
1570
|
+
finalResponseBody = Buffer.from(JSON.stringify(anthropicResponse), "utf-8");
|
|
1571
|
+
responseConverted = true;
|
|
1572
|
+
log.info(`[PROXY] Converted OpenAI response → Anthropic format`);
|
|
1573
|
+
}
|
|
1574
|
+
catch (e) {
|
|
1575
|
+
log.error(`[PROXY] Failed to convert OpenAI response: ${e instanceof Error ? e.message : String(e)}`);
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1165
1579
|
const responseHeaders = {};
|
|
1166
1580
|
providerResponse.headers.forEach((value, key) => {
|
|
1167
1581
|
if (key.toLowerCase() === "transfer-encoding")
|
|
1168
1582
|
return;
|
|
1583
|
+
// Update content-length if we transformed the body
|
|
1584
|
+
if (key.toLowerCase() === "content-length" && responseConverted) {
|
|
1585
|
+
responseHeaders[key] = String(finalResponseBody.length);
|
|
1586
|
+
return;
|
|
1587
|
+
}
|
|
1169
1588
|
responseHeaders[key] = value;
|
|
1170
1589
|
});
|
|
1171
|
-
//
|
|
1590
|
+
// Log error responses (INFO level for cross-provider, DEBUG for others)
|
|
1172
1591
|
if (providerResponse.status >= 400) {
|
|
1173
1592
|
try {
|
|
1174
1593
|
const errorBody = responseBodyBuffer.toString("utf-8").slice(0, 2000);
|
|
1175
|
-
|
|
1594
|
+
if (crossProviderOverride) {
|
|
1595
|
+
log.info(`[PROXY] Cross-provider error (${providerResponse.status}): ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
|
|
1596
|
+
}
|
|
1597
|
+
else {
|
|
1598
|
+
log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
|
|
1599
|
+
}
|
|
1176
1600
|
}
|
|
1177
1601
|
catch {
|
|
1178
1602
|
log.debug(`[PROXY] Error response body: (binary, ${responseBodyBuffer.length} bytes)`);
|
|
1179
1603
|
}
|
|
1180
1604
|
}
|
|
1181
1605
|
res.writeHead(providerResponse.status, responseHeaders);
|
|
1182
|
-
res.end(
|
|
1606
|
+
res.end(finalResponseBody);
|
|
1183
1607
|
try {
|
|
1184
|
-
|
|
1608
|
+
// Use effective provider for metrics, but pass original response for parsing
|
|
1609
|
+
extractAndQueueMetrics(effectiveProvider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
|
|
1185
1610
|
}
|
|
1186
1611
|
catch (error) {
|
|
1187
1612
|
log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|