@agentgazer/proxy 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/proxy-server.js
CHANGED
|
@@ -35,27 +35,32 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.startProxy = startProxy;
|
|
37
37
|
const http = __importStar(require("node:http"));
|
|
38
|
+
const node_string_decoder_1 = require("node:string_decoder");
|
|
38
39
|
const shared_1 = require("@agentgazer/shared");
|
|
39
40
|
const server_1 = require("@agentgazer/server");
|
|
40
41
|
const modelOverrideCache = {};
|
|
41
42
|
const MODEL_OVERRIDE_CACHE_TTL_MS = 30_000; // 30 seconds
|
|
42
43
|
function getModelOverride(db, agentId, provider) {
|
|
44
|
+
const noOverride = { model: null, targetProvider: null };
|
|
43
45
|
if (!db)
|
|
44
|
-
return
|
|
46
|
+
return noOverride;
|
|
45
47
|
const cacheKey = `${agentId}:${provider}`;
|
|
46
48
|
const cached = modelOverrideCache[cacheKey];
|
|
47
49
|
if (cached && cached.expiresAt > Date.now()) {
|
|
48
|
-
return cached.
|
|
50
|
+
return cached.result;
|
|
49
51
|
}
|
|
50
52
|
// Fetch from DB
|
|
51
53
|
const rule = (0, server_1.getModelRule)(db, agentId, provider);
|
|
52
|
-
const
|
|
54
|
+
const result = {
|
|
55
|
+
model: rule?.model_override ?? null,
|
|
56
|
+
targetProvider: rule?.target_provider ?? null,
|
|
57
|
+
};
|
|
53
58
|
// Cache the result
|
|
54
59
|
modelOverrideCache[cacheKey] = {
|
|
55
|
-
|
|
60
|
+
result,
|
|
56
61
|
expiresAt: Date.now() + MODEL_OVERRIDE_CACHE_TTL_MS,
|
|
57
62
|
};
|
|
58
|
-
return
|
|
63
|
+
return result;
|
|
59
64
|
}
|
|
60
65
|
const log = (0, shared_1.createLogger)("proxy");
|
|
61
66
|
const event_buffer_js_1 = require("./event-buffer.js");
|
|
@@ -110,7 +115,11 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
110
115
|
let modified = false;
|
|
111
116
|
const changes = [];
|
|
112
117
|
// Fields that only OpenAI supports (top-level)
|
|
113
|
-
const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"
|
|
118
|
+
const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"];
|
|
119
|
+
// Providers that support stream_options for usage tracking
|
|
120
|
+
const streamOptionsProviders = new Set(["openai", "deepseek", "moonshot", "zhipu", "minimax", "yi", "baichuan"]);
|
|
121
|
+
// Providers that don't support stream_options at all
|
|
122
|
+
const noStreamOptionsProviders = new Set(["anthropic", "google", "mistral", "cohere"]);
|
|
114
123
|
// max_completion_tokens -> max_tokens conversion for non-OpenAI providers
|
|
115
124
|
if (provider !== "openai" && "max_completion_tokens" in result) {
|
|
116
125
|
if (!("max_tokens" in result)) {
|
|
@@ -164,6 +173,29 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
164
173
|
}
|
|
165
174
|
}
|
|
166
175
|
}
|
|
176
|
+
// Handle stream_options for usage tracking
|
|
177
|
+
if (result.stream === true) {
|
|
178
|
+
if (streamOptionsProviders.has(provider)) {
|
|
179
|
+
// Add stream_options.include_usage for providers that support it
|
|
180
|
+
const existingStreamOptions = result.stream_options;
|
|
181
|
+
if (!existingStreamOptions?.include_usage) {
|
|
182
|
+
result.stream_options = {
|
|
183
|
+
...existingStreamOptions,
|
|
184
|
+
include_usage: true,
|
|
185
|
+
};
|
|
186
|
+
changes.push("+stream_options.include_usage");
|
|
187
|
+
modified = true;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
else if (noStreamOptionsProviders.has(provider)) {
|
|
191
|
+
// Remove stream_options for providers that don't support it
|
|
192
|
+
if ("stream_options" in result) {
|
|
193
|
+
delete result.stream_options;
|
|
194
|
+
changes.push("-stream_options");
|
|
195
|
+
modified = true;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
167
199
|
// Provider-specific handling
|
|
168
200
|
switch (provider) {
|
|
169
201
|
case "mistral":
|
|
@@ -180,7 +212,7 @@ function normalizeRequestBody(provider, body, log) {
|
|
|
180
212
|
case "cohere":
|
|
181
213
|
// Cohere uses different field names and doesn't support some OpenAI fields
|
|
182
214
|
// See: https://docs.cohere.com/reference/chat
|
|
183
|
-
const cohereUnsupported = ["top_logprobs", "n", "user"
|
|
215
|
+
const cohereUnsupported = ["top_logprobs", "n", "user"];
|
|
184
216
|
for (const field of cohereUnsupported) {
|
|
185
217
|
if (field in result) {
|
|
186
218
|
delete result[field];
|
|
@@ -238,12 +270,20 @@ function parseAnthropicSSE(dataLines, statusCode) {
|
|
|
238
270
|
let model = null;
|
|
239
271
|
let tokensIn = null;
|
|
240
272
|
let tokensOut = null;
|
|
273
|
+
let cacheCreationTokens = null;
|
|
274
|
+
let cacheReadTokens = null;
|
|
241
275
|
for (const line of dataLines) {
|
|
242
276
|
try {
|
|
243
277
|
const data = JSON.parse(line);
|
|
244
278
|
if (data.type === "message_start" && data.message) {
|
|
245
279
|
model = data.message.model ?? null;
|
|
246
|
-
|
|
280
|
+
const usage = data.message.usage;
|
|
281
|
+
if (usage) {
|
|
282
|
+
tokensIn = usage.input_tokens ?? null;
|
|
283
|
+
// Anthropic prompt caching: include cache tokens in input count
|
|
284
|
+
cacheCreationTokens = usage.cache_creation_input_tokens ?? null;
|
|
285
|
+
cacheReadTokens = usage.cache_read_input_tokens ?? null;
|
|
286
|
+
}
|
|
247
287
|
}
|
|
248
288
|
if (data.type === "message_delta" && data.usage) {
|
|
249
289
|
tokensOut = data.usage.output_tokens ?? null;
|
|
@@ -253,10 +293,22 @@ function parseAnthropicSSE(dataLines, statusCode) {
|
|
|
253
293
|
continue;
|
|
254
294
|
}
|
|
255
295
|
}
|
|
256
|
-
|
|
296
|
+
// Total input includes regular + cache tokens
|
|
297
|
+
// Note: cache_creation is charged at 1.25x, cache_read at 0.1x
|
|
298
|
+
// For now we count all as regular input tokens for simplicity
|
|
299
|
+
let totalInputTokens = tokensIn;
|
|
300
|
+
if (totalInputTokens != null) {
|
|
301
|
+
if (cacheCreationTokens != null) {
|
|
302
|
+
totalInputTokens += cacheCreationTokens;
|
|
303
|
+
}
|
|
304
|
+
if (cacheReadTokens != null) {
|
|
305
|
+
totalInputTokens += cacheReadTokens;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
const tokensTotal = totalInputTokens != null && tokensOut != null ? totalInputTokens + tokensOut : null;
|
|
257
309
|
return {
|
|
258
310
|
model,
|
|
259
|
-
tokensIn,
|
|
311
|
+
tokensIn: totalInputTokens,
|
|
260
312
|
tokensOut,
|
|
261
313
|
tokensTotal,
|
|
262
314
|
statusCode,
|
|
@@ -292,6 +344,77 @@ function parseGoogleSSE(dataLines, statusCode) {
|
|
|
292
344
|
errorMessage: null,
|
|
293
345
|
};
|
|
294
346
|
}
|
|
347
|
+
/**
|
|
348
|
+
* Parse Google's streaming response format (JSON array chunks, not standard SSE).
|
|
349
|
+
* Google Gemini API returns streaming data as: [{"candidates":...}, {"candidates":...}]
|
|
350
|
+
* Each chunk may or may not have usageMetadata - typically only the last chunk has it.
|
|
351
|
+
*/
|
|
352
|
+
function parseGoogleStreamingResponse(rawText, statusCode) {
|
|
353
|
+
let model = null;
|
|
354
|
+
let tokensIn = null;
|
|
355
|
+
let tokensOut = null;
|
|
356
|
+
let tokensTotal = null;
|
|
357
|
+
// Try to extract JSON objects from the response
|
|
358
|
+
// Google streaming format: [{"candidates":...},\n{"candidates":...}]
|
|
359
|
+
// We need to handle the array brackets and commas between objects
|
|
360
|
+
// First, try parsing as a complete JSON array
|
|
361
|
+
try {
|
|
362
|
+
const cleanedText = rawText.trim();
|
|
363
|
+
if (cleanedText.startsWith("[")) {
|
|
364
|
+
const data = JSON.parse(cleanedText);
|
|
365
|
+
if (Array.isArray(data)) {
|
|
366
|
+
for (const chunk of data) {
|
|
367
|
+
if (chunk.modelVersion)
|
|
368
|
+
model = chunk.modelVersion;
|
|
369
|
+
if (chunk.usageMetadata) {
|
|
370
|
+
tokensIn = chunk.usageMetadata.promptTokenCount ?? null;
|
|
371
|
+
tokensOut = chunk.usageMetadata.candidatesTokenCount ?? null;
|
|
372
|
+
tokensTotal = chunk.usageMetadata.totalTokenCount ?? null;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
// Not a valid JSON array, try line-by-line parsing
|
|
380
|
+
}
|
|
381
|
+
// If we didn't find data, try extracting individual JSON objects
|
|
382
|
+
if (tokensIn === null && tokensOut === null) {
|
|
383
|
+
// Remove array brackets and split by object boundaries
|
|
384
|
+
const cleanedText = rawText
|
|
385
|
+
.replace(/^\s*\[\s*/, "") // Remove leading [
|
|
386
|
+
.replace(/\s*\]\s*$/, "") // Remove trailing ]
|
|
387
|
+
.replace(/^\s*,\s*/gm, "") // Remove leading commas on each line
|
|
388
|
+
.trim();
|
|
389
|
+
// Try to find and parse JSON objects
|
|
390
|
+
const objectMatches = cleanedText.match(/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/g);
|
|
391
|
+
if (objectMatches) {
|
|
392
|
+
for (const objStr of objectMatches) {
|
|
393
|
+
try {
|
|
394
|
+
const data = JSON.parse(objStr);
|
|
395
|
+
if (data.modelVersion)
|
|
396
|
+
model = data.modelVersion;
|
|
397
|
+
if (data.usageMetadata) {
|
|
398
|
+
tokensIn = data.usageMetadata.promptTokenCount ?? null;
|
|
399
|
+
tokensOut = data.usageMetadata.candidatesTokenCount ?? null;
|
|
400
|
+
tokensTotal = data.usageMetadata.totalTokenCount ?? null;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return {
|
|
410
|
+
model,
|
|
411
|
+
tokensIn,
|
|
412
|
+
tokensOut,
|
|
413
|
+
tokensTotal,
|
|
414
|
+
statusCode,
|
|
415
|
+
errorMessage: null,
|
|
416
|
+
};
|
|
417
|
+
}
|
|
295
418
|
function parseCohereSSE(dataLines, statusCode) {
|
|
296
419
|
let tokensIn = null;
|
|
297
420
|
let tokensOut = null;
|
|
@@ -330,6 +453,18 @@ function parseSSEResponse(provider, sseText, statusCode) {
|
|
|
330
453
|
dataLines.push(line.slice(6));
|
|
331
454
|
}
|
|
332
455
|
}
|
|
456
|
+
// Google Gemini API uses a different streaming format (JSON array, not SSE)
|
|
457
|
+
// Handle it specially even if no "data: " lines found
|
|
458
|
+
if (provider === "google") {
|
|
459
|
+
if (dataLines.length > 0) {
|
|
460
|
+
// Google with SSE format (when ?alt=sse is used)
|
|
461
|
+
return parseGoogleSSE(dataLines, statusCode);
|
|
462
|
+
}
|
|
463
|
+
else {
|
|
464
|
+
// Google's default JSON array streaming format
|
|
465
|
+
return parseGoogleStreamingResponse(sseText, statusCode);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
333
468
|
if (dataLines.length === 0)
|
|
334
469
|
return null;
|
|
335
470
|
switch (provider) {
|
|
@@ -343,8 +478,6 @@ function parseSSEResponse(provider, sseText, statusCode) {
|
|
|
343
478
|
return parseOpenAISSE(dataLines, statusCode);
|
|
344
479
|
case "anthropic":
|
|
345
480
|
return parseAnthropicSSE(dataLines, statusCode);
|
|
346
|
-
case "google":
|
|
347
|
-
return parseGoogleSSE(dataLines, statusCode);
|
|
348
481
|
case "cohere":
|
|
349
482
|
return parseCohereSSE(dataLines, statusCode);
|
|
350
483
|
default:
|
|
@@ -802,9 +935,12 @@ function startProxy(options) {
|
|
|
802
935
|
log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
|
|
803
936
|
return;
|
|
804
937
|
}
|
|
938
|
+
// Use requestedModel as fallback when provider doesn't return model in response
|
|
939
|
+
// (Google Gemini API typically doesn't include model in streaming response)
|
|
940
|
+
const effectiveModel = parsed.model ?? requestedModel;
|
|
805
941
|
let costUsd = null;
|
|
806
|
-
if (
|
|
807
|
-
costUsd = (0, shared_1.calculateCost)(
|
|
942
|
+
if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
943
|
+
costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut);
|
|
808
944
|
}
|
|
809
945
|
// Record response for loop detection
|
|
810
946
|
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, sseText);
|
|
@@ -812,7 +948,7 @@ function startProxy(options) {
|
|
|
812
948
|
agent_id: effectiveAgentId,
|
|
813
949
|
event_type: "llm_call",
|
|
814
950
|
provider,
|
|
815
|
-
model:
|
|
951
|
+
model: effectiveModel,
|
|
816
952
|
requested_model: requestedModel,
|
|
817
953
|
tokens_in: parsed.tokensIn,
|
|
818
954
|
tokens_out: parsed.tokensOut,
|
|
@@ -845,10 +981,13 @@ function startProxy(options) {
|
|
|
845
981
|
log.warn(`No parser result for provider: ${provider}`);
|
|
846
982
|
return;
|
|
847
983
|
}
|
|
984
|
+
// Use requestedModel as fallback when provider doesn't return model in response
|
|
985
|
+
// (Google Gemini API typically doesn't include model in response)
|
|
986
|
+
const effectiveModel = parsed.model ?? requestedModel;
|
|
848
987
|
// Calculate cost if we have the necessary token data
|
|
849
988
|
let costUsd = null;
|
|
850
|
-
if (
|
|
851
|
-
costUsd = (0, shared_1.calculateCost)(
|
|
989
|
+
if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
990
|
+
costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut);
|
|
852
991
|
}
|
|
853
992
|
// Record response for loop detection
|
|
854
993
|
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, responseBody.toString("utf-8"));
|
|
@@ -856,7 +995,7 @@ function startProxy(options) {
|
|
|
856
995
|
agent_id: effectiveAgentId,
|
|
857
996
|
event_type: "llm_call",
|
|
858
997
|
provider,
|
|
859
|
-
model:
|
|
998
|
+
model: effectiveModel,
|
|
860
999
|
requested_model: requestedModel,
|
|
861
1000
|
tokens_in: parsed.tokensIn,
|
|
862
1001
|
tokens_out: parsed.tokensOut,
|
|
@@ -985,21 +1124,75 @@ function startProxy(options) {
|
|
|
985
1124
|
// Model override and request normalization
|
|
986
1125
|
let requestedModel = null;
|
|
987
1126
|
let modifiedRequestBody = requestBody;
|
|
1127
|
+
let crossProviderOverride = null;
|
|
1128
|
+
let effectiveProvider = provider; // May change if cross-provider override
|
|
1129
|
+
let isStreaming = false;
|
|
988
1130
|
try {
|
|
989
1131
|
let bodyJson = JSON.parse(requestBody.toString("utf-8"));
|
|
990
1132
|
let bodyModified = false;
|
|
991
|
-
|
|
1133
|
+
isStreaming = bodyJson.stream === true;
|
|
1134
|
+
// Extract model from request body if present
|
|
992
1135
|
if (bodyJson.model) {
|
|
993
1136
|
requestedModel = bodyJson.model;
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1137
|
+
}
|
|
1138
|
+
// Always check for model override rules (even if request has no model)
|
|
1139
|
+
// This handles providers like Google where model is in URL, not body
|
|
1140
|
+
const override = getModelOverride(db, effectiveAgentId, provider);
|
|
1141
|
+
// Apply model override if configured
|
|
1142
|
+
if (override.model) {
|
|
1143
|
+
if (requestedModel) {
|
|
1144
|
+
log.info(`[PROXY] Model override: ${requestedModel} → ${override.model}`);
|
|
1145
|
+
}
|
|
1146
|
+
else {
|
|
1147
|
+
log.info(`[PROXY] Model override (no model in request): → ${override.model}`);
|
|
1148
|
+
}
|
|
1149
|
+
bodyJson.model = override.model;
|
|
1150
|
+
bodyModified = true;
|
|
1151
|
+
}
|
|
1152
|
+
// Handle cross-provider override
|
|
1153
|
+
if (override.targetProvider && override.targetProvider !== provider && shared_1.KNOWN_PROVIDER_NAMES.includes(override.targetProvider)) {
|
|
1154
|
+
const targetProv = override.targetProvider;
|
|
1155
|
+
log.info(`[PROXY] Cross-provider override: ${provider} → ${targetProv}`);
|
|
1156
|
+
// Check if we have an API key for the target provider
|
|
1157
|
+
if (!providerKeys[targetProv]) {
|
|
1158
|
+
log.error(`[PROXY] No API key configured for target provider: ${targetProv}`);
|
|
1159
|
+
sendJson(res, 400, { error: `Cross-provider override failed: no API key for ${targetProv}` });
|
|
1160
|
+
return;
|
|
1161
|
+
}
|
|
1162
|
+
crossProviderOverride = { targetProvider: targetProv, originalProvider: provider };
|
|
1163
|
+
effectiveProvider = targetProv;
|
|
1164
|
+
// Transform request format if needed
|
|
1165
|
+
if (provider !== "anthropic" && targetProv === "anthropic") {
|
|
1166
|
+
// OpenAI-compatible or Google → Anthropic
|
|
1167
|
+
// For Google, we need to ensure the request has required fields
|
|
1168
|
+
if (provider === "google") {
|
|
1169
|
+
// Google native format → need to convert to OpenAI first, then to Anthropic
|
|
1170
|
+
// For now, assume the request is already in a compatible format or
|
|
1171
|
+
// the client is using OpenAI-compatible format through the Google endpoint
|
|
1172
|
+
log.info(`[PROXY] Converting Google request → Anthropic`);
|
|
1173
|
+
}
|
|
1174
|
+
const anthropicRequest = (0, shared_1.openaiToAnthropic)(bodyJson);
|
|
1175
|
+
bodyJson = anthropicRequest;
|
|
1176
|
+
bodyModified = true;
|
|
1177
|
+
log.info(`[PROXY] Transformed request: ${provider} → Anthropic`);
|
|
1178
|
+
}
|
|
1179
|
+
else if (provider === "anthropic" && targetProv !== "anthropic") {
|
|
1180
|
+
// Anthropic → OpenAI-compatible
|
|
1181
|
+
const openaiRequest = (0, shared_1.anthropicToOpenaiRequest)(bodyJson);
|
|
1182
|
+
bodyJson = openaiRequest;
|
|
998
1183
|
bodyModified = true;
|
|
1184
|
+
log.info(`[PROXY] Transformed request: Anthropic → OpenAI`);
|
|
1185
|
+
}
|
|
1186
|
+
// Other cases (OpenAI-compatible → OpenAI-compatible) don't need transformation
|
|
1187
|
+
// Update target URL for cross-provider
|
|
1188
|
+
const newEndpoint = (0, shared_1.getProviderChatEndpoint)(targetProv);
|
|
1189
|
+
if (newEndpoint) {
|
|
1190
|
+
targetUrl = newEndpoint;
|
|
1191
|
+
log.info(`[PROXY] Redirecting to: ${targetUrl}`);
|
|
999
1192
|
}
|
|
1000
1193
|
}
|
|
1001
1194
|
// Normalize request body for provider compatibility
|
|
1002
|
-
const normalized = normalizeRequestBody(
|
|
1195
|
+
const normalized = normalizeRequestBody(effectiveProvider, bodyJson, log);
|
|
1003
1196
|
if (normalized.modified) {
|
|
1004
1197
|
bodyJson = normalized.body;
|
|
1005
1198
|
bodyModified = true;
|
|
@@ -1050,23 +1243,43 @@ function startProxy(options) {
|
|
|
1050
1243
|
forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
|
|
1051
1244
|
}
|
|
1052
1245
|
}
|
|
1053
|
-
//
|
|
1054
|
-
|
|
1246
|
+
// For cross-provider override, remove ALL auth-related headers from the original request
|
|
1247
|
+
// This prevents the original provider's API key from being forwarded to the target provider
|
|
1248
|
+
if (crossProviderOverride) {
|
|
1249
|
+
const authHeaders = ["authorization", "x-api-key", "api-key", "x-goog-api-key"];
|
|
1250
|
+
for (const key of Object.keys(forwardHeaders)) {
|
|
1251
|
+
if (authHeaders.includes(key.toLowerCase())) {
|
|
1252
|
+
delete forwardHeaders[key];
|
|
1253
|
+
log.info(`[PROXY] Removed ${key} header for cross-provider override`);
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
// Inject API key (use effective provider for cross-provider override)
|
|
1258
|
+
const providerKey = providerKeys[effectiveProvider];
|
|
1259
|
+
if (crossProviderOverride) {
|
|
1260
|
+
// Debug: show available provider keys for troubleshooting
|
|
1261
|
+
const availableProviders = Object.keys(providerKeys);
|
|
1262
|
+
log.info(`[PROXY] Cross-provider: looking for "${effectiveProvider}" key, available: [${availableProviders.join(", ")}]`);
|
|
1263
|
+
// Show key lengths for debugging
|
|
1264
|
+
const keyLengths = availableProviders.map(p => `${p}:${providerKeys[p]?.length ?? 0}`).join(", ");
|
|
1265
|
+
log.info(`[PROXY] Key lengths: ${keyLengths}`);
|
|
1266
|
+
}
|
|
1055
1267
|
if (providerKey) {
|
|
1056
|
-
const authHeader = (0, shared_1.getProviderAuthHeader)(
|
|
1268
|
+
const authHeader = (0, shared_1.getProviderAuthHeader)(effectiveProvider, providerKey, useNativeApi && !crossProviderOverride);
|
|
1057
1269
|
if (authHeader) {
|
|
1058
1270
|
const existingAuthKey = Object.keys(forwardHeaders).find(k => k.toLowerCase() === authHeader.name.toLowerCase());
|
|
1059
1271
|
if (existingAuthKey)
|
|
1060
1272
|
delete forwardHeaders[existingAuthKey];
|
|
1061
1273
|
forwardHeaders[authHeader.name] = authHeader.value;
|
|
1062
|
-
|
|
1274
|
+
const maskedKey = providerKey.length > 12 ? `${providerKey.slice(0, 8)}...${providerKey.slice(-4)}` : "****";
|
|
1275
|
+
log.info(`[PROXY] Injected ${authHeader.name}=${maskedKey} (len=${providerKey.length}) for ${effectiveProvider}${crossProviderOverride ? " (cross-provider)" : ""}${useNativeApi ? " (native API)" : ""}`);
|
|
1063
1276
|
}
|
|
1064
1277
|
}
|
|
1065
1278
|
else {
|
|
1066
|
-
log.warn(`[PROXY] No API key configured for provider: ${
|
|
1279
|
+
log.warn(`[PROXY] No API key configured for provider: ${effectiveProvider}`);
|
|
1067
1280
|
}
|
|
1068
1281
|
// Add provider-specific required headers
|
|
1069
|
-
if (
|
|
1282
|
+
if (effectiveProvider === "anthropic") {
|
|
1070
1283
|
// Anthropic requires anthropic-version header
|
|
1071
1284
|
if (!forwardHeaders["anthropic-version"]) {
|
|
1072
1285
|
forwardHeaders["anthropic-version"] = "2023-06-01";
|
|
@@ -1113,30 +1326,191 @@ function startProxy(options) {
|
|
|
1113
1326
|
const contentType = providerResponse.headers.get("content-type") ?? "";
|
|
1114
1327
|
const isSSE = contentType.includes("text/event-stream");
|
|
1115
1328
|
if (isSSE && providerResponse.body) {
|
|
1116
|
-
//
|
|
1329
|
+
// Determine stream conversion direction BEFORE setting headers
|
|
1330
|
+
// Case 1: OpenAI-compatible client → Anthropic target (convert Anthropic SSE → OpenAI SSE)
|
|
1331
|
+
const needsAnthropicToOpenai = crossProviderOverride && effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic";
|
|
1332
|
+
// Case 2: Anthropic client → OpenAI-compatible target (convert OpenAI SSE → Anthropic SSE)
|
|
1333
|
+
const needsOpenaiToAnthropic = crossProviderOverride && crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic";
|
|
1334
|
+
// Streaming response - build headers carefully
|
|
1117
1335
|
const responseHeaders = {};
|
|
1336
|
+
// Headers to skip when forwarding SSE response
|
|
1337
|
+
const skipHeaders = new Set([
|
|
1338
|
+
"content-encoding", // fetch() auto-decompresses, so this would be wrong
|
|
1339
|
+
"content-length", // SSE is streamed, no fixed length
|
|
1340
|
+
"transfer-encoding", // Let Node.js handle this
|
|
1341
|
+
"connection", // Let Node.js handle this
|
|
1342
|
+
]);
|
|
1343
|
+
// Headers to skip when doing cross-provider override (provider-specific headers)
|
|
1344
|
+
const providerSpecificHeaders = new Set([
|
|
1345
|
+
"x-request-id",
|
|
1346
|
+
"openai-processing-ms",
|
|
1347
|
+
"openai-organization",
|
|
1348
|
+
"openai-version",
|
|
1349
|
+
"x-ratelimit-limit-requests",
|
|
1350
|
+
"x-ratelimit-limit-tokens",
|
|
1351
|
+
"x-ratelimit-remaining-requests",
|
|
1352
|
+
"x-ratelimit-remaining-tokens",
|
|
1353
|
+
"x-ratelimit-reset-requests",
|
|
1354
|
+
"x-ratelimit-reset-tokens",
|
|
1355
|
+
]);
|
|
1118
1356
|
providerResponse.headers.forEach((value, key) => {
|
|
1357
|
+
const lowerKey = key.toLowerCase();
|
|
1358
|
+
if (skipHeaders.has(lowerKey))
|
|
1359
|
+
return;
|
|
1360
|
+
if (crossProviderOverride && providerSpecificHeaders.has(lowerKey))
|
|
1361
|
+
return;
|
|
1119
1362
|
responseHeaders[key] = value;
|
|
1120
1363
|
});
|
|
1364
|
+
// Ensure correct headers for SSE
|
|
1365
|
+
responseHeaders["Content-Type"] = "text/event-stream; charset=utf-8";
|
|
1366
|
+
responseHeaders["Cache-Control"] = "no-cache";
|
|
1367
|
+
responseHeaders["Connection"] = "keep-alive";
|
|
1368
|
+
responseHeaders["X-Accel-Buffering"] = "no"; // Disable nginx buffering if behind nginx
|
|
1121
1369
|
res.writeHead(providerResponse.status, responseHeaders);
|
|
1370
|
+
res.flushHeaders(); // Ensure headers are sent immediately
|
|
1122
1371
|
const chunks = [];
|
|
1123
1372
|
let accumulatedSize = 0;
|
|
1124
1373
|
const reader = providerResponse.body.getReader();
|
|
1374
|
+
let streamState = null;
|
|
1375
|
+
let reverseStreamState = null;
|
|
1376
|
+
let lineBuffer = "";
|
|
1377
|
+
// Use StringDecoder to handle multi-byte UTF-8 characters split across chunks
|
|
1378
|
+
const utf8Decoder = new node_string_decoder_1.StringDecoder("utf8");
|
|
1379
|
+
if (needsAnthropicToOpenai) {
|
|
1380
|
+
streamState = (0, shared_1.createStreamingConverterState)();
|
|
1381
|
+
log.info(`[PROXY] Converting Anthropic SSE stream → OpenAI format`);
|
|
1382
|
+
}
|
|
1383
|
+
else if (needsOpenaiToAnthropic) {
|
|
1384
|
+
reverseStreamState = (0, shared_1.createOpenAIToAnthropicStreamState)();
|
|
1385
|
+
log.info(`[PROXY] Converting OpenAI SSE stream → Anthropic format`);
|
|
1386
|
+
}
|
|
1125
1387
|
try {
|
|
1126
1388
|
for (;;) {
|
|
1127
1389
|
const { done, value } = await reader.read();
|
|
1128
1390
|
if (done)
|
|
1129
1391
|
break;
|
|
1130
1392
|
const buf = Buffer.from(value);
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1393
|
+
if (needsAnthropicToOpenai && streamState) {
|
|
1394
|
+
// Transform Anthropic SSE to OpenAI SSE
|
|
1395
|
+
// Use StringDecoder to properly handle multi-byte UTF-8 characters
|
|
1396
|
+
lineBuffer += utf8Decoder.write(buf);
|
|
1397
|
+
const lines = lineBuffer.split("\n");
|
|
1398
|
+
lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
|
|
1399
|
+
for (const line of lines) {
|
|
1400
|
+
if (line.startsWith("data: ")) {
|
|
1401
|
+
const data = line.slice(6).trim();
|
|
1402
|
+
if (data && data !== "[DONE]") {
|
|
1403
|
+
try {
|
|
1404
|
+
const event = JSON.parse(data);
|
|
1405
|
+
const openaiChunks = (0, shared_1.anthropicSseToOpenaiChunks)(event, streamState, requestedModel ?? undefined);
|
|
1406
|
+
for (const chunk of openaiChunks) {
|
|
1407
|
+
const sseData = (0, shared_1.formatOpenAISSELine)(chunk);
|
|
1408
|
+
res.write(sseData);
|
|
1409
|
+
accumulatedSize += sseData.length;
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
catch (e) {
|
|
1413
|
+
log.debug(`[PROXY] Failed to parse Anthropic SSE event: ${data}`);
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
// Store original for metrics
|
|
1419
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1420
|
+
chunks.push(buf);
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
else if (needsOpenaiToAnthropic && reverseStreamState) {
|
|
1424
|
+
// Transform OpenAI SSE to Anthropic SSE
|
|
1425
|
+
// Use StringDecoder to properly handle multi-byte UTF-8 characters
|
|
1426
|
+
lineBuffer += utf8Decoder.write(buf);
|
|
1427
|
+
const lines = lineBuffer.split("\n");
|
|
1428
|
+
lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
|
|
1429
|
+
for (const line of lines) {
|
|
1430
|
+
if (line.startsWith("data: ")) {
|
|
1431
|
+
const data = line.slice(6).trim();
|
|
1432
|
+
if (data && data !== "[DONE]") {
|
|
1433
|
+
try {
|
|
1434
|
+
const chunk = JSON.parse(data);
|
|
1435
|
+
const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
|
|
1436
|
+
for (const sseLine of anthropicLines) {
|
|
1437
|
+
log.info(`[PROXY] Anthropic SSE: ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1438
|
+
res.write(sseLine);
|
|
1439
|
+
accumulatedSize += sseLine.length;
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
1442
|
+
catch (e) {
|
|
1443
|
+
log.debug(`[PROXY] Failed to parse OpenAI SSE chunk: ${data}`);
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
// Store original for metrics
|
|
1449
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1450
|
+
chunks.push(buf);
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
else {
|
|
1454
|
+
// No conversion needed, pass through
|
|
1455
|
+
res.write(buf);
|
|
1456
|
+
accumulatedSize += buf.length;
|
|
1457
|
+
if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
|
|
1458
|
+
chunks.push(buf);
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
// Handle any remaining data in lineBuffer for OpenAI → Anthropic conversion
|
|
1463
|
+
if (needsOpenaiToAnthropic && reverseStreamState && lineBuffer.trim()) {
|
|
1464
|
+
if (lineBuffer.startsWith("data: ")) {
|
|
1465
|
+
const data = lineBuffer.slice(6).trim();
|
|
1466
|
+
if (data && data !== "[DONE]") {
|
|
1467
|
+
try {
|
|
1468
|
+
const chunk = JSON.parse(data);
|
|
1469
|
+
const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
|
|
1470
|
+
for (const sseLine of anthropicLines) {
|
|
1471
|
+
log.info(`[PROXY] Anthropic SSE (final): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1472
|
+
res.write(sseLine);
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
catch (e) {
|
|
1476
|
+
log.debug(`[PROXY] Failed to parse final OpenAI SSE chunk: ${data}`);
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
// Send done markers for converted streams
|
|
1482
|
+
if (needsAnthropicToOpenai) {
|
|
1483
|
+
res.write((0, shared_1.formatOpenAISSEDone)());
|
|
1484
|
+
}
|
|
1485
|
+
// For OpenAI → Anthropic conversion, ensure proper stream finalization
|
|
1486
|
+
// This handles cases where the OpenAI stream ended without a finish_reason chunk
|
|
1487
|
+
if (needsOpenaiToAnthropic && reverseStreamState) {
|
|
1488
|
+
if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
|
|
1489
|
+
log.info(`[PROXY] OpenAI stream ended without proper finalization, sending closing events`);
|
|
1490
|
+
const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
|
|
1491
|
+
for (const sseLine of finalLines) {
|
|
1492
|
+
log.info(`[PROXY] Anthropic SSE (finalize): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
|
|
1493
|
+
res.write(sseLine);
|
|
1494
|
+
}
|
|
1135
1495
|
}
|
|
1136
1496
|
}
|
|
1137
1497
|
}
|
|
1138
1498
|
catch (error) {
|
|
1139
1499
|
log.error("Stream read error", { err: error instanceof Error ? error.message : String(error) });
|
|
1500
|
+
// Even on error, try to finalize the Anthropic stream
|
|
1501
|
+
if (needsOpenaiToAnthropic && reverseStreamState && reverseStreamState.sentMessageStart) {
|
|
1502
|
+
try {
|
|
1503
|
+
if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
|
|
1504
|
+
const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
|
|
1505
|
+
for (const sseLine of finalLines) {
|
|
1506
|
+
res.write(sseLine);
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
}
|
|
1510
|
+
catch {
|
|
1511
|
+
// Ignore errors during error recovery
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1140
1514
|
}
|
|
1141
1515
|
finally {
|
|
1142
1516
|
res.end();
|
|
@@ -1144,7 +1518,8 @@ function startProxy(options) {
|
|
|
1144
1518
|
const latencyMs = Date.now() - requestStart;
|
|
1145
1519
|
const fullBody = Buffer.concat(chunks);
|
|
1146
1520
|
try {
|
|
1147
|
-
|
|
1521
|
+
// Use effective provider for metrics extraction
|
|
1522
|
+
extractStreamingMetrics(effectiveProvider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
|
|
1148
1523
|
}
|
|
1149
1524
|
catch (error) {
|
|
1150
1525
|
log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|
|
@@ -1162,26 +1537,70 @@ function startProxy(options) {
|
|
|
1162
1537
|
return;
|
|
1163
1538
|
}
|
|
1164
1539
|
const latencyMs = Date.now() - requestStart;
|
|
1540
|
+
// Transform response if cross-provider override
|
|
1541
|
+
let finalResponseBody = responseBodyBuffer;
|
|
1542
|
+
let responseConverted = false;
|
|
1543
|
+
if (crossProviderOverride && providerResponse.status < 400) {
|
|
1544
|
+
// Case 1: OpenAI-compatible client → Anthropic target
|
|
1545
|
+
// Need to convert Anthropic response → OpenAI format
|
|
1546
|
+
if (effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic") {
|
|
1547
|
+
try {
|
|
1548
|
+
const anthropicResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
|
|
1549
|
+
const openaiResponse = (0, shared_1.anthropicToOpenai)(anthropicResponse, requestedModel ?? undefined);
|
|
1550
|
+
finalResponseBody = Buffer.from(JSON.stringify(openaiResponse), "utf-8");
|
|
1551
|
+
responseConverted = true;
|
|
1552
|
+
log.info(`[PROXY] Converted Anthropic response → OpenAI format`);
|
|
1553
|
+
}
|
|
1554
|
+
catch (e) {
|
|
1555
|
+
log.error(`[PROXY] Failed to convert Anthropic response: ${e instanceof Error ? e.message : String(e)}`);
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
// Case 2: Anthropic client → OpenAI-compatible target
|
|
1559
|
+
// Need to convert OpenAI response → Anthropic format
|
|
1560
|
+
else if (crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic") {
|
|
1561
|
+
try {
|
|
1562
|
+
const openaiResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
|
|
1563
|
+
const anthropicResponse = (0, shared_1.openaiToAnthropicResponse)(openaiResponse, requestedModel ?? undefined);
|
|
1564
|
+
finalResponseBody = Buffer.from(JSON.stringify(anthropicResponse), "utf-8");
|
|
1565
|
+
responseConverted = true;
|
|
1566
|
+
log.info(`[PROXY] Converted OpenAI response → Anthropic format`);
|
|
1567
|
+
}
|
|
1568
|
+
catch (e) {
|
|
1569
|
+
log.error(`[PROXY] Failed to convert OpenAI response: ${e instanceof Error ? e.message : String(e)}`);
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1165
1573
|
const responseHeaders = {};
|
|
1166
1574
|
providerResponse.headers.forEach((value, key) => {
|
|
1167
1575
|
if (key.toLowerCase() === "transfer-encoding")
|
|
1168
1576
|
return;
|
|
1577
|
+
// Update content-length if we transformed the body
|
|
1578
|
+
if (key.toLowerCase() === "content-length" && responseConverted) {
|
|
1579
|
+
responseHeaders[key] = String(finalResponseBody.length);
|
|
1580
|
+
return;
|
|
1581
|
+
}
|
|
1169
1582
|
responseHeaders[key] = value;
|
|
1170
1583
|
});
|
|
1171
|
-
//
|
|
1584
|
+
// Log error responses (INFO level for cross-provider, DEBUG for others)
|
|
1172
1585
|
if (providerResponse.status >= 400) {
|
|
1173
1586
|
try {
|
|
1174
1587
|
const errorBody = responseBodyBuffer.toString("utf-8").slice(0, 2000);
|
|
1175
|
-
|
|
1588
|
+
if (crossProviderOverride) {
|
|
1589
|
+
log.info(`[PROXY] Cross-provider error (${providerResponse.status}): ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
|
|
1590
|
+
}
|
|
1591
|
+
else {
|
|
1592
|
+
log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
|
|
1593
|
+
}
|
|
1176
1594
|
}
|
|
1177
1595
|
catch {
|
|
1178
1596
|
log.debug(`[PROXY] Error response body: (binary, ${responseBodyBuffer.length} bytes)`);
|
|
1179
1597
|
}
|
|
1180
1598
|
}
|
|
1181
1599
|
res.writeHead(providerResponse.status, responseHeaders);
|
|
1182
|
-
res.end(
|
|
1600
|
+
res.end(finalResponseBody);
|
|
1183
1601
|
try {
|
|
1184
|
-
|
|
1602
|
+
// Use effective provider for metrics, but pass original response for parsing
|
|
1603
|
+
extractAndQueueMetrics(effectiveProvider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
|
|
1185
1604
|
}
|
|
1186
1605
|
catch (error) {
|
|
1187
1606
|
log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|