@agentgazer/proxy 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,27 +35,32 @@ var __importStar = (this && this.__importStar) || (function () {
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.startProxy = startProxy;
37
37
  const http = __importStar(require("node:http"));
38
+ const node_string_decoder_1 = require("node:string_decoder");
38
39
  const shared_1 = require("@agentgazer/shared");
39
40
  const server_1 = require("@agentgazer/server");
40
41
  const modelOverrideCache = {};
41
42
  const MODEL_OVERRIDE_CACHE_TTL_MS = 30_000; // 30 seconds
42
43
  function getModelOverride(db, agentId, provider) {
44
+ const noOverride = { model: null, targetProvider: null };
43
45
  if (!db)
44
- return null;
46
+ return noOverride;
45
47
  const cacheKey = `${agentId}:${provider}`;
46
48
  const cached = modelOverrideCache[cacheKey];
47
49
  if (cached && cached.expiresAt > Date.now()) {
48
- return cached.model_override;
50
+ return cached.result;
49
51
  }
50
52
  // Fetch from DB
51
53
  const rule = (0, server_1.getModelRule)(db, agentId, provider);
52
- const modelOverride = rule?.model_override ?? null;
54
+ const result = {
55
+ model: rule?.model_override ?? null,
56
+ targetProvider: rule?.target_provider ?? null,
57
+ };
53
58
  // Cache the result
54
59
  modelOverrideCache[cacheKey] = {
55
- model_override: modelOverride,
60
+ result,
56
61
  expiresAt: Date.now() + MODEL_OVERRIDE_CACHE_TTL_MS,
57
62
  };
58
- return modelOverride;
63
+ return result;
59
64
  }
60
65
  const log = (0, shared_1.createLogger)("proxy");
61
66
  const event_buffer_js_1 = require("./event-buffer.js");
@@ -110,7 +115,11 @@ function normalizeRequestBody(provider, body, log) {
110
115
  let modified = false;
111
116
  const changes = [];
112
117
  // Fields that only OpenAI supports (top-level)
113
- const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls", "stream_options"];
118
+ const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"];
119
+ // Providers that support stream_options for usage tracking
120
+ const streamOptionsProviders = new Set(["openai", "deepseek", "moonshot", "zhipu", "minimax", "yi", "baichuan"]);
121
+ // Providers that don't support stream_options at all
122
+ const noStreamOptionsProviders = new Set(["anthropic", "google", "mistral", "cohere"]);
114
123
  // max_completion_tokens -> max_tokens conversion for non-OpenAI providers
115
124
  if (provider !== "openai" && "max_completion_tokens" in result) {
116
125
  if (!("max_tokens" in result)) {
@@ -164,6 +173,29 @@ function normalizeRequestBody(provider, body, log) {
164
173
  }
165
174
  }
166
175
  }
176
+ // Handle stream_options for usage tracking
177
+ if (result.stream === true) {
178
+ if (streamOptionsProviders.has(provider)) {
179
+ // Add stream_options.include_usage for providers that support it
180
+ const existingStreamOptions = result.stream_options;
181
+ if (!existingStreamOptions?.include_usage) {
182
+ result.stream_options = {
183
+ ...existingStreamOptions,
184
+ include_usage: true,
185
+ };
186
+ changes.push("+stream_options.include_usage");
187
+ modified = true;
188
+ }
189
+ }
190
+ else if (noStreamOptionsProviders.has(provider)) {
191
+ // Remove stream_options for providers that don't support it
192
+ if ("stream_options" in result) {
193
+ delete result.stream_options;
194
+ changes.push("-stream_options");
195
+ modified = true;
196
+ }
197
+ }
198
+ }
167
199
  // Provider-specific handling
168
200
  switch (provider) {
169
201
  case "mistral":
@@ -180,7 +212,7 @@ function normalizeRequestBody(provider, body, log) {
180
212
  case "cohere":
181
213
  // Cohere uses different field names and doesn't support some OpenAI fields
182
214
  // See: https://docs.cohere.com/reference/chat
183
- const cohereUnsupported = ["top_logprobs", "n", "user", "stream_options"];
215
+ const cohereUnsupported = ["top_logprobs", "n", "user"];
184
216
  for (const field of cohereUnsupported) {
185
217
  if (field in result) {
186
218
  delete result[field];
@@ -238,12 +270,20 @@ function parseAnthropicSSE(dataLines, statusCode) {
238
270
  let model = null;
239
271
  let tokensIn = null;
240
272
  let tokensOut = null;
273
+ let cacheCreationTokens = null;
274
+ let cacheReadTokens = null;
241
275
  for (const line of dataLines) {
242
276
  try {
243
277
  const data = JSON.parse(line);
244
278
  if (data.type === "message_start" && data.message) {
245
279
  model = data.message.model ?? null;
246
- tokensIn = data.message.usage?.input_tokens ?? null;
280
+ const usage = data.message.usage;
281
+ if (usage) {
282
+ tokensIn = usage.input_tokens ?? null;
283
+ // Anthropic prompt caching: include cache tokens in input count
284
+ cacheCreationTokens = usage.cache_creation_input_tokens ?? null;
285
+ cacheReadTokens = usage.cache_read_input_tokens ?? null;
286
+ }
247
287
  }
248
288
  if (data.type === "message_delta" && data.usage) {
249
289
  tokensOut = data.usage.output_tokens ?? null;
@@ -253,10 +293,22 @@ function parseAnthropicSSE(dataLines, statusCode) {
253
293
  continue;
254
294
  }
255
295
  }
256
- const tokensTotal = tokensIn != null && tokensOut != null ? tokensIn + tokensOut : null;
296
+ // Total input includes regular + cache tokens
297
+ // Note: cache_creation is charged at 1.25x, cache_read at 0.1x
298
+ // For now we count all as regular input tokens for simplicity
299
+ let totalInputTokens = tokensIn;
300
+ if (totalInputTokens != null) {
301
+ if (cacheCreationTokens != null) {
302
+ totalInputTokens += cacheCreationTokens;
303
+ }
304
+ if (cacheReadTokens != null) {
305
+ totalInputTokens += cacheReadTokens;
306
+ }
307
+ }
308
+ const tokensTotal = totalInputTokens != null && tokensOut != null ? totalInputTokens + tokensOut : null;
257
309
  return {
258
310
  model,
259
- tokensIn,
311
+ tokensIn: totalInputTokens,
260
312
  tokensOut,
261
313
  tokensTotal,
262
314
  statusCode,
@@ -292,6 +344,77 @@ function parseGoogleSSE(dataLines, statusCode) {
292
344
  errorMessage: null,
293
345
  };
294
346
  }
347
+ /**
348
+ * Parse Google's streaming response format (JSON array chunks, not standard SSE).
349
+ * Google Gemini API returns streaming data as: [{"candidates":...}, {"candidates":...}]
350
+ * Each chunk may or may not have usageMetadata - typically only the last chunk has it.
351
+ */
352
+ function parseGoogleStreamingResponse(rawText, statusCode) {
353
+ let model = null;
354
+ let tokensIn = null;
355
+ let tokensOut = null;
356
+ let tokensTotal = null;
357
+ // Try to extract JSON objects from the response
358
+ // Google streaming format: [{"candidates":...},\n{"candidates":...}]
359
+ // We need to handle the array brackets and commas between objects
360
+ // First, try parsing as a complete JSON array
361
+ try {
362
+ const cleanedText = rawText.trim();
363
+ if (cleanedText.startsWith("[")) {
364
+ const data = JSON.parse(cleanedText);
365
+ if (Array.isArray(data)) {
366
+ for (const chunk of data) {
367
+ if (chunk.modelVersion)
368
+ model = chunk.modelVersion;
369
+ if (chunk.usageMetadata) {
370
+ tokensIn = chunk.usageMetadata.promptTokenCount ?? null;
371
+ tokensOut = chunk.usageMetadata.candidatesTokenCount ?? null;
372
+ tokensTotal = chunk.usageMetadata.totalTokenCount ?? null;
373
+ }
374
+ }
375
+ }
376
+ }
377
+ }
378
+ catch {
379
+ // Not a valid JSON array, try line-by-line parsing
380
+ }
381
+ // If we didn't find data, try extracting individual JSON objects
382
+ if (tokensIn === null && tokensOut === null) {
383
+ // Remove array brackets and split by object boundaries
384
+ const cleanedText = rawText
385
+ .replace(/^\s*\[\s*/, "") // Remove leading [
386
+ .replace(/\s*\]\s*$/, "") // Remove trailing ]
387
+ .replace(/^\s*,\s*/gm, "") // Remove leading commas on each line
388
+ .trim();
389
+ // Try to find and parse JSON objects
390
+ const objectMatches = cleanedText.match(/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/g);
391
+ if (objectMatches) {
392
+ for (const objStr of objectMatches) {
393
+ try {
394
+ const data = JSON.parse(objStr);
395
+ if (data.modelVersion)
396
+ model = data.modelVersion;
397
+ if (data.usageMetadata) {
398
+ tokensIn = data.usageMetadata.promptTokenCount ?? null;
399
+ tokensOut = data.usageMetadata.candidatesTokenCount ?? null;
400
+ tokensTotal = data.usageMetadata.totalTokenCount ?? null;
401
+ }
402
+ }
403
+ catch {
404
+ continue;
405
+ }
406
+ }
407
+ }
408
+ }
409
+ return {
410
+ model,
411
+ tokensIn,
412
+ tokensOut,
413
+ tokensTotal,
414
+ statusCode,
415
+ errorMessage: null,
416
+ };
417
+ }
295
418
  function parseCohereSSE(dataLines, statusCode) {
296
419
  let tokensIn = null;
297
420
  let tokensOut = null;
@@ -330,6 +453,18 @@ function parseSSEResponse(provider, sseText, statusCode) {
330
453
  dataLines.push(line.slice(6));
331
454
  }
332
455
  }
456
+ // Google Gemini API uses a different streaming format (JSON array, not SSE)
457
+ // Handle it specially even if no "data: " lines found
458
+ if (provider === "google") {
459
+ if (dataLines.length > 0) {
460
+ // Google with SSE format (when ?alt=sse is used)
461
+ return parseGoogleSSE(dataLines, statusCode);
462
+ }
463
+ else {
464
+ // Google's default JSON array streaming format
465
+ return parseGoogleStreamingResponse(sseText, statusCode);
466
+ }
467
+ }
333
468
  if (dataLines.length === 0)
334
469
  return null;
335
470
  switch (provider) {
@@ -343,8 +478,6 @@ function parseSSEResponse(provider, sseText, statusCode) {
343
478
  return parseOpenAISSE(dataLines, statusCode);
344
479
  case "anthropic":
345
480
  return parseAnthropicSSE(dataLines, statusCode);
346
- case "google":
347
- return parseGoogleSSE(dataLines, statusCode);
348
481
  case "cohere":
349
482
  return parseCohereSSE(dataLines, statusCode);
350
483
  default:
@@ -802,9 +935,15 @@ function startProxy(options) {
802
935
  log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
803
936
  return;
804
937
  }
938
+ // Use requestedModel as fallback when provider doesn't return model in response
939
+ // (Google Gemini API typically doesn't include model in streaming response)
940
+ const effectiveModel = parsed.model ?? requestedModel;
805
941
  let costUsd = null;
806
- if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
807
- costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
942
+ if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
943
+ costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut, {
944
+ cacheCreation: parsed.cacheCreationTokens ?? undefined,
945
+ cacheRead: parsed.cacheReadTokens ?? undefined,
946
+ }, provider);
808
947
  }
809
948
  // Record response for loop detection
810
949
  loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, sseText);
@@ -812,7 +951,7 @@ function startProxy(options) {
812
951
  agent_id: effectiveAgentId,
813
952
  event_type: "llm_call",
814
953
  provider,
815
- model: parsed.model,
954
+ model: effectiveModel,
816
955
  requested_model: requestedModel,
817
956
  tokens_in: parsed.tokensIn,
818
957
  tokens_out: parsed.tokensOut,
@@ -845,10 +984,16 @@ function startProxy(options) {
845
984
  log.warn(`No parser result for provider: ${provider}`);
846
985
  return;
847
986
  }
987
+ // Use requestedModel as fallback when provider doesn't return model in response
988
+ // (Google Gemini API typically doesn't include model in response)
989
+ const effectiveModel = parsed.model ?? requestedModel;
848
990
  // Calculate cost if we have the necessary token data
849
991
  let costUsd = null;
850
- if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
851
- costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
992
+ if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
993
+ costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut, {
994
+ cacheCreation: parsed.cacheCreationTokens ?? undefined,
995
+ cacheRead: parsed.cacheReadTokens ?? undefined,
996
+ }, provider);
852
997
  }
853
998
  // Record response for loop detection
854
999
  loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, responseBody.toString("utf-8"));
@@ -856,7 +1001,7 @@ function startProxy(options) {
856
1001
  agent_id: effectiveAgentId,
857
1002
  event_type: "llm_call",
858
1003
  provider,
859
- model: parsed.model,
1004
+ model: effectiveModel,
860
1005
  requested_model: requestedModel,
861
1006
  tokens_in: parsed.tokensIn,
862
1007
  tokens_out: parsed.tokensOut,
@@ -985,21 +1130,75 @@ function startProxy(options) {
985
1130
  // Model override and request normalization
986
1131
  let requestedModel = null;
987
1132
  let modifiedRequestBody = requestBody;
1133
+ let crossProviderOverride = null;
1134
+ let effectiveProvider = provider; // May change if cross-provider override
1135
+ let isStreaming = false;
988
1136
  try {
989
1137
  let bodyJson = JSON.parse(requestBody.toString("utf-8"));
990
1138
  let bodyModified = false;
991
- // Extract and optionally override model
1139
+ isStreaming = bodyJson.stream === true;
1140
+ // Extract model from request body if present
992
1141
  if (bodyJson.model) {
993
1142
  requestedModel = bodyJson.model;
994
- const modelOverride = getModelOverride(db, effectiveAgentId, provider);
995
- if (modelOverride) {
996
- log.info(`[PROXY] Model override: ${requestedModel} ${modelOverride}`);
997
- bodyJson.model = modelOverride;
1143
+ }
1144
+ // Always check for model override rules (even if request has no model)
1145
+ // This handles providers like Google where model is in URL, not body
1146
+ const override = getModelOverride(db, effectiveAgentId, provider);
1147
+ // Apply model override if configured
1148
+ if (override.model) {
1149
+ if (requestedModel) {
1150
+ log.info(`[PROXY] Model override: ${requestedModel} → ${override.model}`);
1151
+ }
1152
+ else {
1153
+ log.info(`[PROXY] Model override (no model in request): → ${override.model}`);
1154
+ }
1155
+ bodyJson.model = override.model;
1156
+ bodyModified = true;
1157
+ }
1158
+ // Handle cross-provider override
1159
+ if (override.targetProvider && override.targetProvider !== provider && shared_1.KNOWN_PROVIDER_NAMES.includes(override.targetProvider)) {
1160
+ const targetProv = override.targetProvider;
1161
+ log.info(`[PROXY] Cross-provider override: ${provider} → ${targetProv}`);
1162
+ // Check if we have an API key for the target provider
1163
+ if (!providerKeys[targetProv]) {
1164
+ log.error(`[PROXY] No API key configured for target provider: ${targetProv}`);
1165
+ sendJson(res, 400, { error: `Cross-provider override failed: no API key for ${targetProv}` });
1166
+ return;
1167
+ }
1168
+ crossProviderOverride = { targetProvider: targetProv, originalProvider: provider };
1169
+ effectiveProvider = targetProv;
1170
+ // Transform request format if needed
1171
+ if (provider !== "anthropic" && targetProv === "anthropic") {
1172
+ // OpenAI-compatible or Google → Anthropic
1173
+ // For Google, we need to ensure the request has required fields
1174
+ if (provider === "google") {
1175
+ // Google native format → need to convert to OpenAI first, then to Anthropic
1176
+ // For now, assume the request is already in a compatible format or
1177
+ // the client is using OpenAI-compatible format through the Google endpoint
1178
+ log.info(`[PROXY] Converting Google request → Anthropic`);
1179
+ }
1180
+ const anthropicRequest = (0, shared_1.openaiToAnthropic)(bodyJson);
1181
+ bodyJson = anthropicRequest;
1182
+ bodyModified = true;
1183
+ log.info(`[PROXY] Transformed request: ${provider} → Anthropic`);
1184
+ }
1185
+ else if (provider === "anthropic" && targetProv !== "anthropic") {
1186
+ // Anthropic → OpenAI-compatible
1187
+ const openaiRequest = (0, shared_1.anthropicToOpenaiRequest)(bodyJson);
1188
+ bodyJson = openaiRequest;
998
1189
  bodyModified = true;
1190
+ log.info(`[PROXY] Transformed request: Anthropic → OpenAI`);
1191
+ }
1192
+ // Other cases (OpenAI-compatible → OpenAI-compatible) don't need transformation
1193
+ // Update target URL for cross-provider
1194
+ const newEndpoint = (0, shared_1.getProviderChatEndpoint)(targetProv);
1195
+ if (newEndpoint) {
1196
+ targetUrl = newEndpoint;
1197
+ log.info(`[PROXY] Redirecting to: ${targetUrl}`);
999
1198
  }
1000
1199
  }
1001
1200
  // Normalize request body for provider compatibility
1002
- const normalized = normalizeRequestBody(provider, bodyJson, log);
1201
+ const normalized = normalizeRequestBody(effectiveProvider, bodyJson, log);
1003
1202
  if (normalized.modified) {
1004
1203
  bodyJson = normalized.body;
1005
1204
  bodyModified = true;
@@ -1050,23 +1249,43 @@ function startProxy(options) {
1050
1249
  forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
1051
1250
  }
1052
1251
  }
1053
- // Inject API key
1054
- const providerKey = providerKeys[provider];
1252
+ // For cross-provider override, remove ALL auth-related headers from the original request
1253
+ // This prevents the original provider's API key from being forwarded to the target provider
1254
+ if (crossProviderOverride) {
1255
+ const authHeaders = ["authorization", "x-api-key", "api-key", "x-goog-api-key"];
1256
+ for (const key of Object.keys(forwardHeaders)) {
1257
+ if (authHeaders.includes(key.toLowerCase())) {
1258
+ delete forwardHeaders[key];
1259
+ log.info(`[PROXY] Removed ${key} header for cross-provider override`);
1260
+ }
1261
+ }
1262
+ }
1263
+ // Inject API key (use effective provider for cross-provider override)
1264
+ const providerKey = providerKeys[effectiveProvider];
1265
+ if (crossProviderOverride) {
1266
+ // Debug: show available provider keys for troubleshooting
1267
+ const availableProviders = Object.keys(providerKeys);
1268
+ log.info(`[PROXY] Cross-provider: looking for "${effectiveProvider}" key, available: [${availableProviders.join(", ")}]`);
1269
+ // Show key lengths for debugging
1270
+ const keyLengths = availableProviders.map(p => `${p}:${providerKeys[p]?.length ?? 0}`).join(", ");
1271
+ log.info(`[PROXY] Key lengths: ${keyLengths}`);
1272
+ }
1055
1273
  if (providerKey) {
1056
- const authHeader = (0, shared_1.getProviderAuthHeader)(provider, providerKey, useNativeApi);
1274
+ const authHeader = (0, shared_1.getProviderAuthHeader)(effectiveProvider, providerKey, useNativeApi && !crossProviderOverride);
1057
1275
  if (authHeader) {
1058
1276
  const existingAuthKey = Object.keys(forwardHeaders).find(k => k.toLowerCase() === authHeader.name.toLowerCase());
1059
1277
  if (existingAuthKey)
1060
1278
  delete forwardHeaders[existingAuthKey];
1061
1279
  forwardHeaders[authHeader.name] = authHeader.value;
1062
- log.info(`[PROXY] Injected ${authHeader.name} header for ${provider}${useNativeApi ? " (native API)" : ""}`);
1280
+ const maskedKey = providerKey.length > 12 ? `${providerKey.slice(0, 8)}...${providerKey.slice(-4)}` : "****";
1281
+ log.info(`[PROXY] Injected ${authHeader.name}=${maskedKey} (len=${providerKey.length}) for ${effectiveProvider}${crossProviderOverride ? " (cross-provider)" : ""}${useNativeApi ? " (native API)" : ""}`);
1063
1282
  }
1064
1283
  }
1065
1284
  else {
1066
- log.warn(`[PROXY] No API key configured for provider: ${provider}`);
1285
+ log.warn(`[PROXY] No API key configured for provider: ${effectiveProvider}`);
1067
1286
  }
1068
1287
  // Add provider-specific required headers
1069
- if (provider === "anthropic") {
1288
+ if (effectiveProvider === "anthropic") {
1070
1289
  // Anthropic requires anthropic-version header
1071
1290
  if (!forwardHeaders["anthropic-version"]) {
1072
1291
  forwardHeaders["anthropic-version"] = "2023-06-01";
@@ -1113,30 +1332,191 @@ function startProxy(options) {
1113
1332
  const contentType = providerResponse.headers.get("content-type") ?? "";
1114
1333
  const isSSE = contentType.includes("text/event-stream");
1115
1334
  if (isSSE && providerResponse.body) {
1116
- // Streaming response
1335
+ // Determine stream conversion direction BEFORE setting headers
1336
+ // Case 1: OpenAI-compatible client → Anthropic target (convert Anthropic SSE → OpenAI SSE)
1337
+ const needsAnthropicToOpenai = crossProviderOverride && effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic";
1338
+ // Case 2: Anthropic client → OpenAI-compatible target (convert OpenAI SSE → Anthropic SSE)
1339
+ const needsOpenaiToAnthropic = crossProviderOverride && crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic";
1340
+ // Streaming response - build headers carefully
1117
1341
  const responseHeaders = {};
1342
+ // Headers to skip when forwarding SSE response
1343
+ const skipHeaders = new Set([
1344
+ "content-encoding", // fetch() auto-decompresses, so this would be wrong
1345
+ "content-length", // SSE is streamed, no fixed length
1346
+ "transfer-encoding", // Let Node.js handle this
1347
+ "connection", // Let Node.js handle this
1348
+ ]);
1349
+ // Headers to skip when doing cross-provider override (provider-specific headers)
1350
+ const providerSpecificHeaders = new Set([
1351
+ "x-request-id",
1352
+ "openai-processing-ms",
1353
+ "openai-organization",
1354
+ "openai-version",
1355
+ "x-ratelimit-limit-requests",
1356
+ "x-ratelimit-limit-tokens",
1357
+ "x-ratelimit-remaining-requests",
1358
+ "x-ratelimit-remaining-tokens",
1359
+ "x-ratelimit-reset-requests",
1360
+ "x-ratelimit-reset-tokens",
1361
+ ]);
1118
1362
  providerResponse.headers.forEach((value, key) => {
1363
+ const lowerKey = key.toLowerCase();
1364
+ if (skipHeaders.has(lowerKey))
1365
+ return;
1366
+ if (crossProviderOverride && providerSpecificHeaders.has(lowerKey))
1367
+ return;
1119
1368
  responseHeaders[key] = value;
1120
1369
  });
1370
+ // Ensure correct headers for SSE
1371
+ responseHeaders["Content-Type"] = "text/event-stream; charset=utf-8";
1372
+ responseHeaders["Cache-Control"] = "no-cache";
1373
+ responseHeaders["Connection"] = "keep-alive";
1374
+ responseHeaders["X-Accel-Buffering"] = "no"; // Disable nginx buffering if behind nginx
1121
1375
  res.writeHead(providerResponse.status, responseHeaders);
1376
+ res.flushHeaders(); // Ensure headers are sent immediately
1122
1377
  const chunks = [];
1123
1378
  let accumulatedSize = 0;
1124
1379
  const reader = providerResponse.body.getReader();
1380
+ let streamState = null;
1381
+ let reverseStreamState = null;
1382
+ let lineBuffer = "";
1383
+ // Use StringDecoder to handle multi-byte UTF-8 characters split across chunks
1384
+ const utf8Decoder = new node_string_decoder_1.StringDecoder("utf8");
1385
+ if (needsAnthropicToOpenai) {
1386
+ streamState = (0, shared_1.createStreamingConverterState)();
1387
+ log.info(`[PROXY] Converting Anthropic SSE stream → OpenAI format`);
1388
+ }
1389
+ else if (needsOpenaiToAnthropic) {
1390
+ reverseStreamState = (0, shared_1.createOpenAIToAnthropicStreamState)();
1391
+ log.info(`[PROXY] Converting OpenAI SSE stream → Anthropic format`);
1392
+ }
1125
1393
  try {
1126
1394
  for (;;) {
1127
1395
  const { done, value } = await reader.read();
1128
1396
  if (done)
1129
1397
  break;
1130
1398
  const buf = Buffer.from(value);
1131
- res.write(buf);
1132
- accumulatedSize += buf.length;
1133
- if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1134
- chunks.push(buf);
1399
+ if (needsAnthropicToOpenai && streamState) {
1400
+ // Transform Anthropic SSE to OpenAI SSE
1401
+ // Use StringDecoder to properly handle multi-byte UTF-8 characters
1402
+ lineBuffer += utf8Decoder.write(buf);
1403
+ const lines = lineBuffer.split("\n");
1404
+ lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
1405
+ for (const line of lines) {
1406
+ if (line.startsWith("data: ")) {
1407
+ const data = line.slice(6).trim();
1408
+ if (data && data !== "[DONE]") {
1409
+ try {
1410
+ const event = JSON.parse(data);
1411
+ const openaiChunks = (0, shared_1.anthropicSseToOpenaiChunks)(event, streamState, requestedModel ?? undefined);
1412
+ for (const chunk of openaiChunks) {
1413
+ const sseData = (0, shared_1.formatOpenAISSELine)(chunk);
1414
+ res.write(sseData);
1415
+ accumulatedSize += sseData.length;
1416
+ }
1417
+ }
1418
+ catch (e) {
1419
+ log.debug(`[PROXY] Failed to parse Anthropic SSE event: ${data}`);
1420
+ }
1421
+ }
1422
+ }
1423
+ }
1424
+ // Store original for metrics
1425
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1426
+ chunks.push(buf);
1427
+ }
1428
+ }
1429
+ else if (needsOpenaiToAnthropic && reverseStreamState) {
1430
+ // Transform OpenAI SSE to Anthropic SSE
1431
+ // Use StringDecoder to properly handle multi-byte UTF-8 characters
1432
+ lineBuffer += utf8Decoder.write(buf);
1433
+ const lines = lineBuffer.split("\n");
1434
+ lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
1435
+ for (const line of lines) {
1436
+ if (line.startsWith("data: ")) {
1437
+ const data = line.slice(6).trim();
1438
+ if (data && data !== "[DONE]") {
1439
+ try {
1440
+ const chunk = JSON.parse(data);
1441
+ const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
1442
+ for (const sseLine of anthropicLines) {
1443
+ log.info(`[PROXY] Anthropic SSE: ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1444
+ res.write(sseLine);
1445
+ accumulatedSize += sseLine.length;
1446
+ }
1447
+ }
1448
+ catch (e) {
1449
+ log.debug(`[PROXY] Failed to parse OpenAI SSE chunk: ${data}`);
1450
+ }
1451
+ }
1452
+ }
1453
+ }
1454
+ // Store original for metrics
1455
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1456
+ chunks.push(buf);
1457
+ }
1458
+ }
1459
+ else {
1460
+ // No conversion needed, pass through
1461
+ res.write(buf);
1462
+ accumulatedSize += buf.length;
1463
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1464
+ chunks.push(buf);
1465
+ }
1466
+ }
1467
+ }
1468
+ // Handle any remaining data in lineBuffer for OpenAI → Anthropic conversion
1469
+ if (needsOpenaiToAnthropic && reverseStreamState && lineBuffer.trim()) {
1470
+ if (lineBuffer.startsWith("data: ")) {
1471
+ const data = lineBuffer.slice(6).trim();
1472
+ if (data && data !== "[DONE]") {
1473
+ try {
1474
+ const chunk = JSON.parse(data);
1475
+ const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
1476
+ for (const sseLine of anthropicLines) {
1477
+ log.info(`[PROXY] Anthropic SSE (final): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1478
+ res.write(sseLine);
1479
+ }
1480
+ }
1481
+ catch (e) {
1482
+ log.debug(`[PROXY] Failed to parse final OpenAI SSE chunk: ${data}`);
1483
+ }
1484
+ }
1485
+ }
1486
+ }
1487
+ // Send done markers for converted streams
1488
+ if (needsAnthropicToOpenai) {
1489
+ res.write((0, shared_1.formatOpenAISSEDone)());
1490
+ }
1491
+ // For OpenAI → Anthropic conversion, ensure proper stream finalization
1492
+ // This handles cases where the OpenAI stream ended without a finish_reason chunk
1493
+ if (needsOpenaiToAnthropic && reverseStreamState) {
1494
+ if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
1495
+ log.info(`[PROXY] OpenAI stream ended without proper finalization, sending closing events`);
1496
+ const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
1497
+ for (const sseLine of finalLines) {
1498
+ log.info(`[PROXY] Anthropic SSE (finalize): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1499
+ res.write(sseLine);
1500
+ }
1135
1501
  }
1136
1502
  }
1137
1503
  }
1138
1504
  catch (error) {
1139
1505
  log.error("Stream read error", { err: error instanceof Error ? error.message : String(error) });
1506
+ // Even on error, try to finalize the Anthropic stream
1507
+ if (needsOpenaiToAnthropic && reverseStreamState && reverseStreamState.sentMessageStart) {
1508
+ try {
1509
+ if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
1510
+ const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
1511
+ for (const sseLine of finalLines) {
1512
+ res.write(sseLine);
1513
+ }
1514
+ }
1515
+ }
1516
+ catch {
1517
+ // Ignore errors during error recovery
1518
+ }
1519
+ }
1140
1520
  }
1141
1521
  finally {
1142
1522
  res.end();
@@ -1144,7 +1524,8 @@ function startProxy(options) {
1144
1524
  const latencyMs = Date.now() - requestStart;
1145
1525
  const fullBody = Buffer.concat(chunks);
1146
1526
  try {
1147
- extractStreamingMetrics(provider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
1527
+ // Use effective provider for metrics extraction
1528
+ extractStreamingMetrics(effectiveProvider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
1148
1529
  }
1149
1530
  catch (error) {
1150
1531
  log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
@@ -1162,26 +1543,70 @@ function startProxy(options) {
1162
1543
  return;
1163
1544
  }
1164
1545
  const latencyMs = Date.now() - requestStart;
1546
+ // Transform response if cross-provider override
1547
+ let finalResponseBody = responseBodyBuffer;
1548
+ let responseConverted = false;
1549
+ if (crossProviderOverride && providerResponse.status < 400) {
1550
+ // Case 1: OpenAI-compatible client → Anthropic target
1551
+ // Need to convert Anthropic response → OpenAI format
1552
+ if (effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic") {
1553
+ try {
1554
+ const anthropicResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
1555
+ const openaiResponse = (0, shared_1.anthropicToOpenai)(anthropicResponse, requestedModel ?? undefined);
1556
+ finalResponseBody = Buffer.from(JSON.stringify(openaiResponse), "utf-8");
1557
+ responseConverted = true;
1558
+ log.info(`[PROXY] Converted Anthropic response → OpenAI format`);
1559
+ }
1560
+ catch (e) {
1561
+ log.error(`[PROXY] Failed to convert Anthropic response: ${e instanceof Error ? e.message : String(e)}`);
1562
+ }
1563
+ }
1564
+ // Case 2: Anthropic client → OpenAI-compatible target
1565
+ // Need to convert OpenAI response → Anthropic format
1566
+ else if (crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic") {
1567
+ try {
1568
+ const openaiResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
1569
+ const anthropicResponse = (0, shared_1.openaiToAnthropicResponse)(openaiResponse, requestedModel ?? undefined);
1570
+ finalResponseBody = Buffer.from(JSON.stringify(anthropicResponse), "utf-8");
1571
+ responseConverted = true;
1572
+ log.info(`[PROXY] Converted OpenAI response → Anthropic format`);
1573
+ }
1574
+ catch (e) {
1575
+ log.error(`[PROXY] Failed to convert OpenAI response: ${e instanceof Error ? e.message : String(e)}`);
1576
+ }
1577
+ }
1578
+ }
1165
1579
  const responseHeaders = {};
1166
1580
  providerResponse.headers.forEach((value, key) => {
1167
1581
  if (key.toLowerCase() === "transfer-encoding")
1168
1582
  return;
1583
+ // Update content-length if we transformed the body
1584
+ if (key.toLowerCase() === "content-length" && responseConverted) {
1585
+ responseHeaders[key] = String(finalResponseBody.length);
1586
+ return;
1587
+ }
1169
1588
  responseHeaders[key] = value;
1170
1589
  });
1171
- // Debug log error responses
1590
+ // Log error responses (INFO level for cross-provider, DEBUG for others)
1172
1591
  if (providerResponse.status >= 400) {
1173
1592
  try {
1174
1593
  const errorBody = responseBodyBuffer.toString("utf-8").slice(0, 2000);
1175
- log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1594
+ if (crossProviderOverride) {
1595
+ log.info(`[PROXY] Cross-provider error (${providerResponse.status}): ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1596
+ }
1597
+ else {
1598
+ log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1599
+ }
1176
1600
  }
1177
1601
  catch {
1178
1602
  log.debug(`[PROXY] Error response body: (binary, ${responseBodyBuffer.length} bytes)`);
1179
1603
  }
1180
1604
  }
1181
1605
  res.writeHead(providerResponse.status, responseHeaders);
1182
- res.end(responseBodyBuffer);
1606
+ res.end(finalResponseBody);
1183
1607
  try {
1184
- extractAndQueueMetrics(provider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
1608
+ // Use effective provider for metrics, but pass original response for parsing
1609
+ extractAndQueueMetrics(effectiveProvider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
1185
1610
  }
1186
1611
  catch (error) {
1187
1612
  log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });