@agentgazer/proxy 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,27 +35,32 @@ var __importStar = (this && this.__importStar) || (function () {
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.startProxy = startProxy;
37
37
  const http = __importStar(require("node:http"));
38
+ const node_string_decoder_1 = require("node:string_decoder");
38
39
  const shared_1 = require("@agentgazer/shared");
39
40
  const server_1 = require("@agentgazer/server");
40
41
  const modelOverrideCache = {};
41
42
  const MODEL_OVERRIDE_CACHE_TTL_MS = 30_000; // 30 seconds
42
43
  function getModelOverride(db, agentId, provider) {
44
+ const noOverride = { model: null, targetProvider: null };
43
45
  if (!db)
44
- return null;
46
+ return noOverride;
45
47
  const cacheKey = `${agentId}:${provider}`;
46
48
  const cached = modelOverrideCache[cacheKey];
47
49
  if (cached && cached.expiresAt > Date.now()) {
48
- return cached.model_override;
50
+ return cached.result;
49
51
  }
50
52
  // Fetch from DB
51
53
  const rule = (0, server_1.getModelRule)(db, agentId, provider);
52
- const modelOverride = rule?.model_override ?? null;
54
+ const result = {
55
+ model: rule?.model_override ?? null,
56
+ targetProvider: rule?.target_provider ?? null,
57
+ };
53
58
  // Cache the result
54
59
  modelOverrideCache[cacheKey] = {
55
- model_override: modelOverride,
60
+ result,
56
61
  expiresAt: Date.now() + MODEL_OVERRIDE_CACHE_TTL_MS,
57
62
  };
58
- return modelOverride;
63
+ return result;
59
64
  }
60
65
  const log = (0, shared_1.createLogger)("proxy");
61
66
  const event_buffer_js_1 = require("./event-buffer.js");
@@ -110,7 +115,11 @@ function normalizeRequestBody(provider, body, log) {
110
115
  let modified = false;
111
116
  const changes = [];
112
117
  // Fields that only OpenAI supports (top-level)
113
- const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls", "stream_options"];
118
+ const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls"];
119
+ // Providers that support stream_options for usage tracking
120
+ const streamOptionsProviders = new Set(["openai", "deepseek", "moonshot", "zhipu", "minimax", "yi", "baichuan"]);
121
+ // Providers that don't support stream_options at all
122
+ const noStreamOptionsProviders = new Set(["anthropic", "google", "mistral", "cohere"]);
114
123
  // max_completion_tokens -> max_tokens conversion for non-OpenAI providers
115
124
  if (provider !== "openai" && "max_completion_tokens" in result) {
116
125
  if (!("max_tokens" in result)) {
@@ -164,6 +173,29 @@ function normalizeRequestBody(provider, body, log) {
164
173
  }
165
174
  }
166
175
  }
176
+ // Handle stream_options for usage tracking
177
+ if (result.stream === true) {
178
+ if (streamOptionsProviders.has(provider)) {
179
+ // Add stream_options.include_usage for providers that support it
180
+ const existingStreamOptions = result.stream_options;
181
+ if (!existingStreamOptions?.include_usage) {
182
+ result.stream_options = {
183
+ ...existingStreamOptions,
184
+ include_usage: true,
185
+ };
186
+ changes.push("+stream_options.include_usage");
187
+ modified = true;
188
+ }
189
+ }
190
+ else if (noStreamOptionsProviders.has(provider)) {
191
+ // Remove stream_options for providers that don't support it
192
+ if ("stream_options" in result) {
193
+ delete result.stream_options;
194
+ changes.push("-stream_options");
195
+ modified = true;
196
+ }
197
+ }
198
+ }
167
199
  // Provider-specific handling
168
200
  switch (provider) {
169
201
  case "mistral":
@@ -180,7 +212,7 @@ function normalizeRequestBody(provider, body, log) {
180
212
  case "cohere":
181
213
  // Cohere uses different field names and doesn't support some OpenAI fields
182
214
  // See: https://docs.cohere.com/reference/chat
183
- const cohereUnsupported = ["top_logprobs", "n", "user", "stream_options"];
215
+ const cohereUnsupported = ["top_logprobs", "n", "user"];
184
216
  for (const field of cohereUnsupported) {
185
217
  if (field in result) {
186
218
  delete result[field];
@@ -238,12 +270,20 @@ function parseAnthropicSSE(dataLines, statusCode) {
238
270
  let model = null;
239
271
  let tokensIn = null;
240
272
  let tokensOut = null;
273
+ let cacheCreationTokens = null;
274
+ let cacheReadTokens = null;
241
275
  for (const line of dataLines) {
242
276
  try {
243
277
  const data = JSON.parse(line);
244
278
  if (data.type === "message_start" && data.message) {
245
279
  model = data.message.model ?? null;
246
- tokensIn = data.message.usage?.input_tokens ?? null;
280
+ const usage = data.message.usage;
281
+ if (usage) {
282
+ tokensIn = usage.input_tokens ?? null;
283
+ // Anthropic prompt caching: include cache tokens in input count
284
+ cacheCreationTokens = usage.cache_creation_input_tokens ?? null;
285
+ cacheReadTokens = usage.cache_read_input_tokens ?? null;
286
+ }
247
287
  }
248
288
  if (data.type === "message_delta" && data.usage) {
249
289
  tokensOut = data.usage.output_tokens ?? null;
@@ -253,10 +293,22 @@ function parseAnthropicSSE(dataLines, statusCode) {
253
293
  continue;
254
294
  }
255
295
  }
256
- const tokensTotal = tokensIn != null && tokensOut != null ? tokensIn + tokensOut : null;
296
+ // Total input includes regular + cache tokens
297
+ // Note: cache_creation is charged at 1.25x, cache_read at 0.1x
298
+ // For now we count all as regular input tokens for simplicity
299
+ let totalInputTokens = tokensIn;
300
+ if (totalInputTokens != null) {
301
+ if (cacheCreationTokens != null) {
302
+ totalInputTokens += cacheCreationTokens;
303
+ }
304
+ if (cacheReadTokens != null) {
305
+ totalInputTokens += cacheReadTokens;
306
+ }
307
+ }
308
+ const tokensTotal = totalInputTokens != null && tokensOut != null ? totalInputTokens + tokensOut : null;
257
309
  return {
258
310
  model,
259
- tokensIn,
311
+ tokensIn: totalInputTokens,
260
312
  tokensOut,
261
313
  tokensTotal,
262
314
  statusCode,
@@ -292,6 +344,77 @@ function parseGoogleSSE(dataLines, statusCode) {
292
344
  errorMessage: null,
293
345
  };
294
346
  }
347
+ /**
348
+ * Parse Google's streaming response format (JSON array chunks, not standard SSE).
349
+ * Google Gemini API returns streaming data as: [{"candidates":...}, {"candidates":...}]
350
+ * Each chunk may or may not have usageMetadata - typically only the last chunk has it.
351
+ */
352
+ function parseGoogleStreamingResponse(rawText, statusCode) {
353
+ let model = null;
354
+ let tokensIn = null;
355
+ let tokensOut = null;
356
+ let tokensTotal = null;
357
+ // Try to extract JSON objects from the response
358
+ // Google streaming format: [{"candidates":...},\n{"candidates":...}]
359
+ // We need to handle the array brackets and commas between objects
360
+ // First, try parsing as a complete JSON array
361
+ try {
362
+ const cleanedText = rawText.trim();
363
+ if (cleanedText.startsWith("[")) {
364
+ const data = JSON.parse(cleanedText);
365
+ if (Array.isArray(data)) {
366
+ for (const chunk of data) {
367
+ if (chunk.modelVersion)
368
+ model = chunk.modelVersion;
369
+ if (chunk.usageMetadata) {
370
+ tokensIn = chunk.usageMetadata.promptTokenCount ?? null;
371
+ tokensOut = chunk.usageMetadata.candidatesTokenCount ?? null;
372
+ tokensTotal = chunk.usageMetadata.totalTokenCount ?? null;
373
+ }
374
+ }
375
+ }
376
+ }
377
+ }
378
+ catch {
379
+ // Not a valid JSON array, try line-by-line parsing
380
+ }
381
+ // If we didn't find data, try extracting individual JSON objects
382
+ if (tokensIn === null && tokensOut === null) {
383
+ // Remove array brackets and split by object boundaries
384
+ const cleanedText = rawText
385
+ .replace(/^\s*\[\s*/, "") // Remove leading [
386
+ .replace(/\s*\]\s*$/, "") // Remove trailing ]
387
+ .replace(/^\s*,\s*/gm, "") // Remove leading commas on each line
388
+ .trim();
389
+ // Try to find and parse JSON objects
390
+ const objectMatches = cleanedText.match(/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/g);
391
+ if (objectMatches) {
392
+ for (const objStr of objectMatches) {
393
+ try {
394
+ const data = JSON.parse(objStr);
395
+ if (data.modelVersion)
396
+ model = data.modelVersion;
397
+ if (data.usageMetadata) {
398
+ tokensIn = data.usageMetadata.promptTokenCount ?? null;
399
+ tokensOut = data.usageMetadata.candidatesTokenCount ?? null;
400
+ tokensTotal = data.usageMetadata.totalTokenCount ?? null;
401
+ }
402
+ }
403
+ catch {
404
+ continue;
405
+ }
406
+ }
407
+ }
408
+ }
409
+ return {
410
+ model,
411
+ tokensIn,
412
+ tokensOut,
413
+ tokensTotal,
414
+ statusCode,
415
+ errorMessage: null,
416
+ };
417
+ }
295
418
  function parseCohereSSE(dataLines, statusCode) {
296
419
  let tokensIn = null;
297
420
  let tokensOut = null;
@@ -330,6 +453,18 @@ function parseSSEResponse(provider, sseText, statusCode) {
330
453
  dataLines.push(line.slice(6));
331
454
  }
332
455
  }
456
+ // Google Gemini API uses a different streaming format (JSON array, not SSE)
457
+ // Handle it specially even if no "data: " lines found
458
+ if (provider === "google") {
459
+ if (dataLines.length > 0) {
460
+ // Google with SSE format (when ?alt=sse is used)
461
+ return parseGoogleSSE(dataLines, statusCode);
462
+ }
463
+ else {
464
+ // Google's default JSON array streaming format
465
+ return parseGoogleStreamingResponse(sseText, statusCode);
466
+ }
467
+ }
333
468
  if (dataLines.length === 0)
334
469
  return null;
335
470
  switch (provider) {
@@ -343,8 +478,6 @@ function parseSSEResponse(provider, sseText, statusCode) {
343
478
  return parseOpenAISSE(dataLines, statusCode);
344
479
  case "anthropic":
345
480
  return parseAnthropicSSE(dataLines, statusCode);
346
- case "google":
347
- return parseGoogleSSE(dataLines, statusCode);
348
481
  case "cohere":
349
482
  return parseCohereSSE(dataLines, statusCode);
350
483
  default:
@@ -802,9 +935,12 @@ function startProxy(options) {
802
935
  log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
803
936
  return;
804
937
  }
938
+ // Use requestedModel as fallback when provider doesn't return model in response
939
+ // (Google Gemini API typically doesn't include model in streaming response)
940
+ const effectiveModel = parsed.model ?? requestedModel;
805
941
  let costUsd = null;
806
- if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
807
- costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
942
+ if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
943
+ costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut);
808
944
  }
809
945
  // Record response for loop detection
810
946
  loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, sseText);
@@ -812,7 +948,7 @@ function startProxy(options) {
812
948
  agent_id: effectiveAgentId,
813
949
  event_type: "llm_call",
814
950
  provider,
815
- model: parsed.model,
951
+ model: effectiveModel,
816
952
  requested_model: requestedModel,
817
953
  tokens_in: parsed.tokensIn,
818
954
  tokens_out: parsed.tokensOut,
@@ -845,10 +981,13 @@ function startProxy(options) {
845
981
  log.warn(`No parser result for provider: ${provider}`);
846
982
  return;
847
983
  }
984
+ // Use requestedModel as fallback when provider doesn't return model in response
985
+ // (Google Gemini API typically doesn't include model in response)
986
+ const effectiveModel = parsed.model ?? requestedModel;
848
987
  // Calculate cost if we have the necessary token data
849
988
  let costUsd = null;
850
- if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
851
- costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
989
+ if (effectiveModel && parsed.tokensIn != null && parsed.tokensOut != null) {
990
+ costUsd = (0, shared_1.calculateCost)(effectiveModel, parsed.tokensIn, parsed.tokensOut);
852
991
  }
853
992
  // Record response for loop detection
854
993
  loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, responseBody.toString("utf-8"));
@@ -856,7 +995,7 @@ function startProxy(options) {
856
995
  agent_id: effectiveAgentId,
857
996
  event_type: "llm_call",
858
997
  provider,
859
- model: parsed.model,
998
+ model: effectiveModel,
860
999
  requested_model: requestedModel,
861
1000
  tokens_in: parsed.tokensIn,
862
1001
  tokens_out: parsed.tokensOut,
@@ -985,21 +1124,75 @@ function startProxy(options) {
985
1124
  // Model override and request normalization
986
1125
  let requestedModel = null;
987
1126
  let modifiedRequestBody = requestBody;
1127
+ let crossProviderOverride = null;
1128
+ let effectiveProvider = provider; // May change if cross-provider override
1129
+ let isStreaming = false;
988
1130
  try {
989
1131
  let bodyJson = JSON.parse(requestBody.toString("utf-8"));
990
1132
  let bodyModified = false;
991
- // Extract and optionally override model
1133
+ isStreaming = bodyJson.stream === true;
1134
+ // Extract model from request body if present
992
1135
  if (bodyJson.model) {
993
1136
  requestedModel = bodyJson.model;
994
- const modelOverride = getModelOverride(db, effectiveAgentId, provider);
995
- if (modelOverride) {
996
- log.info(`[PROXY] Model override: ${requestedModel} ${modelOverride}`);
997
- bodyJson.model = modelOverride;
1137
+ }
1138
+ // Always check for model override rules (even if request has no model)
1139
+ // This handles providers like Google where model is in URL, not body
1140
+ const override = getModelOverride(db, effectiveAgentId, provider);
1141
+ // Apply model override if configured
1142
+ if (override.model) {
1143
+ if (requestedModel) {
1144
+ log.info(`[PROXY] Model override: ${requestedModel} → ${override.model}`);
1145
+ }
1146
+ else {
1147
+ log.info(`[PROXY] Model override (no model in request): → ${override.model}`);
1148
+ }
1149
+ bodyJson.model = override.model;
1150
+ bodyModified = true;
1151
+ }
1152
+ // Handle cross-provider override
1153
+ if (override.targetProvider && override.targetProvider !== provider && shared_1.KNOWN_PROVIDER_NAMES.includes(override.targetProvider)) {
1154
+ const targetProv = override.targetProvider;
1155
+ log.info(`[PROXY] Cross-provider override: ${provider} → ${targetProv}`);
1156
+ // Check if we have an API key for the target provider
1157
+ if (!providerKeys[targetProv]) {
1158
+ log.error(`[PROXY] No API key configured for target provider: ${targetProv}`);
1159
+ sendJson(res, 400, { error: `Cross-provider override failed: no API key for ${targetProv}` });
1160
+ return;
1161
+ }
1162
+ crossProviderOverride = { targetProvider: targetProv, originalProvider: provider };
1163
+ effectiveProvider = targetProv;
1164
+ // Transform request format if needed
1165
+ if (provider !== "anthropic" && targetProv === "anthropic") {
1166
+ // OpenAI-compatible or Google → Anthropic
1167
+ // For Google, we need to ensure the request has required fields
1168
+ if (provider === "google") {
1169
+ // Google native format → need to convert to OpenAI first, then to Anthropic
1170
+ // For now, assume the request is already in a compatible format or
1171
+ // the client is using OpenAI-compatible format through the Google endpoint
1172
+ log.info(`[PROXY] Converting Google request → Anthropic`);
1173
+ }
1174
+ const anthropicRequest = (0, shared_1.openaiToAnthropic)(bodyJson);
1175
+ bodyJson = anthropicRequest;
1176
+ bodyModified = true;
1177
+ log.info(`[PROXY] Transformed request: ${provider} → Anthropic`);
1178
+ }
1179
+ else if (provider === "anthropic" && targetProv !== "anthropic") {
1180
+ // Anthropic → OpenAI-compatible
1181
+ const openaiRequest = (0, shared_1.anthropicToOpenaiRequest)(bodyJson);
1182
+ bodyJson = openaiRequest;
998
1183
  bodyModified = true;
1184
+ log.info(`[PROXY] Transformed request: Anthropic → OpenAI`);
1185
+ }
1186
+ // Other cases (OpenAI-compatible → OpenAI-compatible) don't need transformation
1187
+ // Update target URL for cross-provider
1188
+ const newEndpoint = (0, shared_1.getProviderChatEndpoint)(targetProv);
1189
+ if (newEndpoint) {
1190
+ targetUrl = newEndpoint;
1191
+ log.info(`[PROXY] Redirecting to: ${targetUrl}`);
999
1192
  }
1000
1193
  }
1001
1194
  // Normalize request body for provider compatibility
1002
- const normalized = normalizeRequestBody(provider, bodyJson, log);
1195
+ const normalized = normalizeRequestBody(effectiveProvider, bodyJson, log);
1003
1196
  if (normalized.modified) {
1004
1197
  bodyJson = normalized.body;
1005
1198
  bodyModified = true;
@@ -1050,23 +1243,43 @@ function startProxy(options) {
1050
1243
  forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
1051
1244
  }
1052
1245
  }
1053
- // Inject API key
1054
- const providerKey = providerKeys[provider];
1246
+ // For cross-provider override, remove ALL auth-related headers from the original request
1247
+ // This prevents the original provider's API key from being forwarded to the target provider
1248
+ if (crossProviderOverride) {
1249
+ const authHeaders = ["authorization", "x-api-key", "api-key", "x-goog-api-key"];
1250
+ for (const key of Object.keys(forwardHeaders)) {
1251
+ if (authHeaders.includes(key.toLowerCase())) {
1252
+ delete forwardHeaders[key];
1253
+ log.info(`[PROXY] Removed ${key} header for cross-provider override`);
1254
+ }
1255
+ }
1256
+ }
1257
+ // Inject API key (use effective provider for cross-provider override)
1258
+ const providerKey = providerKeys[effectiveProvider];
1259
+ if (crossProviderOverride) {
1260
+ // Debug: show available provider keys for troubleshooting
1261
+ const availableProviders = Object.keys(providerKeys);
1262
+ log.info(`[PROXY] Cross-provider: looking for "${effectiveProvider}" key, available: [${availableProviders.join(", ")}]`);
1263
+ // Show key lengths for debugging
1264
+ const keyLengths = availableProviders.map(p => `${p}:${providerKeys[p]?.length ?? 0}`).join(", ");
1265
+ log.info(`[PROXY] Key lengths: ${keyLengths}`);
1266
+ }
1055
1267
  if (providerKey) {
1056
- const authHeader = (0, shared_1.getProviderAuthHeader)(provider, providerKey, useNativeApi);
1268
+ const authHeader = (0, shared_1.getProviderAuthHeader)(effectiveProvider, providerKey, useNativeApi && !crossProviderOverride);
1057
1269
  if (authHeader) {
1058
1270
  const existingAuthKey = Object.keys(forwardHeaders).find(k => k.toLowerCase() === authHeader.name.toLowerCase());
1059
1271
  if (existingAuthKey)
1060
1272
  delete forwardHeaders[existingAuthKey];
1061
1273
  forwardHeaders[authHeader.name] = authHeader.value;
1062
- log.info(`[PROXY] Injected ${authHeader.name} header for ${provider}${useNativeApi ? " (native API)" : ""}`);
1274
+ const maskedKey = providerKey.length > 12 ? `${providerKey.slice(0, 8)}...${providerKey.slice(-4)}` : "****";
1275
+ log.info(`[PROXY] Injected ${authHeader.name}=${maskedKey} (len=${providerKey.length}) for ${effectiveProvider}${crossProviderOverride ? " (cross-provider)" : ""}${useNativeApi ? " (native API)" : ""}`);
1063
1276
  }
1064
1277
  }
1065
1278
  else {
1066
- log.warn(`[PROXY] No API key configured for provider: ${provider}`);
1279
+ log.warn(`[PROXY] No API key configured for provider: ${effectiveProvider}`);
1067
1280
  }
1068
1281
  // Add provider-specific required headers
1069
- if (provider === "anthropic") {
1282
+ if (effectiveProvider === "anthropic") {
1070
1283
  // Anthropic requires anthropic-version header
1071
1284
  if (!forwardHeaders["anthropic-version"]) {
1072
1285
  forwardHeaders["anthropic-version"] = "2023-06-01";
@@ -1113,30 +1326,191 @@ function startProxy(options) {
1113
1326
  const contentType = providerResponse.headers.get("content-type") ?? "";
1114
1327
  const isSSE = contentType.includes("text/event-stream");
1115
1328
  if (isSSE && providerResponse.body) {
1116
- // Streaming response
1329
+ // Determine stream conversion direction BEFORE setting headers
1330
+ // Case 1: OpenAI-compatible client → Anthropic target (convert Anthropic SSE → OpenAI SSE)
1331
+ const needsAnthropicToOpenai = crossProviderOverride && effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic";
1332
+ // Case 2: Anthropic client → OpenAI-compatible target (convert OpenAI SSE → Anthropic SSE)
1333
+ const needsOpenaiToAnthropic = crossProviderOverride && crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic";
1334
+ // Streaming response - build headers carefully
1117
1335
  const responseHeaders = {};
1336
+ // Headers to skip when forwarding SSE response
1337
+ const skipHeaders = new Set([
1338
+ "content-encoding", // fetch() auto-decompresses, so this would be wrong
1339
+ "content-length", // SSE is streamed, no fixed length
1340
+ "transfer-encoding", // Let Node.js handle this
1341
+ "connection", // Let Node.js handle this
1342
+ ]);
1343
+ // Headers to skip when doing cross-provider override (provider-specific headers)
1344
+ const providerSpecificHeaders = new Set([
1345
+ "x-request-id",
1346
+ "openai-processing-ms",
1347
+ "openai-organization",
1348
+ "openai-version",
1349
+ "x-ratelimit-limit-requests",
1350
+ "x-ratelimit-limit-tokens",
1351
+ "x-ratelimit-remaining-requests",
1352
+ "x-ratelimit-remaining-tokens",
1353
+ "x-ratelimit-reset-requests",
1354
+ "x-ratelimit-reset-tokens",
1355
+ ]);
1118
1356
  providerResponse.headers.forEach((value, key) => {
1357
+ const lowerKey = key.toLowerCase();
1358
+ if (skipHeaders.has(lowerKey))
1359
+ return;
1360
+ if (crossProviderOverride && providerSpecificHeaders.has(lowerKey))
1361
+ return;
1119
1362
  responseHeaders[key] = value;
1120
1363
  });
1364
+ // Ensure correct headers for SSE
1365
+ responseHeaders["Content-Type"] = "text/event-stream; charset=utf-8";
1366
+ responseHeaders["Cache-Control"] = "no-cache";
1367
+ responseHeaders["Connection"] = "keep-alive";
1368
+ responseHeaders["X-Accel-Buffering"] = "no"; // Disable nginx buffering if behind nginx
1121
1369
  res.writeHead(providerResponse.status, responseHeaders);
1370
+ res.flushHeaders(); // Ensure headers are sent immediately
1122
1371
  const chunks = [];
1123
1372
  let accumulatedSize = 0;
1124
1373
  const reader = providerResponse.body.getReader();
1374
+ let streamState = null;
1375
+ let reverseStreamState = null;
1376
+ let lineBuffer = "";
1377
+ // Use StringDecoder to handle multi-byte UTF-8 characters split across chunks
1378
+ const utf8Decoder = new node_string_decoder_1.StringDecoder("utf8");
1379
+ if (needsAnthropicToOpenai) {
1380
+ streamState = (0, shared_1.createStreamingConverterState)();
1381
+ log.info(`[PROXY] Converting Anthropic SSE stream → OpenAI format`);
1382
+ }
1383
+ else if (needsOpenaiToAnthropic) {
1384
+ reverseStreamState = (0, shared_1.createOpenAIToAnthropicStreamState)();
1385
+ log.info(`[PROXY] Converting OpenAI SSE stream → Anthropic format`);
1386
+ }
1125
1387
  try {
1126
1388
  for (;;) {
1127
1389
  const { done, value } = await reader.read();
1128
1390
  if (done)
1129
1391
  break;
1130
1392
  const buf = Buffer.from(value);
1131
- res.write(buf);
1132
- accumulatedSize += buf.length;
1133
- if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1134
- chunks.push(buf);
1393
+ if (needsAnthropicToOpenai && streamState) {
1394
+ // Transform Anthropic SSE to OpenAI SSE
1395
+ // Use StringDecoder to properly handle multi-byte UTF-8 characters
1396
+ lineBuffer += utf8Decoder.write(buf);
1397
+ const lines = lineBuffer.split("\n");
1398
+ lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
1399
+ for (const line of lines) {
1400
+ if (line.startsWith("data: ")) {
1401
+ const data = line.slice(6).trim();
1402
+ if (data && data !== "[DONE]") {
1403
+ try {
1404
+ const event = JSON.parse(data);
1405
+ const openaiChunks = (0, shared_1.anthropicSseToOpenaiChunks)(event, streamState, requestedModel ?? undefined);
1406
+ for (const chunk of openaiChunks) {
1407
+ const sseData = (0, shared_1.formatOpenAISSELine)(chunk);
1408
+ res.write(sseData);
1409
+ accumulatedSize += sseData.length;
1410
+ }
1411
+ }
1412
+ catch (e) {
1413
+ log.debug(`[PROXY] Failed to parse Anthropic SSE event: ${data}`);
1414
+ }
1415
+ }
1416
+ }
1417
+ }
1418
+ // Store original for metrics
1419
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1420
+ chunks.push(buf);
1421
+ }
1422
+ }
1423
+ else if (needsOpenaiToAnthropic && reverseStreamState) {
1424
+ // Transform OpenAI SSE to Anthropic SSE
1425
+ // Use StringDecoder to properly handle multi-byte UTF-8 characters
1426
+ lineBuffer += utf8Decoder.write(buf);
1427
+ const lines = lineBuffer.split("\n");
1428
+ lineBuffer = lines.pop() ?? ""; // Keep incomplete line for next chunk
1429
+ for (const line of lines) {
1430
+ if (line.startsWith("data: ")) {
1431
+ const data = line.slice(6).trim();
1432
+ if (data && data !== "[DONE]") {
1433
+ try {
1434
+ const chunk = JSON.parse(data);
1435
+ const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
1436
+ for (const sseLine of anthropicLines) {
1437
+ log.info(`[PROXY] Anthropic SSE: ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1438
+ res.write(sseLine);
1439
+ accumulatedSize += sseLine.length;
1440
+ }
1441
+ }
1442
+ catch (e) {
1443
+ log.debug(`[PROXY] Failed to parse OpenAI SSE chunk: ${data}`);
1444
+ }
1445
+ }
1446
+ }
1447
+ }
1448
+ // Store original for metrics
1449
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1450
+ chunks.push(buf);
1451
+ }
1452
+ }
1453
+ else {
1454
+ // No conversion needed, pass through
1455
+ res.write(buf);
1456
+ accumulatedSize += buf.length;
1457
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
1458
+ chunks.push(buf);
1459
+ }
1460
+ }
1461
+ }
1462
+ // Handle any remaining data in lineBuffer for OpenAI → Anthropic conversion
1463
+ if (needsOpenaiToAnthropic && reverseStreamState && lineBuffer.trim()) {
1464
+ if (lineBuffer.startsWith("data: ")) {
1465
+ const data = lineBuffer.slice(6).trim();
1466
+ if (data && data !== "[DONE]") {
1467
+ try {
1468
+ const chunk = JSON.parse(data);
1469
+ const anthropicLines = (0, shared_1.openaiChunkToAnthropicSse)(chunk, reverseStreamState, requestedModel ?? undefined);
1470
+ for (const sseLine of anthropicLines) {
1471
+ log.info(`[PROXY] Anthropic SSE (final): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1472
+ res.write(sseLine);
1473
+ }
1474
+ }
1475
+ catch (e) {
1476
+ log.debug(`[PROXY] Failed to parse final OpenAI SSE chunk: ${data}`);
1477
+ }
1478
+ }
1479
+ }
1480
+ }
1481
+ // Send done markers for converted streams
1482
+ if (needsAnthropicToOpenai) {
1483
+ res.write((0, shared_1.formatOpenAISSEDone)());
1484
+ }
1485
+ // For OpenAI → Anthropic conversion, ensure proper stream finalization
1486
+ // This handles cases where the OpenAI stream ended without a finish_reason chunk
1487
+ if (needsOpenaiToAnthropic && reverseStreamState) {
1488
+ if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
1489
+ log.info(`[PROXY] OpenAI stream ended without proper finalization, sending closing events`);
1490
+ const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
1491
+ for (const sseLine of finalLines) {
1492
+ log.info(`[PROXY] Anthropic SSE (finalize): ${sseLine.slice(0, 150).replace(/\n/g, "\\n")}`);
1493
+ res.write(sseLine);
1494
+ }
1135
1495
  }
1136
1496
  }
1137
1497
  }
1138
1498
  catch (error) {
1139
1499
  log.error("Stream read error", { err: error instanceof Error ? error.message : String(error) });
1500
+ // Even on error, try to finalize the Anthropic stream
1501
+ if (needsOpenaiToAnthropic && reverseStreamState && reverseStreamState.sentMessageStart) {
1502
+ try {
1503
+ if (!(0, shared_1.isOpenAIToAnthropicStreamFinalized)(reverseStreamState)) {
1504
+ const finalLines = (0, shared_1.finalizeOpenAIToAnthropicStream)(reverseStreamState);
1505
+ for (const sseLine of finalLines) {
1506
+ res.write(sseLine);
1507
+ }
1508
+ }
1509
+ }
1510
+ catch {
1511
+ // Ignore errors during error recovery
1512
+ }
1513
+ }
1140
1514
  }
1141
1515
  finally {
1142
1516
  res.end();
@@ -1144,7 +1518,8 @@ function startProxy(options) {
1144
1518
  const latencyMs = Date.now() - requestStart;
1145
1519
  const fullBody = Buffer.concat(chunks);
1146
1520
  try {
1147
- extractStreamingMetrics(provider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
1521
+ // Use effective provider for metrics extraction
1522
+ extractStreamingMetrics(effectiveProvider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
1148
1523
  }
1149
1524
  catch (error) {
1150
1525
  log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
@@ -1162,26 +1537,70 @@ function startProxy(options) {
1162
1537
  return;
1163
1538
  }
1164
1539
  const latencyMs = Date.now() - requestStart;
1540
+ // Transform response if cross-provider override
1541
+ let finalResponseBody = responseBodyBuffer;
1542
+ let responseConverted = false;
1543
+ if (crossProviderOverride && providerResponse.status < 400) {
1544
+ // Case 1: OpenAI-compatible client → Anthropic target
1545
+ // Need to convert Anthropic response → OpenAI format
1546
+ if (effectiveProvider === "anthropic" && crossProviderOverride.originalProvider !== "anthropic") {
1547
+ try {
1548
+ const anthropicResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
1549
+ const openaiResponse = (0, shared_1.anthropicToOpenai)(anthropicResponse, requestedModel ?? undefined);
1550
+ finalResponseBody = Buffer.from(JSON.stringify(openaiResponse), "utf-8");
1551
+ responseConverted = true;
1552
+ log.info(`[PROXY] Converted Anthropic response → OpenAI format`);
1553
+ }
1554
+ catch (e) {
1555
+ log.error(`[PROXY] Failed to convert Anthropic response: ${e instanceof Error ? e.message : String(e)}`);
1556
+ }
1557
+ }
1558
+ // Case 2: Anthropic client → OpenAI-compatible target
1559
+ // Need to convert OpenAI response → Anthropic format
1560
+ else if (crossProviderOverride.originalProvider === "anthropic" && effectiveProvider !== "anthropic") {
1561
+ try {
1562
+ const openaiResponse = JSON.parse(responseBodyBuffer.toString("utf-8"));
1563
+ const anthropicResponse = (0, shared_1.openaiToAnthropicResponse)(openaiResponse, requestedModel ?? undefined);
1564
+ finalResponseBody = Buffer.from(JSON.stringify(anthropicResponse), "utf-8");
1565
+ responseConverted = true;
1566
+ log.info(`[PROXY] Converted OpenAI response → Anthropic format`);
1567
+ }
1568
+ catch (e) {
1569
+ log.error(`[PROXY] Failed to convert OpenAI response: ${e instanceof Error ? e.message : String(e)}`);
1570
+ }
1571
+ }
1572
+ }
1165
1573
  const responseHeaders = {};
1166
1574
  providerResponse.headers.forEach((value, key) => {
1167
1575
  if (key.toLowerCase() === "transfer-encoding")
1168
1576
  return;
1577
+ // Update content-length if we transformed the body
1578
+ if (key.toLowerCase() === "content-length" && responseConverted) {
1579
+ responseHeaders[key] = String(finalResponseBody.length);
1580
+ return;
1581
+ }
1169
1582
  responseHeaders[key] = value;
1170
1583
  });
1171
- // Debug log error responses
1584
+ // Log error responses (INFO level for cross-provider, DEBUG for others)
1172
1585
  if (providerResponse.status >= 400) {
1173
1586
  try {
1174
1587
  const errorBody = responseBodyBuffer.toString("utf-8").slice(0, 2000);
1175
- log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1588
+ if (crossProviderOverride) {
1589
+ log.info(`[PROXY] Cross-provider error (${providerResponse.status}): ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1590
+ }
1591
+ else {
1592
+ log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
1593
+ }
1176
1594
  }
1177
1595
  catch {
1178
1596
  log.debug(`[PROXY] Error response body: (binary, ${responseBodyBuffer.length} bytes)`);
1179
1597
  }
1180
1598
  }
1181
1599
  res.writeHead(providerResponse.status, responseHeaders);
1182
- res.end(responseBodyBuffer);
1600
+ res.end(finalResponseBody);
1183
1601
  try {
1184
- extractAndQueueMetrics(provider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
1602
+ // Use effective provider for metrics, but pass original response for parsing
1603
+ extractAndQueueMetrics(effectiveProvider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
1185
1604
  }
1186
1605
  catch (error) {
1187
1606
  log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });