@zhouzhengchang/token-party 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,18 @@
1
1
  import { streamSSE } from "hono/streaming";
2
2
  import { getModelId, getModelPricing } from "../types/config.js";
3
- import { getConfig } from "../config.js";
4
3
  import { nanoid } from "nanoid";
5
4
  import { writeLog, headersToRecord } from "../store/log-writer.js";
6
5
  import { recordRequest } from "../metrics/collector.js";
7
6
  import { extractTags } from "../tags/registry.js";
8
7
  import { createGunzip, createInflate, createBrotliDecompress, createZstdDecompress } from "node:zlib";
9
8
  import { Readable, Transform } from "node:stream";
10
- import { request as httpsRequest } from "node:https";
11
- import { request as httpRequest } from "node:http";
9
+ import { Agent as HttpsAgent, request as httpsRequest } from "node:https";
10
+ import { Agent as HttpAgent, request as httpRequest } from "node:http";
11
+ // Shared keepAlive agents for connection pooling.
12
+ // Without these, every outgoing request opens a new TCP connection, causing
13
+ // TIME_WAIT accumulation and ephemeral port exhaustion under sustained load.
14
+ const httpAgent = new HttpAgent({ keepAlive: true, maxFreeSockets: 20, keepAliveMsecs: 30_000 });
15
+ const httpsAgent = new HttpsAgent({ keepAlive: true, maxFreeSockets: 20, keepAliveMsecs: 30_000 });
12
16
  const roundRobinCounters = new Map();
13
17
  function selectApiKey(provider) {
14
18
  const keys = Array.isArray(provider.apiKey) ? provider.apiKey : [provider.apiKey];
@@ -24,37 +28,18 @@ function maskApiKey(key) {
24
28
  return "****";
25
29
  return key.slice(0, 4) + "****" + key.slice(-4);
26
30
  }
27
- export async function forwardRequest(c, provider, targetPath, transformedBody, entryProtocol, pricing, _routeTrace) {
31
+ function isRetryableStatus(status) {
32
+ return status === 429 || status >= 500;
33
+ }
34
+ export async function forwardRequest(c, candidateProviders, targetPath, transformedBody, entryProtocol, _routeTrace) {
28
35
  const routeTrace = _routeTrace ?? [];
29
36
  const requestId = nanoid();
30
37
  const startTime = Date.now();
31
38
  const body = transformedBody ?? (await c.req.json());
32
39
  const isStreaming = body?.stream === true;
33
40
  const model = body?.model ?? "unknown";
34
- const entry = entryProtocol ?? provider.type;
35
- const needsStreamConversion = isStreaming && entry !== provider.type;
41
+ const entry = entryProtocol ?? candidateProviders[0].type;
36
42
  const isResponsesApi = !!body?.input && !body?.messages;
37
- // Request usage in streaming for OpenAI chat completions (not responses API)
38
- if (isStreaming && provider.type === "openai" && !isResponsesApi && !body.stream_options) {
39
- body.stream_options = { include_usage: true };
40
- }
41
- const targetUrl = `${provider.baseUrl}${targetPath}`;
42
- const { key: selectedKey, index: apiKeyIndex } = selectApiKey(provider);
43
- const upstreamHeaders = {};
44
- const skipHeaders = new Set(["host", "connection", "content-length"]);
45
- c.req.raw.headers.forEach((value, key) => {
46
- if (!skipHeaders.has(key.toLowerCase())) {
47
- upstreamHeaders[key] = value;
48
- }
49
- });
50
- if (provider.type === "openai") {
51
- upstreamHeaders["authorization"] = `Bearer ${selectedKey}`;
52
- }
53
- else if (provider.type === "anthropic") {
54
- delete upstreamHeaders["authorization"];
55
- upstreamHeaders["x-api-key"] = selectedKey;
56
- upstreamHeaders["anthropic-version"] ??= "2023-06-01";
57
- }
58
43
  const reqHeaders = {};
59
44
  c.req.raw.headers.forEach((value, key) => {
60
45
  reqHeaders[key] = value;
@@ -62,148 +47,251 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
62
47
  const extractedTags = extractTags({ headers: c.req.raw.headers, path: c.req.path, body, model });
63
48
  const agent = extractedTags.agent ?? "";
64
49
  const customTags = extractedTags.tags ?? "";
50
+ c.set("recorded", true);
51
+ const token = c.get("authToken");
65
52
  const logFile = writeLog(requestId, {
66
53
  type: "request",
67
54
  timestamp: startTime,
68
- headers: { ...reqHeaders, "x-target-url": targetUrl, "x-entry-protocol": entry, "x-provider-type": provider.type, "x-api-key-index": String(apiKeyIndex), "x-api-key-used": maskApiKey(selectedKey) },
55
+ headers: reqHeaders,
69
56
  body,
70
57
  });
71
- c.set("recorded", true);
72
- const token = c.get("authToken");
58
+ // Loop through candidate providers (ordered by priority then price)
59
+ for (let i = 0; i < candidateProviders.length; i++) {
60
+ const provider = candidateProviders[i];
61
+ const providerPricing = getModelPricing(provider.models.find((m) => getModelId(m) === model));
62
+ const { key: selectedKey, index: apiKeyIndex } = selectApiKey(provider);
63
+ const targetUrl = `${provider.baseUrl}${targetPath}`;
64
+ const upstreamHeaders = {};
65
+ const skipHeaders = new Set(["host", "connection", "content-length"]);
66
+ c.req.raw.headers.forEach((value, key) => {
67
+ if (!skipHeaders.has(key.toLowerCase())) {
68
+ upstreamHeaders[key] = value;
69
+ }
70
+ });
71
+ if (provider.type === "openai") {
72
+ upstreamHeaders["authorization"] = `Bearer ${selectedKey}`;
73
+ }
74
+ else if (provider.type === "anthropic") {
75
+ delete upstreamHeaders["authorization"];
76
+ upstreamHeaders["x-api-key"] = selectedKey;
77
+ upstreamHeaders["anthropic-version"] ??= "2023-06-01";
78
+ }
79
+ // Request usage in streaming for OpenAI chat completions (not responses API)
80
+ const attemptBody = isStreaming && provider.type === "openai" && !isResponsesApi && !body.stream_options
81
+ ? { ...body, stream_options: { include_usage: true } }
82
+ : body;
83
+ const attemptResult = await attemptProvider({
84
+ provider,
85
+ targetUrl,
86
+ upstreamHeaders,
87
+ attemptBody,
88
+ isStreaming,
89
+ needsStreamConversion: isStreaming && entry !== provider.type,
90
+ entry,
91
+ c,
92
+ requestId,
93
+ startTime,
94
+ token,
95
+ logFile,
96
+ apiKeyIndex,
97
+ providerPricing,
98
+ agent,
99
+ customTags,
100
+ routeTrace,
101
+ model,
102
+ });
103
+ if (attemptResult.kind === "done") {
104
+ return attemptResult.response;
105
+ }
106
+ // Retryable error - log this attempt and try next candidate
107
+ const latencyMs = Date.now() - startTime;
108
+ const reason = attemptResult.status === 429 ? "rate_limited"
109
+ : attemptResult.error ? "network_error"
110
+ : `http_${attemptResult.status}`;
111
+ routeTrace.push({ provider: provider.id, status: attemptResult.status, latencyMs, reason });
112
+ recordRequest({
113
+ id: requestId,
114
+ tokenId: token.key,
115
+ providerId: provider.id,
116
+ model,
117
+ inputTokens: 0,
118
+ outputTokens: 0,
119
+ cacheReadTokens: 0,
120
+ cacheWriteTokens: 0,
121
+ latencyMs,
122
+ status: attemptResult.status,
123
+ logFile,
124
+ error: attemptResult.error,
125
+ apiKeyIndex,
126
+ pricing: providerPricing,
127
+ currency: provider.currency,
128
+ agent,
129
+ customTags,
130
+ routeTrace,
131
+ });
132
+ if (i < candidateProviders.length - 1) {
133
+ console.log(`[tokenparty] Falling back from ${provider.id} to ${candidateProviders[i + 1].id} for model ${model} (${reason})`);
134
+ }
135
+ }
136
+ // All candidates exhausted - return last retryable error as 502
137
+ return c.json({ error: "All provider candidates failed" }, 502);
138
+ }
139
+ async function attemptProvider(params) {
140
+ const { provider, targetUrl, upstreamHeaders, attemptBody, isStreaming, needsStreamConversion, entry, c, requestId, startTime, token, logFile, apiKeyIndex, providerPricing, agent, customTags, routeTrace, model, } = params;
73
141
  try {
74
- // Same protocol streaming: use http.request for raw passthrough (no auto-decompression)
142
+ // Same protocol streaming: use http.request for raw passthrough
75
143
  if (isStreaming && !needsStreamConversion) {
144
+ const streamResult = await rawStreamPassthrough({
145
+ c, targetUrl, upstreamHeaders, body: attemptBody, requestId, provider,
146
+ model, token, startTime, logFile, apiKeyIndex, pricing: providerPricing,
147
+ agent, customTags, routeTrace,
148
+ });
149
+ if (streamResult.kind === "retryable") {
150
+ return streamResult;
151
+ }
76
152
  routeTrace.push({ provider: provider.id, status: 200, latencyMs: 0 });
77
- return await rawStreamPassthrough(c, targetUrl, upstreamHeaders, body, requestId, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace);
153
+ return { kind: "done", response: streamResult.response };
78
154
  }
155
+ // Fetch path: non-streaming + cross-protocol streaming
79
156
  const response = await fetch(targetUrl, {
80
157
  method: "POST",
81
158
  headers: upstreamHeaders,
82
- body: JSON.stringify(body),
159
+ body: JSON.stringify(attemptBody),
83
160
  });
161
+ // Check if retryable BEFORE reading/piping body
162
+ if (isRetryableStatus(response.status)) {
163
+ // Drain response to free connection
164
+ await response.body?.cancel();
165
+ return { kind: "retryable", status: response.status };
166
+ }
84
167
  const respHeaders = headersToRecord(response.headers);
85
168
  const latencyMs = Date.now() - startTime;
86
169
  if (isStreaming && response.ok) {
87
- // Protocol conversion: decompress, parse, convert, re-emit
170
+ // Protocol conversion streaming
88
171
  c.header("Content-Type", "text/event-stream");
89
172
  c.header("Cache-Control", "no-cache");
90
173
  c.header("Connection", "keep-alive");
91
- return streamSSE(c, async (s) => {
92
- const reader = decompressResponse(response).getReader();
93
- const decoder = new TextDecoder();
94
- let buffer = "";
95
- let fullContent = "";
96
- let rawEvents = [];
97
- let usage;
98
- let chunkId = `chatcmpl-${requestId}`;
99
- const o2aConverter = new OpenaiToAnthropicStreamConverter();
100
- try {
101
- while (true) {
102
- const { done, value } = await reader.read();
103
- if (done)
104
- break;
105
- buffer += decoder.decode(value, { stream: true });
106
- const lines = buffer.split("\n");
107
- buffer = lines.pop() ?? "";
108
- for (const line of lines) {
109
- if (!line.startsWith("data: "))
110
- continue;
111
- const data = line.slice(6).trim();
112
- if (data === "[DONE]") {
113
- await s.writeSSE({ data: "[DONE]" });
114
- continue;
115
- }
116
- try {
117
- const parsed = JSON.parse(data);
118
- rawEvents.push(parsed);
119
- if (provider.type === "anthropic" && entry === "openai") {
120
- const converted = convertAnthropicChunkToOpenai(parsed, model, chunkId);
121
- if (converted) {
122
- if (converted.content)
123
- fullContent += converted.content;
124
- await s.writeSSE({ data: JSON.stringify(converted.chunk) });
125
- }
126
- if (parsed.type === "message_start" && parsed.message?.usage) {
127
- usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), input_tokens: parsed.message.usage.input_tokens ?? 0, cache_read_tokens: parsed.message.usage.cache_read_input_tokens ?? 0, cache_write_tokens: parsed.message.usage.cache_creation_input_tokens ?? 0 };
128
- }
129
- if (parsed.type === "message_delta" && parsed.usage) {
130
- usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), output_tokens: parsed.usage.output_tokens ?? 0 };
131
- }
174
+ return {
175
+ kind: "done",
176
+ response: streamSSE(c, async (s) => {
177
+ const reader = decompressResponse(response).getReader();
178
+ const decoder = new TextDecoder();
179
+ let buffer = "";
180
+ let fullContent = "";
181
+ let rawEvents = [];
182
+ let usage;
183
+ let chunkId = `chatcmpl-${requestId}`;
184
+ const o2aConverter = new OpenaiToAnthropicStreamConverter();
185
+ try {
186
+ while (true) {
187
+ const { done, value } = await reader.read();
188
+ if (done)
189
+ break;
190
+ buffer += decoder.decode(value, { stream: true });
191
+ const lines = buffer.split("\n");
192
+ buffer = lines.pop() ?? "";
193
+ for (const line of lines) {
194
+ if (!line.startsWith("data: "))
195
+ continue;
196
+ const data = line.slice(6).trim();
197
+ if (data === "[DONE]") {
198
+ await s.writeSSE({ data: "[DONE]" });
199
+ continue;
132
200
  }
133
- else if (provider.type === "openai" && entry === "anthropic") {
134
- const converted = o2aConverter.convert(parsed, model);
135
- if (converted) {
136
- for (const event of converted.events) {
137
- await s.writeSSE({ event: event.type, data: JSON.stringify(event.data) });
201
+ try {
202
+ const parsed = JSON.parse(data);
203
+ rawEvents.push(parsed);
204
+ if (provider.type === "anthropic" && entry === "openai") {
205
+ const converted = convertAnthropicChunkToOpenai(parsed, model, chunkId);
206
+ if (converted) {
207
+ if (converted.content)
208
+ fullContent += converted.content;
209
+ await s.writeSSE({ data: JSON.stringify(converted.chunk) });
210
+ }
211
+ if (parsed.type === "message_start" && parsed.message?.usage) {
212
+ usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), input_tokens: parsed.message.usage.input_tokens ?? 0, cache_read_tokens: parsed.message.usage.cache_read_input_tokens ?? 0, cache_write_tokens: parsed.message.usage.cache_creation_input_tokens ?? 0 };
213
+ }
214
+ if (parsed.type === "message_delta" && parsed.usage) {
215
+ usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), output_tokens: parsed.usage.output_tokens ?? 0 };
138
216
  }
139
- if (converted.content)
140
- fullContent += converted.content;
141
217
  }
142
- if (parsed.usage) {
143
- usage = { input_tokens: parsed.usage.prompt_tokens ?? 0, output_tokens: parsed.usage.completion_tokens ?? 0, cache_read_tokens: parsed.usage.prompt_tokens_details?.cached_tokens ?? 0, cache_write_tokens: 0 };
218
+ else if (provider.type === "openai" && entry === "anthropic") {
219
+ const converted = o2aConverter.convert(parsed, model);
220
+ if (converted) {
221
+ for (const event of converted.events) {
222
+ await s.writeSSE({ event: event.type, data: JSON.stringify(event.data) });
223
+ }
224
+ if (converted.content)
225
+ fullContent += converted.content;
226
+ }
227
+ if (parsed.usage) {
228
+ usage = { input_tokens: parsed.usage.prompt_tokens ?? 0, output_tokens: parsed.usage.completion_tokens ?? 0, cache_read_tokens: parsed.usage.prompt_tokens_details?.cached_tokens ?? 0, cache_write_tokens: 0 };
229
+ }
144
230
  }
145
231
  }
232
+ catch { }
146
233
  }
147
- catch { }
148
234
  }
149
235
  }
150
- }
151
- finally {
152
- if (!usage) {
153
- for (let i = rawEvents.length - 1; i >= 0; i--) {
154
- const evt = rawEvents[i];
155
- if (evt.type === "response.completed" && evt.response?.usage) {
156
- usage = {
157
- input_tokens: evt.response.usage.input_tokens ?? 0,
158
- output_tokens: evt.response.usage.output_tokens ?? 0,
159
- cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0,
160
- cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0,
161
- };
162
- break;
163
- }
164
- if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
165
- usage = {
166
- input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0,
167
- output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0,
168
- cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0,
169
- cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0,
170
- };
171
- break;
236
+ finally {
237
+ if (!usage) {
238
+ for (let i = rawEvents.length - 1; i >= 0; i--) {
239
+ const evt = rawEvents[i];
240
+ if (evt.type === "response.completed" && evt.response?.usage) {
241
+ usage = {
242
+ input_tokens: evt.response.usage.input_tokens ?? 0,
243
+ output_tokens: evt.response.usage.output_tokens ?? 0,
244
+ cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0,
245
+ cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0,
246
+ };
247
+ break;
248
+ }
249
+ if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
250
+ usage = {
251
+ input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0,
252
+ output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0,
253
+ cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0,
254
+ cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0,
255
+ };
256
+ break;
257
+ }
172
258
  }
173
259
  }
260
+ writeLog(requestId, {
261
+ type: "response",
262
+ timestamp: Date.now(),
263
+ headers: respHeaders,
264
+ streaming: true,
265
+ streamContent: fullContent,
266
+ body: rawEvents,
267
+ usage,
268
+ status: response.status,
269
+ });
270
+ routeTrace.push({ provider: provider.id, status: response.status, latencyMs: Date.now() - startTime });
271
+ recordRequest({
272
+ id: requestId,
273
+ tokenId: token.key,
274
+ providerId: provider.id,
275
+ model,
276
+ inputTokens: usage?.input_tokens ?? 0,
277
+ outputTokens: usage?.output_tokens ?? 0,
278
+ cacheReadTokens: usage?.cache_read_tokens ?? 0,
279
+ cacheWriteTokens: usage?.cache_write_tokens ?? 0,
280
+ latencyMs: Date.now() - startTime,
281
+ status: response.status,
282
+ logFile,
283
+ apiKeyIndex,
284
+ pricing: providerPricing,
285
+ currency: provider.currency,
286
+ agent,
287
+ customTags,
288
+ routeTrace,
289
+ });
174
290
  }
175
- writeLog(requestId, {
176
- type: "response",
177
- timestamp: Date.now(),
178
- headers: respHeaders,
179
- streaming: true,
180
- streamContent: fullContent,
181
- body: rawEvents,
182
- usage,
183
- });
184
- routeTrace.push({ provider: provider.id, status: response.status, latencyMs: Date.now() - startTime });
185
- recordRequest({
186
- id: requestId,
187
- tokenId: token.key,
188
- providerId: provider.id,
189
- model,
190
- inputTokens: usage?.input_tokens ?? 0,
191
- outputTokens: usage?.output_tokens ?? 0,
192
- cacheReadTokens: usage?.cache_read_tokens ?? 0,
193
- cacheWriteTokens: usage?.cache_write_tokens ?? 0,
194
- latencyMs: Date.now() - startTime,
195
- status: response.status,
196
- logFile,
197
- apiKeyIndex,
198
- pricing,
199
- currency: provider.currency,
200
- agent,
201
- customTags,
202
- routeTrace,
203
- });
204
- }
205
- });
291
+ }),
292
+ };
206
293
  }
294
+ // Non-streaming response
207
295
  const responseBody = await decompressJson(response);
208
296
  const usage = extractUsage(responseBody, provider.type);
209
297
  writeLog(requestId, {
@@ -212,33 +300,8 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
212
300
  headers: respHeaders,
213
301
  body: responseBody,
214
302
  usage,
303
+ status: response.status,
215
304
  });
216
- if ((response.status === 429 || response.status >= 500) && provider.fallback) {
217
- const reason = response.status === 429 ? "rate_limited" : `http_${response.status}`;
218
- routeTrace.push({ provider: provider.id, status: response.status, latencyMs, reason });
219
- recordRequest({
220
- id: requestId,
221
- tokenId: token.key,
222
- providerId: provider.id,
223
- model,
224
- inputTokens: usage?.input_tokens ?? 0,
225
- outputTokens: usage?.output_tokens ?? 0,
226
- cacheReadTokens: usage?.cache_read_tokens ?? 0,
227
- cacheWriteTokens: usage?.cache_write_tokens ?? 0,
228
- latencyMs,
229
- status: response.status,
230
- logFile,
231
- apiKeyIndex,
232
- pricing,
233
- currency: provider.currency,
234
- agent,
235
- customTags,
236
- routeTrace,
237
- });
238
- const fallbackResult = tryFallback(c, provider, model, targetPath, body, entryProtocol, routeTrace);
239
- if (fallbackResult)
240
- return fallbackResult;
241
- }
242
305
  routeTrace.push({ provider: provider.id, status: response.status, latencyMs });
243
306
  recordRequest({
244
307
  id: requestId,
@@ -253,13 +316,13 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
253
316
  status: response.status,
254
317
  logFile,
255
318
  apiKeyIndex,
256
- pricing,
319
+ pricing: providerPricing,
257
320
  currency: provider.currency,
258
321
  agent,
259
322
  customTags,
260
323
  routeTrace,
261
324
  });
262
- return c.json(responseBody, response.status);
325
+ return { kind: "done", response: c.json(responseBody, response.status) };
263
326
  }
264
327
  catch (error) {
265
328
  const latencyMs = Date.now() - startTime;
@@ -268,83 +331,32 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
268
331
  timestamp: Date.now(),
269
332
  error: error.message,
270
333
  });
271
- routeTrace.push({ provider: provider.id, status: null, latencyMs, reason: "network_error" });
272
- if (provider.fallback) {
273
- recordRequest({
274
- id: requestId,
275
- tokenId: token.key,
276
- providerId: provider.id,
277
- model,
278
- inputTokens: 0,
279
- outputTokens: 0,
280
- latencyMs,
281
- status: 502,
282
- logFile,
283
- error: error.message,
284
- apiKeyIndex,
285
- pricing,
286
- currency: provider.currency,
287
- agent,
288
- customTags,
289
- routeTrace,
290
- });
291
- const fallbackResult = tryFallback(c, provider, model, targetPath, body, entryProtocol, routeTrace);
292
- if (fallbackResult)
293
- return fallbackResult;
294
- }
295
- recordRequest({
296
- id: requestId,
297
- tokenId: token.key,
298
- providerId: provider.id,
299
- model,
300
- inputTokens: 0,
301
- outputTokens: 0,
302
- latencyMs,
303
- status: 502,
304
- logFile,
305
- error: error.message,
306
- apiKeyIndex,
307
- pricing,
308
- currency: provider.currency,
309
- agent,
310
- customTags,
311
- routeTrace,
312
- });
313
- return c.json({ error: "Upstream request failed", detail: error.message }, 502);
334
+ return { kind: "retryable", status: 502, error: error.message };
314
335
  }
315
336
  }
316
- function tryFallback(c, provider, model, targetPath, body, entryProtocol, routeTrace) {
317
- if (!provider.fallback)
318
- return null;
319
- const config = getConfig();
320
- const fallbackProvider = config.providers.find((p) => p.id === provider.fallback && p.enabled);
321
- if (!fallbackProvider)
322
- return null;
323
- const modelConfig = fallbackProvider.models.find((m) => getModelId(m) === model);
324
- if (!modelConfig)
325
- return null;
326
- const fallbackPricing = getModelPricing(modelConfig);
327
- let fallbackPath = targetPath;
328
- if (fallbackProvider.type !== provider.type) {
329
- if (fallbackProvider.type === "anthropic")
330
- fallbackPath = "/v1/messages";
331
- else
332
- fallbackPath = "/chat/completions";
333
- }
334
- console.log(`[tokenparty] Falling back from ${provider.id} to ${fallbackProvider.id} for model ${model}`);
335
- return forwardRequest(c, fallbackProvider, fallbackPath, body, entryProtocol, fallbackPricing, routeTrace);
336
- }
337
- function rawStreamPassthrough(c, targetUrl, upstreamHeaders, body, requestId, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace) {
337
+ function rawStreamPassthrough(params) {
338
+ const { targetUrl, upstreamHeaders, body, requestId, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, } = params;
338
339
  const url = new URL(targetUrl);
339
340
  const reqFn = url.protocol === "https:" ? httpsRequest : httpRequest;
340
- return new Promise((resolve, reject) => {
341
- const req = reqFn(url, { method: "POST", headers: { ...upstreamHeaders, "content-type": "application/json" } }, (res) => {
341
+ return new Promise((resolve) => {
342
+ const keepAliveAgent = url.protocol === "https:" ? httpsAgent : httpAgent;
343
+ const req = reqFn(url, {
344
+ method: "POST",
345
+ headers: { ...upstreamHeaders, "content-type": "application/json" },
346
+ agent: keepAliveAgent,
347
+ }, (res) => {
342
348
  const respHeaders = {};
343
349
  for (const [key, val] of Object.entries(res.headers)) {
344
350
  if (val)
345
351
  respHeaders[key] = Array.isArray(val) ? val.join(", ") : val;
346
352
  }
347
353
  const status = res.statusCode ?? 502;
354
+ // Check if retryable BEFORE piping - destroy stream and return retryable
355
+ if (isRetryableStatus(status)) {
356
+ res.destroy();
357
+ resolve({ kind: "retryable", status });
358
+ return;
359
+ }
348
360
  // Passthrough all upstream headers, skip hop-by-hop
349
361
  const passthroughHeaders = new Headers();
350
362
  const hopByHop = new Set(["connection", "keep-alive", "transfer-encoding", "te", "trailer", "upgrade"]);
@@ -361,20 +373,21 @@ function rawStreamPassthrough(c, targetUrl, upstreamHeaders, body, requestId, pr
361
373
  callback(null, chunk);
362
374
  },
363
375
  flush(callback) {
364
- // Async parse for logging after stream ends
365
- asyncParseBufferForLog(rawChunks, res.headers["content-encoding"], requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace);
376
+ asyncParseBufferForLog(rawChunks, res.headers["content-encoding"], requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, status);
366
377
  callback();
367
378
  },
368
379
  });
369
380
  const stream = Readable.toWeb(res.pipe(passthrough));
370
- resolve(new Response(stream, { status, headers: passthroughHeaders }));
381
+ resolve({ kind: "done", response: new Response(stream, { status, headers: passthroughHeaders }) });
382
+ });
383
+ req.on("error", (error) => {
384
+ resolve({ kind: "retryable", status: 502, error: error.message });
371
385
  });
372
- req.on("error", reject);
373
386
  req.write(JSON.stringify(body));
374
387
  req.end();
375
388
  });
376
389
  }
377
- function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace) {
390
+ function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, upstreamStatus) {
378
391
  (async () => {
379
392
  let text;
380
393
  const combined = Buffer.concat(rawChunks);
@@ -392,55 +405,74 @@ function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, pro
392
405
  else {
393
406
  text = combined.toString("utf-8");
394
407
  }
408
+ const contentType = respHeaders["content-type"] ?? "";
409
+ const isSse = contentType.includes("text/event-stream");
410
+ const recordedStatus = upstreamStatus ?? 200;
395
411
  let fullContent = "";
396
412
  let rawEvents = [];
397
413
  let usage;
398
- for (const line of text.split("\n")) {
399
- if (!line.startsWith("data: "))
400
- continue;
401
- const data = line.slice(6).trim();
402
- if (data === "[DONE]")
403
- continue;
404
- try {
405
- const parsed = JSON.parse(data);
406
- rawEvents.push(parsed);
407
- if (provider.type === "anthropic" && parsed.type === "content_block_delta") {
408
- if (parsed.delta?.text)
409
- fullContent += parsed.delta.text;
410
- if (parsed.delta?.thinking)
411
- fullContent += parsed.delta.thinking;
412
- }
413
- else if (provider.type === "openai" && parsed.choices?.[0]?.delta?.content) {
414
- fullContent += parsed.choices[0].delta.content;
414
+ let responseBody;
415
+ if (isSse) {
416
+ for (const line of text.split("\n")) {
417
+ if (!line.startsWith("data: "))
418
+ continue;
419
+ const data = line.slice(6).trim();
420
+ if (data === "[DONE]")
421
+ continue;
422
+ try {
423
+ const parsed = JSON.parse(data);
424
+ rawEvents.push(parsed);
425
+ if (provider.type === "anthropic" && parsed.type === "content_block_delta") {
426
+ if (parsed.delta?.text)
427
+ fullContent += parsed.delta.text;
428
+ if (parsed.delta?.thinking)
429
+ fullContent += parsed.delta.thinking;
430
+ }
431
+ else if (provider.type === "openai" && parsed.choices?.[0]?.delta?.content) {
432
+ fullContent += parsed.choices[0].delta.content;
433
+ }
434
+ else if (parsed.type === "response.output_text.delta" && parsed.delta) {
435
+ fullContent += parsed.delta;
436
+ }
437
+ usage = extractUsageFromChunk(parsed, provider.type) ?? usage;
415
438
  }
416
- else if (parsed.type === "response.output_text.delta" && parsed.delta) {
417
- fullContent += parsed.delta;
439
+ catch { }
440
+ }
441
+ if (!usage) {
442
+ for (let i = rawEvents.length - 1; i >= 0; i--) {
443
+ const evt = rawEvents[i];
444
+ if (evt.type === "response.completed" && evt.response?.usage) {
445
+ usage = { input_tokens: evt.response.usage.input_tokens ?? 0, output_tokens: evt.response.usage.output_tokens ?? 0, cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0 };
446
+ break;
447
+ }
448
+ if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
449
+ usage = { input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0, output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0, cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0 };
450
+ break;
451
+ }
418
452
  }
419
- usage = extractUsageFromChunk(parsed, provider.type) ?? usage;
420
453
  }
421
- catch { }
454
+ responseBody = rawEvents;
422
455
  }
423
- if (!usage) {
424
- for (let i = rawEvents.length - 1; i >= 0; i--) {
425
- const evt = rawEvents[i];
426
- if (evt.type === "response.completed" && evt.response?.usage) {
427
- usage = { input_tokens: evt.response.usage.input_tokens ?? 0, output_tokens: evt.response.usage.output_tokens ?? 0, cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0 };
428
- break;
429
- }
430
- if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
431
- usage = { input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0, output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0, cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0 };
432
- break;
433
- }
456
+ else {
457
+ // Upstream returned a non-SSE body (e.g. JSON error) despite stream:true request.
458
+ // Record the raw decoded text faithfully.
459
+ try {
460
+ responseBody = JSON.parse(text);
461
+ usage = extractUsage(responseBody, provider.type);
462
+ }
463
+ catch {
464
+ responseBody = text;
434
465
  }
435
466
  }
436
467
  writeLog(requestId, {
437
468
  type: "response",
438
469
  timestamp: Date.now(),
439
470
  headers: respHeaders,
440
- streaming: true,
441
- streamContent: fullContent,
442
- body: rawEvents,
471
+ streaming: isSse,
472
+ streamContent: isSse ? fullContent : undefined,
473
+ body: responseBody,
443
474
  usage,
475
+ status: recordedStatus,
444
476
  });
445
477
  recordRequest({
446
478
  id: requestId,
@@ -452,7 +484,7 @@ function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, pro
452
484
  cacheReadTokens: usage?.cache_read_tokens ?? 0,
453
485
  cacheWriteTokens: usage?.cache_write_tokens ?? 0,
454
486
  latencyMs: Date.now() - startTime,
455
- status: 200,
487
+ status: recordedStatus,
456
488
  logFile,
457
489
  apiKeyIndex,
458
490
  pricing,