@zhouzhengchang/token-party 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/assets/index-BwaQlA7_.css +1 -0
- package/dashboard/assets/index-i0xid2VF.js +132 -0
- package/dashboard/index.html +2 -2
- package/dist/proxy/forwarder.d.ts +1 -6
- package/dist/proxy/forwarder.d.ts.map +1 -1
- package/dist/proxy/forwarder.js +306 -274
- package/dist/proxy/forwarder.js.map +1 -1
- package/dist/proxy/router.d.ts +2 -1
- package/dist/proxy/router.d.ts.map +1 -1
- package/dist/proxy/router.js +23 -7
- package/dist/proxy/router.js.map +1 -1
- package/dist/routes/anthropic.d.ts.map +1 -1
- package/dist/routes/anthropic.js +13 -8
- package/dist/routes/anthropic.js.map +1 -1
- package/dist/routes/api.js +49 -0
- package/dist/routes/api.js.map +1 -1
- package/dist/routes/openai.js +9 -6
- package/dist/routes/openai.js.map +1 -1
- package/dist/store/log-writer.d.ts +1 -0
- package/dist/store/log-writer.d.ts.map +1 -1
- package/dist/store/log-writer.js.map +1 -1
- package/dist/tags/extractors/agent-detector.d.ts.map +1 -1
- package/dist/tags/extractors/agent-detector.js +23 -3
- package/dist/tags/extractors/agent-detector.js.map +1 -1
- package/dist/types/config.d.ts +16 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/config.js +12 -0
- package/dist/types/config.js.map +1 -1
- package/package.json +1 -1
- package/dashboard/assets/index-DnAvo_XU.js +0 -132
- package/dashboard/assets/index-WKqSdT1v.css +0 -1
package/dist/proxy/forwarder.js
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import { streamSSE } from "hono/streaming";
|
|
2
2
|
import { getModelId, getModelPricing } from "../types/config.js";
|
|
3
|
-
import { getConfig } from "../config.js";
|
|
4
3
|
import { nanoid } from "nanoid";
|
|
5
4
|
import { writeLog, headersToRecord } from "../store/log-writer.js";
|
|
6
5
|
import { recordRequest } from "../metrics/collector.js";
|
|
7
6
|
import { extractTags } from "../tags/registry.js";
|
|
8
7
|
import { createGunzip, createInflate, createBrotliDecompress, createZstdDecompress } from "node:zlib";
|
|
9
8
|
import { Readable, Transform } from "node:stream";
|
|
10
|
-
import { request as httpsRequest } from "node:https";
|
|
11
|
-
import { request as httpRequest } from "node:http";
|
|
9
|
+
import { Agent as HttpsAgent, request as httpsRequest } from "node:https";
|
|
10
|
+
import { Agent as HttpAgent, request as httpRequest } from "node:http";
|
|
11
|
+
// Shared keepAlive agents for connection pooling.
|
|
12
|
+
// Without these, every outgoing request opens a new TCP connection, causing
|
|
13
|
+
// TIME_WAIT accumulation and ephemeral port exhaustion under sustained load.
|
|
14
|
+
const httpAgent = new HttpAgent({ keepAlive: true, maxFreeSockets: 20, keepAliveMsecs: 30_000 });
|
|
15
|
+
const httpsAgent = new HttpsAgent({ keepAlive: true, maxFreeSockets: 20, keepAliveMsecs: 30_000 });
|
|
12
16
|
const roundRobinCounters = new Map();
|
|
13
17
|
function selectApiKey(provider) {
|
|
14
18
|
const keys = Array.isArray(provider.apiKey) ? provider.apiKey : [provider.apiKey];
|
|
@@ -24,37 +28,18 @@ function maskApiKey(key) {
|
|
|
24
28
|
return "****";
|
|
25
29
|
return key.slice(0, 4) + "****" + key.slice(-4);
|
|
26
30
|
}
|
|
27
|
-
|
|
31
|
+
function isRetryableStatus(status) {
|
|
32
|
+
return status === 429 || status >= 500;
|
|
33
|
+
}
|
|
34
|
+
export async function forwardRequest(c, candidateProviders, targetPath, transformedBody, entryProtocol, _routeTrace) {
|
|
28
35
|
const routeTrace = _routeTrace ?? [];
|
|
29
36
|
const requestId = nanoid();
|
|
30
37
|
const startTime = Date.now();
|
|
31
38
|
const body = transformedBody ?? (await c.req.json());
|
|
32
39
|
const isStreaming = body?.stream === true;
|
|
33
40
|
const model = body?.model ?? "unknown";
|
|
34
|
-
const entry = entryProtocol ??
|
|
35
|
-
const needsStreamConversion = isStreaming && entry !== provider.type;
|
|
41
|
+
const entry = entryProtocol ?? candidateProviders[0].type;
|
|
36
42
|
const isResponsesApi = !!body?.input && !body?.messages;
|
|
37
|
-
// Request usage in streaming for OpenAI chat completions (not responses API)
|
|
38
|
-
if (isStreaming && provider.type === "openai" && !isResponsesApi && !body.stream_options) {
|
|
39
|
-
body.stream_options = { include_usage: true };
|
|
40
|
-
}
|
|
41
|
-
const targetUrl = `${provider.baseUrl}${targetPath}`;
|
|
42
|
-
const { key: selectedKey, index: apiKeyIndex } = selectApiKey(provider);
|
|
43
|
-
const upstreamHeaders = {};
|
|
44
|
-
const skipHeaders = new Set(["host", "connection", "content-length"]);
|
|
45
|
-
c.req.raw.headers.forEach((value, key) => {
|
|
46
|
-
if (!skipHeaders.has(key.toLowerCase())) {
|
|
47
|
-
upstreamHeaders[key] = value;
|
|
48
|
-
}
|
|
49
|
-
});
|
|
50
|
-
if (provider.type === "openai") {
|
|
51
|
-
upstreamHeaders["authorization"] = `Bearer ${selectedKey}`;
|
|
52
|
-
}
|
|
53
|
-
else if (provider.type === "anthropic") {
|
|
54
|
-
delete upstreamHeaders["authorization"];
|
|
55
|
-
upstreamHeaders["x-api-key"] = selectedKey;
|
|
56
|
-
upstreamHeaders["anthropic-version"] ??= "2023-06-01";
|
|
57
|
-
}
|
|
58
43
|
const reqHeaders = {};
|
|
59
44
|
c.req.raw.headers.forEach((value, key) => {
|
|
60
45
|
reqHeaders[key] = value;
|
|
@@ -62,148 +47,251 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
|
|
|
62
47
|
const extractedTags = extractTags({ headers: c.req.raw.headers, path: c.req.path, body, model });
|
|
63
48
|
const agent = extractedTags.agent ?? "";
|
|
64
49
|
const customTags = extractedTags.tags ?? "";
|
|
50
|
+
c.set("recorded", true);
|
|
51
|
+
const token = c.get("authToken");
|
|
65
52
|
const logFile = writeLog(requestId, {
|
|
66
53
|
type: "request",
|
|
67
54
|
timestamp: startTime,
|
|
68
|
-
headers:
|
|
55
|
+
headers: reqHeaders,
|
|
69
56
|
body,
|
|
70
57
|
});
|
|
71
|
-
|
|
72
|
-
|
|
58
|
+
// Loop through candidate providers (ordered by priority then price)
|
|
59
|
+
for (let i = 0; i < candidateProviders.length; i++) {
|
|
60
|
+
const provider = candidateProviders[i];
|
|
61
|
+
const providerPricing = getModelPricing(provider.models.find((m) => getModelId(m) === model));
|
|
62
|
+
const { key: selectedKey, index: apiKeyIndex } = selectApiKey(provider);
|
|
63
|
+
const targetUrl = `${provider.baseUrl}${targetPath}`;
|
|
64
|
+
const upstreamHeaders = {};
|
|
65
|
+
const skipHeaders = new Set(["host", "connection", "content-length"]);
|
|
66
|
+
c.req.raw.headers.forEach((value, key) => {
|
|
67
|
+
if (!skipHeaders.has(key.toLowerCase())) {
|
|
68
|
+
upstreamHeaders[key] = value;
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
if (provider.type === "openai") {
|
|
72
|
+
upstreamHeaders["authorization"] = `Bearer ${selectedKey}`;
|
|
73
|
+
}
|
|
74
|
+
else if (provider.type === "anthropic") {
|
|
75
|
+
delete upstreamHeaders["authorization"];
|
|
76
|
+
upstreamHeaders["x-api-key"] = selectedKey;
|
|
77
|
+
upstreamHeaders["anthropic-version"] ??= "2023-06-01";
|
|
78
|
+
}
|
|
79
|
+
// Request usage in streaming for OpenAI chat completions (not responses API)
|
|
80
|
+
const attemptBody = isStreaming && provider.type === "openai" && !isResponsesApi && !body.stream_options
|
|
81
|
+
? { ...body, stream_options: { include_usage: true } }
|
|
82
|
+
: body;
|
|
83
|
+
const attemptResult = await attemptProvider({
|
|
84
|
+
provider,
|
|
85
|
+
targetUrl,
|
|
86
|
+
upstreamHeaders,
|
|
87
|
+
attemptBody,
|
|
88
|
+
isStreaming,
|
|
89
|
+
needsStreamConversion: isStreaming && entry !== provider.type,
|
|
90
|
+
entry,
|
|
91
|
+
c,
|
|
92
|
+
requestId,
|
|
93
|
+
startTime,
|
|
94
|
+
token,
|
|
95
|
+
logFile,
|
|
96
|
+
apiKeyIndex,
|
|
97
|
+
providerPricing,
|
|
98
|
+
agent,
|
|
99
|
+
customTags,
|
|
100
|
+
routeTrace,
|
|
101
|
+
model,
|
|
102
|
+
});
|
|
103
|
+
if (attemptResult.kind === "done") {
|
|
104
|
+
return attemptResult.response;
|
|
105
|
+
}
|
|
106
|
+
// Retryable error - log this attempt and try next candidate
|
|
107
|
+
const latencyMs = Date.now() - startTime;
|
|
108
|
+
const reason = attemptResult.status === 429 ? "rate_limited"
|
|
109
|
+
: attemptResult.error ? "network_error"
|
|
110
|
+
: `http_${attemptResult.status}`;
|
|
111
|
+
routeTrace.push({ provider: provider.id, status: attemptResult.status, latencyMs, reason });
|
|
112
|
+
recordRequest({
|
|
113
|
+
id: requestId,
|
|
114
|
+
tokenId: token.key,
|
|
115
|
+
providerId: provider.id,
|
|
116
|
+
model,
|
|
117
|
+
inputTokens: 0,
|
|
118
|
+
outputTokens: 0,
|
|
119
|
+
cacheReadTokens: 0,
|
|
120
|
+
cacheWriteTokens: 0,
|
|
121
|
+
latencyMs,
|
|
122
|
+
status: attemptResult.status,
|
|
123
|
+
logFile,
|
|
124
|
+
error: attemptResult.error,
|
|
125
|
+
apiKeyIndex,
|
|
126
|
+
pricing: providerPricing,
|
|
127
|
+
currency: provider.currency,
|
|
128
|
+
agent,
|
|
129
|
+
customTags,
|
|
130
|
+
routeTrace,
|
|
131
|
+
});
|
|
132
|
+
if (i < candidateProviders.length - 1) {
|
|
133
|
+
console.log(`[tokenparty] Falling back from ${provider.id} to ${candidateProviders[i + 1].id} for model ${model} (${reason})`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// All candidates exhausted - return last retryable error as 502
|
|
137
|
+
return c.json({ error: "All provider candidates failed" }, 502);
|
|
138
|
+
}
|
|
139
|
+
async function attemptProvider(params) {
|
|
140
|
+
const { provider, targetUrl, upstreamHeaders, attemptBody, isStreaming, needsStreamConversion, entry, c, requestId, startTime, token, logFile, apiKeyIndex, providerPricing, agent, customTags, routeTrace, model, } = params;
|
|
73
141
|
try {
|
|
74
|
-
// Same protocol streaming: use http.request for raw passthrough
|
|
142
|
+
// Same protocol streaming: use http.request for raw passthrough
|
|
75
143
|
if (isStreaming && !needsStreamConversion) {
|
|
144
|
+
const streamResult = await rawStreamPassthrough({
|
|
145
|
+
c, targetUrl, upstreamHeaders, body: attemptBody, requestId, provider,
|
|
146
|
+
model, token, startTime, logFile, apiKeyIndex, pricing: providerPricing,
|
|
147
|
+
agent, customTags, routeTrace,
|
|
148
|
+
});
|
|
149
|
+
if (streamResult.kind === "retryable") {
|
|
150
|
+
return streamResult;
|
|
151
|
+
}
|
|
76
152
|
routeTrace.push({ provider: provider.id, status: 200, latencyMs: 0 });
|
|
77
|
-
return
|
|
153
|
+
return { kind: "done", response: streamResult.response };
|
|
78
154
|
}
|
|
155
|
+
// Fetch path: non-streaming + cross-protocol streaming
|
|
79
156
|
const response = await fetch(targetUrl, {
|
|
80
157
|
method: "POST",
|
|
81
158
|
headers: upstreamHeaders,
|
|
82
|
-
body: JSON.stringify(
|
|
159
|
+
body: JSON.stringify(attemptBody),
|
|
83
160
|
});
|
|
161
|
+
// Check if retryable BEFORE reading/piping body
|
|
162
|
+
if (isRetryableStatus(response.status)) {
|
|
163
|
+
// Drain response to free connection
|
|
164
|
+
await response.body?.cancel();
|
|
165
|
+
return { kind: "retryable", status: response.status };
|
|
166
|
+
}
|
|
84
167
|
const respHeaders = headersToRecord(response.headers);
|
|
85
168
|
const latencyMs = Date.now() - startTime;
|
|
86
169
|
if (isStreaming && response.ok) {
|
|
87
|
-
// Protocol conversion
|
|
170
|
+
// Protocol conversion streaming
|
|
88
171
|
c.header("Content-Type", "text/event-stream");
|
|
89
172
|
c.header("Cache-Control", "no-cache");
|
|
90
173
|
c.header("Connection", "keep-alive");
|
|
91
|
-
return
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const parsed = JSON.parse(data);
|
|
118
|
-
rawEvents.push(parsed);
|
|
119
|
-
if (provider.type === "anthropic" && entry === "openai") {
|
|
120
|
-
const converted = convertAnthropicChunkToOpenai(parsed, model, chunkId);
|
|
121
|
-
if (converted) {
|
|
122
|
-
if (converted.content)
|
|
123
|
-
fullContent += converted.content;
|
|
124
|
-
await s.writeSSE({ data: JSON.stringify(converted.chunk) });
|
|
125
|
-
}
|
|
126
|
-
if (parsed.type === "message_start" && parsed.message?.usage) {
|
|
127
|
-
usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), input_tokens: parsed.message.usage.input_tokens ?? 0, cache_read_tokens: parsed.message.usage.cache_read_input_tokens ?? 0, cache_write_tokens: parsed.message.usage.cache_creation_input_tokens ?? 0 };
|
|
128
|
-
}
|
|
129
|
-
if (parsed.type === "message_delta" && parsed.usage) {
|
|
130
|
-
usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), output_tokens: parsed.usage.output_tokens ?? 0 };
|
|
131
|
-
}
|
|
174
|
+
return {
|
|
175
|
+
kind: "done",
|
|
176
|
+
response: streamSSE(c, async (s) => {
|
|
177
|
+
const reader = decompressResponse(response).getReader();
|
|
178
|
+
const decoder = new TextDecoder();
|
|
179
|
+
let buffer = "";
|
|
180
|
+
let fullContent = "";
|
|
181
|
+
let rawEvents = [];
|
|
182
|
+
let usage;
|
|
183
|
+
let chunkId = `chatcmpl-${requestId}`;
|
|
184
|
+
const o2aConverter = new OpenaiToAnthropicStreamConverter();
|
|
185
|
+
try {
|
|
186
|
+
while (true) {
|
|
187
|
+
const { done, value } = await reader.read();
|
|
188
|
+
if (done)
|
|
189
|
+
break;
|
|
190
|
+
buffer += decoder.decode(value, { stream: true });
|
|
191
|
+
const lines = buffer.split("\n");
|
|
192
|
+
buffer = lines.pop() ?? "";
|
|
193
|
+
for (const line of lines) {
|
|
194
|
+
if (!line.startsWith("data: "))
|
|
195
|
+
continue;
|
|
196
|
+
const data = line.slice(6).trim();
|
|
197
|
+
if (data === "[DONE]") {
|
|
198
|
+
await s.writeSSE({ data: "[DONE]" });
|
|
199
|
+
continue;
|
|
132
200
|
}
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
201
|
+
try {
|
|
202
|
+
const parsed = JSON.parse(data);
|
|
203
|
+
rawEvents.push(parsed);
|
|
204
|
+
if (provider.type === "anthropic" && entry === "openai") {
|
|
205
|
+
const converted = convertAnthropicChunkToOpenai(parsed, model, chunkId);
|
|
206
|
+
if (converted) {
|
|
207
|
+
if (converted.content)
|
|
208
|
+
fullContent += converted.content;
|
|
209
|
+
await s.writeSSE({ data: JSON.stringify(converted.chunk) });
|
|
210
|
+
}
|
|
211
|
+
if (parsed.type === "message_start" && parsed.message?.usage) {
|
|
212
|
+
usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), input_tokens: parsed.message.usage.input_tokens ?? 0, cache_read_tokens: parsed.message.usage.cache_read_input_tokens ?? 0, cache_write_tokens: parsed.message.usage.cache_creation_input_tokens ?? 0 };
|
|
213
|
+
}
|
|
214
|
+
if (parsed.type === "message_delta" && parsed.usage) {
|
|
215
|
+
usage = { ...(usage ?? { input_tokens: 0, output_tokens: 0 }), output_tokens: parsed.usage.output_tokens ?? 0 };
|
|
138
216
|
}
|
|
139
|
-
if (converted.content)
|
|
140
|
-
fullContent += converted.content;
|
|
141
217
|
}
|
|
142
|
-
if (
|
|
143
|
-
|
|
218
|
+
else if (provider.type === "openai" && entry === "anthropic") {
|
|
219
|
+
const converted = o2aConverter.convert(parsed, model);
|
|
220
|
+
if (converted) {
|
|
221
|
+
for (const event of converted.events) {
|
|
222
|
+
await s.writeSSE({ event: event.type, data: JSON.stringify(event.data) });
|
|
223
|
+
}
|
|
224
|
+
if (converted.content)
|
|
225
|
+
fullContent += converted.content;
|
|
226
|
+
}
|
|
227
|
+
if (parsed.usage) {
|
|
228
|
+
usage = { input_tokens: parsed.usage.prompt_tokens ?? 0, output_tokens: parsed.usage.completion_tokens ?? 0, cache_read_tokens: parsed.usage.prompt_tokens_details?.cached_tokens ?? 0, cache_write_tokens: 0 };
|
|
229
|
+
}
|
|
144
230
|
}
|
|
145
231
|
}
|
|
232
|
+
catch { }
|
|
146
233
|
}
|
|
147
|
-
catch { }
|
|
148
234
|
}
|
|
149
235
|
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
236
|
+
finally {
|
|
237
|
+
if (!usage) {
|
|
238
|
+
for (let i = rawEvents.length - 1; i >= 0; i--) {
|
|
239
|
+
const evt = rawEvents[i];
|
|
240
|
+
if (evt.type === "response.completed" && evt.response?.usage) {
|
|
241
|
+
usage = {
|
|
242
|
+
input_tokens: evt.response.usage.input_tokens ?? 0,
|
|
243
|
+
output_tokens: evt.response.usage.output_tokens ?? 0,
|
|
244
|
+
cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0,
|
|
245
|
+
cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0,
|
|
246
|
+
};
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
|
|
250
|
+
usage = {
|
|
251
|
+
input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0,
|
|
252
|
+
output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0,
|
|
253
|
+
cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0,
|
|
254
|
+
cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0,
|
|
255
|
+
};
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
172
258
|
}
|
|
173
259
|
}
|
|
260
|
+
writeLog(requestId, {
|
|
261
|
+
type: "response",
|
|
262
|
+
timestamp: Date.now(),
|
|
263
|
+
headers: respHeaders,
|
|
264
|
+
streaming: true,
|
|
265
|
+
streamContent: fullContent,
|
|
266
|
+
body: rawEvents,
|
|
267
|
+
usage,
|
|
268
|
+
status: response.status,
|
|
269
|
+
});
|
|
270
|
+
routeTrace.push({ provider: provider.id, status: response.status, latencyMs: Date.now() - startTime });
|
|
271
|
+
recordRequest({
|
|
272
|
+
id: requestId,
|
|
273
|
+
tokenId: token.key,
|
|
274
|
+
providerId: provider.id,
|
|
275
|
+
model,
|
|
276
|
+
inputTokens: usage?.input_tokens ?? 0,
|
|
277
|
+
outputTokens: usage?.output_tokens ?? 0,
|
|
278
|
+
cacheReadTokens: usage?.cache_read_tokens ?? 0,
|
|
279
|
+
cacheWriteTokens: usage?.cache_write_tokens ?? 0,
|
|
280
|
+
latencyMs: Date.now() - startTime,
|
|
281
|
+
status: response.status,
|
|
282
|
+
logFile,
|
|
283
|
+
apiKeyIndex,
|
|
284
|
+
pricing: providerPricing,
|
|
285
|
+
currency: provider.currency,
|
|
286
|
+
agent,
|
|
287
|
+
customTags,
|
|
288
|
+
routeTrace,
|
|
289
|
+
});
|
|
174
290
|
}
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
timestamp: Date.now(),
|
|
178
|
-
headers: respHeaders,
|
|
179
|
-
streaming: true,
|
|
180
|
-
streamContent: fullContent,
|
|
181
|
-
body: rawEvents,
|
|
182
|
-
usage,
|
|
183
|
-
});
|
|
184
|
-
routeTrace.push({ provider: provider.id, status: response.status, latencyMs: Date.now() - startTime });
|
|
185
|
-
recordRequest({
|
|
186
|
-
id: requestId,
|
|
187
|
-
tokenId: token.key,
|
|
188
|
-
providerId: provider.id,
|
|
189
|
-
model,
|
|
190
|
-
inputTokens: usage?.input_tokens ?? 0,
|
|
191
|
-
outputTokens: usage?.output_tokens ?? 0,
|
|
192
|
-
cacheReadTokens: usage?.cache_read_tokens ?? 0,
|
|
193
|
-
cacheWriteTokens: usage?.cache_write_tokens ?? 0,
|
|
194
|
-
latencyMs: Date.now() - startTime,
|
|
195
|
-
status: response.status,
|
|
196
|
-
logFile,
|
|
197
|
-
apiKeyIndex,
|
|
198
|
-
pricing,
|
|
199
|
-
currency: provider.currency,
|
|
200
|
-
agent,
|
|
201
|
-
customTags,
|
|
202
|
-
routeTrace,
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
});
|
|
291
|
+
}),
|
|
292
|
+
};
|
|
206
293
|
}
|
|
294
|
+
// Non-streaming response
|
|
207
295
|
const responseBody = await decompressJson(response);
|
|
208
296
|
const usage = extractUsage(responseBody, provider.type);
|
|
209
297
|
writeLog(requestId, {
|
|
@@ -212,33 +300,8 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
|
|
|
212
300
|
headers: respHeaders,
|
|
213
301
|
body: responseBody,
|
|
214
302
|
usage,
|
|
303
|
+
status: response.status,
|
|
215
304
|
});
|
|
216
|
-
if ((response.status === 429 || response.status >= 500) && provider.fallback) {
|
|
217
|
-
const reason = response.status === 429 ? "rate_limited" : `http_${response.status}`;
|
|
218
|
-
routeTrace.push({ provider: provider.id, status: response.status, latencyMs, reason });
|
|
219
|
-
recordRequest({
|
|
220
|
-
id: requestId,
|
|
221
|
-
tokenId: token.key,
|
|
222
|
-
providerId: provider.id,
|
|
223
|
-
model,
|
|
224
|
-
inputTokens: usage?.input_tokens ?? 0,
|
|
225
|
-
outputTokens: usage?.output_tokens ?? 0,
|
|
226
|
-
cacheReadTokens: usage?.cache_read_tokens ?? 0,
|
|
227
|
-
cacheWriteTokens: usage?.cache_write_tokens ?? 0,
|
|
228
|
-
latencyMs,
|
|
229
|
-
status: response.status,
|
|
230
|
-
logFile,
|
|
231
|
-
apiKeyIndex,
|
|
232
|
-
pricing,
|
|
233
|
-
currency: provider.currency,
|
|
234
|
-
agent,
|
|
235
|
-
customTags,
|
|
236
|
-
routeTrace,
|
|
237
|
-
});
|
|
238
|
-
const fallbackResult = tryFallback(c, provider, model, targetPath, body, entryProtocol, routeTrace);
|
|
239
|
-
if (fallbackResult)
|
|
240
|
-
return fallbackResult;
|
|
241
|
-
}
|
|
242
305
|
routeTrace.push({ provider: provider.id, status: response.status, latencyMs });
|
|
243
306
|
recordRequest({
|
|
244
307
|
id: requestId,
|
|
@@ -253,13 +316,13 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
|
|
|
253
316
|
status: response.status,
|
|
254
317
|
logFile,
|
|
255
318
|
apiKeyIndex,
|
|
256
|
-
pricing,
|
|
319
|
+
pricing: providerPricing,
|
|
257
320
|
currency: provider.currency,
|
|
258
321
|
agent,
|
|
259
322
|
customTags,
|
|
260
323
|
routeTrace,
|
|
261
324
|
});
|
|
262
|
-
return c.json(responseBody, response.status);
|
|
325
|
+
return { kind: "done", response: c.json(responseBody, response.status) };
|
|
263
326
|
}
|
|
264
327
|
catch (error) {
|
|
265
328
|
const latencyMs = Date.now() - startTime;
|
|
@@ -268,83 +331,32 @@ export async function forwardRequest(c, provider, targetPath, transformedBody, e
|
|
|
268
331
|
timestamp: Date.now(),
|
|
269
332
|
error: error.message,
|
|
270
333
|
});
|
|
271
|
-
|
|
272
|
-
if (provider.fallback) {
|
|
273
|
-
recordRequest({
|
|
274
|
-
id: requestId,
|
|
275
|
-
tokenId: token.key,
|
|
276
|
-
providerId: provider.id,
|
|
277
|
-
model,
|
|
278
|
-
inputTokens: 0,
|
|
279
|
-
outputTokens: 0,
|
|
280
|
-
latencyMs,
|
|
281
|
-
status: 502,
|
|
282
|
-
logFile,
|
|
283
|
-
error: error.message,
|
|
284
|
-
apiKeyIndex,
|
|
285
|
-
pricing,
|
|
286
|
-
currency: provider.currency,
|
|
287
|
-
agent,
|
|
288
|
-
customTags,
|
|
289
|
-
routeTrace,
|
|
290
|
-
});
|
|
291
|
-
const fallbackResult = tryFallback(c, provider, model, targetPath, body, entryProtocol, routeTrace);
|
|
292
|
-
if (fallbackResult)
|
|
293
|
-
return fallbackResult;
|
|
294
|
-
}
|
|
295
|
-
recordRequest({
|
|
296
|
-
id: requestId,
|
|
297
|
-
tokenId: token.key,
|
|
298
|
-
providerId: provider.id,
|
|
299
|
-
model,
|
|
300
|
-
inputTokens: 0,
|
|
301
|
-
outputTokens: 0,
|
|
302
|
-
latencyMs,
|
|
303
|
-
status: 502,
|
|
304
|
-
logFile,
|
|
305
|
-
error: error.message,
|
|
306
|
-
apiKeyIndex,
|
|
307
|
-
pricing,
|
|
308
|
-
currency: provider.currency,
|
|
309
|
-
agent,
|
|
310
|
-
customTags,
|
|
311
|
-
routeTrace,
|
|
312
|
-
});
|
|
313
|
-
return c.json({ error: "Upstream request failed", detail: error.message }, 502);
|
|
334
|
+
return { kind: "retryable", status: 502, error: error.message };
|
|
314
335
|
}
|
|
315
336
|
}
|
|
316
|
-
function
|
|
317
|
-
|
|
318
|
-
return null;
|
|
319
|
-
const config = getConfig();
|
|
320
|
-
const fallbackProvider = config.providers.find((p) => p.id === provider.fallback && p.enabled);
|
|
321
|
-
if (!fallbackProvider)
|
|
322
|
-
return null;
|
|
323
|
-
const modelConfig = fallbackProvider.models.find((m) => getModelId(m) === model);
|
|
324
|
-
if (!modelConfig)
|
|
325
|
-
return null;
|
|
326
|
-
const fallbackPricing = getModelPricing(modelConfig);
|
|
327
|
-
let fallbackPath = targetPath;
|
|
328
|
-
if (fallbackProvider.type !== provider.type) {
|
|
329
|
-
if (fallbackProvider.type === "anthropic")
|
|
330
|
-
fallbackPath = "/v1/messages";
|
|
331
|
-
else
|
|
332
|
-
fallbackPath = "/chat/completions";
|
|
333
|
-
}
|
|
334
|
-
console.log(`[tokenparty] Falling back from ${provider.id} to ${fallbackProvider.id} for model ${model}`);
|
|
335
|
-
return forwardRequest(c, fallbackProvider, fallbackPath, body, entryProtocol, fallbackPricing, routeTrace);
|
|
336
|
-
}
|
|
337
|
-
function rawStreamPassthrough(c, targetUrl, upstreamHeaders, body, requestId, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace) {
|
|
337
|
+
function rawStreamPassthrough(params) {
|
|
338
|
+
const { targetUrl, upstreamHeaders, body, requestId, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, } = params;
|
|
338
339
|
const url = new URL(targetUrl);
|
|
339
340
|
const reqFn = url.protocol === "https:" ? httpsRequest : httpRequest;
|
|
340
|
-
return new Promise((resolve
|
|
341
|
-
const
|
|
341
|
+
return new Promise((resolve) => {
|
|
342
|
+
const keepAliveAgent = url.protocol === "https:" ? httpsAgent : httpAgent;
|
|
343
|
+
const req = reqFn(url, {
|
|
344
|
+
method: "POST",
|
|
345
|
+
headers: { ...upstreamHeaders, "content-type": "application/json" },
|
|
346
|
+
agent: keepAliveAgent,
|
|
347
|
+
}, (res) => {
|
|
342
348
|
const respHeaders = {};
|
|
343
349
|
for (const [key, val] of Object.entries(res.headers)) {
|
|
344
350
|
if (val)
|
|
345
351
|
respHeaders[key] = Array.isArray(val) ? val.join(", ") : val;
|
|
346
352
|
}
|
|
347
353
|
const status = res.statusCode ?? 502;
|
|
354
|
+
// Check if retryable BEFORE piping - destroy stream and return retryable
|
|
355
|
+
if (isRetryableStatus(status)) {
|
|
356
|
+
res.destroy();
|
|
357
|
+
resolve({ kind: "retryable", status });
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
348
360
|
// Passthrough all upstream headers, skip hop-by-hop
|
|
349
361
|
const passthroughHeaders = new Headers();
|
|
350
362
|
const hopByHop = new Set(["connection", "keep-alive", "transfer-encoding", "te", "trailer", "upgrade"]);
|
|
@@ -361,20 +373,21 @@ function rawStreamPassthrough(c, targetUrl, upstreamHeaders, body, requestId, pr
|
|
|
361
373
|
callback(null, chunk);
|
|
362
374
|
},
|
|
363
375
|
flush(callback) {
|
|
364
|
-
|
|
365
|
-
asyncParseBufferForLog(rawChunks, res.headers["content-encoding"], requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace);
|
|
376
|
+
asyncParseBufferForLog(rawChunks, res.headers["content-encoding"], requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, status);
|
|
366
377
|
callback();
|
|
367
378
|
},
|
|
368
379
|
});
|
|
369
380
|
const stream = Readable.toWeb(res.pipe(passthrough));
|
|
370
|
-
resolve(new Response(stream, { status, headers: passthroughHeaders }));
|
|
381
|
+
resolve({ kind: "done", response: new Response(stream, { status, headers: passthroughHeaders }) });
|
|
382
|
+
});
|
|
383
|
+
req.on("error", (error) => {
|
|
384
|
+
resolve({ kind: "retryable", status: 502, error: error.message });
|
|
371
385
|
});
|
|
372
|
-
req.on("error", reject);
|
|
373
386
|
req.write(JSON.stringify(body));
|
|
374
387
|
req.end();
|
|
375
388
|
});
|
|
376
389
|
}
|
|
377
|
-
function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace) {
|
|
390
|
+
function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, provider, model, token, startTime, logFile, apiKeyIndex, pricing, agent, customTags, routeTrace, upstreamStatus) {
|
|
378
391
|
(async () => {
|
|
379
392
|
let text;
|
|
380
393
|
const combined = Buffer.concat(rawChunks);
|
|
@@ -392,55 +405,74 @@ function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, pro
|
|
|
392
405
|
else {
|
|
393
406
|
text = combined.toString("utf-8");
|
|
394
407
|
}
|
|
408
|
+
const contentType = respHeaders["content-type"] ?? "";
|
|
409
|
+
const isSse = contentType.includes("text/event-stream");
|
|
410
|
+
const recordedStatus = upstreamStatus ?? 200;
|
|
395
411
|
let fullContent = "";
|
|
396
412
|
let rawEvents = [];
|
|
397
413
|
let usage;
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
414
|
+
let responseBody;
|
|
415
|
+
if (isSse) {
|
|
416
|
+
for (const line of text.split("\n")) {
|
|
417
|
+
if (!line.startsWith("data: "))
|
|
418
|
+
continue;
|
|
419
|
+
const data = line.slice(6).trim();
|
|
420
|
+
if (data === "[DONE]")
|
|
421
|
+
continue;
|
|
422
|
+
try {
|
|
423
|
+
const parsed = JSON.parse(data);
|
|
424
|
+
rawEvents.push(parsed);
|
|
425
|
+
if (provider.type === "anthropic" && parsed.type === "content_block_delta") {
|
|
426
|
+
if (parsed.delta?.text)
|
|
427
|
+
fullContent += parsed.delta.text;
|
|
428
|
+
if (parsed.delta?.thinking)
|
|
429
|
+
fullContent += parsed.delta.thinking;
|
|
430
|
+
}
|
|
431
|
+
else if (provider.type === "openai" && parsed.choices?.[0]?.delta?.content) {
|
|
432
|
+
fullContent += parsed.choices[0].delta.content;
|
|
433
|
+
}
|
|
434
|
+
else if (parsed.type === "response.output_text.delta" && parsed.delta) {
|
|
435
|
+
fullContent += parsed.delta;
|
|
436
|
+
}
|
|
437
|
+
usage = extractUsageFromChunk(parsed, provider.type) ?? usage;
|
|
415
438
|
}
|
|
416
|
-
|
|
417
|
-
|
|
439
|
+
catch { }
|
|
440
|
+
}
|
|
441
|
+
if (!usage) {
|
|
442
|
+
for (let i = rawEvents.length - 1; i >= 0; i--) {
|
|
443
|
+
const evt = rawEvents[i];
|
|
444
|
+
if (evt.type === "response.completed" && evt.response?.usage) {
|
|
445
|
+
usage = { input_tokens: evt.response.usage.input_tokens ?? 0, output_tokens: evt.response.usage.output_tokens ?? 0, cache_read_tokens: evt.response.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.response.usage.cache_creation_input_tokens ?? 0 };
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
if (evt.usage && typeof evt.usage === "object" && (evt.usage.prompt_tokens || evt.usage.completion_tokens || evt.usage.input_tokens || evt.usage.output_tokens || evt.usage.total_tokens)) {
|
|
449
|
+
usage = { input_tokens: evt.usage.prompt_tokens ?? evt.usage.input_tokens ?? 0, output_tokens: evt.usage.completion_tokens ?? evt.usage.output_tokens ?? 0, cache_read_tokens: evt.usage.prompt_tokens_details?.cached_tokens ?? evt.usage.cache_read_input_tokens ?? 0, cache_write_tokens: evt.usage.cache_creation_input_tokens ?? 0 };
|
|
450
|
+
break;
|
|
451
|
+
}
|
|
418
452
|
}
|
|
419
|
-
usage = extractUsageFromChunk(parsed, provider.type) ?? usage;
|
|
420
453
|
}
|
|
421
|
-
|
|
454
|
+
responseBody = rawEvents;
|
|
422
455
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
break;
|
|
433
|
-
}
|
|
456
|
+
else {
|
|
457
|
+
// Upstream returned a non-SSE body (e.g. JSON error) despite stream:true request.
|
|
458
|
+
// Record the raw decoded text faithfully.
|
|
459
|
+
try {
|
|
460
|
+
responseBody = JSON.parse(text);
|
|
461
|
+
usage = extractUsage(responseBody, provider.type);
|
|
462
|
+
}
|
|
463
|
+
catch {
|
|
464
|
+
responseBody = text;
|
|
434
465
|
}
|
|
435
466
|
}
|
|
436
467
|
writeLog(requestId, {
|
|
437
468
|
type: "response",
|
|
438
469
|
timestamp: Date.now(),
|
|
439
470
|
headers: respHeaders,
|
|
440
|
-
streaming:
|
|
441
|
-
streamContent: fullContent,
|
|
442
|
-
body:
|
|
471
|
+
streaming: isSse,
|
|
472
|
+
streamContent: isSse ? fullContent : undefined,
|
|
473
|
+
body: responseBody,
|
|
443
474
|
usage,
|
|
475
|
+
status: recordedStatus,
|
|
444
476
|
});
|
|
445
477
|
recordRequest({
|
|
446
478
|
id: requestId,
|
|
@@ -452,7 +484,7 @@ function asyncParseBufferForLog(rawChunks, encoding, requestId, respHeaders, pro
|
|
|
452
484
|
cacheReadTokens: usage?.cache_read_tokens ?? 0,
|
|
453
485
|
cacheWriteTokens: usage?.cache_write_tokens ?? 0,
|
|
454
486
|
latencyMs: Date.now() - startTime,
|
|
455
|
-
status:
|
|
487
|
+
status: recordedStatus,
|
|
456
488
|
logFile,
|
|
457
489
|
apiKeyIndex,
|
|
458
490
|
pricing,
|