openclaw-autoproxy 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -159
- package/README.zh-CN.md +127 -0
- package/dist/gateway/anthropic-compat.js +841 -0
- package/dist/gateway/config.js +16 -0
- package/dist/gateway/model-load-metrics.js +114 -0
- package/dist/gateway/proxy.js +324 -19
- package/dist/gateway/server-http.js +12 -2
- package/dist/gateway/server.impl.js +1 -1
- package/package.json +2 -1
- package/src/gateway/anthropic-compat.ts +1085 -0
- package/src/gateway/config.ts +29 -0
- package/src/gateway/model-load-metrics.ts +166 -0
- package/src/gateway/proxy.ts +443 -25
- package/src/gateway/server-http.ts +16 -2
- package/src/gateway/server.impl.ts +1 -1
- package/openclaw-autoproxy-1.0.1.tgz +0 -0
package/dist/gateway/config.js
CHANGED
|
@@ -12,6 +12,16 @@ function parseCsvList(value) {
|
|
|
12
12
|
.map((item) => item.trim())
|
|
13
13
|
.filter(Boolean);
|
|
14
14
|
}
|
|
15
|
+
function parsePositiveInteger(value, fallback) {
|
|
16
|
+
if (!value) {
|
|
17
|
+
return fallback;
|
|
18
|
+
}
|
|
19
|
+
const parsed = Number.parseInt(value, 10);
|
|
20
|
+
if (!Number.isInteger(parsed) || parsed <= 0) {
|
|
21
|
+
return fallback;
|
|
22
|
+
}
|
|
23
|
+
return parsed;
|
|
24
|
+
}
|
|
15
25
|
function parseRetryCodes(value) {
|
|
16
26
|
const defaults = new Set([412, 429, 500, 502, 503, 504]);
|
|
17
27
|
if (!value) {
|
|
@@ -248,6 +258,9 @@ function loadRouteFileConfig() {
|
|
|
248
258
|
const host = process.env.HOST ?? "0.0.0.0";
|
|
249
259
|
const port = Number.parseInt(process.env.PORT ?? "8787", 10);
|
|
250
260
|
const timeoutMs = Number.parseInt(process.env.REQUEST_TIMEOUT_MS ?? "60000", 10);
|
|
261
|
+
const upstreamMaxConnections = parsePositiveInteger(process.env.UPSTREAM_MAX_CONNECTIONS, 200);
|
|
262
|
+
const upstreamKeepAliveTimeoutMs = parsePositiveInteger(process.env.UPSTREAM_KEEPALIVE_TIMEOUT_MS, 60_000);
|
|
263
|
+
const upstreamKeepAliveMaxTimeoutMs = parsePositiveInteger(process.env.UPSTREAM_KEEPALIVE_MAX_TIMEOUT_MS, 300_000);
|
|
251
264
|
const upstreamBaseUrl = (process.env.UPSTREAM_BASE_URL ?? "https://api.openai.com").replace(/\/+$/, "");
|
|
252
265
|
const routeFileConfig = loadRouteFileConfig();
|
|
253
266
|
if (!Number.isInteger(port) || port < 1 || port > 65535) {
|
|
@@ -262,6 +275,9 @@ export const config = {
|
|
|
262
275
|
timeoutMs,
|
|
263
276
|
upstreamBaseUrl,
|
|
264
277
|
upstreamApiKey: process.env.UPSTREAM_API_KEY ?? "",
|
|
278
|
+
upstreamMaxConnections,
|
|
279
|
+
upstreamKeepAliveTimeoutMs,
|
|
280
|
+
upstreamKeepAliveMaxTimeoutMs,
|
|
265
281
|
retryStatusCodes: routeFileConfig.retryStatusCodes ?? parseRetryCodes(process.env.RETRY_STATUS_CODES),
|
|
266
282
|
globalFallbackModels: parseCsvList(process.env.GLOBAL_FALLBACK_MODELS),
|
|
267
283
|
modelFallbackMap: parseModelFallbackMap(process.env.MODEL_FALLBACK_MAP),
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
|
|
2
|
+
const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
|
|
3
|
+
const modelSamples = new Map();
|
|
4
|
+
function quantileFromSorted(values, q) {
|
|
5
|
+
if (values.length === 0) {
|
|
6
|
+
return 0;
|
|
7
|
+
}
|
|
8
|
+
const clampedQ = Math.max(0, Math.min(1, q));
|
|
9
|
+
const index = Math.floor((values.length - 1) * clampedQ);
|
|
10
|
+
return values[index] ?? values[values.length - 1] ?? 0;
|
|
11
|
+
}
|
|
12
|
+
function roundMs(value) {
|
|
13
|
+
return Math.round(value * 100) / 100;
|
|
14
|
+
}
|
|
15
|
+
function pruneModelSamples(samples, cutoffAt) {
|
|
16
|
+
let startIndex = 0;
|
|
17
|
+
while (startIndex < samples.length && samples[startIndex] && samples[startIndex].at < cutoffAt) {
|
|
18
|
+
startIndex += 1;
|
|
19
|
+
}
|
|
20
|
+
if (startIndex <= 0) {
|
|
21
|
+
return samples;
|
|
22
|
+
}
|
|
23
|
+
return samples.slice(startIndex);
|
|
24
|
+
}
|
|
25
|
+
function pruneExpiredSamples(cutoffAt) {
|
|
26
|
+
for (const [model, samples] of modelSamples.entries()) {
|
|
27
|
+
const pruned = pruneModelSamples(samples, cutoffAt);
|
|
28
|
+
if (pruned.length === 0) {
|
|
29
|
+
modelSamples.delete(model);
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
if (pruned !== samples) {
|
|
33
|
+
modelSamples.set(model, pruned);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
export function recordModelLoadSample(model, loadMs) {
|
|
38
|
+
if (!model) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
if (!Number.isFinite(loadMs) || loadMs <= 0) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const now = Date.now();
|
|
45
|
+
const sample = {
|
|
46
|
+
at: now,
|
|
47
|
+
loadMs,
|
|
48
|
+
};
|
|
49
|
+
const existing = modelSamples.get(model) ?? [];
|
|
50
|
+
existing.push(sample);
|
|
51
|
+
if (existing.length > DEFAULT_MAX_SAMPLES_PER_MODEL) {
|
|
52
|
+
existing.splice(0, existing.length - DEFAULT_MAX_SAMPLES_PER_MODEL);
|
|
53
|
+
}
|
|
54
|
+
modelSamples.set(model, existing);
|
|
55
|
+
const cutoffAt = now - DEFAULT_WINDOW_MS;
|
|
56
|
+
pruneExpiredSamples(cutoffAt);
|
|
57
|
+
}
|
|
58
|
+
function summarizeModel(model, samples) {
|
|
59
|
+
if (samples.length === 0) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const loadValues = samples.map((sample) => sample.loadMs).sort((a, b) => a - b);
|
|
63
|
+
const total = loadValues.reduce((acc, value) => acc + value, 0);
|
|
64
|
+
const avgLoadMs = total / loadValues.length;
|
|
65
|
+
const minLoadMs = loadValues[0] ?? 0;
|
|
66
|
+
const maxLoadMs = loadValues[loadValues.length - 1] ?? 0;
|
|
67
|
+
const latestAt = samples[samples.length - 1]?.at ?? Date.now();
|
|
68
|
+
return {
|
|
69
|
+
model,
|
|
70
|
+
sampleCount: samples.length,
|
|
71
|
+
avgLoadMs: roundMs(avgLoadMs),
|
|
72
|
+
p50LoadMs: roundMs(quantileFromSorted(loadValues, 0.5)),
|
|
73
|
+
p95LoadMs: roundMs(quantileFromSorted(loadValues, 0.95)),
|
|
74
|
+
minLoadMs: roundMs(minLoadMs),
|
|
75
|
+
maxLoadMs: roundMs(maxLoadMs),
|
|
76
|
+
lastSeenAt: new Date(latestAt).toISOString(),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
|
|
80
|
+
const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
|
|
81
|
+
const now = Date.now();
|
|
82
|
+
const cutoffAt = now - normalizedWindowMs;
|
|
83
|
+
pruneExpiredSamples(cutoffAt);
|
|
84
|
+
const summaries = [];
|
|
85
|
+
for (const [model, samples] of modelSamples.entries()) {
|
|
86
|
+
const filtered = pruneModelSamples(samples, cutoffAt);
|
|
87
|
+
if (filtered.length === 0) {
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
if (filtered !== samples) {
|
|
91
|
+
modelSamples.set(model, filtered);
|
|
92
|
+
}
|
|
93
|
+
const summary = summarizeModel(model, filtered);
|
|
94
|
+
if (summary) {
|
|
95
|
+
summaries.push(summary);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
summaries.sort((a, b) => {
|
|
99
|
+
if (a.avgLoadMs !== b.avgLoadMs) {
|
|
100
|
+
return a.avgLoadMs - b.avgLoadMs;
|
|
101
|
+
}
|
|
102
|
+
if (a.p95LoadMs !== b.p95LoadMs) {
|
|
103
|
+
return a.p95LoadMs - b.p95LoadMs;
|
|
104
|
+
}
|
|
105
|
+
return b.sampleCount - a.sampleCount;
|
|
106
|
+
});
|
|
107
|
+
return {
|
|
108
|
+
windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
|
|
109
|
+
rankedModels: summaries.map((entry, index) => ({
|
|
110
|
+
rank: index + 1,
|
|
111
|
+
...entry,
|
|
112
|
+
})),
|
|
113
|
+
};
|
|
114
|
+
}
|
package/dist/gateway/proxy.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { PassThrough, Readable } from "node:stream";
|
|
2
|
+
import { Agent } from "undici";
|
|
3
|
+
import { createAnthropicMessagesEventStreamTransformer, maybeTransformAnthropicMessagesRequest, transformOpenAiChatCompletionToAnthropicMessage, transformUpstreamErrorToAnthropicError, } from "./anthropic-compat.js";
|
|
2
4
|
import { config } from "./config.js";
|
|
5
|
+
import { recordModelLoadSample } from "./model-load-metrics.js";
|
|
3
6
|
const HOP_BY_HOP_HEADERS = new Set([
|
|
4
7
|
"connection",
|
|
5
8
|
"keep-alive",
|
|
@@ -13,8 +16,37 @@ const HOP_BY_HOP_HEADERS = new Set([
|
|
|
13
16
|
const MAX_REQUEST_BODY_BYTES = 50 * 1024 * 1024;
|
|
14
17
|
const AUTO_MODEL = "auto";
|
|
15
18
|
let autoModelCursor = 0;
|
|
19
|
+
const upstreamAgent = new Agent({
|
|
20
|
+
connections: config.upstreamMaxConnections,
|
|
21
|
+
pipelining: 1,
|
|
22
|
+
keepAliveTimeout: config.upstreamKeepAliveTimeoutMs,
|
|
23
|
+
keepAliveMaxTimeout: config.upstreamKeepAliveMaxTimeoutMs,
|
|
24
|
+
});
|
|
25
|
+
const fetchWithDispatcher = fetch;
|
|
26
|
+
function formatGatewayLogValue(value) {
|
|
27
|
+
if (value === null || value === undefined || value === "") {
|
|
28
|
+
return "-";
|
|
29
|
+
}
|
|
30
|
+
const normalized = String(value);
|
|
31
|
+
return /\s|"/.test(normalized) ? JSON.stringify(normalized) : normalized;
|
|
32
|
+
}
|
|
33
|
+
function buildGatewayLogLine(protocol, event, fields) {
|
|
34
|
+
const parts = [
|
|
35
|
+
"[gateway]",
|
|
36
|
+
`protocol=${formatGatewayLogValue(protocol)}`,
|
|
37
|
+
`event=${formatGatewayLogValue(event)}`,
|
|
38
|
+
];
|
|
39
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
40
|
+
parts.push(`${key}=${formatGatewayLogValue(value)}`);
|
|
41
|
+
}
|
|
42
|
+
return parts.join(" ");
|
|
43
|
+
}
|
|
16
44
|
function logProxyModelRoute(params) {
|
|
17
|
-
console.log(
|
|
45
|
+
console.log(buildGatewayLogLine(params.protocol, "routed", {
|
|
46
|
+
requested_model: params.requestedModel,
|
|
47
|
+
used_model: params.usedModel,
|
|
48
|
+
route: params.routeName,
|
|
49
|
+
}));
|
|
18
50
|
}
|
|
19
51
|
function resolveRouteNameForModel(modelId) {
|
|
20
52
|
if (modelId && config.modelRouteMap[modelId]) {
|
|
@@ -23,7 +55,27 @@ function resolveRouteNameForModel(modelId) {
|
|
|
23
55
|
return config.modelRouteMap["*"]?.routeName ?? null;
|
|
24
56
|
}
|
|
25
57
|
function logProxyModelSwitch(params) {
|
|
26
|
-
console.log(
|
|
58
|
+
console.log(buildGatewayLogLine(params.protocol, "switch", {
|
|
59
|
+
trigger_status: params.triggerStatus,
|
|
60
|
+
from_model: params.fromModel,
|
|
61
|
+
from_route: params.fromRoute,
|
|
62
|
+
to_model: params.toModel,
|
|
63
|
+
to_route: params.toRoute,
|
|
64
|
+
}));
|
|
65
|
+
}
|
|
66
|
+
function resolveGatewayProtocolFromPath(requestPath) {
|
|
67
|
+
const { pathname } = parsePathnameAndSearch(requestPath);
|
|
68
|
+
if (pathname === "/anthropic" ||
|
|
69
|
+
pathname.startsWith("/anthropic/") ||
|
|
70
|
+
isAnthropicApiPath(pathname)) {
|
|
71
|
+
return "anthropic";
|
|
72
|
+
}
|
|
73
|
+
return "openai";
|
|
74
|
+
}
|
|
75
|
+
function resolveGatewayProtocol(request) {
|
|
76
|
+
const rawUrl = request.url ?? "/";
|
|
77
|
+
const normalizedRawUrl = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
|
|
78
|
+
return resolveGatewayProtocolFromPath(normalizedRawUrl);
|
|
27
79
|
}
|
|
28
80
|
function sendJson(response, statusCode, payload) {
|
|
29
81
|
if (response.writableEnded) {
|
|
@@ -39,11 +91,25 @@ function normalizeRequestPath(request) {
|
|
|
39
91
|
const rawUrl = request.url ?? "/";
|
|
40
92
|
try {
|
|
41
93
|
const parsed = new URL(rawUrl, "http://localhost");
|
|
42
|
-
return `${parsed.pathname}${parsed.search}
|
|
94
|
+
return normalizeGatewayRequestPath(`${parsed.pathname}${parsed.search}`);
|
|
43
95
|
}
|
|
44
96
|
catch {
|
|
45
|
-
|
|
97
|
+
const normalizedRawUrl = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
|
|
98
|
+
return normalizeGatewayRequestPath(normalizedRawUrl);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
function normalizeGatewayRequestPath(requestPath) {
|
|
102
|
+
const { pathname, search } = parsePathnameAndSearch(requestPath);
|
|
103
|
+
if (pathname === "/anthropic") {
|
|
104
|
+
return `/v1${search}`;
|
|
46
105
|
}
|
|
106
|
+
if (pathname === "/anthropic/v1" || pathname.startsWith("/anthropic/v1/")) {
|
|
107
|
+
return `${pathname.slice("/anthropic".length)}${search}`;
|
|
108
|
+
}
|
|
109
|
+
if (pathname.startsWith("/anthropic/")) {
|
|
110
|
+
return `/v1${pathname.slice("/anthropic".length)}${search}`;
|
|
111
|
+
}
|
|
112
|
+
return `${pathname}${search}`;
|
|
47
113
|
}
|
|
48
114
|
function rotateCandidates(candidates, startIndex) {
|
|
49
115
|
if (candidates.length <= 1) {
|
|
@@ -71,31 +137,102 @@ function buildModelCandidates(requestedModel) {
|
|
|
71
137
|
// Non-auto requests are pinned to the exact model specified by client.
|
|
72
138
|
return [requestedModel];
|
|
73
139
|
}
|
|
74
|
-
function
|
|
140
|
+
function parsePathnameAndSearch(requestPath) {
|
|
141
|
+
try {
|
|
142
|
+
const parsed = new URL(requestPath, "http://localhost");
|
|
143
|
+
return {
|
|
144
|
+
pathname: parsed.pathname,
|
|
145
|
+
search: parsed.search,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
const [pathnamePart, ...searchParts] = requestPath.split("?");
|
|
150
|
+
return {
|
|
151
|
+
pathname: pathnamePart || "/",
|
|
152
|
+
search: searchParts.length > 0 ? `?${searchParts.join("?")}` : "",
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
function isAnthropicApiPath(pathname) {
|
|
157
|
+
return (pathname === "/v1/messages" ||
|
|
158
|
+
pathname.startsWith("/v1/messages/") ||
|
|
159
|
+
pathname === "/v1/models" ||
|
|
160
|
+
pathname === "/v1/complete");
|
|
161
|
+
}
|
|
162
|
+
function rewriteFixedChatCompletionsRouteUrlForAnthropic(routeUrl, requestPath) {
|
|
163
|
+
const { pathname: requestPathname, search: requestSearch } = parsePathnameAndSearch(requestPath);
|
|
164
|
+
if (!isAnthropicApiPath(requestPathname)) {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
let parsedRouteUrl;
|
|
168
|
+
try {
|
|
169
|
+
parsedRouteUrl = new URL(routeUrl);
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
const normalizedRoutePath = parsedRouteUrl.pathname.replace(/\/+$/, "");
|
|
175
|
+
const fixedChatCompletionsSuffix = "/v1/chat/completions";
|
|
176
|
+
if (!normalizedRoutePath.endsWith(fixedChatCompletionsSuffix)) {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
const routePrefixPath = normalizedRoutePath.slice(0, -fixedChatCompletionsSuffix.length);
|
|
180
|
+
parsedRouteUrl.pathname = `${routePrefixPath}${requestPathname}`.replace(/\/{2,}/g, "/");
|
|
181
|
+
parsedRouteUrl.search = requestSearch;
|
|
182
|
+
return parsedRouteUrl.toString();
|
|
183
|
+
}
|
|
184
|
+
function buildRoutedUpstreamUrl(requestPath, selectedRoute) {
|
|
75
185
|
if (!selectedRoute) {
|
|
76
|
-
return `${config.upstreamBaseUrl}${
|
|
186
|
+
return `${config.upstreamBaseUrl}${requestPath}`;
|
|
77
187
|
}
|
|
78
188
|
if (!selectedRoute.isBaseUrl) {
|
|
189
|
+
// Backward-compatible Anthropic support when route URL is fixed to /v1/chat/completions.
|
|
190
|
+
const anthropicCompatUrl = rewriteFixedChatCompletionsRouteUrlForAnthropic(selectedRoute.url, requestPath);
|
|
191
|
+
if (anthropicCompatUrl) {
|
|
192
|
+
return anthropicCompatUrl;
|
|
193
|
+
}
|
|
79
194
|
return selectedRoute.url;
|
|
80
195
|
}
|
|
81
196
|
const routeBase = selectedRoute.url.replace(/\/+$/, "");
|
|
82
|
-
const requestPath = normalizeRequestPath(request);
|
|
83
197
|
if (routeBase.endsWith("/v1") && requestPath.startsWith("/v1")) {
|
|
84
198
|
return `${routeBase}${requestPath.slice(3)}`;
|
|
85
199
|
}
|
|
86
200
|
return `${routeBase}${requestPath}`;
|
|
87
201
|
}
|
|
88
|
-
function resolveUpstreamTarget(
|
|
202
|
+
function resolveUpstreamTarget(requestPath, modelId) {
|
|
89
203
|
const modelRoute = modelId ? config.modelRouteMap[modelId] ?? null : null;
|
|
90
204
|
const wildcardRoute = config.modelRouteMap["*"] ?? null;
|
|
91
205
|
const selectedRoute = modelRoute ?? wildcardRoute;
|
|
92
206
|
return {
|
|
93
|
-
upstreamUrl: buildRoutedUpstreamUrl(
|
|
207
|
+
upstreamUrl: buildRoutedUpstreamUrl(requestPath, selectedRoute),
|
|
94
208
|
selectedRoute,
|
|
95
209
|
};
|
|
96
210
|
}
|
|
211
|
+
async function logUpstreamErrorResponse(params) {
|
|
212
|
+
let detail = "-";
|
|
213
|
+
try {
|
|
214
|
+
const raw = await params.response.clone().text();
|
|
215
|
+
const normalized = raw.replace(/\s+/g, " ").trim();
|
|
216
|
+
if (normalized) {
|
|
217
|
+
detail = normalized.slice(0, 2000);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
catch {
|
|
221
|
+
detail = "<unavailable>";
|
|
222
|
+
}
|
|
223
|
+
console.error(buildGatewayLogLine(params.protocol, "upstream_error", {
|
|
224
|
+
status: params.response.status,
|
|
225
|
+
path: params.requestPath,
|
|
226
|
+
route: params.routeName,
|
|
227
|
+
model: params.modelId,
|
|
228
|
+
upstream: params.upstreamUrl,
|
|
229
|
+
detail,
|
|
230
|
+
}));
|
|
231
|
+
}
|
|
97
232
|
function buildUpstreamHeaders(reqHeaders, bodyLength, selectedRoute) {
|
|
98
233
|
const headers = new Headers();
|
|
234
|
+
const selectedAuthHeader = selectedRoute?.authHeader || "authorization";
|
|
235
|
+
const conflictingAuthHeaders = ["authorization", "x-api-key", "api-key"];
|
|
99
236
|
for (const [key, value] of Object.entries(reqHeaders)) {
|
|
100
237
|
if (value === undefined) {
|
|
101
238
|
continue;
|
|
@@ -106,13 +243,20 @@ function buildUpstreamHeaders(reqHeaders, bodyLength, selectedRoute) {
|
|
|
106
243
|
}
|
|
107
244
|
headers.set(key, Array.isArray(value) ? value.join(",") : String(value));
|
|
108
245
|
}
|
|
246
|
+
if (selectedRoute?.apiKey) {
|
|
247
|
+
for (const headerName of conflictingAuthHeaders) {
|
|
248
|
+
if (headerName !== selectedAuthHeader) {
|
|
249
|
+
headers.delete(headerName);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
109
253
|
if (selectedRoute?.headers) {
|
|
110
254
|
for (const [key, value] of Object.entries(selectedRoute.headers)) {
|
|
111
255
|
headers.set(key, value);
|
|
112
256
|
}
|
|
113
257
|
}
|
|
114
258
|
if (selectedRoute?.apiKey) {
|
|
115
|
-
const authHeader =
|
|
259
|
+
const authHeader = selectedAuthHeader;
|
|
116
260
|
const authPrefix = selectedRoute.authPrefix ?? "Bearer ";
|
|
117
261
|
if (!headers.has(authHeader)) {
|
|
118
262
|
headers.set(authHeader, `${authPrefix}${selectedRoute.apiKey}`);
|
|
@@ -224,12 +368,63 @@ async function fetchWithTimeout(url, options, timeoutMs) {
|
|
|
224
368
|
const controller = new AbortController();
|
|
225
369
|
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
226
370
|
try {
|
|
227
|
-
return await
|
|
371
|
+
return await fetchWithDispatcher(url, {
|
|
372
|
+
...options,
|
|
373
|
+
signal: controller.signal,
|
|
374
|
+
dispatcher: upstreamAgent,
|
|
375
|
+
});
|
|
228
376
|
}
|
|
229
377
|
finally {
|
|
230
378
|
clearTimeout(timeoutId);
|
|
231
379
|
}
|
|
232
380
|
}
|
|
381
|
+
function createClientAbortSignal(request, response) {
|
|
382
|
+
const controller = new AbortController();
|
|
383
|
+
let aborted = false;
|
|
384
|
+
const abort = () => {
|
|
385
|
+
if (aborted) {
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
aborted = true;
|
|
389
|
+
controller.abort();
|
|
390
|
+
};
|
|
391
|
+
request.once("aborted", abort);
|
|
392
|
+
response.once("close", () => {
|
|
393
|
+
if (!response.writableEnded) {
|
|
394
|
+
abort();
|
|
395
|
+
}
|
|
396
|
+
});
|
|
397
|
+
return controller.signal;
|
|
398
|
+
}
|
|
399
|
+
async function fetchWithTimeoutAndClientSignal(url, options, timeoutMs, clientSignal) {
|
|
400
|
+
if (!clientSignal) {
|
|
401
|
+
return fetchWithTimeout(url, options, timeoutMs);
|
|
402
|
+
}
|
|
403
|
+
const controller = new AbortController();
|
|
404
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
405
|
+
const onClientAbort = () => {
|
|
406
|
+
if (!controller.signal.aborted) {
|
|
407
|
+
controller.abort();
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
if (clientSignal.aborted) {
|
|
411
|
+
onClientAbort();
|
|
412
|
+
}
|
|
413
|
+
else {
|
|
414
|
+
clientSignal.addEventListener("abort", onClientAbort, { once: true });
|
|
415
|
+
}
|
|
416
|
+
try {
|
|
417
|
+
return await fetchWithDispatcher(url, {
|
|
418
|
+
...options,
|
|
419
|
+
signal: controller.signal,
|
|
420
|
+
dispatcher: upstreamAgent,
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
finally {
|
|
424
|
+
clearTimeout(timeoutId);
|
|
425
|
+
clientSignal.removeEventListener("abort", onClientAbort);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
233
428
|
async function disposeBody(response) {
|
|
234
429
|
if (!response.body) {
|
|
235
430
|
return;
|
|
@@ -295,6 +490,9 @@ async function readRequestBody(request) {
|
|
|
295
490
|
export async function proxyRequest(request, response) {
|
|
296
491
|
const method = (request.method ?? "GET").toUpperCase();
|
|
297
492
|
const supportsBody = method !== "GET" && method !== "HEAD";
|
|
493
|
+
const clientSignal = createClientAbortSignal(request, response);
|
|
494
|
+
const normalizedRequestPath = normalizeRequestPath(request);
|
|
495
|
+
const requestProtocol = resolveGatewayProtocol(request);
|
|
298
496
|
let incomingBody = Buffer.alloc(0);
|
|
299
497
|
if (supportsBody) {
|
|
300
498
|
try {
|
|
@@ -344,23 +542,62 @@ export async function proxyRequest(request, response) {
|
|
|
344
542
|
let switchNotice = null;
|
|
345
543
|
for (let attemptIndex = 0; attemptIndex < modelCandidates.length; attemptIndex += 1) {
|
|
346
544
|
const modelId = modelCandidates[attemptIndex];
|
|
347
|
-
let
|
|
348
|
-
|
|
349
|
-
|
|
545
|
+
let requestPath = normalizedRequestPath;
|
|
546
|
+
let responseFormat = null;
|
|
547
|
+
let requestJsonPayload = null;
|
|
548
|
+
if (supportsBody && parsedJsonBody) {
|
|
549
|
+
requestJsonPayload = {
|
|
350
550
|
...parsedJsonBody,
|
|
351
|
-
model: modelId,
|
|
352
|
-
}
|
|
551
|
+
...(modelId ? { model: modelId } : {}),
|
|
552
|
+
};
|
|
353
553
|
}
|
|
354
|
-
|
|
554
|
+
let { upstreamUrl, selectedRoute } = resolveUpstreamTarget(requestPath, modelId);
|
|
355
555
|
lastAttemptRouteName = selectedRoute?.routeName ?? null;
|
|
556
|
+
if (requestJsonPayload) {
|
|
557
|
+
const compatRequest = maybeTransformAnthropicMessagesRequest({
|
|
558
|
+
requestPath,
|
|
559
|
+
upstreamUrl,
|
|
560
|
+
body: requestJsonPayload,
|
|
561
|
+
});
|
|
562
|
+
if (compatRequest.error) {
|
|
563
|
+
console.error(buildGatewayLogLine(requestProtocol, "compat_error", {
|
|
564
|
+
path: requestPath,
|
|
565
|
+
route: selectedRoute?.routeName ?? null,
|
|
566
|
+
model: modelId,
|
|
567
|
+
detail: compatRequest.error,
|
|
568
|
+
}));
|
|
569
|
+
sendJson(response, 400, {
|
|
570
|
+
error: {
|
|
571
|
+
message: compatRequest.error,
|
|
572
|
+
},
|
|
573
|
+
});
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
requestPath = compatRequest.requestPath;
|
|
577
|
+
requestJsonPayload = compatRequest.body;
|
|
578
|
+
responseFormat = compatRequest.responseFormat;
|
|
579
|
+
if (responseFormat) {
|
|
580
|
+
upstreamUrl = buildRoutedUpstreamUrl(requestPath, selectedRoute);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
let bodyBuffer = supportsBody && incomingBody.length > 0 ? incomingBody : undefined;
|
|
584
|
+
if (supportsBody && requestJsonPayload) {
|
|
585
|
+
bodyBuffer = Buffer.from(JSON.stringify(requestJsonPayload), "utf8");
|
|
586
|
+
}
|
|
356
587
|
const requestBody = bodyBuffer ? new Uint8Array(bodyBuffer) : undefined;
|
|
357
588
|
const headers = buildUpstreamHeaders(request.headers, bodyBuffer ? bodyBuffer.length : undefined, selectedRoute);
|
|
358
589
|
try {
|
|
359
|
-
const
|
|
590
|
+
const attemptStartedAt = Date.now();
|
|
591
|
+
const upstreamResponse = await fetchWithTimeoutAndClientSignal(upstreamUrl, {
|
|
360
592
|
method,
|
|
361
593
|
headers,
|
|
362
594
|
body: requestBody,
|
|
363
|
-
}, config.timeoutMs);
|
|
595
|
+
}, config.timeoutMs, clientSignal);
|
|
596
|
+
const headerLoadMs = Date.now() - attemptStartedAt;
|
|
597
|
+
const modelForMetric = modelId ?? requestedModel;
|
|
598
|
+
if (upstreamResponse.ok) {
|
|
599
|
+
recordModelLoadSample(modelForMetric, headerLoadMs);
|
|
600
|
+
}
|
|
364
601
|
const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
|
|
365
602
|
const isEventStream = contentType.includes("text/event-stream");
|
|
366
603
|
const isJsonResponse = contentType.includes("application/json");
|
|
@@ -378,6 +615,7 @@ export async function proxyRequest(request, response) {
|
|
|
378
615
|
const triggerStatus = retryTriggerStatus ?? upstreamResponse.status;
|
|
379
616
|
const nextRouteName = resolveRouteNameForModel(nextModel);
|
|
380
617
|
logProxyModelSwitch({
|
|
618
|
+
protocol: requestProtocol,
|
|
381
619
|
triggerStatus,
|
|
382
620
|
fromModel: modelId,
|
|
383
621
|
toModel: nextModel,
|
|
@@ -396,6 +634,16 @@ export async function proxyRequest(request, response) {
|
|
|
396
634
|
await disposeBody(upstreamResponse);
|
|
397
635
|
continue;
|
|
398
636
|
}
|
|
637
|
+
if (!upstreamResponse.ok) {
|
|
638
|
+
await logUpstreamErrorResponse({
|
|
639
|
+
protocol: requestProtocol,
|
|
640
|
+
requestPath,
|
|
641
|
+
upstreamUrl,
|
|
642
|
+
routeName: selectedRoute?.routeName ?? null,
|
|
643
|
+
modelId,
|
|
644
|
+
response: upstreamResponse,
|
|
645
|
+
});
|
|
646
|
+
}
|
|
399
647
|
const attemptCount = attemptIndex + 1;
|
|
400
648
|
const effectiveSwitchNotice = switchNotice;
|
|
401
649
|
copyResponseHeaders(upstreamResponse, response);
|
|
@@ -407,6 +655,7 @@ export async function proxyRequest(request, response) {
|
|
|
407
655
|
response.setHeader("x-gateway-switched", "1");
|
|
408
656
|
}
|
|
409
657
|
logProxyModelRoute({
|
|
658
|
+
protocol: requestProtocol,
|
|
410
659
|
requestedModel,
|
|
411
660
|
usedModel: modelId,
|
|
412
661
|
routeName: selectedRoute?.routeName ?? null,
|
|
@@ -416,6 +665,61 @@ export async function proxyRequest(request, response) {
|
|
|
416
665
|
response.end();
|
|
417
666
|
return;
|
|
418
667
|
}
|
|
668
|
+
if (responseFormat === "anthropic-messages" && isEventStream) {
|
|
669
|
+
const nodeStream = Readable.fromWeb(upstreamResponse.body);
|
|
670
|
+
const anthropicStream = nodeStream.pipe(createAnthropicMessagesEventStreamTransformer(modelId));
|
|
671
|
+
response.removeHeader("content-length");
|
|
672
|
+
response.setHeader("content-type", "text/event-stream; charset=utf-8");
|
|
673
|
+
if (effectiveSwitchNotice) {
|
|
674
|
+
createSsePrefixedStream(anthropicStream, effectiveSwitchNotice).pipe(response);
|
|
675
|
+
return;
|
|
676
|
+
}
|
|
677
|
+
anthropicStream.on("error", () => {
|
|
678
|
+
if (!response.writableEnded) {
|
|
679
|
+
response.destroy();
|
|
680
|
+
}
|
|
681
|
+
});
|
|
682
|
+
anthropicStream.pipe(response);
|
|
683
|
+
return;
|
|
684
|
+
}
|
|
685
|
+
if (responseFormat === "anthropic-messages" && isJsonResponse && !isEventStream) {
|
|
686
|
+
const rawText = await upstreamResponse.text();
|
|
687
|
+
response.removeHeader("content-length");
|
|
688
|
+
response.setHeader("content-type", "application/json; charset=utf-8");
|
|
689
|
+
try {
|
|
690
|
+
const parsed = JSON.parse(rawText);
|
|
691
|
+
if (!upstreamResponse.ok) {
|
|
692
|
+
response.end(JSON.stringify(transformUpstreamErrorToAnthropicError(parsed, upstreamResponse.status)));
|
|
693
|
+
return;
|
|
694
|
+
}
|
|
695
|
+
const transformed = transformOpenAiChatCompletionToAnthropicMessage(parsed, modelId);
|
|
696
|
+
if (transformed.value) {
|
|
697
|
+
response.end(JSON.stringify(transformed.value));
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
console.error(buildGatewayLogLine(requestProtocol, "compat_error", {
|
|
701
|
+
path: requestPath,
|
|
702
|
+
route: selectedRoute?.routeName ?? null,
|
|
703
|
+
model: modelId,
|
|
704
|
+
detail: transformed.error ?? "Unknown transform error",
|
|
705
|
+
}));
|
|
706
|
+
sendJson(response, 502, {
|
|
707
|
+
error: {
|
|
708
|
+
message: "Gateway failed to translate the OpenAI-compatible response to Anthropic format.",
|
|
709
|
+
detail: transformed.error ?? "Unknown transform error",
|
|
710
|
+
},
|
|
711
|
+
});
|
|
712
|
+
return;
|
|
713
|
+
}
|
|
714
|
+
catch {
|
|
715
|
+
if (!upstreamResponse.ok) {
|
|
716
|
+
response.end(JSON.stringify(transformUpstreamErrorToAnthropicError({
|
|
717
|
+
message: rawText,
|
|
718
|
+
}, upstreamResponse.status)));
|
|
719
|
+
return;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
419
723
|
if (effectiveSwitchNotice && isJsonResponse && !isEventStream) {
|
|
420
724
|
const rawText = await upstreamResponse.text();
|
|
421
725
|
response.removeHeader("content-length");
|
|
@@ -461,6 +765,7 @@ export async function proxyRequest(request, response) {
|
|
|
461
765
|
const errorStatusCode = timeoutLike ? 504 : 502;
|
|
462
766
|
const lastTriedModel = modelCandidates[modelCandidates.length - 1] ?? null;
|
|
463
767
|
logProxyModelRoute({
|
|
768
|
+
protocol: requestProtocol,
|
|
464
769
|
requestedModel,
|
|
465
770
|
usedModel: lastTriedModel,
|
|
466
771
|
routeName: lastAttemptRouteName,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { createServer } from "node:http";
|
|
2
2
|
import { config } from "./config.js";
|
|
3
|
+
import { getModelLoadRankingHealth } from "./model-load-metrics.js";
|
|
3
4
|
import { proxyRequest } from "./proxy.js";
|
|
4
5
|
function sendJson(response, statusCode, payload) {
|
|
5
6
|
if (response.writableEnded) {
|
|
@@ -20,24 +21,33 @@ function resolvePathname(request) {
|
|
|
20
21
|
return rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
|
|
21
22
|
}
|
|
22
23
|
}
|
|
24
|
+
function isGatewayApiPath(pathname) {
|
|
25
|
+
return (pathname === "/v1" ||
|
|
26
|
+
pathname.startsWith("/v1/") ||
|
|
27
|
+
pathname === "/anthropic" ||
|
|
28
|
+
pathname.startsWith("/anthropic/"));
|
|
29
|
+
}
|
|
23
30
|
async function handleRequest(request, response) {
|
|
24
31
|
const method = (request.method ?? "GET").toUpperCase();
|
|
25
32
|
const pathname = resolvePathname(request);
|
|
26
33
|
if ((method === "GET" || method === "HEAD") && pathname === "/health") {
|
|
34
|
+
const modelLoadHealth = getModelLoadRankingHealth(12 * 60 * 60 * 1000);
|
|
27
35
|
sendJson(response, 200, {
|
|
28
36
|
status: "ok",
|
|
29
37
|
retryStatusCodes: Array.from(config.retryStatusCodes),
|
|
30
38
|
enabledRouteCount: Object.keys(config.modelRouteMap).length,
|
|
39
|
+
modelLoadWindowHours: modelLoadHealth.windowHours,
|
|
40
|
+
modelLoadRanking: modelLoadHealth.rankedModels,
|
|
31
41
|
});
|
|
32
42
|
return;
|
|
33
43
|
}
|
|
34
|
-
if (pathname
|
|
44
|
+
if (isGatewayApiPath(pathname)) {
|
|
35
45
|
await proxyRequest(request, response);
|
|
36
46
|
return;
|
|
37
47
|
}
|
|
38
48
|
sendJson(response, 404, {
|
|
39
49
|
error: {
|
|
40
|
-
message: "Route not found. Use /v1
|
|
50
|
+
message: "Route not found. Use /v1/*, /anthropic/*, or /health.",
|
|
41
51
|
},
|
|
42
52
|
});
|
|
43
53
|
}
|
|
@@ -21,7 +21,7 @@ export async function startGatewayServer(port = config.port, opts = {}) {
|
|
|
21
21
|
});
|
|
22
22
|
const address = server.address();
|
|
23
23
|
const resolvedPort = typeof address === "object" && address ? address.port : port;
|
|
24
|
-
console.log(`Gateway listening on http://${host}:${resolvedPort}
|
|
24
|
+
console.log(`Gateway listening on http://${host}:${resolvedPort}`);
|
|
25
25
|
return {
|
|
26
26
|
close: async () => {
|
|
27
27
|
await new Promise((resolve, reject) => {
|