openclaw-autoproxy 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,16 @@ function parseCsvList(value) {
12
12
  .map((item) => item.trim())
13
13
  .filter(Boolean);
14
14
  }
15
+ function parsePositiveInteger(value, fallback) {
16
+ if (!value) {
17
+ return fallback;
18
+ }
19
+ const parsed = Number.parseInt(value, 10);
20
+ if (!Number.isInteger(parsed) || parsed <= 0) {
21
+ return fallback;
22
+ }
23
+ return parsed;
24
+ }
15
25
  function parseRetryCodes(value) {
16
26
  const defaults = new Set([412, 429, 500, 502, 503, 504]);
17
27
  if (!value) {
@@ -248,6 +258,9 @@ function loadRouteFileConfig() {
248
258
  const host = process.env.HOST ?? "0.0.0.0";
249
259
  const port = Number.parseInt(process.env.PORT ?? "8787", 10);
250
260
  const timeoutMs = Number.parseInt(process.env.REQUEST_TIMEOUT_MS ?? "60000", 10);
261
+ const upstreamMaxConnections = parsePositiveInteger(process.env.UPSTREAM_MAX_CONNECTIONS, 200);
262
+ const upstreamKeepAliveTimeoutMs = parsePositiveInteger(process.env.UPSTREAM_KEEPALIVE_TIMEOUT_MS, 60_000);
263
+ const upstreamKeepAliveMaxTimeoutMs = parsePositiveInteger(process.env.UPSTREAM_KEEPALIVE_MAX_TIMEOUT_MS, 300_000);
251
264
  const upstreamBaseUrl = (process.env.UPSTREAM_BASE_URL ?? "https://api.openai.com").replace(/\/+$/, "");
252
265
  const routeFileConfig = loadRouteFileConfig();
253
266
  if (!Number.isInteger(port) || port < 1 || port > 65535) {
@@ -262,6 +275,9 @@ export const config = {
262
275
  timeoutMs,
263
276
  upstreamBaseUrl,
264
277
  upstreamApiKey: process.env.UPSTREAM_API_KEY ?? "",
278
+ upstreamMaxConnections,
279
+ upstreamKeepAliveTimeoutMs,
280
+ upstreamKeepAliveMaxTimeoutMs,
265
281
  retryStatusCodes: routeFileConfig.retryStatusCodes ?? parseRetryCodes(process.env.RETRY_STATUS_CODES),
266
282
  globalFallbackModels: parseCsvList(process.env.GLOBAL_FALLBACK_MODELS),
267
283
  modelFallbackMap: parseModelFallbackMap(process.env.MODEL_FALLBACK_MAP),
@@ -0,0 +1,114 @@
1
+ const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
2
+ const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
3
+ const modelSamples = new Map();
4
+ function quantileFromSorted(values, q) {
5
+ if (values.length === 0) {
6
+ return 0;
7
+ }
8
+ const clampedQ = Math.max(0, Math.min(1, q));
9
+ const index = Math.floor((values.length - 1) * clampedQ);
10
+ return values[index] ?? values[values.length - 1] ?? 0;
11
+ }
12
+ function roundMs(value) {
13
+ return Math.round(value * 100) / 100;
14
+ }
15
+ function pruneModelSamples(samples, cutoffAt) {
16
+ let startIndex = 0;
17
+ while (startIndex < samples.length && samples[startIndex] && samples[startIndex].at < cutoffAt) {
18
+ startIndex += 1;
19
+ }
20
+ if (startIndex <= 0) {
21
+ return samples;
22
+ }
23
+ return samples.slice(startIndex);
24
+ }
25
+ function pruneExpiredSamples(cutoffAt) {
26
+ for (const [model, samples] of modelSamples.entries()) {
27
+ const pruned = pruneModelSamples(samples, cutoffAt);
28
+ if (pruned.length === 0) {
29
+ modelSamples.delete(model);
30
+ continue;
31
+ }
32
+ if (pruned !== samples) {
33
+ modelSamples.set(model, pruned);
34
+ }
35
+ }
36
+ }
37
+ export function recordModelLoadSample(model, loadMs) {
38
+ if (!model) {
39
+ return;
40
+ }
41
+ if (!Number.isFinite(loadMs) || loadMs <= 0) {
42
+ return;
43
+ }
44
+ const now = Date.now();
45
+ const sample = {
46
+ at: now,
47
+ loadMs,
48
+ };
49
+ const existing = modelSamples.get(model) ?? [];
50
+ existing.push(sample);
51
+ if (existing.length > DEFAULT_MAX_SAMPLES_PER_MODEL) {
52
+ existing.splice(0, existing.length - DEFAULT_MAX_SAMPLES_PER_MODEL);
53
+ }
54
+ modelSamples.set(model, existing);
55
+ const cutoffAt = now - DEFAULT_WINDOW_MS;
56
+ pruneExpiredSamples(cutoffAt);
57
+ }
58
+ function summarizeModel(model, samples) {
59
+ if (samples.length === 0) {
60
+ return null;
61
+ }
62
+ const loadValues = samples.map((sample) => sample.loadMs).sort((a, b) => a - b);
63
+ const total = loadValues.reduce((acc, value) => acc + value, 0);
64
+ const avgLoadMs = total / loadValues.length;
65
+ const minLoadMs = loadValues[0] ?? 0;
66
+ const maxLoadMs = loadValues[loadValues.length - 1] ?? 0;
67
+ const latestAt = samples[samples.length - 1]?.at ?? Date.now();
68
+ return {
69
+ model,
70
+ sampleCount: samples.length,
71
+ avgLoadMs: roundMs(avgLoadMs),
72
+ p50LoadMs: roundMs(quantileFromSorted(loadValues, 0.5)),
73
+ p95LoadMs: roundMs(quantileFromSorted(loadValues, 0.95)),
74
+ minLoadMs: roundMs(minLoadMs),
75
+ maxLoadMs: roundMs(maxLoadMs),
76
+ lastSeenAt: new Date(latestAt).toISOString(),
77
+ };
78
+ }
79
+ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
80
+ const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
81
+ const now = Date.now();
82
+ const cutoffAt = now - normalizedWindowMs;
83
+ pruneExpiredSamples(cutoffAt);
84
+ const summaries = [];
85
+ for (const [model, samples] of modelSamples.entries()) {
86
+ const filtered = pruneModelSamples(samples, cutoffAt);
87
+ if (filtered.length === 0) {
88
+ continue;
89
+ }
90
+ if (filtered !== samples) {
91
+ modelSamples.set(model, filtered);
92
+ }
93
+ const summary = summarizeModel(model, filtered);
94
+ if (summary) {
95
+ summaries.push(summary);
96
+ }
97
+ }
98
+ summaries.sort((a, b) => {
99
+ if (a.avgLoadMs !== b.avgLoadMs) {
100
+ return a.avgLoadMs - b.avgLoadMs;
101
+ }
102
+ if (a.p95LoadMs !== b.p95LoadMs) {
103
+ return a.p95LoadMs - b.p95LoadMs;
104
+ }
105
+ return b.sampleCount - a.sampleCount;
106
+ });
107
+ return {
108
+ windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
109
+ rankedModels: summaries.map((entry, index) => ({
110
+ rank: index + 1,
111
+ ...entry,
112
+ })),
113
+ };
114
+ }
@@ -1,5 +1,8 @@
1
1
  import { PassThrough, Readable } from "node:stream";
2
+ import { Agent } from "undici";
3
+ import { createAnthropicMessagesEventStreamTransformer, maybeTransformAnthropicMessagesRequest, transformOpenAiChatCompletionToAnthropicMessage, transformUpstreamErrorToAnthropicError, } from "./anthropic-compat.js";
2
4
  import { config } from "./config.js";
5
+ import { recordModelLoadSample } from "./model-load-metrics.js";
3
6
  const HOP_BY_HOP_HEADERS = new Set([
4
7
  "connection",
5
8
  "keep-alive",
@@ -13,8 +16,37 @@ const HOP_BY_HOP_HEADERS = new Set([
13
16
  const MAX_REQUEST_BODY_BYTES = 50 * 1024 * 1024;
14
17
  const AUTO_MODEL = "auto";
15
18
  let autoModelCursor = 0;
19
+ const upstreamAgent = new Agent({
20
+ connections: config.upstreamMaxConnections,
21
+ pipelining: 1,
22
+ keepAliveTimeout: config.upstreamKeepAliveTimeoutMs,
23
+ keepAliveMaxTimeout: config.upstreamKeepAliveMaxTimeoutMs,
24
+ });
25
+ const fetchWithDispatcher = fetch;
26
+ function formatGatewayLogValue(value) {
27
+ if (value === null || value === undefined || value === "") {
28
+ return "-";
29
+ }
30
+ const normalized = String(value);
31
+ return /\s|"/.test(normalized) ? JSON.stringify(normalized) : normalized;
32
+ }
33
+ function buildGatewayLogLine(protocol, event, fields) {
34
+ const parts = [
35
+ "[gateway]",
36
+ `protocol=${formatGatewayLogValue(protocol)}`,
37
+ `event=${formatGatewayLogValue(event)}`,
38
+ ];
39
+ for (const [key, value] of Object.entries(fields)) {
40
+ parts.push(`${key}=${formatGatewayLogValue(value)}`);
41
+ }
42
+ return parts.join(" ");
43
+ }
16
44
  function logProxyModelRoute(params) {
17
- console.log(`[gateway] requested_model=${params.requestedModel ?? "-"} used_model=${params.usedModel ?? "-"} route=${params.routeName ?? "-"}`);
45
+ console.log(buildGatewayLogLine(params.protocol, "routed", {
46
+ requested_model: params.requestedModel,
47
+ used_model: params.usedModel,
48
+ route: params.routeName,
49
+ }));
18
50
  }
19
51
  function resolveRouteNameForModel(modelId) {
20
52
  if (modelId && config.modelRouteMap[modelId]) {
@@ -23,7 +55,27 @@ function resolveRouteNameForModel(modelId) {
23
55
  return config.modelRouteMap["*"]?.routeName ?? null;
24
56
  }
25
57
  function logProxyModelSwitch(params) {
26
- console.log(`[gateway] switch trigger_status=${params.triggerStatus} from_model=${params.fromModel ?? "-"} from_route=${params.fromRoute ?? "-"} to_model=${params.toModel ?? "-"} to_route=${params.toRoute ?? "-"}`);
58
+ console.log(buildGatewayLogLine(params.protocol, "switch", {
59
+ trigger_status: params.triggerStatus,
60
+ from_model: params.fromModel,
61
+ from_route: params.fromRoute,
62
+ to_model: params.toModel,
63
+ to_route: params.toRoute,
64
+ }));
65
+ }
66
+ function resolveGatewayProtocolFromPath(requestPath) {
67
+ const { pathname } = parsePathnameAndSearch(requestPath);
68
+ if (pathname === "/anthropic" ||
69
+ pathname.startsWith("/anthropic/") ||
70
+ isAnthropicApiPath(pathname)) {
71
+ return "anthropic";
72
+ }
73
+ return "openai";
74
+ }
75
+ function resolveGatewayProtocol(request) {
76
+ const rawUrl = request.url ?? "/";
77
+ const normalizedRawUrl = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
78
+ return resolveGatewayProtocolFromPath(normalizedRawUrl);
27
79
  }
28
80
  function sendJson(response, statusCode, payload) {
29
81
  if (response.writableEnded) {
@@ -39,11 +91,25 @@ function normalizeRequestPath(request) {
39
91
  const rawUrl = request.url ?? "/";
40
92
  try {
41
93
  const parsed = new URL(rawUrl, "http://localhost");
42
- return `${parsed.pathname}${parsed.search}`;
94
+ return normalizeGatewayRequestPath(`${parsed.pathname}${parsed.search}`);
43
95
  }
44
96
  catch {
45
- return rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
97
+ const normalizedRawUrl = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
98
+ return normalizeGatewayRequestPath(normalizedRawUrl);
99
+ }
100
+ }
101
+ function normalizeGatewayRequestPath(requestPath) {
102
+ const { pathname, search } = parsePathnameAndSearch(requestPath);
103
+ if (pathname === "/anthropic") {
104
+ return `/v1${search}`;
46
105
  }
106
+ if (pathname === "/anthropic/v1" || pathname.startsWith("/anthropic/v1/")) {
107
+ return `${pathname.slice("/anthropic".length)}${search}`;
108
+ }
109
+ if (pathname.startsWith("/anthropic/")) {
110
+ return `/v1${pathname.slice("/anthropic".length)}${search}`;
111
+ }
112
+ return `${pathname}${search}`;
47
113
  }
48
114
  function rotateCandidates(candidates, startIndex) {
49
115
  if (candidates.length <= 1) {
@@ -71,31 +137,102 @@ function buildModelCandidates(requestedModel) {
71
137
  // Non-auto requests are pinned to the exact model specified by client.
72
138
  return [requestedModel];
73
139
  }
74
- function buildRoutedUpstreamUrl(request, selectedRoute) {
140
+ function parsePathnameAndSearch(requestPath) {
141
+ try {
142
+ const parsed = new URL(requestPath, "http://localhost");
143
+ return {
144
+ pathname: parsed.pathname,
145
+ search: parsed.search,
146
+ };
147
+ }
148
+ catch {
149
+ const [pathnamePart, ...searchParts] = requestPath.split("?");
150
+ return {
151
+ pathname: pathnamePart || "/",
152
+ search: searchParts.length > 0 ? `?${searchParts.join("?")}` : "",
153
+ };
154
+ }
155
+ }
156
+ function isAnthropicApiPath(pathname) {
157
+ return (pathname === "/v1/messages" ||
158
+ pathname.startsWith("/v1/messages/") ||
159
+ pathname === "/v1/models" ||
160
+ pathname === "/v1/complete");
161
+ }
162
+ function rewriteFixedChatCompletionsRouteUrlForAnthropic(routeUrl, requestPath) {
163
+ const { pathname: requestPathname, search: requestSearch } = parsePathnameAndSearch(requestPath);
164
+ if (!isAnthropicApiPath(requestPathname)) {
165
+ return null;
166
+ }
167
+ let parsedRouteUrl;
168
+ try {
169
+ parsedRouteUrl = new URL(routeUrl);
170
+ }
171
+ catch {
172
+ return null;
173
+ }
174
+ const normalizedRoutePath = parsedRouteUrl.pathname.replace(/\/+$/, "");
175
+ const fixedChatCompletionsSuffix = "/v1/chat/completions";
176
+ if (!normalizedRoutePath.endsWith(fixedChatCompletionsSuffix)) {
177
+ return null;
178
+ }
179
+ const routePrefixPath = normalizedRoutePath.slice(0, -fixedChatCompletionsSuffix.length);
180
+ parsedRouteUrl.pathname = `${routePrefixPath}${requestPathname}`.replace(/\/{2,}/g, "/");
181
+ parsedRouteUrl.search = requestSearch;
182
+ return parsedRouteUrl.toString();
183
+ }
184
+ function buildRoutedUpstreamUrl(requestPath, selectedRoute) {
75
185
  if (!selectedRoute) {
76
- return `${config.upstreamBaseUrl}${normalizeRequestPath(request)}`;
186
+ return `${config.upstreamBaseUrl}${requestPath}`;
77
187
  }
78
188
  if (!selectedRoute.isBaseUrl) {
189
+ // Backward-compatible Anthropic support when route URL is fixed to /v1/chat/completions.
190
+ const anthropicCompatUrl = rewriteFixedChatCompletionsRouteUrlForAnthropic(selectedRoute.url, requestPath);
191
+ if (anthropicCompatUrl) {
192
+ return anthropicCompatUrl;
193
+ }
79
194
  return selectedRoute.url;
80
195
  }
81
196
  const routeBase = selectedRoute.url.replace(/\/+$/, "");
82
- const requestPath = normalizeRequestPath(request);
83
197
  if (routeBase.endsWith("/v1") && requestPath.startsWith("/v1")) {
84
198
  return `${routeBase}${requestPath.slice(3)}`;
85
199
  }
86
200
  return `${routeBase}${requestPath}`;
87
201
  }
88
- function resolveUpstreamTarget(request, modelId) {
202
+ function resolveUpstreamTarget(requestPath, modelId) {
89
203
  const modelRoute = modelId ? config.modelRouteMap[modelId] ?? null : null;
90
204
  const wildcardRoute = config.modelRouteMap["*"] ?? null;
91
205
  const selectedRoute = modelRoute ?? wildcardRoute;
92
206
  return {
93
- upstreamUrl: buildRoutedUpstreamUrl(request, selectedRoute),
207
+ upstreamUrl: buildRoutedUpstreamUrl(requestPath, selectedRoute),
94
208
  selectedRoute,
95
209
  };
96
210
  }
211
+ async function logUpstreamErrorResponse(params) {
212
+ let detail = "-";
213
+ try {
214
+ const raw = await params.response.clone().text();
215
+ const normalized = raw.replace(/\s+/g, " ").trim();
216
+ if (normalized) {
217
+ detail = normalized.slice(0, 2000);
218
+ }
219
+ }
220
+ catch {
221
+ detail = "<unavailable>";
222
+ }
223
+ console.error(buildGatewayLogLine(params.protocol, "upstream_error", {
224
+ status: params.response.status,
225
+ path: params.requestPath,
226
+ route: params.routeName,
227
+ model: params.modelId,
228
+ upstream: params.upstreamUrl,
229
+ detail,
230
+ }));
231
+ }
97
232
  function buildUpstreamHeaders(reqHeaders, bodyLength, selectedRoute) {
98
233
  const headers = new Headers();
234
+ const selectedAuthHeader = selectedRoute?.authHeader || "authorization";
235
+ const conflictingAuthHeaders = ["authorization", "x-api-key", "api-key"];
99
236
  for (const [key, value] of Object.entries(reqHeaders)) {
100
237
  if (value === undefined) {
101
238
  continue;
@@ -106,13 +243,20 @@ function buildUpstreamHeaders(reqHeaders, bodyLength, selectedRoute) {
106
243
  }
107
244
  headers.set(key, Array.isArray(value) ? value.join(",") : String(value));
108
245
  }
246
+ if (selectedRoute?.apiKey) {
247
+ for (const headerName of conflictingAuthHeaders) {
248
+ if (headerName !== selectedAuthHeader) {
249
+ headers.delete(headerName);
250
+ }
251
+ }
252
+ }
109
253
  if (selectedRoute?.headers) {
110
254
  for (const [key, value] of Object.entries(selectedRoute.headers)) {
111
255
  headers.set(key, value);
112
256
  }
113
257
  }
114
258
  if (selectedRoute?.apiKey) {
115
- const authHeader = selectedRoute.authHeader || "authorization";
259
+ const authHeader = selectedAuthHeader;
116
260
  const authPrefix = selectedRoute.authPrefix ?? "Bearer ";
117
261
  if (!headers.has(authHeader)) {
118
262
  headers.set(authHeader, `${authPrefix}${selectedRoute.apiKey}`);
@@ -224,12 +368,63 @@ async function fetchWithTimeout(url, options, timeoutMs) {
224
368
  const controller = new AbortController();
225
369
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
226
370
  try {
227
- return await fetch(url, { ...options, signal: controller.signal });
371
+ return await fetchWithDispatcher(url, {
372
+ ...options,
373
+ signal: controller.signal,
374
+ dispatcher: upstreamAgent,
375
+ });
228
376
  }
229
377
  finally {
230
378
  clearTimeout(timeoutId);
231
379
  }
232
380
  }
381
+ function createClientAbortSignal(request, response) {
382
+ const controller = new AbortController();
383
+ let aborted = false;
384
+ const abort = () => {
385
+ if (aborted) {
386
+ return;
387
+ }
388
+ aborted = true;
389
+ controller.abort();
390
+ };
391
+ request.once("aborted", abort);
392
+ response.once("close", () => {
393
+ if (!response.writableEnded) {
394
+ abort();
395
+ }
396
+ });
397
+ return controller.signal;
398
+ }
399
+ async function fetchWithTimeoutAndClientSignal(url, options, timeoutMs, clientSignal) {
400
+ if (!clientSignal) {
401
+ return fetchWithTimeout(url, options, timeoutMs);
402
+ }
403
+ const controller = new AbortController();
404
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
405
+ const onClientAbort = () => {
406
+ if (!controller.signal.aborted) {
407
+ controller.abort();
408
+ }
409
+ };
410
+ if (clientSignal.aborted) {
411
+ onClientAbort();
412
+ }
413
+ else {
414
+ clientSignal.addEventListener("abort", onClientAbort, { once: true });
415
+ }
416
+ try {
417
+ return await fetchWithDispatcher(url, {
418
+ ...options,
419
+ signal: controller.signal,
420
+ dispatcher: upstreamAgent,
421
+ });
422
+ }
423
+ finally {
424
+ clearTimeout(timeoutId);
425
+ clientSignal.removeEventListener("abort", onClientAbort);
426
+ }
427
+ }
233
428
  async function disposeBody(response) {
234
429
  if (!response.body) {
235
430
  return;
@@ -295,6 +490,9 @@ async function readRequestBody(request) {
295
490
  export async function proxyRequest(request, response) {
296
491
  const method = (request.method ?? "GET").toUpperCase();
297
492
  const supportsBody = method !== "GET" && method !== "HEAD";
493
+ const clientSignal = createClientAbortSignal(request, response);
494
+ const normalizedRequestPath = normalizeRequestPath(request);
495
+ const requestProtocol = resolveGatewayProtocol(request);
298
496
  let incomingBody = Buffer.alloc(0);
299
497
  if (supportsBody) {
300
498
  try {
@@ -344,23 +542,62 @@ export async function proxyRequest(request, response) {
344
542
  let switchNotice = null;
345
543
  for (let attemptIndex = 0; attemptIndex < modelCandidates.length; attemptIndex += 1) {
346
544
  const modelId = modelCandidates[attemptIndex];
347
- let bodyBuffer = supportsBody && incomingBody.length > 0 ? incomingBody : undefined;
348
- if (supportsBody && parsedJsonBody && modelId) {
349
- bodyBuffer = Buffer.from(JSON.stringify({
545
+ let requestPath = normalizedRequestPath;
546
+ let responseFormat = null;
547
+ let requestJsonPayload = null;
548
+ if (supportsBody && parsedJsonBody) {
549
+ requestJsonPayload = {
350
550
  ...parsedJsonBody,
351
- model: modelId,
352
- }), "utf8");
551
+ ...(modelId ? { model: modelId } : {}),
552
+ };
353
553
  }
354
- const { upstreamUrl, selectedRoute } = resolveUpstreamTarget(request, modelId);
554
+ let { upstreamUrl, selectedRoute } = resolveUpstreamTarget(requestPath, modelId);
355
555
  lastAttemptRouteName = selectedRoute?.routeName ?? null;
556
+ if (requestJsonPayload) {
557
+ const compatRequest = maybeTransformAnthropicMessagesRequest({
558
+ requestPath,
559
+ upstreamUrl,
560
+ body: requestJsonPayload,
561
+ });
562
+ if (compatRequest.error) {
563
+ console.error(buildGatewayLogLine(requestProtocol, "compat_error", {
564
+ path: requestPath,
565
+ route: selectedRoute?.routeName ?? null,
566
+ model: modelId,
567
+ detail: compatRequest.error,
568
+ }));
569
+ sendJson(response, 400, {
570
+ error: {
571
+ message: compatRequest.error,
572
+ },
573
+ });
574
+ return;
575
+ }
576
+ requestPath = compatRequest.requestPath;
577
+ requestJsonPayload = compatRequest.body;
578
+ responseFormat = compatRequest.responseFormat;
579
+ if (responseFormat) {
580
+ upstreamUrl = buildRoutedUpstreamUrl(requestPath, selectedRoute);
581
+ }
582
+ }
583
+ let bodyBuffer = supportsBody && incomingBody.length > 0 ? incomingBody : undefined;
584
+ if (supportsBody && requestJsonPayload) {
585
+ bodyBuffer = Buffer.from(JSON.stringify(requestJsonPayload), "utf8");
586
+ }
356
587
  const requestBody = bodyBuffer ? new Uint8Array(bodyBuffer) : undefined;
357
588
  const headers = buildUpstreamHeaders(request.headers, bodyBuffer ? bodyBuffer.length : undefined, selectedRoute);
358
589
  try {
359
- const upstreamResponse = await fetchWithTimeout(upstreamUrl, {
590
+ const attemptStartedAt = Date.now();
591
+ const upstreamResponse = await fetchWithTimeoutAndClientSignal(upstreamUrl, {
360
592
  method,
361
593
  headers,
362
594
  body: requestBody,
363
- }, config.timeoutMs);
595
+ }, config.timeoutMs, clientSignal);
596
+ const headerLoadMs = Date.now() - attemptStartedAt;
597
+ const modelForMetric = modelId ?? requestedModel;
598
+ if (upstreamResponse.ok) {
599
+ recordModelLoadSample(modelForMetric, headerLoadMs);
600
+ }
364
601
  const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
365
602
  const isEventStream = contentType.includes("text/event-stream");
366
603
  const isJsonResponse = contentType.includes("application/json");
@@ -378,6 +615,7 @@ export async function proxyRequest(request, response) {
378
615
  const triggerStatus = retryTriggerStatus ?? upstreamResponse.status;
379
616
  const nextRouteName = resolveRouteNameForModel(nextModel);
380
617
  logProxyModelSwitch({
618
+ protocol: requestProtocol,
381
619
  triggerStatus,
382
620
  fromModel: modelId,
383
621
  toModel: nextModel,
@@ -396,6 +634,16 @@ export async function proxyRequest(request, response) {
396
634
  await disposeBody(upstreamResponse);
397
635
  continue;
398
636
  }
637
+ if (!upstreamResponse.ok) {
638
+ await logUpstreamErrorResponse({
639
+ protocol: requestProtocol,
640
+ requestPath,
641
+ upstreamUrl,
642
+ routeName: selectedRoute?.routeName ?? null,
643
+ modelId,
644
+ response: upstreamResponse,
645
+ });
646
+ }
399
647
  const attemptCount = attemptIndex + 1;
400
648
  const effectiveSwitchNotice = switchNotice;
401
649
  copyResponseHeaders(upstreamResponse, response);
@@ -407,6 +655,7 @@ export async function proxyRequest(request, response) {
407
655
  response.setHeader("x-gateway-switched", "1");
408
656
  }
409
657
  logProxyModelRoute({
658
+ protocol: requestProtocol,
410
659
  requestedModel,
411
660
  usedModel: modelId,
412
661
  routeName: selectedRoute?.routeName ?? null,
@@ -416,6 +665,61 @@ export async function proxyRequest(request, response) {
416
665
  response.end();
417
666
  return;
418
667
  }
668
+ if (responseFormat === "anthropic-messages" && isEventStream) {
669
+ const nodeStream = Readable.fromWeb(upstreamResponse.body);
670
+ const anthropicStream = nodeStream.pipe(createAnthropicMessagesEventStreamTransformer(modelId));
671
+ response.removeHeader("content-length");
672
+ response.setHeader("content-type", "text/event-stream; charset=utf-8");
673
+ if (effectiveSwitchNotice) {
674
+ createSsePrefixedStream(anthropicStream, effectiveSwitchNotice).pipe(response);
675
+ return;
676
+ }
677
+ anthropicStream.on("error", () => {
678
+ if (!response.writableEnded) {
679
+ response.destroy();
680
+ }
681
+ });
682
+ anthropicStream.pipe(response);
683
+ return;
684
+ }
685
+ if (responseFormat === "anthropic-messages" && isJsonResponse && !isEventStream) {
686
+ const rawText = await upstreamResponse.text();
687
+ response.removeHeader("content-length");
688
+ response.setHeader("content-type", "application/json; charset=utf-8");
689
+ try {
690
+ const parsed = JSON.parse(rawText);
691
+ if (!upstreamResponse.ok) {
692
+ response.end(JSON.stringify(transformUpstreamErrorToAnthropicError(parsed, upstreamResponse.status)));
693
+ return;
694
+ }
695
+ const transformed = transformOpenAiChatCompletionToAnthropicMessage(parsed, modelId);
696
+ if (transformed.value) {
697
+ response.end(JSON.stringify(transformed.value));
698
+ return;
699
+ }
700
+ console.error(buildGatewayLogLine(requestProtocol, "compat_error", {
701
+ path: requestPath,
702
+ route: selectedRoute?.routeName ?? null,
703
+ model: modelId,
704
+ detail: transformed.error ?? "Unknown transform error",
705
+ }));
706
+ sendJson(response, 502, {
707
+ error: {
708
+ message: "Gateway failed to translate the OpenAI-compatible response to Anthropic format.",
709
+ detail: transformed.error ?? "Unknown transform error",
710
+ },
711
+ });
712
+ return;
713
+ }
714
+ catch {
715
+ if (!upstreamResponse.ok) {
716
+ response.end(JSON.stringify(transformUpstreamErrorToAnthropicError({
717
+ message: rawText,
718
+ }, upstreamResponse.status)));
719
+ return;
720
+ }
721
+ }
722
+ }
419
723
  if (effectiveSwitchNotice && isJsonResponse && !isEventStream) {
420
724
  const rawText = await upstreamResponse.text();
421
725
  response.removeHeader("content-length");
@@ -461,6 +765,7 @@ export async function proxyRequest(request, response) {
461
765
  const errorStatusCode = timeoutLike ? 504 : 502;
462
766
  const lastTriedModel = modelCandidates[modelCandidates.length - 1] ?? null;
463
767
  logProxyModelRoute({
768
+ protocol: requestProtocol,
464
769
  requestedModel,
465
770
  usedModel: lastTriedModel,
466
771
  routeName: lastAttemptRouteName,
@@ -1,5 +1,6 @@
1
1
  import { createServer } from "node:http";
2
2
  import { config } from "./config.js";
3
+ import { getModelLoadRankingHealth } from "./model-load-metrics.js";
3
4
  import { proxyRequest } from "./proxy.js";
4
5
  function sendJson(response, statusCode, payload) {
5
6
  if (response.writableEnded) {
@@ -20,24 +21,33 @@ function resolvePathname(request) {
20
21
  return rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
21
22
  }
22
23
  }
24
+ function isGatewayApiPath(pathname) {
25
+ return (pathname === "/v1" ||
26
+ pathname.startsWith("/v1/") ||
27
+ pathname === "/anthropic" ||
28
+ pathname.startsWith("/anthropic/"));
29
+ }
23
30
  async function handleRequest(request, response) {
24
31
  const method = (request.method ?? "GET").toUpperCase();
25
32
  const pathname = resolvePathname(request);
26
33
  if ((method === "GET" || method === "HEAD") && pathname === "/health") {
34
+ const modelLoadHealth = getModelLoadRankingHealth(12 * 60 * 60 * 1000);
27
35
  sendJson(response, 200, {
28
36
  status: "ok",
29
37
  retryStatusCodes: Array.from(config.retryStatusCodes),
30
38
  enabledRouteCount: Object.keys(config.modelRouteMap).length,
39
+ modelLoadWindowHours: modelLoadHealth.windowHours,
40
+ modelLoadRanking: modelLoadHealth.rankedModels,
31
41
  });
32
42
  return;
33
43
  }
34
- if (pathname === "/v1" || pathname.startsWith("/v1/")) {
44
+ if (isGatewayApiPath(pathname)) {
35
45
  await proxyRequest(request, response);
36
46
  return;
37
47
  }
38
48
  sendJson(response, 404, {
39
49
  error: {
40
- message: "Route not found. Use /v1/* or /health.",
50
+ message: "Route not found. Use /v1/*, /anthropic/*, or /health.",
41
51
  },
42
52
  });
43
53
  }
@@ -21,7 +21,7 @@ export async function startGatewayServer(port = config.port, opts = {}) {
21
21
  });
22
22
  const address = server.address();
23
23
  const resolvedPort = typeof address === "object" && address ? address.port : port;
24
- console.log(`Gateway listening on http://${host}:${resolvedPort} -> ${config.upstreamBaseUrl}`);
24
+ console.log(`Gateway listening on http://${host}:${resolvedPort}`);
25
25
  return {
26
26
  close: async () => {
27
27
  await new Promise((resolve, reject) => {