openclaw-autoproxy 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -61,6 +61,7 @@ routes:
61
61
  - Start: `openclaw-autoproxy start`
62
62
  - Dev (watch): `openclaw-autoproxy dev`
63
63
  - Help: `openclaw-autoproxy help`
64
+ - Publish: `npm publish --registry=https://registry.npmjs.org --access public`
64
65
 
65
66
  Quick run (installed):
66
67
 
package/README.zh-CN.md CHANGED
@@ -59,6 +59,7 @@ routes:
59
59
  - 启动:`openclaw-autoproxy start`
60
60
  - 开发(热重载):`openclaw-autoproxy dev`
61
61
  - 帮助:`openclaw-autoproxy help`
62
+ - 发布:`npm publish --registry=https://registry.npmjs.org --access public`
62
63
 
63
64
  快速示例(安装并立即启动):
64
65
 
@@ -1,14 +1,7 @@
1
1
  const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
2
2
  const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
3
+ export const DEFAULT_MODEL_HEALTH_WINDOW_MS = DEFAULT_WINDOW_MS;
3
4
  const modelSamples = new Map();
4
- function quantileFromSorted(values, q) {
5
- if (values.length === 0) {
6
- return 0;
7
- }
8
- const clampedQ = Math.max(0, Math.min(1, q));
9
- const index = Math.floor((values.length - 1) * clampedQ);
10
- return values[index] ?? values[values.length - 1] ?? 0;
11
- }
12
5
  function roundMs(value) {
13
6
  return Math.round(value * 100) / 100;
14
7
  }
@@ -34,17 +27,19 @@ function pruneExpiredSamples(cutoffAt) {
34
27
  }
35
28
  }
36
29
  }
37
- export function recordModelLoadSample(model, loadMs) {
30
+ export function recordModelRequestSample(model, params) {
38
31
  if (!model) {
39
32
  return;
40
33
  }
41
- if (!Number.isFinite(loadMs) || loadMs <= 0) {
34
+ if (!Number.isFinite(params.responseMs) || params.responseMs < 0) {
42
35
  return;
43
36
  }
44
37
  const now = Date.now();
45
38
  const sample = {
46
39
  at: now,
47
- loadMs,
40
+ ok: params.ok,
41
+ responseMs: params.responseMs,
42
+ statusCode: params.statusCode ?? null,
48
43
  };
49
44
  const existing = modelSamples.get(model) ?? [];
50
45
  existing.push(sample);
@@ -55,31 +50,37 @@ export function recordModelLoadSample(model, loadMs) {
55
50
  const cutoffAt = now - DEFAULT_WINDOW_MS;
56
51
  pruneExpiredSamples(cutoffAt);
57
52
  }
53
+ export function recordModelLoadSample(model, loadMs) {
54
+ recordModelRequestSample(model, {
55
+ ok: true,
56
+ responseMs: loadMs,
57
+ statusCode: 200,
58
+ });
59
+ }
58
60
  function summarizeModel(model, samples) {
59
61
  if (samples.length === 0) {
60
62
  return null;
61
63
  }
62
- const loadValues = samples.map((sample) => sample.loadMs).sort((a, b) => a - b);
63
- const total = loadValues.reduce((acc, value) => acc + value, 0);
64
- const avgLoadMs = total / loadValues.length;
65
- const minLoadMs = loadValues[0] ?? 0;
66
- const maxLoadMs = loadValues[loadValues.length - 1] ?? 0;
67
- const latestAt = samples[samples.length - 1]?.at ?? Date.now();
64
+ const accessCount = samples.length;
65
+ const successCount = samples.reduce((count, sample) => count + (sample.ok ? 1 : 0), 0);
66
+ const totalResponseMs = samples.reduce((total, sample) => total + sample.responseMs, 0);
67
+ const lastSample = samples[samples.length - 1] ?? null;
68
+ const avgResponseMs = totalResponseMs / accessCount;
69
+ const successRatePct = accessCount > 0 ? (successCount / accessCount) * 100 : 0;
68
70
  return {
69
71
  model,
70
- sampleCount: samples.length,
71
- avgLoadMs: roundMs(avgLoadMs),
72
- p50LoadMs: roundMs(quantileFromSorted(loadValues, 0.5)),
73
- p95LoadMs: roundMs(quantileFromSorted(loadValues, 0.95)),
74
- minLoadMs: roundMs(minLoadMs),
75
- maxLoadMs: roundMs(maxLoadMs),
76
- lastSeenAt: new Date(latestAt).toISOString(),
72
+ accessCount,
73
+ avgResponseMs: roundMs(avgResponseMs),
74
+ lastResponseMs: roundMs(lastSample?.responseMs ?? 0),
75
+ lastSeenAt: new Date(lastSample?.at ?? Date.now()).toISOString(),
76
+ lastStatusCode: lastSample?.statusCode ?? null,
77
+ successCount,
78
+ successRatePct: roundMs(successRatePct),
77
79
  };
78
80
  }
79
- export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
81
+ export function getModelHealthWindow(windowMs = DEFAULT_WINDOW_MS) {
80
82
  const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
81
- const now = Date.now();
82
- const cutoffAt = now - normalizedWindowMs;
83
+ const cutoffAt = Date.now() - normalizedWindowMs;
83
84
  pruneExpiredSamples(cutoffAt);
84
85
  const summaries = [];
85
86
  for (const [model, samples] of modelSamples.entries()) {
@@ -96,19 +97,34 @@ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
96
97
  }
97
98
  }
98
99
  summaries.sort((a, b) => {
99
- if (a.avgLoadMs !== b.avgLoadMs) {
100
- return a.avgLoadMs - b.avgLoadMs;
100
+ const aIsHealthy = a.lastStatusCode === 200 ? 1 : 0;
101
+ const bIsHealthy = b.lastStatusCode === 200 ? 1 : 0;
102
+ if (aIsHealthy !== bIsHealthy) {
103
+ return bIsHealthy - aIsHealthy;
104
+ }
105
+ if (a.avgResponseMs !== b.avgResponseMs) {
106
+ return a.avgResponseMs - b.avgResponseMs;
101
107
  }
102
- if (a.p95LoadMs !== b.p95LoadMs) {
103
- return a.p95LoadMs - b.p95LoadMs;
108
+ if (a.accessCount !== b.accessCount) {
109
+ return b.accessCount - a.accessCount;
104
110
  }
105
- return b.sampleCount - a.sampleCount;
111
+ if (a.successRatePct !== b.successRatePct) {
112
+ return b.successRatePct - a.successRatePct;
113
+ }
114
+ return a.model.localeCompare(b.model);
106
115
  });
107
116
  return {
108
117
  windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
109
- rankedModels: summaries.map((entry, index) => ({
118
+ models: summaries.map((entry, index) => ({
110
119
  rank: index + 1,
111
120
  ...entry,
112
121
  })),
113
122
  };
114
123
  }
124
+ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
125
+ const health = getModelHealthWindow(windowMs);
126
+ return {
127
+ windowHours: health.windowHours,
128
+ rankedModels: health.models,
129
+ };
130
+ }
@@ -2,7 +2,7 @@ import { PassThrough, Readable } from "node:stream";
2
2
  import { Agent } from "undici";
3
3
  import { createAnthropicMessagesEventStreamTransformer, maybeTransformAnthropicMessagesRequest, transformOpenAiChatCompletionToAnthropicMessage, transformUpstreamErrorToAnthropicError, } from "./anthropic-compat.js";
4
4
  import { config } from "./config.js";
5
- import { recordModelLoadSample } from "./model-load-metrics.js";
5
+ import { recordModelRequestSample } from "./model-load-metrics.js";
6
6
  const HOP_BY_HOP_HEADERS = new Set([
7
7
  "connection",
8
8
  "keep-alive",
@@ -586,8 +586,8 @@ export async function proxyRequest(request, response) {
586
586
  }
587
587
  const requestBody = bodyBuffer ? new Uint8Array(bodyBuffer) : undefined;
588
588
  const headers = buildUpstreamHeaders(request.headers, bodyBuffer ? bodyBuffer.length : undefined, selectedRoute);
589
+ const attemptStartedAt = Date.now();
589
590
  try {
590
- const attemptStartedAt = Date.now();
591
591
  const upstreamResponse = await fetchWithTimeoutAndClientSignal(upstreamUrl, {
592
592
  method,
593
593
  headers,
@@ -595,9 +595,11 @@ export async function proxyRequest(request, response) {
595
595
  }, config.timeoutMs, clientSignal);
596
596
  const headerLoadMs = Date.now() - attemptStartedAt;
597
597
  const modelForMetric = modelId ?? requestedModel;
598
- if (upstreamResponse.ok) {
599
- recordModelLoadSample(modelForMetric, headerLoadMs);
600
- }
598
+ recordModelRequestSample(modelForMetric, {
599
+ ok: upstreamResponse.ok,
600
+ responseMs: headerLoadMs,
601
+ statusCode: upstreamResponse.status,
602
+ });
601
603
  const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
602
604
  const isEventStream = contentType.includes("text/event-stream");
603
605
  const isJsonResponse = contentType.includes("application/json");
@@ -753,6 +755,11 @@ export async function proxyRequest(request, response) {
753
755
  }
754
756
  catch (error) {
755
757
  lastError = error;
758
+ recordModelRequestSample(modelId ?? requestedModel, {
759
+ ok: false,
760
+ responseMs: Date.now() - attemptStartedAt,
761
+ statusCode: null,
762
+ });
756
763
  if (attemptIndex < modelCandidates.length - 1) {
757
764
  continue;
758
765
  }
@@ -1,6 +1,6 @@
1
1
  import { createServer } from "node:http";
2
2
  import { config } from "./config.js";
3
- import { getModelLoadRankingHealth } from "./model-load-metrics.js";
3
+ import { DEFAULT_MODEL_HEALTH_WINDOW_MS, getModelHealthWindow, } from "./model-load-metrics.js";
4
4
  import { proxyRequest } from "./proxy.js";
5
5
  function sendJson(response, statusCode, payload) {
6
6
  if (response.writableEnded) {
@@ -12,15 +12,79 @@ function sendJson(response, statusCode, payload) {
12
12
  response.setHeader("content-length", Buffer.byteLength(body));
13
13
  response.end(body);
14
14
  }
15
- function resolvePathname(request) {
15
+ function sendText(response, statusCode, body) {
16
+ if (response.writableEnded) {
17
+ return;
18
+ }
19
+ response.statusCode = statusCode;
20
+ response.setHeader("content-type", "text/plain; charset=utf-8");
21
+ response.setHeader("content-length", Buffer.byteLength(body));
22
+ response.end(body);
23
+ }
24
+ function resolveRequestUrl(request) {
16
25
  const rawUrl = request.url ?? "/";
17
26
  try {
18
- return new URL(rawUrl, "http://localhost").pathname;
27
+ return new URL(rawUrl, "http://localhost");
19
28
  }
20
29
  catch {
21
- return rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
30
+ const normalized = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
31
+ return new URL(normalized, "http://localhost");
22
32
  }
23
33
  }
34
+ function resolvePathname(request) {
35
+ return resolveRequestUrl(request).pathname;
36
+ }
37
+ function formatTableNumber(value) {
38
+ if (!Number.isFinite(value)) {
39
+ return "-";
40
+ }
41
+ if (Number.isInteger(value)) {
42
+ return String(value);
43
+ }
44
+ return value.toFixed(2).replace(/\.00$/, "").replace(/(\.\d)0$/, "$1");
45
+ }
46
+ function formatTableDurationMs(value) {
47
+ if (!Number.isFinite(value)) {
48
+ return "-";
49
+ }
50
+ return String(Math.round(value));
51
+ }
52
+ function padTableCell(value, width, align) {
53
+ return align === "right" ? value.padStart(width, " ") : value.padEnd(width, " ");
54
+ }
55
+ function buildModelHealthTable(windowHours, models) {
56
+ const columns = [
57
+ { header: "Model", align: "left", value: (row) => row.model },
58
+ {
59
+ header: "Code",
60
+ align: "right",
61
+ value: (row) => row.lastStatusCode === null ? "-" : String(row.lastStatusCode),
62
+ },
63
+ { header: "Avg(ms)", align: "right", value: (row) => formatTableDurationMs(row.avgResponseMs) },
64
+ { header: "Last(ms)", align: "right", value: (row) => formatTableDurationMs(row.lastResponseMs) },
65
+ { header: "Count", align: "right", value: (row) => String(row.accessCount) },
66
+ { header: "OK%", align: "right", value: (row) => `${formatTableNumber(row.successRatePct)}%` },
67
+ ];
68
+ const widths = columns.map((column) => {
69
+ const rowWidths = models.map((row) => column.value(row).length);
70
+ return Math.max(column.header.length, ...rowWidths, 1);
71
+ });
72
+ const header = columns
73
+ .map((column, index) => padTableCell(column.header, widths[index] ?? column.header.length, column.align))
74
+ .join(" | ");
75
+ const divider = widths.map((width) => "-".repeat(width)).join("-+-");
76
+ const rows = models.map((row) => columns
77
+ .map((column, index) => padTableCell(column.value(row), widths[index] ?? 0, column.align))
78
+ .join(" | "));
79
+ return [
80
+ `Gateway Health (last ${formatTableNumber(windowHours)}h)`,
81
+ `Status: ok`,
82
+ "",
83
+ header,
84
+ divider,
85
+ ...(rows.length > 0 ? rows : ["No model traffic recorded in the last 12 hours."]),
86
+ ].join("\n");
87
+ }
24
88
  function isGatewayApiPath(pathname) {
25
89
  return (pathname === "/v1" ||
26
90
  pathname.startsWith("/v1/") ||
@@ -29,15 +93,24 @@ function isGatewayApiPath(pathname) {
29
93
  }
30
94
  async function handleRequest(request, response) {
31
95
  const method = (request.method ?? "GET").toUpperCase();
32
- const pathname = resolvePathname(request);
96
+ const requestUrl = resolveRequestUrl(request);
97
+ const pathname = requestUrl.pathname;
33
98
  if ((method === "GET" || method === "HEAD") && pathname === "/health") {
34
- const modelLoadHealth = getModelLoadRankingHealth(12 * 60 * 60 * 1000);
99
+ const modelHealth = getModelHealthWindow(DEFAULT_MODEL_HEALTH_WINDOW_MS);
100
+ const tableOutput = buildModelHealthTable(modelHealth.windowHours, modelHealth.models);
101
+ if (requestUrl.searchParams.get("format")?.toLowerCase() !== "json") {
102
+ sendText(response, 200, tableOutput);
103
+ return;
104
+ }
35
105
  sendJson(response, 200, {
36
106
  status: "ok",
37
107
  retryStatusCodes: Array.from(config.retryStatusCodes),
38
108
  enabledRouteCount: Object.keys(config.modelRouteMap).length,
39
- modelLoadWindowHours: modelLoadHealth.windowHours,
40
- modelLoadRanking: modelLoadHealth.rankedModels,
109
+ modelHealthWindowHours: modelHealth.windowHours,
110
+ modelHealth: modelHealth.models,
111
+ modelHealthTable: tableOutput,
112
+ modelLoadWindowHours: modelHealth.windowHours,
113
+ modelLoadRanking: modelHealth.models,
41
114
  });
42
115
  return;
43
116
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-autoproxy",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "Local model-switching proxy gateway with OpenAI-compatible APIs",
5
5
  "type": "module",
6
6
  "main": "dist/gateway/server.js",
@@ -1,39 +1,33 @@
1
1
  const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
2
2
  const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
3
3
 
4
- interface ModelLoadSample {
4
+ export const DEFAULT_MODEL_HEALTH_WINDOW_MS = DEFAULT_WINDOW_MS;
5
+
6
+ interface ModelRequestSample {
5
7
  at: number;
6
- loadMs: number;
8
+ ok: boolean;
9
+ responseMs: number;
10
+ statusCode: number | null;
7
11
  }
8
12
 
9
- interface ModelLoadSummary {
13
+ export interface ModelHealthSummary {
10
14
  model: string;
11
- sampleCount: number;
12
- avgLoadMs: number;
13
- p50LoadMs: number;
14
- p95LoadMs: number;
15
- minLoadMs: number;
16
- maxLoadMs: number;
15
+ accessCount: number;
16
+ avgResponseMs: number;
17
+ lastResponseMs: number;
17
18
  lastSeenAt: string;
19
+ lastStatusCode: number | null;
20
+ successCount: number;
21
+ successRatePct: number;
18
22
  }
19
23
 
20
- const modelSamples = new Map<string, ModelLoadSample[]>();
21
-
22
- function quantileFromSorted(values: number[], q: number): number {
23
- if (values.length === 0) {
24
- return 0;
25
- }
26
-
27
- const clampedQ = Math.max(0, Math.min(1, q));
28
- const index = Math.floor((values.length - 1) * clampedQ);
29
- return values[index] ?? values[values.length - 1] ?? 0;
30
- }
24
+ const modelSamples = new Map<string, ModelRequestSample[]>();
31
25
 
32
26
  function roundMs(value: number): number {
33
27
  return Math.round(value * 100) / 100;
34
28
  }
35
29
 
36
- function pruneModelSamples(samples: ModelLoadSample[], cutoffAt: number): ModelLoadSample[] {
30
+ function pruneModelSamples(samples: ModelRequestSample[], cutoffAt: number): ModelRequestSample[] {
37
31
  let startIndex = 0;
38
32
 
39
33
  while (startIndex < samples.length && samples[startIndex] && samples[startIndex].at < cutoffAt) {
@@ -62,19 +56,28 @@ function pruneExpiredSamples(cutoffAt: number): void {
62
56
  }
63
57
  }
64
58
 
65
- export function recordModelLoadSample(model: string | null, loadMs: number): void {
59
+ export function recordModelRequestSample(
60
+ model: string | null,
61
+ params: {
62
+ ok: boolean;
63
+ responseMs: number;
64
+ statusCode?: number | null;
65
+ },
66
+ ): void {
66
67
  if (!model) {
67
68
  return;
68
69
  }
69
70
 
70
- if (!Number.isFinite(loadMs) || loadMs <= 0) {
71
+ if (!Number.isFinite(params.responseMs) || params.responseMs < 0) {
71
72
  return;
72
73
  }
73
74
 
74
75
  const now = Date.now();
75
- const sample: ModelLoadSample = {
76
+ const sample: ModelRequestSample = {
76
77
  at: now,
77
- loadMs,
78
+ ok: params.ok,
79
+ responseMs: params.responseMs,
80
+ statusCode: params.statusCode ?? null,
78
81
  };
79
82
 
80
83
  const existing = modelSamples.get(model) ?? [];
@@ -90,41 +93,48 @@ export function recordModelLoadSample(model: string | null, loadMs: number): voi
90
93
  pruneExpiredSamples(cutoffAt);
91
94
  }
92
95
 
93
- function summarizeModel(model: string, samples: ModelLoadSample[]): ModelLoadSummary | null {
96
+ export function recordModelLoadSample(model: string | null, loadMs: number): void {
97
+ recordModelRequestSample(model, {
98
+ ok: true,
99
+ responseMs: loadMs,
100
+ statusCode: 200,
101
+ });
102
+ }
103
+
104
+ function summarizeModel(model: string, samples: ModelRequestSample[]): ModelHealthSummary | null {
94
105
  if (samples.length === 0) {
95
106
  return null;
96
107
  }
97
108
 
98
- const loadValues = samples.map((sample) => sample.loadMs).sort((a, b) => a - b);
99
- const total = loadValues.reduce((acc, value) => acc + value, 0);
100
- const avgLoadMs = total / loadValues.length;
101
- const minLoadMs = loadValues[0] ?? 0;
102
- const maxLoadMs = loadValues[loadValues.length - 1] ?? 0;
103
- const latestAt = samples[samples.length - 1]?.at ?? Date.now();
109
+ const accessCount = samples.length;
110
+ const successCount = samples.reduce((count, sample) => count + (sample.ok ? 1 : 0), 0);
111
+ const totalResponseMs = samples.reduce((total, sample) => total + sample.responseMs, 0);
112
+ const lastSample = samples[samples.length - 1] ?? null;
113
+ const avgResponseMs = totalResponseMs / accessCount;
114
+ const successRatePct = accessCount > 0 ? (successCount / accessCount) * 100 : 0;
104
115
 
105
116
  return {
106
117
  model,
107
- sampleCount: samples.length,
108
- avgLoadMs: roundMs(avgLoadMs),
109
- p50LoadMs: roundMs(quantileFromSorted(loadValues, 0.5)),
110
- p95LoadMs: roundMs(quantileFromSorted(loadValues, 0.95)),
111
- minLoadMs: roundMs(minLoadMs),
112
- maxLoadMs: roundMs(maxLoadMs),
113
- lastSeenAt: new Date(latestAt).toISOString(),
118
+ accessCount,
119
+ avgResponseMs: roundMs(avgResponseMs),
120
+ lastResponseMs: roundMs(lastSample?.responseMs ?? 0),
121
+ lastSeenAt: new Date(lastSample?.at ?? Date.now()).toISOString(),
122
+ lastStatusCode: lastSample?.statusCode ?? null,
123
+ successCount,
124
+ successRatePct: roundMs(successRatePct),
114
125
  };
115
126
  }
116
127
 
117
- export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
128
+ export function getModelHealthWindow(windowMs = DEFAULT_WINDOW_MS): {
118
129
  windowHours: number;
119
- rankedModels: Array<ModelLoadSummary & { rank: number }>;
130
+ models: Array<ModelHealthSummary & { rank: number }>;
120
131
  } {
121
132
  const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
122
- const now = Date.now();
123
- const cutoffAt = now - normalizedWindowMs;
133
+ const cutoffAt = Date.now() - normalizedWindowMs;
124
134
 
125
135
  pruneExpiredSamples(cutoffAt);
126
136
 
127
- const summaries: ModelLoadSummary[] = [];
137
+ const summaries: ModelHealthSummary[] = [];
128
138
 
129
139
  for (const [model, samples] of modelSamples.entries()) {
130
140
  const filtered = pruneModelSamples(samples, cutoffAt);
@@ -145,22 +155,45 @@ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
145
155
  }
146
156
 
147
157
  summaries.sort((a, b) => {
148
- if (a.avgLoadMs !== b.avgLoadMs) {
149
- return a.avgLoadMs - b.avgLoadMs;
158
+ const aIsHealthy = a.lastStatusCode === 200 ? 1 : 0;
159
+ const bIsHealthy = b.lastStatusCode === 200 ? 1 : 0;
160
+
161
+ if (aIsHealthy !== bIsHealthy) {
162
+ return bIsHealthy - aIsHealthy;
150
163
  }
151
164
 
152
- if (a.p95LoadMs !== b.p95LoadMs) {
153
- return a.p95LoadMs - b.p95LoadMs;
165
+ if (a.avgResponseMs !== b.avgResponseMs) {
166
+ return a.avgResponseMs - b.avgResponseMs;
154
167
  }
155
168
 
156
- return b.sampleCount - a.sampleCount;
169
+ if (a.accessCount !== b.accessCount) {
170
+ return b.accessCount - a.accessCount;
171
+ }
172
+
173
+ if (a.successRatePct !== b.successRatePct) {
174
+ return b.successRatePct - a.successRatePct;
175
+ }
176
+
177
+ return a.model.localeCompare(b.model);
157
178
  });
158
179
 
159
180
  return {
160
181
  windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
161
- rankedModels: summaries.map((entry, index) => ({
182
+ models: summaries.map((entry, index) => ({
162
183
  rank: index + 1,
163
184
  ...entry,
164
185
  })),
165
186
  };
166
187
  }
188
+
189
+ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
190
+ windowHours: number;
191
+ rankedModels: Array<ModelHealthSummary & { rank: number }>;
192
+ } {
193
+ const health = getModelHealthWindow(windowMs);
194
+
195
+ return {
196
+ windowHours: health.windowHours,
197
+ rankedModels: health.models,
198
+ };
199
+ }
@@ -8,7 +8,7 @@ import {
8
8
  transformUpstreamErrorToAnthropicError,
9
9
  } from "./anthropic-compat.js";
10
10
  import { config, type ModelRouteConfig } from "./config.js";
11
- import { recordModelLoadSample } from "./model-load-metrics.js";
11
+ import { recordModelRequestSample } from "./model-load-metrics.js";
12
12
 
13
13
  const HOP_BY_HOP_HEADERS = new Set([
14
14
  "connection",
@@ -814,8 +814,9 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
814
814
  selectedRoute,
815
815
  );
816
816
 
817
+ const attemptStartedAt = Date.now();
818
+
817
819
  try {
818
- const attemptStartedAt = Date.now();
819
820
  const upstreamResponse = await fetchWithTimeoutAndClientSignal(
820
821
  upstreamUrl,
821
822
  {
@@ -829,9 +830,11 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
829
830
  const headerLoadMs = Date.now() - attemptStartedAt;
830
831
  const modelForMetric = modelId ?? requestedModel;
831
832
 
832
- if (upstreamResponse.ok) {
833
- recordModelLoadSample(modelForMetric, headerLoadMs);
834
- }
833
+ recordModelRequestSample(modelForMetric, {
834
+ ok: upstreamResponse.ok,
835
+ responseMs: headerLoadMs,
836
+ statusCode: upstreamResponse.status,
837
+ });
835
838
 
836
839
  const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
837
840
  const isEventStream = contentType.includes("text/event-stream");
@@ -1029,6 +1032,12 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
1029
1032
  } catch (error) {
1030
1033
  lastError = error;
1031
1034
 
1035
+ recordModelRequestSample(modelId ?? requestedModel, {
1036
+ ok: false,
1037
+ responseMs: Date.now() - attemptStartedAt,
1038
+ statusCode: null,
1039
+ });
1040
+
1032
1041
  if (attemptIndex < modelCandidates.length - 1) {
1033
1042
  continue;
1034
1043
  }
@@ -1,6 +1,10 @@
1
1
  import { createServer, type IncomingMessage, type Server, type ServerResponse } from "node:http";
2
2
  import { config } from "./config.js";
3
- import { getModelLoadRankingHealth } from "./model-load-metrics.js";
3
+ import {
4
+ DEFAULT_MODEL_HEALTH_WINDOW_MS,
5
+ getModelHealthWindow,
6
+ type ModelHealthSummary,
7
+ } from "./model-load-metrics.js";
4
8
  import { proxyRequest } from "./proxy.js";
5
9
 
6
10
  function sendJson(response: ServerResponse, statusCode: number, payload: unknown): void {
@@ -15,16 +19,94 @@ function sendJson(response: ServerResponse, statusCode: number, payload: unknown
15
19
  response.end(body);
16
20
  }
17
21
 
18
- function resolvePathname(request: IncomingMessage): string {
22
+ function sendText(response: ServerResponse, statusCode: number, body: string): void {
23
+ if (response.writableEnded) {
24
+ return;
25
+ }
26
+
27
+ response.statusCode = statusCode;
28
+ response.setHeader("content-type", "text/plain; charset=utf-8");
29
+ response.setHeader("content-length", Buffer.byteLength(body));
30
+ response.end(body);
31
+ }
32
+
33
+ function resolveRequestUrl(request: IncomingMessage): URL {
19
34
  const rawUrl = request.url ?? "/";
20
35
 
21
36
  try {
22
- return new URL(rawUrl, "http://localhost").pathname;
37
+ return new URL(rawUrl, "http://localhost");
23
38
  } catch {
24
- return rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
39
+ const normalized = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
40
+ return new URL(normalized, "http://localhost");
25
41
  }
26
42
  }
27
43
 
44
+ function resolvePathname(request: IncomingMessage): string {
45
+ return resolveRequestUrl(request).pathname;
46
+ }
47
+
48
+ function formatTableNumber(value: number): string {
49
+ if (!Number.isFinite(value)) {
50
+ return "-";
51
+ }
52
+
53
+ if (Number.isInteger(value)) {
54
+ return String(value);
55
+ }
56
+
57
+ return value.toFixed(2).replace(/\.00$/, "").replace(/(\.\d)0$/, "$1");
58
+ }
59
+
60
+ function formatTableDurationMs(value: number): string {
61
+ if (!Number.isFinite(value)) {
62
+ return "-";
63
+ }
64
+
65
+ return String(Math.round(value));
66
+ }
67
+
68
+ function padTableCell(value: string, width: number, align: "left" | "right"): string {
69
+ return align === "right" ? value.padStart(width, " ") : value.padEnd(width, " ");
70
+ }
71
+
72
+ function buildModelHealthTable(windowHours: number, models: Array<ModelHealthSummary & { rank: number }>): string {
73
+ const columns = [
74
+ { header: "Model", align: "left" as const, value: (row: ModelHealthSummary & { rank: number }) => row.model },
75
+ {
76
+ header: "Code",
77
+ align: "right" as const,
78
+ value: (row: ModelHealthSummary & { rank: number }) =>
79
+ row.lastStatusCode === null ? "-" : String(row.lastStatusCode),
80
+ },
81
+ { header: "Avg(ms)", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => formatTableDurationMs(row.avgResponseMs) },
82
+ { header: "Last(ms)", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => formatTableDurationMs(row.lastResponseMs) },
83
+ { header: "Count", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => String(row.accessCount) },
84
+ { header: "OK%", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => `${formatTableNumber(row.successRatePct)}%` },
85
+ ];
86
+
87
+ const widths = columns.map((column) => {
88
+ const rowWidths = models.map((row) => column.value(row).length);
89
+ return Math.max(column.header.length, ...rowWidths, 1);
90
+ });
91
+
92
+ const header = columns
93
+ .map((column, index) => padTableCell(column.header, widths[index] ?? column.header.length, column.align))
94
+ .join(" | ");
95
+ const divider = widths.map((width) => "-".repeat(width)).join("-+-");
96
+ const rows = models.map((row) => columns
97
+ .map((column, index) => padTableCell(column.value(row), widths[index] ?? 0, column.align))
98
+ .join(" | "));
99
+
100
+ return [
101
+ `Gateway Health (last ${formatTableNumber(windowHours)}h)`,
102
+ `Status: ok`,
103
+ "",
104
+ header,
105
+ divider,
106
+ ...(rows.length > 0 ? rows : ["No model traffic recorded in the last 12 hours."]),
107
+ ].join("\n");
108
+ }
109
+
28
110
  function isGatewayApiPath(pathname: string): boolean {
29
111
  return (
30
112
  pathname === "/v1" ||
@@ -36,17 +118,27 @@ function isGatewayApiPath(pathname: string): boolean {
36
118
 
37
119
  async function handleRequest(request: IncomingMessage, response: ServerResponse): Promise<void> {
38
120
  const method = (request.method ?? "GET").toUpperCase();
39
- const pathname = resolvePathname(request);
121
+ const requestUrl = resolveRequestUrl(request);
122
+ const pathname = requestUrl.pathname;
40
123
 
41
124
  if ((method === "GET" || method === "HEAD") && pathname === "/health") {
42
- const modelLoadHealth = getModelLoadRankingHealth(12 * 60 * 60 * 1000);
125
+ const modelHealth = getModelHealthWindow(DEFAULT_MODEL_HEALTH_WINDOW_MS);
126
+ const tableOutput = buildModelHealthTable(modelHealth.windowHours, modelHealth.models);
127
+
128
+ if (requestUrl.searchParams.get("format")?.toLowerCase() !== "json") {
129
+ sendText(response, 200, tableOutput);
130
+ return;
131
+ }
43
132
 
44
133
  sendJson(response, 200, {
45
134
  status: "ok",
46
135
  retryStatusCodes: Array.from(config.retryStatusCodes),
47
136
  enabledRouteCount: Object.keys(config.modelRouteMap).length,
48
- modelLoadWindowHours: modelLoadHealth.windowHours,
49
- modelLoadRanking: modelLoadHealth.rankedModels,
137
+ modelHealthWindowHours: modelHealth.windowHours,
138
+ modelHealth: modelHealth.models,
139
+ modelHealthTable: tableOutput,
140
+ modelLoadWindowHours: modelHealth.windowHours,
141
+ modelLoadRanking: modelHealth.models,
50
142
  });
51
143
  return;
52
144
  }