openclaw-autoproxy 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/README.zh-CN.md +1 -0
- package/dist/gateway/model-load-metrics.js +49 -33
- package/dist/gateway/proxy.js +12 -5
- package/dist/gateway/server-http.js +81 -8
- package/package.json +1 -1
- package/src/gateway/model-load-metrics.ts +83 -50
- package/src/gateway/proxy.ts +14 -5
- package/src/gateway/server-http.ts +100 -8
package/README.md
CHANGED
package/README.zh-CN.md
CHANGED
|
@@ -1,14 +1,7 @@
|
|
|
1
1
|
const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
|
|
2
2
|
const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
|
|
3
|
+
export const DEFAULT_MODEL_HEALTH_WINDOW_MS = DEFAULT_WINDOW_MS;
|
|
3
4
|
const modelSamples = new Map();
|
|
4
|
-
function quantileFromSorted(values, q) {
|
|
5
|
-
if (values.length === 0) {
|
|
6
|
-
return 0;
|
|
7
|
-
}
|
|
8
|
-
const clampedQ = Math.max(0, Math.min(1, q));
|
|
9
|
-
const index = Math.floor((values.length - 1) * clampedQ);
|
|
10
|
-
return values[index] ?? values[values.length - 1] ?? 0;
|
|
11
|
-
}
|
|
12
5
|
function roundMs(value) {
|
|
13
6
|
return Math.round(value * 100) / 100;
|
|
14
7
|
}
|
|
@@ -34,17 +27,19 @@ function pruneExpiredSamples(cutoffAt) {
|
|
|
34
27
|
}
|
|
35
28
|
}
|
|
36
29
|
}
|
|
37
|
-
export function
|
|
30
|
+
export function recordModelRequestSample(model, params) {
|
|
38
31
|
if (!model) {
|
|
39
32
|
return;
|
|
40
33
|
}
|
|
41
|
-
if (!Number.isFinite(
|
|
34
|
+
if (!Number.isFinite(params.responseMs) || params.responseMs < 0) {
|
|
42
35
|
return;
|
|
43
36
|
}
|
|
44
37
|
const now = Date.now();
|
|
45
38
|
const sample = {
|
|
46
39
|
at: now,
|
|
47
|
-
|
|
40
|
+
ok: params.ok,
|
|
41
|
+
responseMs: params.responseMs,
|
|
42
|
+
statusCode: params.statusCode ?? null,
|
|
48
43
|
};
|
|
49
44
|
const existing = modelSamples.get(model) ?? [];
|
|
50
45
|
existing.push(sample);
|
|
@@ -55,31 +50,37 @@ export function recordModelLoadSample(model, loadMs) {
|
|
|
55
50
|
const cutoffAt = now - DEFAULT_WINDOW_MS;
|
|
56
51
|
pruneExpiredSamples(cutoffAt);
|
|
57
52
|
}
|
|
53
|
+
export function recordModelLoadSample(model, loadMs) {
|
|
54
|
+
recordModelRequestSample(model, {
|
|
55
|
+
ok: true,
|
|
56
|
+
responseMs: loadMs,
|
|
57
|
+
statusCode: 200,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
58
60
|
function summarizeModel(model, samples) {
|
|
59
61
|
if (samples.length === 0) {
|
|
60
62
|
return null;
|
|
61
63
|
}
|
|
62
|
-
const
|
|
63
|
-
const
|
|
64
|
-
const
|
|
65
|
-
const
|
|
66
|
-
const
|
|
67
|
-
const
|
|
64
|
+
const accessCount = samples.length;
|
|
65
|
+
const successCount = samples.reduce((count, sample) => count + (sample.ok ? 1 : 0), 0);
|
|
66
|
+
const totalResponseMs = samples.reduce((total, sample) => total + sample.responseMs, 0);
|
|
67
|
+
const lastSample = samples[samples.length - 1] ?? null;
|
|
68
|
+
const avgResponseMs = totalResponseMs / accessCount;
|
|
69
|
+
const successRatePct = accessCount > 0 ? (successCount / accessCount) * 100 : 0;
|
|
68
70
|
return {
|
|
69
71
|
model,
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
accessCount,
|
|
73
|
+
avgResponseMs: roundMs(avgResponseMs),
|
|
74
|
+
lastResponseMs: roundMs(lastSample?.responseMs ?? 0),
|
|
75
|
+
lastSeenAt: new Date(lastSample?.at ?? Date.now()).toISOString(),
|
|
76
|
+
lastStatusCode: lastSample?.statusCode ?? null,
|
|
77
|
+
successCount,
|
|
78
|
+
successRatePct: roundMs(successRatePct),
|
|
77
79
|
};
|
|
78
80
|
}
|
|
79
|
-
export function
|
|
81
|
+
export function getModelHealthWindow(windowMs = DEFAULT_WINDOW_MS) {
|
|
80
82
|
const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
|
|
81
|
-
const
|
|
82
|
-
const cutoffAt = now - normalizedWindowMs;
|
|
83
|
+
const cutoffAt = Date.now() - normalizedWindowMs;
|
|
83
84
|
pruneExpiredSamples(cutoffAt);
|
|
84
85
|
const summaries = [];
|
|
85
86
|
for (const [model, samples] of modelSamples.entries()) {
|
|
@@ -96,19 +97,34 @@ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
|
|
|
96
97
|
}
|
|
97
98
|
}
|
|
98
99
|
summaries.sort((a, b) => {
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
const aIsHealthy = a.lastStatusCode === 200 ? 1 : 0;
|
|
101
|
+
const bIsHealthy = b.lastStatusCode === 200 ? 1 : 0;
|
|
102
|
+
if (aIsHealthy !== bIsHealthy) {
|
|
103
|
+
return bIsHealthy - aIsHealthy;
|
|
104
|
+
}
|
|
105
|
+
if (a.avgResponseMs !== b.avgResponseMs) {
|
|
106
|
+
return a.avgResponseMs - b.avgResponseMs;
|
|
101
107
|
}
|
|
102
|
-
if (a.
|
|
103
|
-
return
|
|
108
|
+
if (a.accessCount !== b.accessCount) {
|
|
109
|
+
return b.accessCount - a.accessCount;
|
|
104
110
|
}
|
|
105
|
-
|
|
111
|
+
if (a.successRatePct !== b.successRatePct) {
|
|
112
|
+
return b.successRatePct - a.successRatePct;
|
|
113
|
+
}
|
|
114
|
+
return a.model.localeCompare(b.model);
|
|
106
115
|
});
|
|
107
116
|
return {
|
|
108
117
|
windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
|
|
109
|
-
|
|
118
|
+
models: summaries.map((entry, index) => ({
|
|
110
119
|
rank: index + 1,
|
|
111
120
|
...entry,
|
|
112
121
|
})),
|
|
113
122
|
};
|
|
114
123
|
}
|
|
124
|
+
export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS) {
|
|
125
|
+
const health = getModelHealthWindow(windowMs);
|
|
126
|
+
return {
|
|
127
|
+
windowHours: health.windowHours,
|
|
128
|
+
rankedModels: health.models,
|
|
129
|
+
};
|
|
130
|
+
}
|
package/dist/gateway/proxy.js
CHANGED
|
@@ -2,7 +2,7 @@ import { PassThrough, Readable } from "node:stream";
|
|
|
2
2
|
import { Agent } from "undici";
|
|
3
3
|
import { createAnthropicMessagesEventStreamTransformer, maybeTransformAnthropicMessagesRequest, transformOpenAiChatCompletionToAnthropicMessage, transformUpstreamErrorToAnthropicError, } from "./anthropic-compat.js";
|
|
4
4
|
import { config } from "./config.js";
|
|
5
|
-
import {
|
|
5
|
+
import { recordModelRequestSample } from "./model-load-metrics.js";
|
|
6
6
|
const HOP_BY_HOP_HEADERS = new Set([
|
|
7
7
|
"connection",
|
|
8
8
|
"keep-alive",
|
|
@@ -586,8 +586,8 @@ export async function proxyRequest(request, response) {
|
|
|
586
586
|
}
|
|
587
587
|
const requestBody = bodyBuffer ? new Uint8Array(bodyBuffer) : undefined;
|
|
588
588
|
const headers = buildUpstreamHeaders(request.headers, bodyBuffer ? bodyBuffer.length : undefined, selectedRoute);
|
|
589
|
+
const attemptStartedAt = Date.now();
|
|
589
590
|
try {
|
|
590
|
-
const attemptStartedAt = Date.now();
|
|
591
591
|
const upstreamResponse = await fetchWithTimeoutAndClientSignal(upstreamUrl, {
|
|
592
592
|
method,
|
|
593
593
|
headers,
|
|
@@ -595,9 +595,11 @@ export async function proxyRequest(request, response) {
|
|
|
595
595
|
}, config.timeoutMs, clientSignal);
|
|
596
596
|
const headerLoadMs = Date.now() - attemptStartedAt;
|
|
597
597
|
const modelForMetric = modelId ?? requestedModel;
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
598
|
+
recordModelRequestSample(modelForMetric, {
|
|
599
|
+
ok: upstreamResponse.ok,
|
|
600
|
+
responseMs: headerLoadMs,
|
|
601
|
+
statusCode: upstreamResponse.status,
|
|
602
|
+
});
|
|
601
603
|
const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
|
|
602
604
|
const isEventStream = contentType.includes("text/event-stream");
|
|
603
605
|
const isJsonResponse = contentType.includes("application/json");
|
|
@@ -753,6 +755,11 @@ export async function proxyRequest(request, response) {
|
|
|
753
755
|
}
|
|
754
756
|
catch (error) {
|
|
755
757
|
lastError = error;
|
|
758
|
+
recordModelRequestSample(modelId ?? requestedModel, {
|
|
759
|
+
ok: false,
|
|
760
|
+
responseMs: Date.now() - attemptStartedAt,
|
|
761
|
+
statusCode: null,
|
|
762
|
+
});
|
|
756
763
|
if (attemptIndex < modelCandidates.length - 1) {
|
|
757
764
|
continue;
|
|
758
765
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createServer } from "node:http";
|
|
2
2
|
import { config } from "./config.js";
|
|
3
|
-
import {
|
|
3
|
+
import { DEFAULT_MODEL_HEALTH_WINDOW_MS, getModelHealthWindow, } from "./model-load-metrics.js";
|
|
4
4
|
import { proxyRequest } from "./proxy.js";
|
|
5
5
|
function sendJson(response, statusCode, payload) {
|
|
6
6
|
if (response.writableEnded) {
|
|
@@ -12,15 +12,79 @@ function sendJson(response, statusCode, payload) {
|
|
|
12
12
|
response.setHeader("content-length", Buffer.byteLength(body));
|
|
13
13
|
response.end(body);
|
|
14
14
|
}
|
|
15
|
-
function
|
|
15
|
+
function sendText(response, statusCode, body) {
|
|
16
|
+
if (response.writableEnded) {
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
response.statusCode = statusCode;
|
|
20
|
+
response.setHeader("content-type", "text/plain; charset=utf-8");
|
|
21
|
+
response.setHeader("content-length", Buffer.byteLength(body));
|
|
22
|
+
response.end(body);
|
|
23
|
+
}
|
|
24
|
+
function resolveRequestUrl(request) {
|
|
16
25
|
const rawUrl = request.url ?? "/";
|
|
17
26
|
try {
|
|
18
|
-
return new URL(rawUrl, "http://localhost")
|
|
27
|
+
return new URL(rawUrl, "http://localhost");
|
|
19
28
|
}
|
|
20
29
|
catch {
|
|
21
|
-
|
|
30
|
+
const normalized = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
|
|
31
|
+
return new URL(normalized, "http://localhost");
|
|
22
32
|
}
|
|
23
33
|
}
|
|
34
|
+
function resolvePathname(request) {
|
|
35
|
+
return resolveRequestUrl(request).pathname;
|
|
36
|
+
}
|
|
37
|
+
function formatTableNumber(value) {
|
|
38
|
+
if (!Number.isFinite(value)) {
|
|
39
|
+
return "-";
|
|
40
|
+
}
|
|
41
|
+
if (Number.isInteger(value)) {
|
|
42
|
+
return String(value);
|
|
43
|
+
}
|
|
44
|
+
return value.toFixed(2).replace(/\.00$/, "").replace(/(\.\d)0$/, "$1");
|
|
45
|
+
}
|
|
46
|
+
function formatTableDurationMs(value) {
|
|
47
|
+
if (!Number.isFinite(value)) {
|
|
48
|
+
return "-";
|
|
49
|
+
}
|
|
50
|
+
return String(Math.round(value));
|
|
51
|
+
}
|
|
52
|
+
function padTableCell(value, width, align) {
|
|
53
|
+
return align === "right" ? value.padStart(width, " ") : value.padEnd(width, " ");
|
|
54
|
+
}
|
|
55
|
+
function buildModelHealthTable(windowHours, models) {
|
|
56
|
+
const columns = [
|
|
57
|
+
{ header: "Model", align: "left", value: (row) => row.model },
|
|
58
|
+
{
|
|
59
|
+
header: "Code",
|
|
60
|
+
align: "right",
|
|
61
|
+
value: (row) => row.lastStatusCode === null ? "-" : String(row.lastStatusCode),
|
|
62
|
+
},
|
|
63
|
+
{ header: "Avg(ms)", align: "right", value: (row) => formatTableDurationMs(row.avgResponseMs) },
|
|
64
|
+
{ header: "Last(ms)", align: "right", value: (row) => formatTableDurationMs(row.lastResponseMs) },
|
|
65
|
+
{ header: "Count", align: "right", value: (row) => String(row.accessCount) },
|
|
66
|
+
{ header: "OK%", align: "right", value: (row) => `${formatTableNumber(row.successRatePct)}%` },
|
|
67
|
+
];
|
|
68
|
+
const widths = columns.map((column) => {
|
|
69
|
+
const rowWidths = models.map((row) => column.value(row).length);
|
|
70
|
+
return Math.max(column.header.length, ...rowWidths, 1);
|
|
71
|
+
});
|
|
72
|
+
const header = columns
|
|
73
|
+
.map((column, index) => padTableCell(column.header, widths[index] ?? column.header.length, column.align))
|
|
74
|
+
.join(" | ");
|
|
75
|
+
const divider = widths.map((width) => "-".repeat(width)).join("-+-");
|
|
76
|
+
const rows = models.map((row) => columns
|
|
77
|
+
.map((column, index) => padTableCell(column.value(row), widths[index] ?? 0, column.align))
|
|
78
|
+
.join(" | "));
|
|
79
|
+
return [
|
|
80
|
+
`Gateway Health (last ${formatTableNumber(windowHours)}h)`,
|
|
81
|
+
`Status: ok`,
|
|
82
|
+
"",
|
|
83
|
+
header,
|
|
84
|
+
divider,
|
|
85
|
+
...(rows.length > 0 ? rows : ["No model traffic recorded in the last 12 hours."]),
|
|
86
|
+
].join("\n");
|
|
87
|
+
}
|
|
24
88
|
function isGatewayApiPath(pathname) {
|
|
25
89
|
return (pathname === "/v1" ||
|
|
26
90
|
pathname.startsWith("/v1/") ||
|
|
@@ -29,15 +93,24 @@ function isGatewayApiPath(pathname) {
|
|
|
29
93
|
}
|
|
30
94
|
async function handleRequest(request, response) {
|
|
31
95
|
const method = (request.method ?? "GET").toUpperCase();
|
|
32
|
-
const
|
|
96
|
+
const requestUrl = resolveRequestUrl(request);
|
|
97
|
+
const pathname = requestUrl.pathname;
|
|
33
98
|
if ((method === "GET" || method === "HEAD") && pathname === "/health") {
|
|
34
|
-
const
|
|
99
|
+
const modelHealth = getModelHealthWindow(DEFAULT_MODEL_HEALTH_WINDOW_MS);
|
|
100
|
+
const tableOutput = buildModelHealthTable(modelHealth.windowHours, modelHealth.models);
|
|
101
|
+
if (requestUrl.searchParams.get("format")?.toLowerCase() !== "json") {
|
|
102
|
+
sendText(response, 200, tableOutput);
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
35
105
|
sendJson(response, 200, {
|
|
36
106
|
status: "ok",
|
|
37
107
|
retryStatusCodes: Array.from(config.retryStatusCodes),
|
|
38
108
|
enabledRouteCount: Object.keys(config.modelRouteMap).length,
|
|
39
|
-
|
|
40
|
-
|
|
109
|
+
modelHealthWindowHours: modelHealth.windowHours,
|
|
110
|
+
modelHealth: modelHealth.models,
|
|
111
|
+
modelHealthTable: tableOutput,
|
|
112
|
+
modelLoadWindowHours: modelHealth.windowHours,
|
|
113
|
+
modelLoadRanking: modelHealth.models,
|
|
41
114
|
});
|
|
42
115
|
return;
|
|
43
116
|
}
|
package/package.json
CHANGED
|
@@ -1,39 +1,33 @@
|
|
|
1
1
|
const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
|
|
2
2
|
const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
export const DEFAULT_MODEL_HEALTH_WINDOW_MS = DEFAULT_WINDOW_MS;
|
|
5
|
+
|
|
6
|
+
interface ModelRequestSample {
|
|
5
7
|
at: number;
|
|
6
|
-
|
|
8
|
+
ok: boolean;
|
|
9
|
+
responseMs: number;
|
|
10
|
+
statusCode: number | null;
|
|
7
11
|
}
|
|
8
12
|
|
|
9
|
-
interface
|
|
13
|
+
export interface ModelHealthSummary {
|
|
10
14
|
model: string;
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
p95LoadMs: number;
|
|
15
|
-
minLoadMs: number;
|
|
16
|
-
maxLoadMs: number;
|
|
15
|
+
accessCount: number;
|
|
16
|
+
avgResponseMs: number;
|
|
17
|
+
lastResponseMs: number;
|
|
17
18
|
lastSeenAt: string;
|
|
19
|
+
lastStatusCode: number | null;
|
|
20
|
+
successCount: number;
|
|
21
|
+
successRatePct: number;
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
const modelSamples = new Map<string,
|
|
21
|
-
|
|
22
|
-
function quantileFromSorted(values: number[], q: number): number {
|
|
23
|
-
if (values.length === 0) {
|
|
24
|
-
return 0;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
const clampedQ = Math.max(0, Math.min(1, q));
|
|
28
|
-
const index = Math.floor((values.length - 1) * clampedQ);
|
|
29
|
-
return values[index] ?? values[values.length - 1] ?? 0;
|
|
30
|
-
}
|
|
24
|
+
const modelSamples = new Map<string, ModelRequestSample[]>();
|
|
31
25
|
|
|
32
26
|
function roundMs(value: number): number {
|
|
33
27
|
return Math.round(value * 100) / 100;
|
|
34
28
|
}
|
|
35
29
|
|
|
36
|
-
function pruneModelSamples(samples:
|
|
30
|
+
function pruneModelSamples(samples: ModelRequestSample[], cutoffAt: number): ModelRequestSample[] {
|
|
37
31
|
let startIndex = 0;
|
|
38
32
|
|
|
39
33
|
while (startIndex < samples.length && samples[startIndex] && samples[startIndex].at < cutoffAt) {
|
|
@@ -62,19 +56,28 @@ function pruneExpiredSamples(cutoffAt: number): void {
|
|
|
62
56
|
}
|
|
63
57
|
}
|
|
64
58
|
|
|
65
|
-
export function
|
|
59
|
+
export function recordModelRequestSample(
|
|
60
|
+
model: string | null,
|
|
61
|
+
params: {
|
|
62
|
+
ok: boolean;
|
|
63
|
+
responseMs: number;
|
|
64
|
+
statusCode?: number | null;
|
|
65
|
+
},
|
|
66
|
+
): void {
|
|
66
67
|
if (!model) {
|
|
67
68
|
return;
|
|
68
69
|
}
|
|
69
70
|
|
|
70
|
-
if (!Number.isFinite(
|
|
71
|
+
if (!Number.isFinite(params.responseMs) || params.responseMs < 0) {
|
|
71
72
|
return;
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
const now = Date.now();
|
|
75
|
-
const sample:
|
|
76
|
+
const sample: ModelRequestSample = {
|
|
76
77
|
at: now,
|
|
77
|
-
|
|
78
|
+
ok: params.ok,
|
|
79
|
+
responseMs: params.responseMs,
|
|
80
|
+
statusCode: params.statusCode ?? null,
|
|
78
81
|
};
|
|
79
82
|
|
|
80
83
|
const existing = modelSamples.get(model) ?? [];
|
|
@@ -90,41 +93,48 @@ export function recordModelLoadSample(model: string | null, loadMs: number): voi
|
|
|
90
93
|
pruneExpiredSamples(cutoffAt);
|
|
91
94
|
}
|
|
92
95
|
|
|
93
|
-
function
|
|
96
|
+
export function recordModelLoadSample(model: string | null, loadMs: number): void {
|
|
97
|
+
recordModelRequestSample(model, {
|
|
98
|
+
ok: true,
|
|
99
|
+
responseMs: loadMs,
|
|
100
|
+
statusCode: 200,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function summarizeModel(model: string, samples: ModelRequestSample[]): ModelHealthSummary | null {
|
|
94
105
|
if (samples.length === 0) {
|
|
95
106
|
return null;
|
|
96
107
|
}
|
|
97
108
|
|
|
98
|
-
const
|
|
99
|
-
const
|
|
100
|
-
const
|
|
101
|
-
const
|
|
102
|
-
const
|
|
103
|
-
const
|
|
109
|
+
const accessCount = samples.length;
|
|
110
|
+
const successCount = samples.reduce((count, sample) => count + (sample.ok ? 1 : 0), 0);
|
|
111
|
+
const totalResponseMs = samples.reduce((total, sample) => total + sample.responseMs, 0);
|
|
112
|
+
const lastSample = samples[samples.length - 1] ?? null;
|
|
113
|
+
const avgResponseMs = totalResponseMs / accessCount;
|
|
114
|
+
const successRatePct = accessCount > 0 ? (successCount / accessCount) * 100 : 0;
|
|
104
115
|
|
|
105
116
|
return {
|
|
106
117
|
model,
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
118
|
+
accessCount,
|
|
119
|
+
avgResponseMs: roundMs(avgResponseMs),
|
|
120
|
+
lastResponseMs: roundMs(lastSample?.responseMs ?? 0),
|
|
121
|
+
lastSeenAt: new Date(lastSample?.at ?? Date.now()).toISOString(),
|
|
122
|
+
lastStatusCode: lastSample?.statusCode ?? null,
|
|
123
|
+
successCount,
|
|
124
|
+
successRatePct: roundMs(successRatePct),
|
|
114
125
|
};
|
|
115
126
|
}
|
|
116
127
|
|
|
117
|
-
export function
|
|
128
|
+
export function getModelHealthWindow(windowMs = DEFAULT_WINDOW_MS): {
|
|
118
129
|
windowHours: number;
|
|
119
|
-
|
|
130
|
+
models: Array<ModelHealthSummary & { rank: number }>;
|
|
120
131
|
} {
|
|
121
132
|
const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
|
|
122
|
-
const
|
|
123
|
-
const cutoffAt = now - normalizedWindowMs;
|
|
133
|
+
const cutoffAt = Date.now() - normalizedWindowMs;
|
|
124
134
|
|
|
125
135
|
pruneExpiredSamples(cutoffAt);
|
|
126
136
|
|
|
127
|
-
const summaries:
|
|
137
|
+
const summaries: ModelHealthSummary[] = [];
|
|
128
138
|
|
|
129
139
|
for (const [model, samples] of modelSamples.entries()) {
|
|
130
140
|
const filtered = pruneModelSamples(samples, cutoffAt);
|
|
@@ -145,22 +155,45 @@ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
|
|
|
145
155
|
}
|
|
146
156
|
|
|
147
157
|
summaries.sort((a, b) => {
|
|
148
|
-
|
|
149
|
-
|
|
158
|
+
const aIsHealthy = a.lastStatusCode === 200 ? 1 : 0;
|
|
159
|
+
const bIsHealthy = b.lastStatusCode === 200 ? 1 : 0;
|
|
160
|
+
|
|
161
|
+
if (aIsHealthy !== bIsHealthy) {
|
|
162
|
+
return bIsHealthy - aIsHealthy;
|
|
150
163
|
}
|
|
151
164
|
|
|
152
|
-
if (a.
|
|
153
|
-
return a.
|
|
165
|
+
if (a.avgResponseMs !== b.avgResponseMs) {
|
|
166
|
+
return a.avgResponseMs - b.avgResponseMs;
|
|
154
167
|
}
|
|
155
168
|
|
|
156
|
-
|
|
169
|
+
if (a.accessCount !== b.accessCount) {
|
|
170
|
+
return b.accessCount - a.accessCount;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (a.successRatePct !== b.successRatePct) {
|
|
174
|
+
return b.successRatePct - a.successRatePct;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return a.model.localeCompare(b.model);
|
|
157
178
|
});
|
|
158
179
|
|
|
159
180
|
return {
|
|
160
181
|
windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
|
|
161
|
-
|
|
182
|
+
models: summaries.map((entry, index) => ({
|
|
162
183
|
rank: index + 1,
|
|
163
184
|
...entry,
|
|
164
185
|
})),
|
|
165
186
|
};
|
|
166
187
|
}
|
|
188
|
+
|
|
189
|
+
export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
|
|
190
|
+
windowHours: number;
|
|
191
|
+
rankedModels: Array<ModelHealthSummary & { rank: number }>;
|
|
192
|
+
} {
|
|
193
|
+
const health = getModelHealthWindow(windowMs);
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
windowHours: health.windowHours,
|
|
197
|
+
rankedModels: health.models,
|
|
198
|
+
};
|
|
199
|
+
}
|
package/src/gateway/proxy.ts
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
transformUpstreamErrorToAnthropicError,
|
|
9
9
|
} from "./anthropic-compat.js";
|
|
10
10
|
import { config, type ModelRouteConfig } from "./config.js";
|
|
11
|
-
import {
|
|
11
|
+
import { recordModelRequestSample } from "./model-load-metrics.js";
|
|
12
12
|
|
|
13
13
|
const HOP_BY_HOP_HEADERS = new Set([
|
|
14
14
|
"connection",
|
|
@@ -814,8 +814,9 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
|
|
|
814
814
|
selectedRoute,
|
|
815
815
|
);
|
|
816
816
|
|
|
817
|
+
const attemptStartedAt = Date.now();
|
|
818
|
+
|
|
817
819
|
try {
|
|
818
|
-
const attemptStartedAt = Date.now();
|
|
819
820
|
const upstreamResponse = await fetchWithTimeoutAndClientSignal(
|
|
820
821
|
upstreamUrl,
|
|
821
822
|
{
|
|
@@ -829,9 +830,11 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
|
|
|
829
830
|
const headerLoadMs = Date.now() - attemptStartedAt;
|
|
830
831
|
const modelForMetric = modelId ?? requestedModel;
|
|
831
832
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
833
|
+
recordModelRequestSample(modelForMetric, {
|
|
834
|
+
ok: upstreamResponse.ok,
|
|
835
|
+
responseMs: headerLoadMs,
|
|
836
|
+
statusCode: upstreamResponse.status,
|
|
837
|
+
});
|
|
835
838
|
|
|
836
839
|
const contentType = (upstreamResponse.headers.get("content-type") ?? "").toLowerCase();
|
|
837
840
|
const isEventStream = contentType.includes("text/event-stream");
|
|
@@ -1029,6 +1032,12 @@ export async function proxyRequest(request: IncomingMessage, response: ServerRes
|
|
|
1029
1032
|
} catch (error) {
|
|
1030
1033
|
lastError = error;
|
|
1031
1034
|
|
|
1035
|
+
recordModelRequestSample(modelId ?? requestedModel, {
|
|
1036
|
+
ok: false,
|
|
1037
|
+
responseMs: Date.now() - attemptStartedAt,
|
|
1038
|
+
statusCode: null,
|
|
1039
|
+
});
|
|
1040
|
+
|
|
1032
1041
|
if (attemptIndex < modelCandidates.length - 1) {
|
|
1033
1042
|
continue;
|
|
1034
1043
|
}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import { createServer, type IncomingMessage, type Server, type ServerResponse } from "node:http";
|
|
2
2
|
import { config } from "./config.js";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
DEFAULT_MODEL_HEALTH_WINDOW_MS,
|
|
5
|
+
getModelHealthWindow,
|
|
6
|
+
type ModelHealthSummary,
|
|
7
|
+
} from "./model-load-metrics.js";
|
|
4
8
|
import { proxyRequest } from "./proxy.js";
|
|
5
9
|
|
|
6
10
|
function sendJson(response: ServerResponse, statusCode: number, payload: unknown): void {
|
|
@@ -15,16 +19,94 @@ function sendJson(response: ServerResponse, statusCode: number, payload: unknown
|
|
|
15
19
|
response.end(body);
|
|
16
20
|
}
|
|
17
21
|
|
|
18
|
-
function
|
|
22
|
+
function sendText(response: ServerResponse, statusCode: number, body: string): void {
|
|
23
|
+
if (response.writableEnded) {
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
response.statusCode = statusCode;
|
|
28
|
+
response.setHeader("content-type", "text/plain; charset=utf-8");
|
|
29
|
+
response.setHeader("content-length", Buffer.byteLength(body));
|
|
30
|
+
response.end(body);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function resolveRequestUrl(request: IncomingMessage): URL {
|
|
19
34
|
const rawUrl = request.url ?? "/";
|
|
20
35
|
|
|
21
36
|
try {
|
|
22
|
-
return new URL(rawUrl, "http://localhost")
|
|
37
|
+
return new URL(rawUrl, "http://localhost");
|
|
23
38
|
} catch {
|
|
24
|
-
|
|
39
|
+
const normalized = rawUrl.startsWith("/") ? rawUrl : `/${rawUrl}`;
|
|
40
|
+
return new URL(normalized, "http://localhost");
|
|
25
41
|
}
|
|
26
42
|
}
|
|
27
43
|
|
|
44
|
+
function resolvePathname(request: IncomingMessage): string {
|
|
45
|
+
return resolveRequestUrl(request).pathname;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function formatTableNumber(value: number): string {
|
|
49
|
+
if (!Number.isFinite(value)) {
|
|
50
|
+
return "-";
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (Number.isInteger(value)) {
|
|
54
|
+
return String(value);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return value.toFixed(2).replace(/\.00$/, "").replace(/(\.\d)0$/, "$1");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function formatTableDurationMs(value: number): string {
|
|
61
|
+
if (!Number.isFinite(value)) {
|
|
62
|
+
return "-";
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return String(Math.round(value));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function padTableCell(value: string, width: number, align: "left" | "right"): string {
|
|
69
|
+
return align === "right" ? value.padStart(width, " ") : value.padEnd(width, " ");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function buildModelHealthTable(windowHours: number, models: Array<ModelHealthSummary & { rank: number }>): string {
|
|
73
|
+
const columns = [
|
|
74
|
+
{ header: "Model", align: "left" as const, value: (row: ModelHealthSummary & { rank: number }) => row.model },
|
|
75
|
+
{
|
|
76
|
+
header: "Code",
|
|
77
|
+
align: "right" as const,
|
|
78
|
+
value: (row: ModelHealthSummary & { rank: number }) =>
|
|
79
|
+
row.lastStatusCode === null ? "-" : String(row.lastStatusCode),
|
|
80
|
+
},
|
|
81
|
+
{ header: "Avg(ms)", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => formatTableDurationMs(row.avgResponseMs) },
|
|
82
|
+
{ header: "Last(ms)", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => formatTableDurationMs(row.lastResponseMs) },
|
|
83
|
+
{ header: "Count", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => String(row.accessCount) },
|
|
84
|
+
{ header: "OK%", align: "right" as const, value: (row: ModelHealthSummary & { rank: number }) => `${formatTableNumber(row.successRatePct)}%` },
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
const widths = columns.map((column) => {
|
|
88
|
+
const rowWidths = models.map((row) => column.value(row).length);
|
|
89
|
+
return Math.max(column.header.length, ...rowWidths, 1);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const header = columns
|
|
93
|
+
.map((column, index) => padTableCell(column.header, widths[index] ?? column.header.length, column.align))
|
|
94
|
+
.join(" | ");
|
|
95
|
+
const divider = widths.map((width) => "-".repeat(width)).join("-+-");
|
|
96
|
+
const rows = models.map((row) => columns
|
|
97
|
+
.map((column, index) => padTableCell(column.value(row), widths[index] ?? 0, column.align))
|
|
98
|
+
.join(" | "));
|
|
99
|
+
|
|
100
|
+
return [
|
|
101
|
+
`Gateway Health (last ${formatTableNumber(windowHours)}h)`,
|
|
102
|
+
`Status: ok`,
|
|
103
|
+
"",
|
|
104
|
+
header,
|
|
105
|
+
divider,
|
|
106
|
+
...(rows.length > 0 ? rows : ["No model traffic recorded in the last 12 hours."]),
|
|
107
|
+
].join("\n");
|
|
108
|
+
}
|
|
109
|
+
|
|
28
110
|
function isGatewayApiPath(pathname: string): boolean {
|
|
29
111
|
return (
|
|
30
112
|
pathname === "/v1" ||
|
|
@@ -36,17 +118,27 @@ function isGatewayApiPath(pathname: string): boolean {
|
|
|
36
118
|
|
|
37
119
|
async function handleRequest(request: IncomingMessage, response: ServerResponse): Promise<void> {
|
|
38
120
|
const method = (request.method ?? "GET").toUpperCase();
|
|
39
|
-
const
|
|
121
|
+
const requestUrl = resolveRequestUrl(request);
|
|
122
|
+
const pathname = requestUrl.pathname;
|
|
40
123
|
|
|
41
124
|
if ((method === "GET" || method === "HEAD") && pathname === "/health") {
|
|
42
|
-
const
|
|
125
|
+
const modelHealth = getModelHealthWindow(DEFAULT_MODEL_HEALTH_WINDOW_MS);
|
|
126
|
+
const tableOutput = buildModelHealthTable(modelHealth.windowHours, modelHealth.models);
|
|
127
|
+
|
|
128
|
+
if (requestUrl.searchParams.get("format")?.toLowerCase() !== "json") {
|
|
129
|
+
sendText(response, 200, tableOutput);
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
43
132
|
|
|
44
133
|
sendJson(response, 200, {
|
|
45
134
|
status: "ok",
|
|
46
135
|
retryStatusCodes: Array.from(config.retryStatusCodes),
|
|
47
136
|
enabledRouteCount: Object.keys(config.modelRouteMap).length,
|
|
48
|
-
|
|
49
|
-
|
|
137
|
+
modelHealthWindowHours: modelHealth.windowHours,
|
|
138
|
+
modelHealth: modelHealth.models,
|
|
139
|
+
modelHealthTable: tableOutput,
|
|
140
|
+
modelLoadWindowHours: modelHealth.windowHours,
|
|
141
|
+
modelLoadRanking: modelHealth.models,
|
|
50
142
|
});
|
|
51
143
|
return;
|
|
52
144
|
}
|