@archimonde12/llm-proxy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +272 -0
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/deepseek.js +78 -0
- package/dist/adapters/index.js +20 -0
- package/dist/adapters/ollama.js +182 -0
- package/dist/adapters/openaiCompatible.js +50 -0
- package/dist/admin/auth.js +37 -0
- package/dist/admin/configStore.js +80 -0
- package/dist/admin/envStore.js +149 -0
- package/dist/admin/routes.js +360 -0
- package/dist/cli/bin.js +10 -0
- package/dist/cli/commands/config.js +31 -0
- package/dist/cli/commands/doctor.js +107 -0
- package/dist/cli/commands/init.js +68 -0
- package/dist/cli/commands/start.js +38 -0
- package/dist/cli/commands/status.js +23 -0
- package/dist/cli/index.js +22 -0
- package/dist/config/defaultModelsFile.js +16 -0
- package/dist/config/load.js +221 -0
- package/dist/config/mergeHeaders.js +33 -0
- package/dist/config/paths.js +45 -0
- package/dist/config/schema.js +59 -0
- package/dist/config.js +25 -0
- package/dist/http.js +69 -0
- package/dist/index.js +30 -0
- package/dist/observability/metrics.js +102 -0
- package/dist/observability/modelMessageDebugStore.js +69 -0
- package/dist/observability/modelRequestStore.js +52 -0
- package/dist/observability/requestId.js +21 -0
- package/dist/observability/requestRecorder.js +48 -0
- package/dist/observability/summary.js +56 -0
- package/dist/observability/tokenUsage.js +46 -0
- package/dist/server.js +442 -0
- package/dist/startupLog.js +114 -0
- package/dist/types.js +2 -0
- package/dist/upstreamProbe.js +53 -0
- package/dist/version.js +19 -0
- package/package.json +73 -0
- package/ui/dist/assets/index-CDUAKry5.css +1 -0
- package/ui/dist/assets/index-Dq3YzAqp.js +13 -0
- package/ui/dist/index.html +16 -0
package/dist/http.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.joinUrl = joinUrl;
|
|
4
|
+
exports.postJson = postJson;
|
|
5
|
+
exports.postJsonStream = postJsonStream;
|
|
6
|
+
/**
|
|
7
|
+
* Join `base` (origin and optional path prefix) with `path`.
|
|
8
|
+
* `new URL("/v1/...", base)` drops any path on `base` because a leading `/` is
|
|
9
|
+
* origin-absolute; this helper preserves prefixes like `/api`.
|
|
10
|
+
*/
|
|
11
|
+
function joinUrl(base, path) {
|
|
12
|
+
const b = base.replace(/\/+$/, "");
|
|
13
|
+
const p = path.replace(/^\/+/, "");
|
|
14
|
+
return `${b}/${p}`;
|
|
15
|
+
}
|
|
16
|
+
async function postJson(url, body, opts) {
|
|
17
|
+
const controller = new AbortController();
|
|
18
|
+
const timeoutMs = opts?.timeoutMs ?? 1800_000;
|
|
19
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
20
|
+
try {
|
|
21
|
+
const res = await fetch(url, {
|
|
22
|
+
method: "POST",
|
|
23
|
+
headers: {
|
|
24
|
+
"content-type": "application/json",
|
|
25
|
+
...(opts?.headers ?? {}),
|
|
26
|
+
},
|
|
27
|
+
body: JSON.stringify(body),
|
|
28
|
+
signal: controller.signal,
|
|
29
|
+
});
|
|
30
|
+
const json = (await res.json());
|
|
31
|
+
return { status: res.status, headers: res.headers, json };
|
|
32
|
+
}
|
|
33
|
+
finally {
|
|
34
|
+
clearTimeout(timer);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
// Like postJson, but returns the raw streaming body. Timeout applies only to the
|
|
38
|
+
// initial request (until headers are received), not the entire stream duration.
|
|
39
|
+
async function postJsonStream(url, body, opts) {
|
|
40
|
+
const controller = new AbortController();
|
|
41
|
+
const timeoutMs = opts?.timeoutMs ?? 1800_000;
|
|
42
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
43
|
+
try {
|
|
44
|
+
const res = await fetch(url, {
|
|
45
|
+
method: "POST",
|
|
46
|
+
headers: {
|
|
47
|
+
"content-type": "application/json",
|
|
48
|
+
...(opts?.headers ?? {}),
|
|
49
|
+
},
|
|
50
|
+
body: JSON.stringify(body),
|
|
51
|
+
signal: controller.signal,
|
|
52
|
+
});
|
|
53
|
+
if (res.status !== 200) {
|
|
54
|
+
const { messages, tools, ...rest } = body;
|
|
55
|
+
const safeHeaders = opts?.headers && typeof opts.headers === "object"
|
|
56
|
+
? Object.fromEntries(Object.entries(opts.headers).map(([k, v]) => [
|
|
57
|
+
k,
|
|
58
|
+
/^authorization$/i.test(k) ? "<redacted>" : v,
|
|
59
|
+
]))
|
|
60
|
+
: undefined;
|
|
61
|
+
console.log("postJsonStream", JSON.stringify(rest), JSON.stringify(safeHeaders));
|
|
62
|
+
console.log("postJsonStream res", res);
|
|
63
|
+
}
|
|
64
|
+
return { status: res.status, headers: res.headers, body: res.body };
|
|
65
|
+
}
|
|
66
|
+
finally {
|
|
67
|
+
clearTimeout(timer);
|
|
68
|
+
}
|
|
69
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
require("dotenv/config");
|
|
4
|
+
const server_1 = require("./server");
|
|
5
|
+
const load_1 = require("./config/load");
|
|
6
|
+
const startupLog_1 = require("./startupLog");
|
|
7
|
+
async function main() {
|
|
8
|
+
const port = Number(process.env.PORT ?? 8787);
|
|
9
|
+
const host = process.env.HOST ?? "0.0.0.0";
|
|
10
|
+
const loaded = await (0, load_1.loadModelsFile)({
|
|
11
|
+
envPath: process.env.MODELS_PATH,
|
|
12
|
+
});
|
|
13
|
+
(0, startupLog_1.logStartupPreamble)({
|
|
14
|
+
nodeVersion: process.version,
|
|
15
|
+
nodeEnv: process.env.NODE_ENV ?? "development",
|
|
16
|
+
host,
|
|
17
|
+
port,
|
|
18
|
+
modelsPath: loaded.source.path,
|
|
19
|
+
modelsSourceKind: loaded.source.kind,
|
|
20
|
+
createdDefaultModelsFile: loaded.createdDefaultFile,
|
|
21
|
+
});
|
|
22
|
+
const app = await (0, server_1.buildServer)({ bindHost: host, initial: loaded });
|
|
23
|
+
const address = await app.listen({ port, host });
|
|
24
|
+
(0, startupLog_1.logListenBanner)({ address: String(address), port });
|
|
25
|
+
}
|
|
26
|
+
main().catch((err) => {
|
|
27
|
+
// eslint-disable-next-line no-console
|
|
28
|
+
console.error(err);
|
|
29
|
+
process.exit(1);
|
|
30
|
+
});
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.createMetrics = createMetrics;
|
|
7
|
+
exports.registerMetrics = registerMetrics;
|
|
8
|
+
const prom_client_1 = __importDefault(require("prom-client"));
|
|
9
|
+
function routeLabel(req) {
|
|
10
|
+
const anyReq = req;
|
|
11
|
+
const route = anyReq.routeOptions?.url;
|
|
12
|
+
if (typeof route === "string" && route)
|
|
13
|
+
return route;
|
|
14
|
+
return req.url.split("?")[0] ?? "<unknown>";
|
|
15
|
+
}
|
|
16
|
+
function createMetrics() {
|
|
17
|
+
const registry = new prom_client_1.default.Registry();
|
|
18
|
+
prom_client_1.default.collectDefaultMetrics({ register: registry });
|
|
19
|
+
const httpRequestsTotal = new prom_client_1.default.Counter({
|
|
20
|
+
name: "llm_proxy_http_requests_total",
|
|
21
|
+
help: "HTTP requests completed",
|
|
22
|
+
registers: [registry],
|
|
23
|
+
labelNames: ["method", "route", "status_code"],
|
|
24
|
+
});
|
|
25
|
+
const httpRequestDurationSeconds = new prom_client_1.default.Histogram({
|
|
26
|
+
name: "llm_proxy_http_request_duration_seconds",
|
|
27
|
+
help: "HTTP request duration in seconds",
|
|
28
|
+
registers: [registry],
|
|
29
|
+
labelNames: ["method", "route", "status_code"],
|
|
30
|
+
buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
|
|
31
|
+
});
|
|
32
|
+
const upstreamErrorsTotal = new prom_client_1.default.Counter({
|
|
33
|
+
name: "llm_proxy_upstream_errors_total",
|
|
34
|
+
help: "Upstream errors by adapter/model",
|
|
35
|
+
registers: [registry],
|
|
36
|
+
labelNames: ["model_id", "adapter"],
|
|
37
|
+
});
|
|
38
|
+
const tokensInTotal = new prom_client_1.default.Counter({
|
|
39
|
+
name: "llm_proxy_tokens_in_total",
|
|
40
|
+
help: "Total input/prompt tokens (best-effort)",
|
|
41
|
+
registers: [registry],
|
|
42
|
+
labelNames: ["model_id", "adapter"],
|
|
43
|
+
});
|
|
44
|
+
const tokensOutTotal = new prom_client_1.default.Counter({
|
|
45
|
+
name: "llm_proxy_tokens_out_total",
|
|
46
|
+
help: "Total output/completion tokens (best-effort)",
|
|
47
|
+
registers: [registry],
|
|
48
|
+
labelNames: ["model_id", "adapter"],
|
|
49
|
+
});
|
|
50
|
+
const tokensTotal = new prom_client_1.default.Counter({
|
|
51
|
+
name: "llm_proxy_tokens_total",
|
|
52
|
+
help: "Total tokens (best-effort)",
|
|
53
|
+
registers: [registry],
|
|
54
|
+
labelNames: ["model_id", "adapter"],
|
|
55
|
+
});
|
|
56
|
+
const observeTokens = (args) => {
|
|
57
|
+
const labels = { model_id: args.modelId, adapter: args.adapter };
|
|
58
|
+
if (typeof args.tokensIn === "number" && Number.isFinite(args.tokensIn)) {
|
|
59
|
+
tokensInTotal.inc(labels, args.tokensIn);
|
|
60
|
+
}
|
|
61
|
+
if (typeof args.tokensOut === "number" && Number.isFinite(args.tokensOut)) {
|
|
62
|
+
tokensOutTotal.inc(labels, args.tokensOut);
|
|
63
|
+
}
|
|
64
|
+
if (typeof args.tokensTotal === "number" && Number.isFinite(args.tokensTotal)) {
|
|
65
|
+
tokensTotal.inc(labels, args.tokensTotal);
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
return {
|
|
69
|
+
registry,
|
|
70
|
+
httpRequestsTotal,
|
|
71
|
+
httpRequestDurationSeconds,
|
|
72
|
+
upstreamErrorsTotal,
|
|
73
|
+
tokensInTotal,
|
|
74
|
+
tokensOutTotal,
|
|
75
|
+
tokensTotal,
|
|
76
|
+
observeTokens,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
async function registerMetrics(app, metrics) {
|
|
80
|
+
app.addHook("onRequest", async (req) => {
|
|
81
|
+
req.__metricsStartAt = process.hrtime.bigint();
|
|
82
|
+
});
|
|
83
|
+
app.addHook("onResponse", async (req, reply) => {
|
|
84
|
+
const startAt = req.__metricsStartAt;
|
|
85
|
+
const durationSeconds = startAt
|
|
86
|
+
? Number(process.hrtime.bigint() - startAt) / 1e9
|
|
87
|
+
: undefined;
|
|
88
|
+
const labels = {
|
|
89
|
+
method: req.method,
|
|
90
|
+
route: routeLabel(req),
|
|
91
|
+
status_code: String(reply.statusCode),
|
|
92
|
+
};
|
|
93
|
+
metrics.httpRequestsTotal.inc(labels, 1);
|
|
94
|
+
if (typeof durationSeconds === "number") {
|
|
95
|
+
metrics.httpRequestDurationSeconds.observe(labels, durationSeconds);
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
app.get("/metrics", async (_req, reply) => {
|
|
99
|
+
reply.header("content-type", metrics.registry.contentType);
|
|
100
|
+
return await metrics.registry.metrics();
|
|
101
|
+
});
|
|
102
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ModelMessageDebugStore = void 0;
|
|
4
|
+
class Ring {
|
|
5
|
+
capacity;
|
|
6
|
+
buf;
|
|
7
|
+
count = 0;
|
|
8
|
+
constructor(capacity) {
|
|
9
|
+
this.capacity = capacity;
|
|
10
|
+
if (capacity < 1)
|
|
11
|
+
throw new Error("capacity must be >= 1");
|
|
12
|
+
this.buf = new Array(capacity);
|
|
13
|
+
}
|
|
14
|
+
push(item) {
|
|
15
|
+
const i = this.count % this.capacity;
|
|
16
|
+
this.buf[i] = item;
|
|
17
|
+
this.count++;
|
|
18
|
+
}
|
|
19
|
+
/** Newest first */
|
|
20
|
+
list(limit) {
|
|
21
|
+
const cap = this.capacity;
|
|
22
|
+
const n = Math.min(limit, Math.min(this.count, cap));
|
|
23
|
+
const out = [];
|
|
24
|
+
for (let k = 0; k < n; k++) {
|
|
25
|
+
const idx = (this.count - 1 - k + cap) % cap;
|
|
26
|
+
const e = this.buf[idx];
|
|
27
|
+
if (e)
|
|
28
|
+
out.push(e);
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
class ModelMessageDebugStore {
|
|
34
|
+
perModelCapacity;
|
|
35
|
+
byModel = new Map();
|
|
36
|
+
constructor(perModelCapacity) {
|
|
37
|
+
this.perModelCapacity = perModelCapacity;
|
|
38
|
+
if (perModelCapacity < 1)
|
|
39
|
+
throw new Error("perModelCapacity must be >= 1");
|
|
40
|
+
}
|
|
41
|
+
record(ev) {
|
|
42
|
+
let ring = this.byModel.get(ev.modelId);
|
|
43
|
+
if (!ring) {
|
|
44
|
+
ring = new Ring(this.perModelCapacity);
|
|
45
|
+
this.byModel.set(ev.modelId, ring);
|
|
46
|
+
}
|
|
47
|
+
ring.push(ev);
|
|
48
|
+
}
|
|
49
|
+
/** Newest first */
|
|
50
|
+
getRecent(modelId, limit, roles) {
|
|
51
|
+
const ring = this.byModel.get(modelId);
|
|
52
|
+
if (!ring)
|
|
53
|
+
return [];
|
|
54
|
+
const items = ring.list(limit * 3);
|
|
55
|
+
if (!roles || roles.length === 0)
|
|
56
|
+
return items.slice(0, limit);
|
|
57
|
+
const allowed = new Set(roles);
|
|
58
|
+
const out = [];
|
|
59
|
+
for (const it of items) {
|
|
60
|
+
if (!allowed.has(it.role))
|
|
61
|
+
continue;
|
|
62
|
+
out.push(it);
|
|
63
|
+
if (out.length >= limit)
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
exports.ModelMessageDebugStore = ModelMessageDebugStore;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ModelRequestStore = void 0;
|
|
4
|
+
exports.modelRequestHistoryCapacityFromEnv = modelRequestHistoryCapacityFromEnv;
|
|
5
|
+
class ModelRequestStore {
|
|
6
|
+
capacity;
|
|
7
|
+
buf;
|
|
8
|
+
count = 0;
|
|
9
|
+
byId = new Map();
|
|
10
|
+
constructor(capacity) {
|
|
11
|
+
this.capacity = capacity;
|
|
12
|
+
if (capacity < 1)
|
|
13
|
+
throw new Error("capacity must be >= 1");
|
|
14
|
+
this.buf = new Array(capacity);
|
|
15
|
+
}
|
|
16
|
+
record(entry) {
|
|
17
|
+
const i = this.count % this.capacity;
|
|
18
|
+
const prev = this.buf[i];
|
|
19
|
+
if (prev)
|
|
20
|
+
this.byId.delete(prev.requestId);
|
|
21
|
+
this.buf[i] = entry;
|
|
22
|
+
this.byId.set(entry.requestId, entry);
|
|
23
|
+
this.count++;
|
|
24
|
+
}
|
|
25
|
+
/** Newest first */
|
|
26
|
+
getRecent(limit) {
|
|
27
|
+
const cap = this.capacity;
|
|
28
|
+
const n = Math.min(limit, Math.min(this.count, cap));
|
|
29
|
+
const out = [];
|
|
30
|
+
for (let k = 0; k < n; k++) {
|
|
31
|
+
const idx = (this.count - 1 - k + cap) % cap;
|
|
32
|
+
const e = this.buf[idx];
|
|
33
|
+
if (e)
|
|
34
|
+
out.push(e);
|
|
35
|
+
}
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
getById(requestId) {
|
|
39
|
+
return this.byId.get(requestId) ?? null;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
exports.ModelRequestStore = ModelRequestStore;
|
|
43
|
+
;
|
|
44
|
+
function modelRequestHistoryCapacityFromEnv() {
|
|
45
|
+
const raw = process.env.MODEL_REQUEST_HISTORY_MAX?.trim();
|
|
46
|
+
if (!raw)
|
|
47
|
+
return 5000;
|
|
48
|
+
const n = Number(raw);
|
|
49
|
+
if (!Number.isFinite(n) || n < 1)
|
|
50
|
+
return 5000;
|
|
51
|
+
return Math.min(Math.floor(n), 50_000);
|
|
52
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.genRequestId = genRequestId;
|
|
7
|
+
exports.registerRequestId = registerRequestId;
|
|
8
|
+
const node_crypto_1 = __importDefault(require("node:crypto"));
|
|
9
|
+
function genRequestId(req) {
|
|
10
|
+
const hdr = req.headers["x-request-id"] ??
|
|
11
|
+
req.headers["X-Request-Id"] ??
|
|
12
|
+
req.headers["x-request-id".toLowerCase()];
|
|
13
|
+
if (typeof hdr === "string" && hdr.trim())
|
|
14
|
+
return hdr.trim();
|
|
15
|
+
return node_crypto_1.default.randomUUID();
|
|
16
|
+
}
|
|
17
|
+
async function registerRequestId(app) {
|
|
18
|
+
app.addHook("onRequest", async (req, reply) => {
|
|
19
|
+
reply.header("x-request-id", req.id);
|
|
20
|
+
});
|
|
21
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RequestRecorder = void 0;
|
|
4
|
+
exports.requestHistoryCapacityFromEnv = requestHistoryCapacityFromEnv;
|
|
5
|
+
class RequestRecorder {
|
|
6
|
+
capacity;
|
|
7
|
+
buf;
|
|
8
|
+
head = 0;
|
|
9
|
+
count = 0;
|
|
10
|
+
constructor(capacity) {
|
|
11
|
+
this.capacity = capacity;
|
|
12
|
+
if (capacity < 1)
|
|
13
|
+
throw new Error("capacity must be >= 1");
|
|
14
|
+
this.buf = new Array(capacity);
|
|
15
|
+
}
|
|
16
|
+
record(entry) {
|
|
17
|
+
const i = this.count % this.capacity;
|
|
18
|
+
this.buf[i] = entry;
|
|
19
|
+
this.count++;
|
|
20
|
+
if (this.count <= this.capacity)
|
|
21
|
+
this.head = 0;
|
|
22
|
+
else
|
|
23
|
+
this.head = this.count % this.capacity;
|
|
24
|
+
}
|
|
25
|
+
/** Newest first */
|
|
26
|
+
getRecent(limit) {
|
|
27
|
+
const cap = this.capacity;
|
|
28
|
+
const n = Math.min(limit, Math.min(this.count, cap));
|
|
29
|
+
const out = [];
|
|
30
|
+
for (let k = 0; k < n; k++) {
|
|
31
|
+
const idx = (this.count - 1 - k + cap) % cap;
|
|
32
|
+
const e = this.buf[idx];
|
|
33
|
+
if (e)
|
|
34
|
+
out.push(e);
|
|
35
|
+
}
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.RequestRecorder = RequestRecorder;
|
|
40
|
+
function requestHistoryCapacityFromEnv() {
|
|
41
|
+
const raw = process.env.REQUEST_HISTORY_MAX?.trim();
|
|
42
|
+
if (!raw)
|
|
43
|
+
return 200;
|
|
44
|
+
const n = Number(raw);
|
|
45
|
+
if (!Number.isFinite(n) || n < 1)
|
|
46
|
+
return 200;
|
|
47
|
+
return Math.min(Math.floor(n), 10_000);
|
|
48
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.buildMetricsSummary = buildMetricsSummary;
|
|
4
|
+
/**
|
|
5
|
+
* Small JSON snapshot from existing prom-client metrics (no extra counters).
|
|
6
|
+
*/
|
|
7
|
+
async function buildMetricsSummary(metrics) {
|
|
8
|
+
const out = {
|
|
9
|
+
uptimeSeconds: process.uptime(),
|
|
10
|
+
};
|
|
11
|
+
const http = await metrics.httpRequestsTotal.get();
|
|
12
|
+
let total = 0;
|
|
13
|
+
let e4 = 0;
|
|
14
|
+
let e5 = 0;
|
|
15
|
+
let e1 = 0;
|
|
16
|
+
for (const v of http.values) {
|
|
17
|
+
total += v.value;
|
|
18
|
+
const sc = v.labels?.status_code;
|
|
19
|
+
if (typeof sc !== "string")
|
|
20
|
+
continue;
|
|
21
|
+
const code = Number(sc);
|
|
22
|
+
if (!Number.isFinite(code))
|
|
23
|
+
continue;
|
|
24
|
+
if (code >= 500)
|
|
25
|
+
e5 += v.value;
|
|
26
|
+
else if (code >= 400)
|
|
27
|
+
e4 += v.value;
|
|
28
|
+
else if (code < 200)
|
|
29
|
+
e1 += v.value;
|
|
30
|
+
}
|
|
31
|
+
out.httpRequestsTotal = total;
|
|
32
|
+
out.httpErrors4xx = e4;
|
|
33
|
+
out.httpErrors5xx = e5;
|
|
34
|
+
out.httpErrors1xx = e1;
|
|
35
|
+
const up = await metrics.upstreamErrorsTotal.get();
|
|
36
|
+
let upSum = 0;
|
|
37
|
+
for (const v of up.values)
|
|
38
|
+
upSum += v.value;
|
|
39
|
+
out.upstreamErrorsTotal = upSum;
|
|
40
|
+
const hist = await metrics.httpRequestDurationSeconds.get();
|
|
41
|
+
let count = 0;
|
|
42
|
+
let sum = 0;
|
|
43
|
+
for (const v of hist.values) {
|
|
44
|
+
const mn = v.metricName;
|
|
45
|
+
if (mn?.endsWith("_count"))
|
|
46
|
+
count += v.value;
|
|
47
|
+
if (mn === "llm_proxy_http_request_duration_seconds_sum")
|
|
48
|
+
sum += v.value;
|
|
49
|
+
}
|
|
50
|
+
out.requestDurationSeconds = {
|
|
51
|
+
count,
|
|
52
|
+
sum,
|
|
53
|
+
meanSeconds: count > 0 ? sum / count : undefined,
|
|
54
|
+
};
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractUsageFromChatCompletionResponse = extractUsageFromChatCompletionResponse;
|
|
4
|
+
exports.extractUsageFromSseChunk = extractUsageFromSseChunk;
|
|
5
|
+
function num(v) {
|
|
6
|
+
return typeof v === "number" && Number.isFinite(v) ? v : undefined;
|
|
7
|
+
}
|
|
8
|
+
function extractUsageFromChatCompletionResponse(adapter, body) {
|
|
9
|
+
if (!body || typeof body !== "object")
|
|
10
|
+
return null;
|
|
11
|
+
if (adapter === "openai_compatible" || adapter === "deepseek") {
|
|
12
|
+
const usage = body.usage;
|
|
13
|
+
if (!usage || typeof usage !== "object")
|
|
14
|
+
return null;
|
|
15
|
+
const tokensIn = num(usage.prompt_tokens);
|
|
16
|
+
const tokensOut = num(usage.completion_tokens);
|
|
17
|
+
const tokensTotal = num(usage.total_tokens);
|
|
18
|
+
if (typeof tokensIn !== "number" &&
|
|
19
|
+
typeof tokensOut !== "number" &&
|
|
20
|
+
typeof tokensTotal !== "number") {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
return { tokensIn, tokensOut, tokensTotal };
|
|
24
|
+
}
|
|
25
|
+
// Best-effort only; adapters may not expose usage consistently.
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
function extractUsageFromSseChunk(adapter, chunk) {
|
|
29
|
+
if (!chunk || typeof chunk !== "object")
|
|
30
|
+
return null;
|
|
31
|
+
if (adapter === "openai_compatible" || adapter === "deepseek") {
|
|
32
|
+
const usage = chunk.usage;
|
|
33
|
+
if (!usage || typeof usage !== "object")
|
|
34
|
+
return null;
|
|
35
|
+
const tokensIn = num(usage.prompt_tokens);
|
|
36
|
+
const tokensOut = num(usage.completion_tokens);
|
|
37
|
+
const tokensTotal = num(usage.total_tokens);
|
|
38
|
+
if (typeof tokensIn !== "number" &&
|
|
39
|
+
typeof tokensOut !== "number" &&
|
|
40
|
+
typeof tokensTotal !== "number") {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
return { tokensIn, tokensOut, tokensTotal };
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|