@archimonde12/llm-proxy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +272 -0
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/deepseek.js +78 -0
- package/dist/adapters/index.js +20 -0
- package/dist/adapters/ollama.js +182 -0
- package/dist/adapters/openaiCompatible.js +50 -0
- package/dist/admin/auth.js +37 -0
- package/dist/admin/configStore.js +80 -0
- package/dist/admin/envStore.js +149 -0
- package/dist/admin/routes.js +360 -0
- package/dist/cli/bin.js +10 -0
- package/dist/cli/commands/config.js +31 -0
- package/dist/cli/commands/doctor.js +107 -0
- package/dist/cli/commands/init.js +68 -0
- package/dist/cli/commands/start.js +38 -0
- package/dist/cli/commands/status.js +23 -0
- package/dist/cli/index.js +22 -0
- package/dist/config/defaultModelsFile.js +16 -0
- package/dist/config/load.js +221 -0
- package/dist/config/mergeHeaders.js +33 -0
- package/dist/config/paths.js +45 -0
- package/dist/config/schema.js +59 -0
- package/dist/config.js +25 -0
- package/dist/http.js +69 -0
- package/dist/index.js +30 -0
- package/dist/observability/metrics.js +102 -0
- package/dist/observability/modelMessageDebugStore.js +69 -0
- package/dist/observability/modelRequestStore.js +52 -0
- package/dist/observability/requestId.js +21 -0
- package/dist/observability/requestRecorder.js +48 -0
- package/dist/observability/summary.js +56 -0
- package/dist/observability/tokenUsage.js +46 -0
- package/dist/server.js +442 -0
- package/dist/startupLog.js +114 -0
- package/dist/types.js +2 -0
- package/dist/upstreamProbe.js +53 -0
- package/dist/version.js +19 -0
- package/package.json +73 -0
- package/ui/dist/assets/index-CDUAKry5.css +1 -0
- package/ui/dist/assets/index-Dq3YzAqp.js +13 -0
- package/ui/dist/index.html +16 -0
package/dist/server.js
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.buildServer = buildServer;
|
|
7
|
+
const fastify_1 = __importDefault(require("fastify"));
|
|
8
|
+
const node_fs_1 = __importDefault(require("node:fs"));
|
|
9
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
10
|
+
const static_1 = __importDefault(require("@fastify/static"));
|
|
11
|
+
const load_1 = require("./config/load");
|
|
12
|
+
const config_1 = require("./config");
|
|
13
|
+
const adapters_1 = require("./adapters");
|
|
14
|
+
const metrics_1 = require("./observability/metrics");
|
|
15
|
+
const requestId_1 = require("./observability/requestId");
|
|
16
|
+
const tokenUsage_1 = require("./observability/tokenUsage");
|
|
17
|
+
const auth_1 = require("./admin/auth");
|
|
18
|
+
const routes_1 = require("./admin/routes");
|
|
19
|
+
const upstreamProbe_1 = require("./upstreamProbe");
|
|
20
|
+
const requestRecorder_1 = require("./observability/requestRecorder");
|
|
21
|
+
const modelRequestStore_1 = require("./observability/modelRequestStore");
|
|
22
|
+
const modelMessageDebugStore_1 = require("./observability/modelMessageDebugStore");
|
|
23
|
+
const version_1 = require("./version");
|
|
24
|
+
function shouldLog(level) {
|
|
25
|
+
const configured = (process.env.LOG_LEVEL ?? "").toLowerCase();
|
|
26
|
+
if (!configured)
|
|
27
|
+
return true;
|
|
28
|
+
const order = {
|
|
29
|
+
debug: 10,
|
|
30
|
+
info: 20,
|
|
31
|
+
warn: 30,
|
|
32
|
+
error: 40,
|
|
33
|
+
};
|
|
34
|
+
const cfg = ["debug", "info", "warn", "error"].includes(configured)
|
|
35
|
+
? configured
|
|
36
|
+
: "info";
|
|
37
|
+
return order[level] >= order[cfg];
|
|
38
|
+
}
|
|
39
|
+
function logStreamIsTTY(level) {
|
|
40
|
+
return level === "error" || level === "warn"
|
|
41
|
+
? process.stderr.isTTY
|
|
42
|
+
: process.stdout.isTTY;
|
|
43
|
+
}
|
|
44
|
+
function formatLogLine(level, message, meta) {
|
|
45
|
+
if (!logStreamIsTTY(level)) {
|
|
46
|
+
return meta ? `${message} ${JSON.stringify(meta)}` : message;
|
|
47
|
+
}
|
|
48
|
+
const badge = {
|
|
49
|
+
debug: "\x1b[94m\x1b[2m[debug]\x1b[0m",
|
|
50
|
+
info: "\x1b[96m[info]\x1b[0m",
|
|
51
|
+
warn: "\x1b[33m\x1b[1m[warn]\x1b[0m",
|
|
52
|
+
error: "\x1b[31m\x1b[1m[error]\x1b[0m",
|
|
53
|
+
};
|
|
54
|
+
if (!meta) {
|
|
55
|
+
return `${badge[level]} ${message}`;
|
|
56
|
+
}
|
|
57
|
+
// meta: dim but readable (avoid dark gray)
|
|
58
|
+
return `${badge[level]} ${message} \x1b[2m\x1b[37m${JSON.stringify(meta)}\x1b[0m`;
|
|
59
|
+
}
|
|
60
|
+
function log(level, message, meta) {
|
|
61
|
+
if (!shouldLog(level))
|
|
62
|
+
return;
|
|
63
|
+
const line = formatLogLine(level, message, meta);
|
|
64
|
+
// eslint-disable-next-line no-console
|
|
65
|
+
(level === "error"
|
|
66
|
+
? console.error
|
|
67
|
+
: level === "warn"
|
|
68
|
+
? console.warn
|
|
69
|
+
: console.log)(line);
|
|
70
|
+
}
|
|
71
|
+
async function pipeWebStreamToNode(stream, res) {
|
|
72
|
+
const reader = stream.getReader();
|
|
73
|
+
try {
|
|
74
|
+
while (true) {
|
|
75
|
+
const { value, done } = await reader.read();
|
|
76
|
+
if (done)
|
|
77
|
+
break;
|
|
78
|
+
if (value)
|
|
79
|
+
res.write(Buffer.from(value));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
try {
|
|
84
|
+
reader.releaseLock();
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
// ignore
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
async function pipeSseWebStreamToNodeWithUsage(adapter, stream, res, onUsage) {
|
|
92
|
+
const reader = stream.getReader();
|
|
93
|
+
const decoder = new TextDecoder();
|
|
94
|
+
let buffer = "";
|
|
95
|
+
const maybeHandleEvent = (eventBlock) => {
|
|
96
|
+
const lines = eventBlock.split("\n");
|
|
97
|
+
for (const line of lines) {
|
|
98
|
+
const trimmed = line.trimEnd();
|
|
99
|
+
if (!trimmed.startsWith("data:"))
|
|
100
|
+
continue;
|
|
101
|
+
const data = trimmed.slice("data:".length).trimStart();
|
|
102
|
+
if (!data || data === "[DONE]")
|
|
103
|
+
continue;
|
|
104
|
+
try {
|
|
105
|
+
const obj = JSON.parse(data);
|
|
106
|
+
const usage = (0, tokenUsage_1.extractUsageFromSseChunk)(adapter, obj);
|
|
107
|
+
if (usage)
|
|
108
|
+
onUsage(usage);
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// ignore non-JSON chunks
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
try {
|
|
116
|
+
while (true) {
|
|
117
|
+
const { value, done } = await reader.read();
|
|
118
|
+
if (done)
|
|
119
|
+
break;
|
|
120
|
+
if (!value)
|
|
121
|
+
continue;
|
|
122
|
+
res.write(Buffer.from(value));
|
|
123
|
+
buffer += decoder.decode(value, { stream: true });
|
|
124
|
+
if (buffer.length > 1024 * 1024)
|
|
125
|
+
buffer = buffer.slice(-256 * 1024);
|
|
126
|
+
let idx;
|
|
127
|
+
while ((idx = buffer.indexOf("\n\n")) >= 0) {
|
|
128
|
+
const block = buffer.slice(0, idx);
|
|
129
|
+
buffer = buffer.slice(idx + 2);
|
|
130
|
+
if (block)
|
|
131
|
+
maybeHandleEvent(block);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
finally {
|
|
136
|
+
try {
|
|
137
|
+
reader.releaseLock();
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
// ignore
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
async function buildServer(opts) {
|
|
145
|
+
const app = (0, fastify_1.default)({
|
|
146
|
+
logger: process.env.LOG_LEVEL ? true : false,
|
|
147
|
+
genReqId: requestId_1.genRequestId,
|
|
148
|
+
});
|
|
149
|
+
let state;
|
|
150
|
+
if (opts.initial) {
|
|
151
|
+
state = {
|
|
152
|
+
modelsFile: opts.initial.modelsFile,
|
|
153
|
+
activeConfigPath: node_path_1.default.resolve(opts.initial.source.path),
|
|
154
|
+
bootstrapSource: opts.initial.source,
|
|
155
|
+
configGeneration: 1,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
else if (opts.modelsPath) {
|
|
159
|
+
const abs = node_path_1.default.resolve(opts.modelsPath);
|
|
160
|
+
const mf = await (0, load_1.loadModelsFileFromPath)(abs);
|
|
161
|
+
state = {
|
|
162
|
+
modelsFile: mf,
|
|
163
|
+
activeConfigPath: abs,
|
|
164
|
+
bootstrapSource: { kind: "cli_flag", path: abs },
|
|
165
|
+
configGeneration: 1,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
throw new Error("buildServer: provide initial or modelsPath");
|
|
170
|
+
}
|
|
171
|
+
const metrics = (0, metrics_1.createMetrics)();
|
|
172
|
+
const recorder = new requestRecorder_1.RequestRecorder((0, requestRecorder_1.requestHistoryCapacityFromEnv)());
|
|
173
|
+
const modelRequests = new modelRequestStore_1.ModelRequestStore((0, modelRequestStore_1.modelRequestHistoryCapacityFromEnv)());
|
|
174
|
+
const modelMessages = new modelMessageDebugStore_1.ModelMessageDebugStore(10);
|
|
175
|
+
const enforceLocalhost = (0, auth_1.shouldEnforceLocalhostGuard)(opts.bindHost);
|
|
176
|
+
const pkgV = (0, version_1.packageVersion)();
|
|
177
|
+
if (enforceLocalhost) {
|
|
178
|
+
app.addHook("onRequest", async (req, reply) => {
|
|
179
|
+
const p = (req.url ?? "").split("?")[0] ?? "";
|
|
180
|
+
if (p === "/" || p.startsWith("/ui")) {
|
|
181
|
+
if (!(0, auth_1.isLocalhostRequest)(req))
|
|
182
|
+
return (0, auth_1.sendForbiddenNonLocal)(reply);
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
await (0, requestId_1.registerRequestId)(app);
|
|
187
|
+
await (0, metrics_1.registerMetrics)(app, metrics);
|
|
188
|
+
app.addHook("onResponse", async (req, reply) => {
|
|
189
|
+
const startAt = req.__startAt;
|
|
190
|
+
const durationMs = startAt
|
|
191
|
+
? Number(process.hrtime.bigint() - startAt) / 1e6
|
|
192
|
+
: 0;
|
|
193
|
+
const url = req.url.split("?")[0] ?? req.url;
|
|
194
|
+
const modelId = req.__modelId;
|
|
195
|
+
const modelAdapter = req.__modelAdapter;
|
|
196
|
+
const tokens = req.__tokenUsage;
|
|
197
|
+
let error;
|
|
198
|
+
if (reply.statusCode >= 400) {
|
|
199
|
+
error = `HTTP ${reply.statusCode}`;
|
|
200
|
+
}
|
|
201
|
+
recorder.record({
|
|
202
|
+
requestId: String(req.id),
|
|
203
|
+
ts: Date.now(),
|
|
204
|
+
method: req.method,
|
|
205
|
+
path: url,
|
|
206
|
+
status: reply.statusCode,
|
|
207
|
+
durationMs: Math.round(durationMs * 1000) / 1000,
|
|
208
|
+
modelId,
|
|
209
|
+
adapter: modelAdapter,
|
|
210
|
+
error,
|
|
211
|
+
});
|
|
212
|
+
if (modelId && url === "/v1/chat/completions") {
|
|
213
|
+
modelRequests.record({
|
|
214
|
+
ts: Date.now(),
|
|
215
|
+
requestId: String(req.id),
|
|
216
|
+
modelId,
|
|
217
|
+
endpoint: url,
|
|
218
|
+
status: reply.statusCode,
|
|
219
|
+
latencyMs: Math.round(durationMs * 1000) / 1000,
|
|
220
|
+
usage: tokens &&
|
|
221
|
+
(typeof tokens.tokensIn === "number" ||
|
|
222
|
+
typeof tokens.tokensOut === "number" ||
|
|
223
|
+
typeof tokens.tokensTotal === "number")
|
|
224
|
+
? {
|
|
225
|
+
prompt_tokens: tokens.tokensIn,
|
|
226
|
+
completion_tokens: tokens.tokensOut,
|
|
227
|
+
total_tokens: tokens.tokensTotal,
|
|
228
|
+
}
|
|
229
|
+
: undefined,
|
|
230
|
+
error,
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
app.addHook("onRequest", async (req) => {
|
|
235
|
+
req.__startAt = process.hrtime.bigint();
|
|
236
|
+
const path = (req.url ?? "").split("?")[0] ?? "";
|
|
237
|
+
if (!path.startsWith("/admin")) {
|
|
238
|
+
log("info", "incoming_request", {
|
|
239
|
+
id: req.id,
|
|
240
|
+
method: req.method,
|
|
241
|
+
url: req.url,
|
|
242
|
+
ip: req.ip,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
app.addHook("onResponse", async (req, reply) => {
|
|
247
|
+
const startAt = req.__startAt;
|
|
248
|
+
const durationMs = startAt
|
|
249
|
+
? Number(process.hrtime.bigint() - startAt) / 1e6
|
|
250
|
+
: undefined;
|
|
251
|
+
const modelId = req.__modelId;
|
|
252
|
+
const modelAdapter = req.__modelAdapter;
|
|
253
|
+
const tokens = req.__tokenUsage;
|
|
254
|
+
const path = (req.url ?? "").split("?")[0] ?? "";
|
|
255
|
+
if (!path.startsWith("/admin")) {
|
|
256
|
+
log("info", "request_complete", {
|
|
257
|
+
id: req.id,
|
|
258
|
+
method: req.method,
|
|
259
|
+
url: req.url,
|
|
260
|
+
statusCode: reply.statusCode,
|
|
261
|
+
durationMs: durationMs ? Math.round(durationMs * 1000) / 1000 : undefined,
|
|
262
|
+
modelId,
|
|
263
|
+
adapter: modelAdapter,
|
|
264
|
+
tokensIn: tokens?.tokensIn,
|
|
265
|
+
tokensOut: tokens?.tokensOut,
|
|
266
|
+
tokensTotal: tokens?.tokensTotal,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
app.setErrorHandler(async (err, req, reply) => {
|
|
271
|
+
const statusCode = typeof err?.statusCode === "number"
|
|
272
|
+
? err.statusCode
|
|
273
|
+
: 500;
|
|
274
|
+
log("error", "request_error", {
|
|
275
|
+
id: req.id,
|
|
276
|
+
method: req.method,
|
|
277
|
+
url: req.url,
|
|
278
|
+
statusCode,
|
|
279
|
+
message: err?.message,
|
|
280
|
+
});
|
|
281
|
+
reply.code(statusCode);
|
|
282
|
+
return { error: { message: err?.message ?? "Internal error" } };
|
|
283
|
+
});
|
|
284
|
+
app.get("/healthz", async () => ({ ok: true }));
|
|
285
|
+
app.get("/readyz", async (req, reply) => {
|
|
286
|
+
const deep = req.query?.deep === "1" || req.query?.deep === 1;
|
|
287
|
+
if (!deep)
|
|
288
|
+
return { ok: true };
|
|
289
|
+
const checks = await Promise.all(state.modelsFile.models.map(async (m) => {
|
|
290
|
+
const base = m.baseUrl.replace(/\/+$/, "");
|
|
291
|
+
const r = await (0, upstreamProbe_1.probeModelUpstream)({
|
|
292
|
+
...m,
|
|
293
|
+
timeoutMs: Math.min(m.timeoutMs ?? 1500, 5000),
|
|
294
|
+
});
|
|
295
|
+
return {
|
|
296
|
+
id: m.id,
|
|
297
|
+
ok: r.ok,
|
|
298
|
+
status: r.status,
|
|
299
|
+
baseUrl: base,
|
|
300
|
+
...(!r.ok ? { error: r.message } : {}),
|
|
301
|
+
};
|
|
302
|
+
}));
|
|
303
|
+
const allOk = checks.every((c) => c.ok);
|
|
304
|
+
if (!allOk)
|
|
305
|
+
reply.code(503);
|
|
306
|
+
return { ok: allOk, upstreams: checks };
|
|
307
|
+
});
|
|
308
|
+
app.get("/v1/models", async () => {
|
|
309
|
+
return {
|
|
310
|
+
object: "list",
|
|
311
|
+
data: state.modelsFile.models.map((m) => ({
|
|
312
|
+
id: m.id,
|
|
313
|
+
object: "model",
|
|
314
|
+
owned_by: "local",
|
|
315
|
+
})),
|
|
316
|
+
};
|
|
317
|
+
});
|
|
318
|
+
app.post("/v1/chat/completions", async (req, reply) => {
|
|
319
|
+
if (!req.body || typeof req.body !== "object") {
|
|
320
|
+
reply.code(400);
|
|
321
|
+
return { error: { message: "Missing JSON body" } };
|
|
322
|
+
}
|
|
323
|
+
try {
|
|
324
|
+
const modelCfg = (0, config_1.resolveModelConfig)(state.modelsFile, req.body.model);
|
|
325
|
+
const adapter = (0, adapters_1.createAdapter)(modelCfg);
|
|
326
|
+
req.__modelId = modelCfg.id;
|
|
327
|
+
req.__modelAdapter = modelCfg.adapter;
|
|
328
|
+
log("info", "chat_completions", {
|
|
329
|
+
id: req.id,
|
|
330
|
+
model: modelCfg.id,
|
|
331
|
+
stream: Boolean(req.body.stream),
|
|
332
|
+
});
|
|
333
|
+
// Capture input messages for deep-debug (per-model ring buffer, roles system/user only).
|
|
334
|
+
const endpoint = "/v1/chat/completions";
|
|
335
|
+
const msgs = req.body?.messages;
|
|
336
|
+
if (Array.isArray(msgs)) {
|
|
337
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
338
|
+
const m = msgs[i];
|
|
339
|
+
const role = m?.role;
|
|
340
|
+
if (role !== "system" && role !== "user")
|
|
341
|
+
continue;
|
|
342
|
+
modelMessages.record({
|
|
343
|
+
id: `${String(req.id)}:${i}:${role}`,
|
|
344
|
+
ts: Date.now(),
|
|
345
|
+
modelId: modelCfg.id,
|
|
346
|
+
requestId: String(req.id),
|
|
347
|
+
endpoint,
|
|
348
|
+
role,
|
|
349
|
+
rawMessageJson: m,
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
if (req.body.stream) {
|
|
354
|
+
if (!adapter.chatCompletionsStream) {
|
|
355
|
+
reply.code(400);
|
|
356
|
+
return {
|
|
357
|
+
error: {
|
|
358
|
+
message: `Model '${modelCfg.id}' does not support streaming`,
|
|
359
|
+
},
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
const streamResult = await adapter.chatCompletionsStream(req.body);
|
|
363
|
+
const contentType = streamResult.headers["content-type"] ??
|
|
364
|
+
"text/event-stream; charset=utf-8";
|
|
365
|
+
reply.raw.statusCode = streamResult.status;
|
|
366
|
+
reply.raw.setHeader("content-type", contentType);
|
|
367
|
+
reply.raw.setHeader("cache-control", streamResult.headers["cache-control"] ?? "no-cache");
|
|
368
|
+
reply.raw.setHeader("connection", streamResult.headers["connection"] ?? "keep-alive");
|
|
369
|
+
if (!streamResult.body) {
|
|
370
|
+
reply.raw.end();
|
|
371
|
+
return reply;
|
|
372
|
+
}
|
|
373
|
+
const streamUsage = {
|
|
374
|
+
current: null,
|
|
375
|
+
};
|
|
376
|
+
const recordUsage = (u) => {
|
|
377
|
+
streamUsage.current = u;
|
|
378
|
+
};
|
|
379
|
+
if (String(contentType).toLowerCase().includes("text/event-stream")) {
|
|
380
|
+
await pipeSseWebStreamToNodeWithUsage(modelCfg.adapter, streamResult.body, reply.raw, recordUsage);
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
await pipeWebStreamToNode(streamResult.body, reply.raw);
|
|
384
|
+
}
|
|
385
|
+
const lastUsage = streamUsage.current;
|
|
386
|
+
if (lastUsage && streamResult.status >= 200 && streamResult.status < 300) {
|
|
387
|
+
metrics.observeTokens({
|
|
388
|
+
modelId: modelCfg.id,
|
|
389
|
+
adapter: modelCfg.adapter,
|
|
390
|
+
...lastUsage,
|
|
391
|
+
});
|
|
392
|
+
req.__tokenUsage = lastUsage;
|
|
393
|
+
}
|
|
394
|
+
reply.raw.end();
|
|
395
|
+
return reply;
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
const result = await adapter.chatCompletions(req.body);
|
|
399
|
+
reply.code(result.status);
|
|
400
|
+
reply.header("content-type", "application/json");
|
|
401
|
+
const usage = (0, tokenUsage_1.extractUsageFromChatCompletionResponse)(modelCfg.adapter, result.body);
|
|
402
|
+
if (usage && result.status >= 200 && result.status < 300) {
|
|
403
|
+
metrics.observeTokens({
|
|
404
|
+
modelId: modelCfg.id,
|
|
405
|
+
adapter: modelCfg.adapter,
|
|
406
|
+
...usage,
|
|
407
|
+
});
|
|
408
|
+
req.__tokenUsage = usage;
|
|
409
|
+
}
|
|
410
|
+
return result.body;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
catch (err) {
|
|
414
|
+
const statusCode = typeof err?.statusCode === "number" ? err.statusCode : 502;
|
|
415
|
+
reply.code(statusCode);
|
|
416
|
+
const modelId = req.__modelId;
|
|
417
|
+
const modelAdapter = req.__modelAdapter;
|
|
418
|
+
if (modelId && modelAdapter && statusCode >= 500) {
|
|
419
|
+
metrics.upstreamErrorsTotal.inc({ model_id: String(modelId), adapter: String(modelAdapter) }, 1);
|
|
420
|
+
}
|
|
421
|
+
return { error: { message: err?.message ?? "Upstream error" } };
|
|
422
|
+
}
|
|
423
|
+
});
|
|
424
|
+
await (0, routes_1.registerAdminRoutes)(app, {
|
|
425
|
+
state,
|
|
426
|
+
metrics,
|
|
427
|
+
recorder,
|
|
428
|
+
modelRequests,
|
|
429
|
+
modelMessages,
|
|
430
|
+
packageVersion: pkgV,
|
|
431
|
+
enforceLocalhost,
|
|
432
|
+
});
|
|
433
|
+
const uiDist = node_path_1.default.join(__dirname, "..", "ui", "dist");
|
|
434
|
+
if (node_fs_1.default.existsSync(uiDist)) {
|
|
435
|
+
await app.register(static_1.default, {
|
|
436
|
+
root: uiDist,
|
|
437
|
+
prefix: "/ui/",
|
|
438
|
+
});
|
|
439
|
+
app.get("/", async (_req, reply) => reply.redirect("/ui/", 302));
|
|
440
|
+
}
|
|
441
|
+
return app;
|
|
442
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/** Colored startup / listen logs (ANSI when stdout is a TTY). */
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.logStartupPreamble = logStartupPreamble;
|
|
5
|
+
exports.logListenBanner = logListenBanner;
|
|
6
|
+
const tty = process.stdout.isTTY;
|
|
7
|
+
function ansi(open, text) {
|
|
8
|
+
return tty ? `${open}${text}\x1b[0m` : text;
|
|
9
|
+
}
|
|
10
|
+
const color = {
|
|
11
|
+
dim: (s) => ansi("\x1b[2m", s),
|
|
12
|
+
bold: (s) => ansi("\x1b[1m", s),
|
|
13
|
+
cyan: (s) => ansi("\x1b[36m", s),
|
|
14
|
+
brightCyan: (s) => ansi("\x1b[96m", s),
|
|
15
|
+
green: (s) => ansi("\x1b[32m", s),
|
|
16
|
+
brightGreen: (s) => ansi("\x1b[92m", s),
|
|
17
|
+
yellow: (s) => ansi("\x1b[33m", s),
|
|
18
|
+
magenta: (s) => ansi("\x1b[35m", s),
|
|
19
|
+
gray: (s) => ansi("\x1b[90m", s),
|
|
20
|
+
/** label + value combined */
|
|
21
|
+
labelValue: (label, value) => `${color.gray(label)} ${color.brightCyan(value)}`,
|
|
22
|
+
};
|
|
23
|
+
function logStartupPreamble(opts) {
|
|
24
|
+
const w = 56;
|
|
25
|
+
const inner = w - 4;
|
|
26
|
+
const border = (s) => color.cyan(s);
|
|
27
|
+
const top = border(`╔${"═".repeat(w - 2)}╗`);
|
|
28
|
+
const mid = border(`║${" ".repeat(w - 2)}║`);
|
|
29
|
+
const bot = border(`╚${"═".repeat(w - 2)}╝`);
|
|
30
|
+
const name = "llm-proxy";
|
|
31
|
+
const titleLine = border("║") +
|
|
32
|
+
" " +
|
|
33
|
+
color.bold(color.brightCyan(name)) +
|
|
34
|
+
" ".repeat(Math.max(0, inner - 2 - name.length)) +
|
|
35
|
+
" " +
|
|
36
|
+
border("║");
|
|
37
|
+
// eslint-disable-next-line no-console
|
|
38
|
+
console.log("");
|
|
39
|
+
// eslint-disable-next-line no-console
|
|
40
|
+
console.log(top);
|
|
41
|
+
// eslint-disable-next-line no-console
|
|
42
|
+
console.log(mid);
|
|
43
|
+
// eslint-disable-next-line no-console
|
|
44
|
+
console.log(titleLine);
|
|
45
|
+
// eslint-disable-next-line no-console
|
|
46
|
+
console.log(mid);
|
|
47
|
+
// eslint-disable-next-line no-console
|
|
48
|
+
console.log(bot);
|
|
49
|
+
// eslint-disable-next-line no-console
|
|
50
|
+
console.log("");
|
|
51
|
+
const rows = [
|
|
52
|
+
["Node.js", opts.nodeVersion],
|
|
53
|
+
["Environment", opts.nodeEnv],
|
|
54
|
+
["Bind", `${opts.host}:${opts.port}`],
|
|
55
|
+
["models.json", `${opts.modelsPath} (${opts.modelsSourceKind})`],
|
|
56
|
+
];
|
|
57
|
+
const labelW = Math.max(...rows.map(([k]) => k.length), 11);
|
|
58
|
+
for (const [k, v] of rows) {
|
|
59
|
+
// eslint-disable-next-line no-console
|
|
60
|
+
console.log(` ${color.labelValue(k.padEnd(labelW), v)}`);
|
|
61
|
+
}
|
|
62
|
+
if (opts.createdDefaultModelsFile) {
|
|
63
|
+
// eslint-disable-next-line no-console
|
|
64
|
+
console.log("");
|
|
65
|
+
// eslint-disable-next-line no-console
|
|
66
|
+
console.log(` ${color.yellow("Note:")} ${color.dim("No models.json was found — wrote a starter file with an example Ollama model.")}`);
|
|
67
|
+
// eslint-disable-next-line no-console
|
|
68
|
+
console.log(` ${color.dim(`Edit: ${opts.modelsPath}`)}`);
|
|
69
|
+
}
|
|
70
|
+
// eslint-disable-next-line no-console
|
|
71
|
+
console.log("");
|
|
72
|
+
// eslint-disable-next-line no-console
|
|
73
|
+
console.log(color.dim(" Endpoints: /healthz /readyz /metrics /v1/models /v1/chat/completions /admin/* /ui/"));
|
|
74
|
+
// eslint-disable-next-line no-console
|
|
75
|
+
console.log("");
|
|
76
|
+
}
|
|
77
|
+
function logListenBanner(opts) {
|
|
78
|
+
const localUrl = `http://127.0.0.1:${opts.port}`;
|
|
79
|
+
const sep = color.dim("─".repeat(58));
|
|
80
|
+
// eslint-disable-next-line no-console
|
|
81
|
+
console.log(sep);
|
|
82
|
+
// eslint-disable-next-line no-console
|
|
83
|
+
console.log(` ${color.bold(color.brightGreen("Listening"))} ${color.brightCyan(opts.address)}`);
|
|
84
|
+
// eslint-disable-next-line no-console
|
|
85
|
+
console.log(sep);
|
|
86
|
+
// eslint-disable-next-line no-console
|
|
87
|
+
console.log("");
|
|
88
|
+
// eslint-disable-next-line no-console
|
|
89
|
+
console.log(` ${color.bold(color.yellow("Local"))}`);
|
|
90
|
+
// eslint-disable-next-line no-console
|
|
91
|
+
console.log(` ${color.brightCyan(localUrl)}`);
|
|
92
|
+
// eslint-disable-next-line no-console
|
|
93
|
+
console.log("");
|
|
94
|
+
// eslint-disable-next-line no-console
|
|
95
|
+
console.log(` ${color.bold(color.magenta("Public HTTPS tunnel"))}`);
|
|
96
|
+
// eslint-disable-next-line no-console
|
|
97
|
+
console.log(color.dim(" Run one of these in another terminal to expose this server:"));
|
|
98
|
+
// eslint-disable-next-line no-console
|
|
99
|
+
console.log("");
|
|
100
|
+
// eslint-disable-next-line no-console
|
|
101
|
+
console.log(` ${color.gray("ngrok")}`);
|
|
102
|
+
// eslint-disable-next-line no-console
|
|
103
|
+
console.log(` ${color.green(`ngrok http ${opts.port}`)}`);
|
|
104
|
+
// eslint-disable-next-line no-console
|
|
105
|
+
console.log("");
|
|
106
|
+
// eslint-disable-next-line no-console
|
|
107
|
+
console.log(` ${color.gray("Cloudflare Tunnel (cloudflared)")}`);
|
|
108
|
+
// eslint-disable-next-line no-console
|
|
109
|
+
console.log(` ${color.green(`cloudflared tunnel --url ${localUrl}`)}`);
|
|
110
|
+
// eslint-disable-next-line no-console
|
|
111
|
+
console.log(color.dim(" Install: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps/install-and-setup/installation/"));
|
|
112
|
+
// eslint-disable-next-line no-console
|
|
113
|
+
console.log("");
|
|
114
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.listModelsProbePath = listModelsProbePath;
|
|
4
|
+
exports.probeModelUpstream = probeModelUpstream;
|
|
5
|
+
const mergeHeaders_1 = require("./config/mergeHeaders");
|
|
6
|
+
const http_1 = require("./http");
|
|
7
|
+
/** Path for a lightweight GET that lists models (same auth as chat POST). */
|
|
8
|
+
function listModelsProbePath(adapter) {
|
|
9
|
+
switch (adapter) {
|
|
10
|
+
case "ollama":
|
|
11
|
+
return "api/tags";
|
|
12
|
+
case "openai_compatible":
|
|
13
|
+
case "deepseek":
|
|
14
|
+
return "v1/models";
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* GET list-models endpoint with the same outbound headers as chat completions
|
|
19
|
+
* (Authorization / custom api key header / config.headers).
|
|
20
|
+
*/
|
|
21
|
+
async function probeModelUpstream(mc) {
|
|
22
|
+
const path = listModelsProbePath(mc.adapter);
|
|
23
|
+
const url = (0, http_1.joinUrl)(mc.baseUrl, path);
|
|
24
|
+
const timeoutMs = Math.min(mc.timeoutMs ?? 5000, 10_000);
|
|
25
|
+
const merged = (0, mergeHeaders_1.mergeModelOutboundHeaders)(mc);
|
|
26
|
+
const controller = new AbortController();
|
|
27
|
+
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
28
|
+
try {
|
|
29
|
+
const res = await fetch(url, {
|
|
30
|
+
method: "GET",
|
|
31
|
+
headers: {
|
|
32
|
+
accept: "application/json",
|
|
33
|
+
...(merged ?? {}),
|
|
34
|
+
},
|
|
35
|
+
signal: controller.signal,
|
|
36
|
+
});
|
|
37
|
+
return {
|
|
38
|
+
ok: res.ok,
|
|
39
|
+
status: res.status,
|
|
40
|
+
message: res.ok ? "Reachable" : `HTTP ${res.status}`,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
return {
|
|
45
|
+
ok: false,
|
|
46
|
+
status: 0,
|
|
47
|
+
message: err?.name === "AbortError" ? "Timeout" : (err?.message ?? String(err)),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
finally {
|
|
51
|
+
clearTimeout(t);
|
|
52
|
+
}
|
|
53
|
+
}
|
package/dist/version.js
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.packageVersion = packageVersion;
|
|
7
|
+
const node_fs_1 = __importDefault(require("node:fs"));
|
|
8
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
+
function packageVersion() {
|
|
10
|
+
try {
|
|
11
|
+
const pkgPath = node_path_1.default.join(__dirname, "..", "package.json");
|
|
12
|
+
const raw = node_fs_1.default.readFileSync(pkgPath, "utf8");
|
|
13
|
+
const j = JSON.parse(raw);
|
|
14
|
+
return j.version ?? "unknown";
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return "unknown";
|
|
18
|
+
}
|
|
19
|
+
}
|