@archimonde12/llm-proxy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +272 -0
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/deepseek.js +78 -0
- package/dist/adapters/index.js +20 -0
- package/dist/adapters/ollama.js +182 -0
- package/dist/adapters/openaiCompatible.js +50 -0
- package/dist/admin/auth.js +37 -0
- package/dist/admin/configStore.js +80 -0
- package/dist/admin/envStore.js +149 -0
- package/dist/admin/routes.js +360 -0
- package/dist/cli/bin.js +10 -0
- package/dist/cli/commands/config.js +31 -0
- package/dist/cli/commands/doctor.js +107 -0
- package/dist/cli/commands/init.js +68 -0
- package/dist/cli/commands/start.js +38 -0
- package/dist/cli/commands/status.js +23 -0
- package/dist/cli/index.js +22 -0
- package/dist/config/defaultModelsFile.js +16 -0
- package/dist/config/load.js +221 -0
- package/dist/config/mergeHeaders.js +33 -0
- package/dist/config/paths.js +45 -0
- package/dist/config/schema.js +59 -0
- package/dist/config.js +25 -0
- package/dist/http.js +69 -0
- package/dist/index.js +30 -0
- package/dist/observability/metrics.js +102 -0
- package/dist/observability/modelMessageDebugStore.js +69 -0
- package/dist/observability/modelRequestStore.js +52 -0
- package/dist/observability/requestId.js +21 -0
- package/dist/observability/requestRecorder.js +48 -0
- package/dist/observability/summary.js +56 -0
- package/dist/observability/tokenUsage.js +46 -0
- package/dist/server.js +442 -0
- package/dist/startupLog.js +114 -0
- package/dist/types.js +2 -0
- package/dist/upstreamProbe.js +53 -0
- package/dist/version.js +19 -0
- package/package.json +73 -0
- package/ui/dist/assets/index-CDUAKry5.css +1 -0
- package/ui/dist/assets/index-Dq3YzAqp.js +13 -0
- package/ui/dist/index.html +16 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.isPathWritableAsFile = isPathWritableAsFile;
|
|
7
|
+
exports.resolveWriteTarget = resolveWriteTarget;
|
|
8
|
+
exports.parseModelsFileJson = parseModelsFileJson;
|
|
9
|
+
exports.writeModelsFileAtomic = writeModelsFileAtomic;
|
|
10
|
+
const promises_1 = require("node:fs/promises");
|
|
11
|
+
const node_fs_1 = require("node:fs");
|
|
12
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
13
|
+
const paths_1 = require("../config/paths");
|
|
14
|
+
const schema_1 = require("../config/schema");
|
|
15
|
+
async function isPathWritableAsFile(filePath) {
|
|
16
|
+
try {
|
|
17
|
+
await (0, promises_1.access)(filePath);
|
|
18
|
+
try {
|
|
19
|
+
await (0, promises_1.access)(filePath, node_fs_1.constants.W_OK);
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
const parent = node_path_1.default.dirname(filePath);
|
|
28
|
+
try {
|
|
29
|
+
await (0, promises_1.access)(parent, node_fs_1.constants.W_OK);
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* If the loaded config file path is writable (or can be created), use it.
|
|
39
|
+
* Otherwise fall back to ~/.config/llm-proxy/models.json.
|
|
40
|
+
*/
|
|
41
|
+
async function resolveWriteTarget(loadedFrom) {
|
|
42
|
+
const abs = node_path_1.default.resolve(loadedFrom);
|
|
43
|
+
if (await isPathWritableAsFile(abs)) {
|
|
44
|
+
return { writeTarget: abs, usedAlternate: false };
|
|
45
|
+
}
|
|
46
|
+
return { writeTarget: (0, paths_1.defaultUserModelsPath)(), usedAlternate: true };
|
|
47
|
+
}
|
|
48
|
+
function parseModelsFileJson(raw) {
|
|
49
|
+
const res = schema_1.ModelsFileSchema.safeParse(raw);
|
|
50
|
+
if (!res.success) {
|
|
51
|
+
return {
|
|
52
|
+
ok: false,
|
|
53
|
+
issues: res.error.issues.map((i) => ({
|
|
54
|
+
path: pathToString(i.path),
|
|
55
|
+
message: i.message,
|
|
56
|
+
})),
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
return { ok: true, data: res.data };
|
|
60
|
+
}
|
|
61
|
+
function pathToString(p) {
|
|
62
|
+
if (!p.length)
|
|
63
|
+
return "<root>";
|
|
64
|
+
let out = "";
|
|
65
|
+
for (const seg of p) {
|
|
66
|
+
if (typeof seg === "number")
|
|
67
|
+
out += `[${seg}]`;
|
|
68
|
+
else
|
|
69
|
+
out += out ? `.${seg}` : String(seg);
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
async function writeModelsFileAtomic(filePath, data) {
|
|
74
|
+
const abs = node_path_1.default.resolve(filePath);
|
|
75
|
+
await (0, promises_1.mkdir)(node_path_1.default.dirname(abs), { recursive: true });
|
|
76
|
+
const tmp = `${abs}.${process.pid}.${Date.now()}.tmp`;
|
|
77
|
+
const json = `${JSON.stringify(data, null, 2)}\n`;
|
|
78
|
+
await (0, promises_1.writeFile)(tmp, json, "utf8");
|
|
79
|
+
await (0, promises_1.rename)(tmp, abs);
|
|
80
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.resolveDotenvPath = resolveDotenvPath;
|
|
7
|
+
exports.listEnvKeys = listEnvKeys;
|
|
8
|
+
exports.listKeysInDotenvFile = listKeysInDotenvFile;
|
|
9
|
+
exports.listEntriesInDotenvFile = listEntriesInDotenvFile;
|
|
10
|
+
exports.applyEnvUpdates = applyEnvUpdates;
|
|
11
|
+
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
12
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
13
|
+
const ENV_KEY_RE = /^[A-Z0-9_]+$/;
|
|
14
|
+
function resolveDotenvPath() {
|
|
15
|
+
const configured = (process.env.DOTENV_CONFIG_PATH ?? "").trim();
|
|
16
|
+
if (configured)
|
|
17
|
+
return { path: node_path_1.default.resolve(configured), source: "DOTENV_CONFIG_PATH" };
|
|
18
|
+
return { path: node_path_1.default.resolve(process.cwd(), ".env"), source: "cwd" };
|
|
19
|
+
}
|
|
20
|
+
function listEnvKeys() {
|
|
21
|
+
const keys = Object.keys(process.env ?? {}).filter((k) => ENV_KEY_RE.test(k));
|
|
22
|
+
keys.sort();
|
|
23
|
+
return keys;
|
|
24
|
+
}
|
|
25
|
+
/** Keys declared in the dotenv file (names only; no values). Missing file → []. */
|
|
26
|
+
async function listKeysInDotenvFile(dotenvPath) {
|
|
27
|
+
const entries = await listEntriesInDotenvFile(dotenvPath);
|
|
28
|
+
const keys = entries.map((e) => e.key);
|
|
29
|
+
keys.sort();
|
|
30
|
+
return keys;
|
|
31
|
+
}
|
|
32
|
+
/** Key/value pairs from the dotenv file (localhost admin only). */
|
|
33
|
+
async function listEntriesInDotenvFile(dotenvPath) {
|
|
34
|
+
let existing = "";
|
|
35
|
+
try {
|
|
36
|
+
existing = await promises_1.default.readFile(node_path_1.default.resolve(dotenvPath), "utf8");
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
if (err?.code === "ENOENT")
|
|
40
|
+
return [];
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
const parsed = parseEnvLines(existing);
|
|
44
|
+
const out = [];
|
|
45
|
+
for (const line of parsed) {
|
|
46
|
+
if (line.kind === "kv" && ENV_KEY_RE.test(line.key)) {
|
|
47
|
+
out.push({ key: line.key, value: line.value });
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return out;
|
|
51
|
+
}
|
|
52
|
+
function parseEnvLines(raw) {
|
|
53
|
+
const lines = raw.split(/\r?\n/);
|
|
54
|
+
const out = [];
|
|
55
|
+
for (const line of lines) {
|
|
56
|
+
const m = /^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$/.exec(line);
|
|
57
|
+
if (!m) {
|
|
58
|
+
out.push({ kind: "other", raw: line });
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
const rawKey = m[1] ?? "";
|
|
62
|
+
const key = rawKey.trim();
|
|
63
|
+
const value = m[2] ?? "";
|
|
64
|
+
out.push({ kind: "kv", key, rawKey, value, raw: line });
|
|
65
|
+
}
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
function normalizeEnvValue(v) {
|
|
69
|
+
// Keep as-is (no quoting rules). UI will paste raw tokens.
|
|
70
|
+
return String(v);
|
|
71
|
+
}
|
|
72
|
+
async function applyEnvUpdates(args) {
|
|
73
|
+
const abs = node_path_1.default.resolve(args.dotenvPath);
|
|
74
|
+
const updates = args.updates;
|
|
75
|
+
for (const u of updates) {
|
|
76
|
+
if (!ENV_KEY_RE.test(u.key)) {
|
|
77
|
+
throw Object.assign(new Error(`Invalid env key '${u.key}'. Use A-Z0-9_ only.`), { statusCode: 400 });
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
let existing = "";
|
|
81
|
+
try {
|
|
82
|
+
existing = await promises_1.default.readFile(abs, "utf8");
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
if (err?.code !== "ENOENT")
|
|
86
|
+
throw err;
|
|
87
|
+
existing = "";
|
|
88
|
+
}
|
|
89
|
+
const parsed = parseEnvLines(existing);
|
|
90
|
+
const updateMap = new Map();
|
|
91
|
+
for (const u of updates)
|
|
92
|
+
updateMap.set(u.key, u.value == null ? null : normalizeEnvValue(u.value));
|
|
93
|
+
const seen = new Set();
|
|
94
|
+
const nextLines = [];
|
|
95
|
+
for (const line of parsed) {
|
|
96
|
+
if (line.kind !== "kv") {
|
|
97
|
+
nextLines.push(line.raw);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (!ENV_KEY_RE.test(line.key)) {
|
|
101
|
+
// Preserve non-standard keys untouched.
|
|
102
|
+
nextLines.push(line.raw);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (!updateMap.has(line.key)) {
|
|
106
|
+
nextLines.push(line.raw);
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
seen.add(line.key);
|
|
110
|
+
const nextVal = updateMap.get(line.key);
|
|
111
|
+
if (nextVal == null) {
|
|
112
|
+
// delete: skip line
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
nextLines.push(`${line.key}=${nextVal}`);
|
|
116
|
+
}
|
|
117
|
+
// Append new keys (not present in file)
|
|
118
|
+
const toAppend = [];
|
|
119
|
+
for (const [k, v] of updateMap) {
|
|
120
|
+
if (seen.has(k))
|
|
121
|
+
continue;
|
|
122
|
+
if (v == null)
|
|
123
|
+
continue;
|
|
124
|
+
toAppend.push(`${k}=${v}`);
|
|
125
|
+
}
|
|
126
|
+
if (toAppend.length) {
|
|
127
|
+
if (nextLines.length && nextLines[nextLines.length - 1] !== "")
|
|
128
|
+
nextLines.push("");
|
|
129
|
+
nextLines.push(...toAppend);
|
|
130
|
+
}
|
|
131
|
+
const finalText = `${nextLines.join("\n").replace(/\n+$/, "")}\n`;
|
|
132
|
+
await promises_1.default.mkdir(node_path_1.default.dirname(abs), { recursive: true });
|
|
133
|
+
await promises_1.default.writeFile(abs, finalText, "utf8");
|
|
134
|
+
// Apply to runtime env without restart
|
|
135
|
+
const changedKeys = [];
|
|
136
|
+
for (const [k, v] of updateMap) {
|
|
137
|
+
if (v == null) {
|
|
138
|
+
if (k in process.env)
|
|
139
|
+
changedKeys.push(k);
|
|
140
|
+
delete process.env[k];
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
if (process.env[k] !== v)
|
|
144
|
+
changedKeys.push(k);
|
|
145
|
+
process.env[k] = v;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return { writtenTo: abs, changedKeys };
|
|
149
|
+
}
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.registerAdminRoutes = registerAdminRoutes;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
const load_1 = require("../config/load");
|
|
6
|
+
const configStore_1 = require("./configStore");
|
|
7
|
+
const auth_1 = require("./auth");
|
|
8
|
+
const summary_1 = require("../observability/summary");
|
|
9
|
+
const config_1 = require("../config");
|
|
10
|
+
const upstreamProbe_1 = require("../upstreamProbe");
|
|
11
|
+
const envStore_1 = require("./envStore");
|
|
12
|
+
function parseRange(raw) {
|
|
13
|
+
const v = String(raw ?? "").trim();
|
|
14
|
+
if (v === "15m")
|
|
15
|
+
return { ok: true, ms: 15 * 60_000, label: "15m" };
|
|
16
|
+
if (v === "1h")
|
|
17
|
+
return { ok: true, ms: 60 * 60_000, label: "1h" };
|
|
18
|
+
if (v === "24h")
|
|
19
|
+
return { ok: true, ms: 24 * 60 * 60_000, label: "24h" };
|
|
20
|
+
return { ok: false };
|
|
21
|
+
}
|
|
22
|
+
function percentile(sorted, p) {
|
|
23
|
+
if (sorted.length === 0)
|
|
24
|
+
return null;
|
|
25
|
+
const idx = Math.max(0, Math.min(sorted.length - 1, Math.ceil(p * sorted.length) - 1));
|
|
26
|
+
return sorted[idx] ?? null;
|
|
27
|
+
}
|
|
28
|
+
const TestBodySchema = zod_1.z.union([
|
|
29
|
+
zod_1.z.object({ modelId: zod_1.z.string().min(1) }),
|
|
30
|
+
zod_1.z.object({
|
|
31
|
+
adapter: zod_1.z.enum(["ollama", "openai_compatible", "deepseek"]),
|
|
32
|
+
baseUrl: zod_1.z.string().min(1),
|
|
33
|
+
model: zod_1.z.string().min(1),
|
|
34
|
+
timeoutMs: zod_1.z.number().int().positive().max(60_000).optional(),
|
|
35
|
+
apiKey: zod_1.z.string().optional(),
|
|
36
|
+
apiKeyHeader: zod_1.z.string().optional(),
|
|
37
|
+
headers: zod_1.z.record(zod_1.z.string(), zod_1.z.string()).optional(),
|
|
38
|
+
}),
|
|
39
|
+
]);
|
|
40
|
+
async function registerAdminRoutes(app, ctx) {
|
|
41
|
+
const guard = async (req, reply) => {
|
|
42
|
+
if (!ctx.enforceLocalhost)
|
|
43
|
+
return;
|
|
44
|
+
if (!(0, auth_1.isLocalhostRequest)(req))
|
|
45
|
+
return (0, auth_1.sendForbiddenNonLocal)(reply);
|
|
46
|
+
};
|
|
47
|
+
app.get("/admin/env", { preHandler: guard }, async () => {
|
|
48
|
+
const r = (0, envStore_1.resolveDotenvPath)();
|
|
49
|
+
const [keysInDotenvFile, dotenvEntries] = await Promise.all([
|
|
50
|
+
(0, envStore_1.listKeysInDotenvFile)(r.path),
|
|
51
|
+
(0, envStore_1.listEntriesInDotenvFile)(r.path),
|
|
52
|
+
]);
|
|
53
|
+
return {
|
|
54
|
+
keys: (0, envStore_1.listEnvKeys)(),
|
|
55
|
+
keysInDotenvFile,
|
|
56
|
+
dotenvEntries,
|
|
57
|
+
dotenvPath: r.path,
|
|
58
|
+
dotenvSource: r.source,
|
|
59
|
+
};
|
|
60
|
+
});
|
|
61
|
+
app.put("/admin/env", { preHandler: guard }, async (req, reply) => {
|
|
62
|
+
const Body = zod_1.z.object({
|
|
63
|
+
updates: zod_1.z.array(zod_1.z.object({
|
|
64
|
+
key: zod_1.z.string().min(1),
|
|
65
|
+
value: zod_1.z.union([zod_1.z.string(), zod_1.z.null()]),
|
|
66
|
+
})),
|
|
67
|
+
});
|
|
68
|
+
const parsed = Body.safeParse(req.body);
|
|
69
|
+
if (!parsed.success) {
|
|
70
|
+
reply.code(400);
|
|
71
|
+
return {
|
|
72
|
+
error: {
|
|
73
|
+
message: "Invalid body",
|
|
74
|
+
issues: parsed.error.issues.map((i) => ({
|
|
75
|
+
path: i.path.join("."),
|
|
76
|
+
message: i.message,
|
|
77
|
+
})),
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
const r = (0, envStore_1.resolveDotenvPath)();
|
|
82
|
+
try {
|
|
83
|
+
const result = await (0, envStore_1.applyEnvUpdates)({
|
|
84
|
+
dotenvPath: r.path,
|
|
85
|
+
updates: parsed.data.updates,
|
|
86
|
+
});
|
|
87
|
+
return {
|
|
88
|
+
ok: true,
|
|
89
|
+
writtenTo: result.writtenTo,
|
|
90
|
+
changedKeys: result.changedKeys,
|
|
91
|
+
note: "Runtime process.env was updated; if your config references ${ENV_VAR}, call /admin/reload to re-resolve.",
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
const statusCode = typeof err?.statusCode === "number" ? err.statusCode : 500;
|
|
96
|
+
reply.code(statusCode);
|
|
97
|
+
return { error: { message: err?.message ?? String(err) } };
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
app.get("/admin/health", { preHandler: guard }, async () => ({
|
|
101
|
+
ok: true,
|
|
102
|
+
version: ctx.packageVersion,
|
|
103
|
+
ui: true,
|
|
104
|
+
activeConfigPath: ctx.state.activeConfigPath,
|
|
105
|
+
}));
|
|
106
|
+
app.get("/admin/config", { preHandler: guard }, async () => {
|
|
107
|
+
const wt = await (0, configStore_1.resolveWriteTarget)(ctx.state.activeConfigPath);
|
|
108
|
+
let config = ctx.state.modelsFile;
|
|
109
|
+
try {
|
|
110
|
+
config = await (0, load_1.loadModelsFileFromPathAsStored)(ctx.state.activeConfigPath);
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
// Fall back to in-memory config if the file is unreadable (should be rare).
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
config,
|
|
117
|
+
loadedFromPath: ctx.state.activeConfigPath,
|
|
118
|
+
writeTarget: wt.writeTarget,
|
|
119
|
+
usedAlternateWritePath: wt.usedAlternate,
|
|
120
|
+
configSource: ctx.state.bootstrapSource.kind,
|
|
121
|
+
configGeneration: ctx.state.configGeneration,
|
|
122
|
+
};
|
|
123
|
+
});
|
|
124
|
+
app.put("/admin/config", { preHandler: guard }, async (req, reply) => {
|
|
125
|
+
const raw = req.body;
|
|
126
|
+
const parsed = (0, configStore_1.parseModelsFileJson)(raw);
|
|
127
|
+
if (!parsed.ok) {
|
|
128
|
+
reply.code(400);
|
|
129
|
+
return { error: { message: "Validation failed", issues: parsed.issues } };
|
|
130
|
+
}
|
|
131
|
+
const wt = await (0, configStore_1.resolveWriteTarget)(ctx.state.activeConfigPath);
|
|
132
|
+
try {
|
|
133
|
+
await (0, configStore_1.writeModelsFileAtomic)(wt.writeTarget, parsed.data);
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
reply.code(500);
|
|
137
|
+
return {
|
|
138
|
+
error: {
|
|
139
|
+
message: `Failed to write config: ${err?.message ?? String(err)}`,
|
|
140
|
+
},
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
ctx.state.activeConfigPath = wt.writeTarget;
|
|
144
|
+
ctx.state.configGeneration++;
|
|
145
|
+
try {
|
|
146
|
+
ctx.state.modelsFile = await (0, load_1.loadModelsFileFromPath)(wt.writeTarget);
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
ctx.state.modelsFile = parsed.data;
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
ok: true,
|
|
153
|
+
writtenTo: wt.writeTarget,
|
|
154
|
+
usedAlternateWritePath: wt.usedAlternate,
|
|
155
|
+
configGeneration: ctx.state.configGeneration,
|
|
156
|
+
hint: wt.usedAlternate
|
|
157
|
+
? "Config was written to the managed path because the previous location was not writable. Point MODELS_PATH or --models to this file if you want the CLI to use it."
|
|
158
|
+
: undefined,
|
|
159
|
+
};
|
|
160
|
+
});
|
|
161
|
+
app.post("/admin/reload", { preHandler: guard }, async (_req, reply) => {
|
|
162
|
+
let mf;
|
|
163
|
+
try {
|
|
164
|
+
mf = await (0, load_1.loadModelsFileFromPath)(ctx.state.activeConfigPath);
|
|
165
|
+
}
|
|
166
|
+
catch (err) {
|
|
167
|
+
reply.code(500);
|
|
168
|
+
return { error: { message: err?.message ?? String(err) } };
|
|
169
|
+
}
|
|
170
|
+
ctx.state.modelsFile = mf;
|
|
171
|
+
ctx.state.configGeneration++;
|
|
172
|
+
return {
|
|
173
|
+
ok: true,
|
|
174
|
+
activeConfigPath: ctx.state.activeConfigPath,
|
|
175
|
+
configGeneration: ctx.state.configGeneration,
|
|
176
|
+
};
|
|
177
|
+
});
|
|
178
|
+
app.post("/admin/test-connection", { preHandler: guard }, async (req, reply) => {
|
|
179
|
+
const parsed = TestBodySchema.safeParse(req.body);
|
|
180
|
+
if (!parsed.success) {
|
|
181
|
+
reply.code(400);
|
|
182
|
+
return {
|
|
183
|
+
error: {
|
|
184
|
+
message: "Invalid body",
|
|
185
|
+
issues: parsed.error.issues.map((i) => ({
|
|
186
|
+
path: i.path.join("."),
|
|
187
|
+
message: i.message,
|
|
188
|
+
})),
|
|
189
|
+
},
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
const body = parsed.data;
|
|
193
|
+
if ("modelId" in body) {
|
|
194
|
+
try {
|
|
195
|
+
const mc = (0, config_1.resolveModelConfig)(ctx.state.modelsFile, body.modelId);
|
|
196
|
+
const result = await (0, upstreamProbe_1.probeModelUpstream)(mc);
|
|
197
|
+
return {
|
|
198
|
+
ok: result.ok,
|
|
199
|
+
status: result.status,
|
|
200
|
+
message: result.message,
|
|
201
|
+
baseUrl: mc.baseUrl.replace(/\/+$/, ""),
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
catch (err) {
|
|
205
|
+
reply.code(400);
|
|
206
|
+
return { error: { message: err?.message ?? String(err) } };
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
const mc = {
|
|
210
|
+
adapter: body.adapter,
|
|
211
|
+
baseUrl: body.baseUrl,
|
|
212
|
+
timeoutMs: body.timeoutMs,
|
|
213
|
+
apiKey: body.apiKey,
|
|
214
|
+
apiKeyHeader: body.apiKeyHeader,
|
|
215
|
+
headers: body.headers,
|
|
216
|
+
};
|
|
217
|
+
const result = await (0, upstreamProbe_1.probeModelUpstream)(mc);
|
|
218
|
+
return {
|
|
219
|
+
ok: result.ok,
|
|
220
|
+
status: result.status,
|
|
221
|
+
message: result.message,
|
|
222
|
+
baseUrl: body.baseUrl.replace(/\/+$/, ""),
|
|
223
|
+
};
|
|
224
|
+
});
|
|
225
|
+
app.get("/admin/metrics/summary", { preHandler: guard }, async () => {
|
|
226
|
+
return (0, summary_1.buildMetricsSummary)(ctx.metrics);
|
|
227
|
+
});
|
|
228
|
+
app.get("/admin/metrics/overview", { preHandler: guard }, async (req, reply) => {
|
|
229
|
+
const q = req.query;
|
|
230
|
+
const parsed = parseRange(q.range ?? "15m");
|
|
231
|
+
if (!parsed.ok) {
|
|
232
|
+
reply.code(400);
|
|
233
|
+
return { error: { message: "Invalid range. Use 15m|1h|24h" } };
|
|
234
|
+
}
|
|
235
|
+
const now = Date.now();
|
|
236
|
+
const since = now - parsed.ms;
|
|
237
|
+
const logs = ctx.modelRequests.getRecent(50_000);
|
|
238
|
+
const inRange = logs.filter((r) => r.ts >= since && r.ts <= now);
|
|
239
|
+
const reqCount = inRange.length;
|
|
240
|
+
const errCount = inRange.filter((r) => r.status >= 400).length;
|
|
241
|
+
const errorRate = reqCount > 0 ? errCount / reqCount : 0;
|
|
242
|
+
const lat = inRange.map((r) => r.latencyMs).filter((x) => Number.isFinite(x));
|
|
243
|
+
lat.sort((a, b) => a - b);
|
|
244
|
+
const latencyP50 = percentile(lat, 0.5);
|
|
245
|
+
const latencyP95 = percentile(lat, 0.95);
|
|
246
|
+
let tokensInTotal = 0;
|
|
247
|
+
let tokensOutTotal = 0;
|
|
248
|
+
for (const r of inRange) {
|
|
249
|
+
const u = r.usage;
|
|
250
|
+
if (u && typeof u.prompt_tokens === "number")
|
|
251
|
+
tokensInTotal += u.prompt_tokens;
|
|
252
|
+
if (u && typeof u.completion_tokens === "number")
|
|
253
|
+
tokensOutTotal += u.completion_tokens;
|
|
254
|
+
}
|
|
255
|
+
const bucketMs = parsed.label === "24h" ? 15 * 60_000 : 60_000;
|
|
256
|
+
const bucketCount = Math.ceil(parsed.ms / bucketMs);
|
|
257
|
+
const buckets = new Array(bucketCount).fill(0).map((_, i) => ({
|
|
258
|
+
ts: since + i * bucketMs,
|
|
259
|
+
tokens_in: 0,
|
|
260
|
+
tokens_out: 0,
|
|
261
|
+
req_count: 0,
|
|
262
|
+
error_count: 0,
|
|
263
|
+
}));
|
|
264
|
+
for (const r of inRange) {
|
|
265
|
+
const bi = Math.floor((r.ts - since) / bucketMs);
|
|
266
|
+
if (bi < 0 || bi >= buckets.length)
|
|
267
|
+
continue;
|
|
268
|
+
const b = buckets[bi];
|
|
269
|
+
b.req_count++;
|
|
270
|
+
if (r.status >= 400)
|
|
271
|
+
b.error_count++;
|
|
272
|
+
const u = r.usage;
|
|
273
|
+
if (u && typeof u.prompt_tokens === "number")
|
|
274
|
+
b.tokens_in += u.prompt_tokens;
|
|
275
|
+
if (u && typeof u.completion_tokens === "number")
|
|
276
|
+
b.tokens_out += u.completion_tokens;
|
|
277
|
+
}
|
|
278
|
+
return {
|
|
279
|
+
req_count: reqCount,
|
|
280
|
+
error_rate: errorRate,
|
|
281
|
+
latency_p50_ms: latencyP50,
|
|
282
|
+
latency_p95_ms: latencyP95,
|
|
283
|
+
tokens_in_total: tokensInTotal,
|
|
284
|
+
tokens_out_total: tokensOutTotal,
|
|
285
|
+
timeseries: buckets,
|
|
286
|
+
};
|
|
287
|
+
});
|
|
288
|
+
app.get("/admin/logs/models", { preHandler: guard }, async (req) => {
|
|
289
|
+
const q = req.query;
|
|
290
|
+
const parsed = parseRange(q.range ?? "15m");
|
|
291
|
+
const lim = Math.min(500, Math.max(1, Number(q.limit ?? 200) || 200));
|
|
292
|
+
const now = Date.now();
|
|
293
|
+
const since = parsed.ok ? now - parsed.ms : now - 15 * 60_000;
|
|
294
|
+
const wantModelId = (q.modelId ?? "").trim();
|
|
295
|
+
const wantStatus = (q.status ?? "").trim();
|
|
296
|
+
const rows = ctx.modelRequests
|
|
297
|
+
.getRecent(50_000)
|
|
298
|
+
.filter((r) => r.ts >= since && r.ts <= now)
|
|
299
|
+
.filter((r) => (wantModelId ? r.modelId === wantModelId : true))
|
|
300
|
+
.filter((r) => (wantStatus ? String(r.status) === wantStatus : true))
|
|
301
|
+
.slice(0, lim)
|
|
302
|
+
.map((r) => ({
|
|
303
|
+
ts: r.ts,
|
|
304
|
+
requestId: r.requestId,
|
|
305
|
+
modelId: r.modelId,
|
|
306
|
+
endpoint: r.endpoint,
|
|
307
|
+
status: r.status,
|
|
308
|
+
latencyMs: r.latencyMs,
|
|
309
|
+
usage: r.usage ?? null,
|
|
310
|
+
}));
|
|
311
|
+
return { logs: rows };
|
|
312
|
+
});
|
|
313
|
+
app.get("/admin/models/:modelId/debug/messages", { preHandler: guard }, async (req) => {
|
|
314
|
+
const params = req.params;
|
|
315
|
+
const q = req.query;
|
|
316
|
+
const lim = Math.min(50, Math.max(1, Number(q.limit ?? 10) || 10));
|
|
317
|
+
const rolesRaw = String(q.roles ?? "system,user")
|
|
318
|
+
.split(",")
|
|
319
|
+
.map((s) => s.trim())
|
|
320
|
+
.filter(Boolean);
|
|
321
|
+
const roles = rolesRaw.filter((r) => r === "system" || r === "user");
|
|
322
|
+
const items = ctx.modelMessages.getRecent(params.modelId, lim, roles);
|
|
323
|
+
return {
|
|
324
|
+
messages: items.map((m) => ({
|
|
325
|
+
id: m.id,
|
|
326
|
+
ts: m.ts,
|
|
327
|
+
modelId: m.modelId,
|
|
328
|
+
requestId: m.requestId,
|
|
329
|
+
endpoint: m.endpoint,
|
|
330
|
+
role: m.role,
|
|
331
|
+
rawMessageJson: m.rawMessageJson,
|
|
332
|
+
})),
|
|
333
|
+
};
|
|
334
|
+
});
|
|
335
|
+
app.get("/admin/requests/:requestId", { preHandler: guard }, async (req, reply) => {
|
|
336
|
+
const params = req.params;
|
|
337
|
+
const found = ctx.modelRequests.getById(params.requestId);
|
|
338
|
+
if (!found) {
|
|
339
|
+
reply.code(404);
|
|
340
|
+
return { error: { message: "Not found" } };
|
|
341
|
+
}
|
|
342
|
+
return {
|
|
343
|
+
request: {
|
|
344
|
+
ts: found.ts,
|
|
345
|
+
requestId: found.requestId,
|
|
346
|
+
modelId: found.modelId,
|
|
347
|
+
endpoint: found.endpoint,
|
|
348
|
+
status: found.status,
|
|
349
|
+
latencyMs: found.latencyMs,
|
|
350
|
+
usage: found.usage ?? null,
|
|
351
|
+
error: found.error ?? null,
|
|
352
|
+
},
|
|
353
|
+
};
|
|
354
|
+
});
|
|
355
|
+
app.get("/admin/requests", { preHandler: guard }, async (req) => {
|
|
356
|
+
const q = req.query;
|
|
357
|
+
const lim = Math.min(500, Math.max(1, Number(q.limit ?? 200) || 200));
|
|
358
|
+
return { requests: ctx.recorder.getRecent(lim) };
|
|
359
|
+
});
|
|
360
|
+
}
|
package/dist/cli/bin.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
require("dotenv/config");
|
|
5
|
+
const index_js_1 = require("./index.js");
|
|
6
|
+
(0, index_js_1.runCli)().catch((err) => {
|
|
7
|
+
// eslint-disable-next-line no-console
|
|
8
|
+
console.error(err);
|
|
9
|
+
process.exit(1);
|
|
10
|
+
});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.configCommand = configCommand;
|
|
7
|
+
const commander_1 = require("commander");
|
|
8
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
+
const load_1 = require("../../config/load");
|
|
10
|
+
function configCommand() {
|
|
11
|
+
const cmd = new commander_1.Command("config");
|
|
12
|
+
cmd.description("Configuration utilities.");
|
|
13
|
+
cmd
|
|
14
|
+
.command("validate")
|
|
15
|
+
.description("Validate models.json schema.")
|
|
16
|
+
.option("--file <path>", "Config file path (default: ./models.json)")
|
|
17
|
+
.action(async (opts) => {
|
|
18
|
+
const filePath = node_path_1.default.resolve(opts.file ?? node_path_1.default.resolve(process.cwd(), "models.json"));
|
|
19
|
+
try {
|
|
20
|
+
await (0, load_1.loadModelsFileFromPath)(filePath);
|
|
21
|
+
// eslint-disable-next-line no-console
|
|
22
|
+
console.log(`OK: ${filePath}`);
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
// eslint-disable-next-line no-console
|
|
26
|
+
console.error((0, load_1.formatConfigError)(err));
|
|
27
|
+
process.exitCode = 1;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
return cmd;
|
|
31
|
+
}
|