@prakashpro1/auto-modal 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -0
- package/LICENSE +21 -0
- package/README.md +282 -0
- package/bin/cli.mjs +138 -0
- package/claude-router.sh +28 -0
- package/config.default.yaml +23 -0
- package/package.json +63 -0
- package/scripts/free-port.mjs +26 -0
- package/src/anthropic.js +186 -0
- package/src/config.js +101 -0
- package/src/dashboard.js +560 -0
- package/src/envfile.js +60 -0
- package/src/loadenv.js +5 -0
- package/src/server.js +543 -0
- package/src/usage.js +131 -0
package/src/server.js
ADDED
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
import "./loadenv.js";
|
|
2
|
+
import express from "express";
|
|
3
|
+
import { loadConfig, readRaw, writeRaw, buildConfig, PROVIDERS, resolveKeys } from "./config.js";
|
|
4
|
+
import { recordSuccess, tripCooldown, isAvailable, tryConsumeToken, snapshot, historyFor } from "./usage.js";
|
|
5
|
+
import { getKeysFor, addKey, removeKey, maskKey } from "./envfile.js";
|
|
6
|
+
import { DASHBOARD_HTML } from "./dashboard.js";
|
|
7
|
+
import { anthropicToOpenAI, openAIToAnthropic, streamAnthropic } from "./anthropic.js";
|
|
8
|
+
|
|
9
|
+
// `cfg` is reassigned on hot-reload when models are added/removed via the dashboard.
|
|
10
|
+
let cfg = loadConfig();
|
|
11
|
+
function reload() {
|
|
12
|
+
cfg = loadConfig();
|
|
13
|
+
console.log(` reloaded chain: ${cfg.chain.map((m) => m.id).join(" -> ")}`);
|
|
14
|
+
}
|
|
15
|
+
const app = express();
|
|
16
|
+
app.use(express.json({ limit: "25mb" }));
|
|
17
|
+
|
|
18
|
+
// Errors that mean "this model is out of budget" -> switch to the next model.
|
|
19
|
+
const LIMIT_STATUS = new Set([429, 402]);
|
|
20
|
+
// Errors that are usually transient -> retry the same model briefly, then switch.
|
|
21
|
+
const TRANSIENT_STATUS = new Set([500, 502, 503, 504]);
|
|
22
|
+
|
|
23
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
24
|
+
|
|
25
|
+
// Forward one request to a single upstream model + key, at the given path
|
|
26
|
+
// ("/chat/completions" for chat, "/completions" for autocomplete/FIM).
|
|
27
|
+
// Returns the raw fetch Response so the caller can stream or read it.
|
|
28
|
+
function callUpstream(model, apiKey, body, signal, path) {
|
|
29
|
+
// Strip the client's "model" field; we set the real upstream model id.
|
|
30
|
+
const payload = { ...body, model: model.model };
|
|
31
|
+
return fetch(`${model.baseUrl}${path}`, {
|
|
32
|
+
method: "POST",
|
|
33
|
+
signal,
|
|
34
|
+
headers: {
|
|
35
|
+
"Content-Type": "application/json",
|
|
36
|
+
Authorization: `Bearer ${apiKey}`,
|
|
37
|
+
// OpenRouter likes these for attribution; harmless elsewhere.
|
|
38
|
+
"HTTP-Referer": "http://localhost",
|
|
39
|
+
"X-Title": "auto-modal",
|
|
40
|
+
},
|
|
41
|
+
body: JSON.stringify(payload),
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Flatten given models into ordered (model, keyIndex) slots so we iterate keys
|
|
46
|
+
// AND models in one loop: model-A key0, model-A key1, model-B key0, ...
|
|
47
|
+
function slotsOf(models) {
|
|
48
|
+
const out = [];
|
|
49
|
+
for (const model of models) {
|
|
50
|
+
for (let keyIdx = 0; keyIdx < model.keys.length; keyIdx++) {
|
|
51
|
+
out.push({ model, keyIdx });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return out;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Which slots to use for a request. If the client asks for a specific model
|
|
58
|
+
// (by chain id or model slug), route to just that model's keys — so you get key
|
|
59
|
+
// rotation without falling through to other (e.g. text-only) models. "auto",
|
|
60
|
+
// empty, or an unknown id uses the full chain in priority order.
|
|
61
|
+
function slotsFor(requested) {
|
|
62
|
+
if (requested && requested !== "auto") {
|
|
63
|
+
// Accept the "claude-<id>" aliases we advertise for Claude Code discovery.
|
|
64
|
+
const stripped = requested.startsWith("claude-") ? requested.slice(7) : requested;
|
|
65
|
+
const matches = cfg.chain.filter(
|
|
66
|
+
(m) => m.id === requested || m.model === requested || m.id === stripped
|
|
67
|
+
);
|
|
68
|
+
if (matches.length) return slotsOf(matches);
|
|
69
|
+
}
|
|
70
|
+
return slotsOf(cfg.chain);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Shared routing core: walk the slots, apply limits, forward to `path`,
|
|
74
|
+
// rotate on limit errors. Used by /chat/completions, /completions, /messages.
|
|
75
|
+
// opts.body overrides the forwarded body (e.g. an Anthropic->OpenAI translation);
|
|
76
|
+
// opts.onOk(upstream, ctx) handles a successful response (else default passthrough);
|
|
77
|
+
// opts.onExhausted(res, tried) formats the all-slots-failed error.
|
|
78
|
+
async function routeRequest(req, res, path, opts = {}) {
|
|
79
|
+
const sendBody = opts.body ?? req.body;
|
|
80
|
+
const wantStream = sendBody?.stream === true;
|
|
81
|
+
const tried = [];
|
|
82
|
+
|
|
83
|
+
for (const { model, keyIdx } of slotsFor(sendBody?.model)) {
|
|
84
|
+
const slotLabel = `${model.id}#key${keyIdx}`;
|
|
85
|
+
if (!isAvailable(model, keyIdx)) {
|
|
86
|
+
tried.push({ slot: slotLabel, skipped: "cooldown-or-daily-cap" });
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
// Per-minute budget: skip (rotate) if this slot is out of tokens this minute.
|
|
90
|
+
if (!tryConsumeToken(model, keyIdx)) {
|
|
91
|
+
tried.push({ slot: slotLabel, skipped: "rpm-limit" });
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
const apiKey = model.keys[keyIdx];
|
|
95
|
+
|
|
96
|
+
let attempt = 0;
|
|
97
|
+
while (attempt <= cfg.transientRetries) {
|
|
98
|
+
attempt++;
|
|
99
|
+
let upstream;
|
|
100
|
+
try {
|
|
101
|
+
upstream = await callUpstream(model, apiKey, sendBody, req.signal, path);
|
|
102
|
+
} catch (err) {
|
|
103
|
+
// Network-level failure -> treat as transient.
|
|
104
|
+
if (attempt <= cfg.transientRetries) {
|
|
105
|
+
await sleep(300 * attempt);
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
tried.push({ slot: slotLabel, error: err.message });
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (upstream.ok) {
|
|
113
|
+
recordSuccess(model.id, keyIdx);
|
|
114
|
+
res.setHeader("X-Router-Model", model.id);
|
|
115
|
+
res.setHeader("X-Router-Upstream", model.model);
|
|
116
|
+
res.setHeader("X-Router-Key", `key${keyIdx}`);
|
|
117
|
+
|
|
118
|
+
if (opts.onOk) return await opts.onOk(upstream, { model, keyIdx, wantStream, res });
|
|
119
|
+
|
|
120
|
+
if (wantStream && upstream.body) {
|
|
121
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
122
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
123
|
+
res.setHeader("Connection", "keep-alive");
|
|
124
|
+
// Pipe the SSE stream straight through.
|
|
125
|
+
const reader = upstream.body.getReader();
|
|
126
|
+
for (;;) {
|
|
127
|
+
const { done, value } = await reader.read();
|
|
128
|
+
if (done) break;
|
|
129
|
+
res.write(Buffer.from(value));
|
|
130
|
+
}
|
|
131
|
+
return res.end();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const json = await upstream.json();
|
|
135
|
+
return res.json(json);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Not OK — decide whether to retry, switch, or fail.
|
|
139
|
+
const status = upstream.status;
|
|
140
|
+
const text = await upstream.text().catch(() => "");
|
|
141
|
+
|
|
142
|
+
if (LIMIT_STATUS.has(status)) {
|
|
143
|
+
// This key is rate-limited / out of credits -> cool it down and
|
|
144
|
+
// rotate to the next (model, key) slot.
|
|
145
|
+
tripCooldown(model.id, keyIdx, cfg.cooldownMs);
|
|
146
|
+
tried.push({ slot: slotLabel, status, reason: "limit-exceeded -> rotated key/model" });
|
|
147
|
+
break; // advance to next slot
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (TRANSIENT_STATUS.has(status) && attempt <= cfg.transientRetries) {
|
|
151
|
+
await sleep(300 * attempt);
|
|
152
|
+
continue; // retry same slot
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// 404 = this slot doesn't offer this endpoint (e.g. a provider with no
|
|
156
|
+
// /completions support). Rotate to the next slot; another may support it.
|
|
157
|
+
if (status === 404) {
|
|
158
|
+
tried.push({ slot: slotLabel, status, reason: "endpoint not supported -> rotated" });
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Other errors (e.g. 400 bad request): bubble up immediately,
|
|
163
|
+
// since switching keys/models won't fix a malformed request.
|
|
164
|
+
if (status >= 400 && status < 500 && !LIMIT_STATUS.has(status)) {
|
|
165
|
+
return res.status(status).type("application/json").send(text || "{}");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
tried.push({ slot: slotLabel, status, body: text.slice(0, 200) });
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Every (model, key) slot was unavailable or failed.
|
|
174
|
+
if (opts.onExhausted) return opts.onExhausted(res, tried);
|
|
175
|
+
return res.status(503).json({
|
|
176
|
+
error: {
|
|
177
|
+
message: "All models and API keys are exhausted or unavailable.",
|
|
178
|
+
type: "router_all_slots_exhausted",
|
|
179
|
+
tried,
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Chat (sidebar, edit, apply).
|
|
185
|
+
app.post("/v1/chat/completions", (req, res) => routeRequest(req, res, "/chat/completions"));
|
|
186
|
+
|
|
187
|
+
// Text completions / FIM — used by Continue's autocomplete role.
|
|
188
|
+
app.post("/v1/completions", (req, res) => routeRequest(req, res, "/completions"));
|
|
189
|
+
|
|
190
|
+
// Anthropic Messages API — used by Claude Code. Translates the request to
|
|
191
|
+
// OpenAI, routes it through the chain, and translates the response back.
|
|
192
|
+
app.post("/v1/messages", (req, res) => {
|
|
193
|
+
const model = req.body?.model || "auto";
|
|
194
|
+
let openaiBody;
|
|
195
|
+
try {
|
|
196
|
+
openaiBody = anthropicToOpenAI(req.body || {});
|
|
197
|
+
} catch (err) {
|
|
198
|
+
return res.status(400).json({ type: "error", error: { type: "invalid_request_error", message: err.message } });
|
|
199
|
+
}
|
|
200
|
+
return routeRequest(req, res, "/chat/completions", {
|
|
201
|
+
body: openaiBody,
|
|
202
|
+
onOk: async (upstream, { wantStream }) => {
|
|
203
|
+
if (wantStream && upstream.body) return streamAnthropic(upstream, res, model);
|
|
204
|
+
const oj = await upstream.json();
|
|
205
|
+
return res.json(openAIToAnthropic(oj, model));
|
|
206
|
+
},
|
|
207
|
+
onExhausted: (r) => r.status(529).json({
|
|
208
|
+
type: "error",
|
|
209
|
+
error: { type: "overloaded_error", message: "All models and API keys are exhausted or unavailable." },
|
|
210
|
+
}),
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// Rough token count (Claude Code may call this). ~4 chars/token estimate.
|
|
215
|
+
app.post("/v1/messages/count_tokens", (req, res) => {
|
|
216
|
+
const b = req.body || {};
|
|
217
|
+
let chars = typeof b.system === "string" ? b.system.length : 0;
|
|
218
|
+
for (const m of b.messages || []) {
|
|
219
|
+
chars += typeof m.content === "string" ? m.content.length
|
|
220
|
+
: (m.content || []).reduce((n, blk) => n + (blk.text?.length || 0), 0);
|
|
221
|
+
}
|
|
222
|
+
res.json({ input_tokens: Math.max(1, Math.ceil(chars / 4)) });
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// Advertise the chain as available "models". Each model is listed twice: under
|
|
226
|
+
// its real id (Continue / OpenAI SDKs) and a "claude-<id>" alias so Claude Code's
|
|
227
|
+
// gateway model discovery (which only shows claude/anthropic ids) can list them.
|
|
228
|
+
app.get("/v1/models", (_req, res) => {
|
|
229
|
+
const data = [];
|
|
230
|
+
for (const m of cfg.chain) {
|
|
231
|
+
data.push({ id: m.id, object: "model", owned_by: m.provider });
|
|
232
|
+
data.push({ id: `claude-${m.id}`, object: "model", owned_by: m.provider });
|
|
233
|
+
}
|
|
234
|
+
res.json({ object: "list", data });
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
app.get("/usage", (_req, res) => res.json(snapshot()));
|
|
238
|
+
app.get("/health", (_req, res) => res.json({ ok: true, models: cfg.chain.length }));
|
|
239
|
+
|
|
240
|
+
// Merge the configured chain with live per-slot usage for the dashboard.
|
|
241
|
+
function buildStatus() {
|
|
242
|
+
const snap = snapshot();
|
|
243
|
+
const chain = cfg.chain.map((model) => ({
|
|
244
|
+
id: model.id,
|
|
245
|
+
provider: model.provider,
|
|
246
|
+
model: model.model,
|
|
247
|
+
rpm: Number.isFinite(model.rpm) ? model.rpm : null,
|
|
248
|
+
dailyLimit: Number.isFinite(model.dailyLimit) ? model.dailyLimit : null,
|
|
249
|
+
slots: model.keys.map((_key, keyIdx) => {
|
|
250
|
+
const u = snap[`${model.id}#${keyIdx}`] || {};
|
|
251
|
+
return {
|
|
252
|
+
keyIdx,
|
|
253
|
+
count: u.count ?? 0,
|
|
254
|
+
dailyLimit: Number.isFinite(model.dailyLimit) ? model.dailyLimit : null,
|
|
255
|
+
cooldownRemainingMs: u.cooldownRemainingMs ?? 0,
|
|
256
|
+
tokensThisMinute: u.tokensThisMinute ?? null,
|
|
257
|
+
history: historyFor(`${model.id}#${keyIdx}`),
|
|
258
|
+
};
|
|
259
|
+
}),
|
|
260
|
+
}));
|
|
261
|
+
return { port: cfg.port, providers: PROVIDERS, chain };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
app.get("/status", (_req, res) => res.json(buildStatus()));
|
|
265
|
+
app.get("/", (_req, res) => res.type("html").send(DASHBOARD_HTML));
|
|
266
|
+
|
|
267
|
+
// --- Model catalog: live list of available models per provider (for the picker) ---
|
|
268
|
+
|
|
269
|
+
const CATALOG_TTL = 10 * 60 * 1000; // cache 10 min
|
|
270
|
+
const catalogCache = {}; // provider -> { ts, data }
|
|
271
|
+
|
|
272
|
+
async function fetchCatalog(provider) {
|
|
273
|
+
const cached = catalogCache[provider];
|
|
274
|
+
if (cached && Date.now() - cached.ts < CATALOG_TTL) return cached.data;
|
|
275
|
+
|
|
276
|
+
let data = [];
|
|
277
|
+
if (provider === "openrouter") {
|
|
278
|
+
const j = await (await fetch("https://openrouter.ai/api/v1/models")).json();
|
|
279
|
+
data = (j.data || []).map((m) => ({
|
|
280
|
+
id: m.id,
|
|
281
|
+
name: m.name || m.id,
|
|
282
|
+
contextLength: m.context_length || null,
|
|
283
|
+
free: m.id.endsWith(":free") ||
|
|
284
|
+
(m.pricing && m.pricing.prompt === "0" && m.pricing.completion === "0"),
|
|
285
|
+
}));
|
|
286
|
+
} else if (provider === "huggingface") {
|
|
287
|
+
const j = await (await fetch("https://router.huggingface.co/v1/models")).json();
|
|
288
|
+
data = (j.data || j || []).map((m) => ({
|
|
289
|
+
id: m.id,
|
|
290
|
+
name: m.id,
|
|
291
|
+
contextLength: null,
|
|
292
|
+
free: Array.isArray(m.providers) ? m.providers.some((p) => p.is_free) : false,
|
|
293
|
+
}));
|
|
294
|
+
}
|
|
295
|
+
// Free models first, then alphabetical.
|
|
296
|
+
data.sort((a, b) => Number(b.free) - Number(a.free) || a.id.localeCompare(b.id));
|
|
297
|
+
catalogCache[provider] = { ts: Date.now(), data };
|
|
298
|
+
return data;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Query OpenRouter for the account tier behind a key. Returns null if no key or
|
|
302
|
+
// the call fails. freeDailyLimit: 50/day on free tier, 1000/day once >=10 credits
|
|
303
|
+
// bought (per key, SHARED across all :free models).
|
|
304
|
+
async function fetchOpenRouterTier(key) {
|
|
305
|
+
key = key || getKeysFor("OPENROUTER_API_KEYS")[0];
|
|
306
|
+
if (!key) return null;
|
|
307
|
+
try {
|
|
308
|
+
const j = await (await fetch("https://openrouter.ai/api/v1/key", {
|
|
309
|
+
headers: { Authorization: `Bearer ${key}` },
|
|
310
|
+
})).json();
|
|
311
|
+
const d = j.data || {};
|
|
312
|
+
return {
|
|
313
|
+
isFreeTier: d.is_free_tier,
|
|
314
|
+
usageDaily: d.usage_daily,
|
|
315
|
+
usage: d.usage,
|
|
316
|
+
limit: d.limit,
|
|
317
|
+
limitRemaining: d.limit_remaining,
|
|
318
|
+
freeDailyLimit: d.is_free_tier ? 50 : 1000,
|
|
319
|
+
};
|
|
320
|
+
} catch {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
app.get("/admin/credits", async (_req, res) => {
|
|
326
|
+
const tier = await fetchOpenRouterTier();
|
|
327
|
+
res.json(tier ? { available: true, ...tier } : { available: false, reason: "no OpenRouter key or fetch failed" });
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
app.get("/admin/catalog", async (req, res) => {
|
|
331
|
+
const provider = req.query.provider;
|
|
332
|
+
if (!PROVIDERS.includes(provider)) {
|
|
333
|
+
return res.status(400).json({ error: `unknown provider "${provider}"` });
|
|
334
|
+
}
|
|
335
|
+
try {
|
|
336
|
+
res.json({ provider, models: await fetchCatalog(provider) });
|
|
337
|
+
} catch (err) {
|
|
338
|
+
res.status(502).json({ error: `failed to fetch ${provider} catalog: ${err.message}` });
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// --- Admin: add / remove models via the dashboard (writes config.yaml, hot-reloads) ---
|
|
343
|
+
|
|
344
|
+
app.post("/admin/models", async (req, res) => {
|
|
345
|
+
const { id, provider, model, apiKeys, dailyLimit, rpm } = req.body || {};
|
|
346
|
+
if (!id || !provider || !model || !apiKeys) {
|
|
347
|
+
return res.status(400).json({ error: "id, provider, model and apiKeys are required" });
|
|
348
|
+
}
|
|
349
|
+
if (!PROVIDERS.includes(provider)) {
|
|
350
|
+
return res.status(400).json({ error: `provider must be one of: ${PROVIDERS.join(", ")}` });
|
|
351
|
+
}
|
|
352
|
+
const raw = readRaw();
|
|
353
|
+
raw.chain = raw.chain || [];
|
|
354
|
+
if (raw.chain.some((m) => m.id === id)) {
|
|
355
|
+
return res.status(409).json({ error: `a model with id "${id}" already exists` });
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const entry = { id, provider, model, apiKeys };
|
|
359
|
+
if (dailyLimit !== "" && dailyLimit != null) entry.dailyLimit = Number(dailyLimit);
|
|
360
|
+
if (rpm !== "" && rpm != null) entry.rpm = Number(rpm);
|
|
361
|
+
|
|
362
|
+
// Auto-set dailyLimit for an ACTIVE free OpenRouter model when left blank:
|
|
363
|
+
// pull the real free cap (50/1000) from the account tier.
|
|
364
|
+
let autoLimit = null;
|
|
365
|
+
const isActiveFree = provider === "openrouter" && /:free$/.test(model) && resolveKeys(entry).length > 0;
|
|
366
|
+
if (entry.dailyLimit == null && isActiveFree) {
|
|
367
|
+
const tier = await fetchOpenRouterTier(resolveKeys(entry)[0]);
|
|
368
|
+
if (tier?.freeDailyLimit) entry.dailyLimit = autoLimit = tier.freeDailyLimit;
|
|
369
|
+
}
|
|
370
|
+
raw.chain.push(entry);
|
|
371
|
+
|
|
372
|
+
try {
|
|
373
|
+
writeRaw(raw);
|
|
374
|
+
reload();
|
|
375
|
+
} catch (err) {
|
|
376
|
+
return res.status(500).json({ error: err.message });
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// If the env var behind apiKeys is empty, the model is valid but inactive.
|
|
380
|
+
const active = cfg.chain.some((m) => m.id === id);
|
|
381
|
+
res.json({
|
|
382
|
+
ok: true,
|
|
383
|
+
active,
|
|
384
|
+
autoDailyLimit: autoLimit, // set from OpenRouter tier when left blank, else null
|
|
385
|
+
warning: active ? undefined : `Added, but inactive: "${apiKeys}" resolved to no keys (set it in .env).`,
|
|
386
|
+
});
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
// Fire a tiny real request through one model (first key) and report the result.
|
|
390
|
+
// Diagnostic only — not counted toward usage.
|
|
391
|
+
async function runModelTest(model) {
|
|
392
|
+
const body = { messages: [{ role: "user", content: "ping" }], max_tokens: 5 };
|
|
393
|
+
const start = Date.now();
|
|
394
|
+
try {
|
|
395
|
+
const up = await callUpstream(model, model.keys[0], body, undefined, "/chat/completions");
|
|
396
|
+
const text = await up.text();
|
|
397
|
+
let sample;
|
|
398
|
+
try { sample = JSON.parse(text)?.choices?.[0]?.message?.content; } catch { /* non-JSON */ }
|
|
399
|
+
return {
|
|
400
|
+
id: model.id,
|
|
401
|
+
ok: up.ok,
|
|
402
|
+
status: up.status,
|
|
403
|
+
latencyMs: Date.now() - start,
|
|
404
|
+
sample: up.ok ? (sample || "").slice(0, 80) : undefined,
|
|
405
|
+
error: up.ok ? undefined : text.slice(0, 160),
|
|
406
|
+
};
|
|
407
|
+
} catch (err) {
|
|
408
|
+
return { id: model.id, ok: false, error: err.message, latencyMs: Date.now() - start };
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
app.post("/admin/models/:id/test", async (req, res) => {
|
|
413
|
+
const model = cfg.chain.find((m) => m.id === req.params.id);
|
|
414
|
+
if (!model) return res.status(404).json({ ok: false, error: "unknown or inactive model (needs a key)" });
|
|
415
|
+
res.json(await runModelTest(model));
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
// Test every active model at once (in parallel) and flag the dead ones.
|
|
419
|
+
app.post("/admin/test-all", async (_req, res) => {
|
|
420
|
+
const results = await Promise.all(cfg.chain.map(runModelTest));
|
|
421
|
+
res.json({ results, healthy: results.filter((r) => r.ok).length, total: results.length });
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
// Edit a model's rpm / dailyLimit in place (empty value clears the limit).
|
|
425
|
+
app.patch("/admin/models/:id", (req, res) => {
|
|
426
|
+
const raw = readRaw();
|
|
427
|
+
const m = (raw.chain || []).find((x) => x.id === req.params.id);
|
|
428
|
+
if (!m) return res.status(404).json({ error: `no model with id "${req.params.id}"` });
|
|
429
|
+
const { dailyLimit, rpm } = req.body || {};
|
|
430
|
+
if (dailyLimit === "" || dailyLimit == null) delete m.dailyLimit;
|
|
431
|
+
else m.dailyLimit = Number(dailyLimit);
|
|
432
|
+
if (rpm === "" || rpm == null) delete m.rpm;
|
|
433
|
+
else m.rpm = Number(rpm);
|
|
434
|
+
try {
|
|
435
|
+
writeRaw(raw);
|
|
436
|
+
reload();
|
|
437
|
+
} catch (err) {
|
|
438
|
+
return res.status(500).json({ error: err.message });
|
|
439
|
+
}
|
|
440
|
+
res.json({ ok: true });
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
// Reorder the chain (priority = position). Body: { order: [id, id, ...] }.
|
|
444
|
+
// Listed ids are slotted into their original positions in the given order;
|
|
445
|
+
// any unlisted entries (e.g. inactive, key-less models) keep their slots.
|
|
446
|
+
app.put("/admin/models/order", (req, res) => {
|
|
447
|
+
const order = req.body?.order;
|
|
448
|
+
if (!Array.isArray(order)) return res.status(400).json({ error: "order must be an array of ids" });
|
|
449
|
+
|
|
450
|
+
const raw = readRaw();
|
|
451
|
+
const chain = raw.chain || [];
|
|
452
|
+
const byId = new Map(chain.map((m) => [m.id, m]));
|
|
453
|
+
if (new Set(order).size !== order.length) return res.status(400).json({ error: "duplicate ids in order" });
|
|
454
|
+
if (order.some((id) => !byId.has(id))) return res.status(400).json({ error: "order contains unknown id" });
|
|
455
|
+
|
|
456
|
+
const listed = new Set(order);
|
|
457
|
+
let oi = 0;
|
|
458
|
+
raw.chain = chain.map((entry) => (listed.has(entry.id) ? byId.get(order[oi++]) : entry));
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
writeRaw(raw);
|
|
462
|
+
reload();
|
|
463
|
+
} catch (err) {
|
|
464
|
+
return res.status(500).json({ error: err.message });
|
|
465
|
+
}
|
|
466
|
+
res.json({ ok: true, chain: cfg.chain.map((m) => m.id) });
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
// --- Admin: manage API-key pools (env vars) from the dashboard ---
|
|
470
|
+
|
|
471
|
+
// Which env vars hold keys: the two defaults plus anything referenced in config.
|
|
472
|
+
function referencedEnvVars() {
|
|
473
|
+
const set = new Set(["OPENROUTER_API_KEYS", "HF_API_KEYS"]);
|
|
474
|
+
for (const m of readRaw().chain || []) {
|
|
475
|
+
const ref = Array.isArray(m.apiKeys) ? m.apiKeys.join(",") : String(m.apiKeys ?? m.apiKey ?? "");
|
|
476
|
+
for (const t of ref.match(/\$\{([A-Z0-9_]+)\}/g) || []) set.add(t.slice(2, -1));
|
|
477
|
+
}
|
|
478
|
+
return [...set];
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
app.get("/admin/keys", (_req, res) => {
|
|
482
|
+
res.json(referencedEnvVars().map((envVar) => ({
|
|
483
|
+
envVar,
|
|
484
|
+
keys: getKeysFor(envVar).map(maskKey),
|
|
485
|
+
})));
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
app.post("/admin/keys", (req, res) => {
|
|
489
|
+
const { envVar, key } = req.body || {};
|
|
490
|
+
if (!envVar || !key) return res.status(400).json({ error: "envVar and key are required" });
|
|
491
|
+
if (!/^[A-Z0-9_]+$/.test(envVar)) return res.status(400).json({ error: "invalid env var name" });
|
|
492
|
+
try {
|
|
493
|
+
const r = addKey(envVar, key);
|
|
494
|
+
reload(); // a model that was inactive for lack of keys may now go live
|
|
495
|
+
res.json({ ok: true, ...r });
|
|
496
|
+
} catch (err) {
|
|
497
|
+
res.status(500).json({ error: err.message });
|
|
498
|
+
}
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
app.delete("/admin/keys", (req, res) => {
|
|
502
|
+
const { envVar, index } = req.body || {};
|
|
503
|
+
if (!envVar || index == null) return res.status(400).json({ error: "envVar and index are required" });
|
|
504
|
+
try {
|
|
505
|
+
const r = removeKey(envVar, Number(index));
|
|
506
|
+
if (!r.removed) return res.status(404).json({ error: "no key at that index" });
|
|
507
|
+
reload();
|
|
508
|
+
res.json({ ok: true, ...r });
|
|
509
|
+
} catch (err) {
|
|
510
|
+
res.status(500).json({ error: err.message });
|
|
511
|
+
}
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
app.delete("/admin/models/:id", (req, res) => {
|
|
515
|
+
const raw = readRaw();
|
|
516
|
+
const before = (raw.chain || []).length;
|
|
517
|
+
raw.chain = (raw.chain || []).filter((m) => m.id !== req.params.id);
|
|
518
|
+
if (raw.chain.length === before) {
|
|
519
|
+
return res.status(404).json({ error: `no model with id "${req.params.id}"` });
|
|
520
|
+
}
|
|
521
|
+
try {
|
|
522
|
+
writeRaw(raw);
|
|
523
|
+
reload();
|
|
524
|
+
} catch (err) {
|
|
525
|
+
return res.status(500).json({ error: err.message });
|
|
526
|
+
}
|
|
527
|
+
res.json({ ok: true });
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
const server = app.listen(cfg.port, () => {
|
|
531
|
+
console.log(`auto-modal listening on http://localhost:${cfg.port}`);
|
|
532
|
+
console.log(` dashboard: http://localhost:${cfg.port}/`);
|
|
533
|
+
console.log(` chain: ${cfg.chain.map((m) => m.id).join(" -> ")}`);
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
server.on("error", (err) => {
|
|
537
|
+
if (err.code === "EADDRINUSE") {
|
|
538
|
+
console.error(`✗ Port ${cfg.port} is already in use — another router is running.`);
|
|
539
|
+
console.error(` Run "npm run restart" to stop it and start fresh.`);
|
|
540
|
+
process.exit(1);
|
|
541
|
+
}
|
|
542
|
+
throw err;
|
|
543
|
+
});
|
package/src/usage.js
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const USAGE_PATH = process.env.ROUTER_USAGE || join(__dirname, "..", "usage.json");
|
|
7
|
+
|
|
8
|
+
// Per (model, key-index) state persisted across restarts. The "slot" is the
|
|
9
|
+
// unit of availability — switching a key is just moving to a different slot:
|
|
10
|
+
// { "deepseek-free#0": { day, count, cooldownUntil }, "deepseek-free#1": {...} }
|
|
11
|
+
let state = {};
|
|
12
|
+
|
|
13
|
+
function load() {
|
|
14
|
+
if (existsSync(USAGE_PATH)) {
|
|
15
|
+
try {
|
|
16
|
+
state = JSON.parse(readFileSync(USAGE_PATH, "utf8"));
|
|
17
|
+
} catch {
|
|
18
|
+
state = {};
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
load();
|
|
23
|
+
|
|
24
|
+
function persist() {
|
|
25
|
+
try {
|
|
26
|
+
writeFileSync(USAGE_PATH, JSON.stringify(state, null, 2));
|
|
27
|
+
} catch (err) {
|
|
28
|
+
console.error("usage: failed to persist", err.message);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function today() {
|
|
33
|
+
return new Date().toISOString().slice(0, 10); // YYYY-MM-DD (UTC)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const slotId = (modelId, keyIdx) => `${modelId}#${keyIdx}`;
|
|
37
|
+
|
|
38
|
+
// Per-slot token buckets for per-minute rate limiting. Kept in memory only —
|
|
39
|
+
// a per-minute window has no meaning across restarts, so it resets full on boot.
|
|
40
|
+
// { "deepseek-free#0": { tokens: 4.2, lastRefill: 1718800000000 } }
|
|
41
|
+
const buckets = {};
|
|
42
|
+
|
|
43
|
+
// Refill + try to consume one token. Returns false if the slot is over its
|
|
44
|
+
// requests-per-minute budget right now (caller should rotate to another slot).
|
|
45
|
+
export function tryConsumeToken(model, keyIdx) {
|
|
46
|
+
const rpm = model.rpm;
|
|
47
|
+
if (!Number.isFinite(rpm) || rpm <= 0) return true; // unlimited
|
|
48
|
+
const id = slotId(model.id, keyIdx);
|
|
49
|
+
const now = Date.now();
|
|
50
|
+
let b = buckets[id];
|
|
51
|
+
if (!b) b = buckets[id] = { tokens: rpm, lastRefill: now };
|
|
52
|
+
// Continuous refill at rpm tokens per 60s, capped at capacity (rpm).
|
|
53
|
+
const elapsedSec = (now - b.lastRefill) / 1000;
|
|
54
|
+
b.tokens = Math.min(rpm, b.tokens + elapsedSec * (rpm / 60));
|
|
55
|
+
b.lastRefill = now;
|
|
56
|
+
if (b.tokens >= 1) {
|
|
57
|
+
b.tokens -= 1;
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function bucketTokens(id) {
|
|
64
|
+
return buckets[id] ? Math.floor(buckets[id].tokens) : null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Per-slot request history for sparklines: counts bucketed into 1-minute slots,
|
|
68
|
+
// keeping the last HISTORY_BUCKETS minutes. In memory only (resets on restart).
|
|
69
|
+
const BUCKET_MS = 60_000;
|
|
70
|
+
const HISTORY_BUCKETS = 30;
|
|
71
|
+
const history = {}; // slotId -> { [absoluteMinuteIndex]: count }
|
|
72
|
+
|
|
73
|
+
function bumpHistory(id) {
|
|
74
|
+
const idx = Math.floor(Date.now() / BUCKET_MS);
|
|
75
|
+
const h = history[id] || (history[id] = {});
|
|
76
|
+
h[idx] = (h[idx] || 0) + 1;
|
|
77
|
+
const cutoff = idx - HISTORY_BUCKETS + 1;
|
|
78
|
+
for (const k of Object.keys(h)) if (Number(k) < cutoff) delete h[k]; // prune old
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Oldest..newest array of length HISTORY_BUCKETS (zero-filled gaps).
|
|
82
|
+
export function historyFor(id) {
|
|
83
|
+
const idx = Math.floor(Date.now() / BUCKET_MS);
|
|
84
|
+
const h = history[id] || {};
|
|
85
|
+
const out = [];
|
|
86
|
+
for (let i = idx - HISTORY_BUCKETS + 1; i <= idx; i++) out.push(h[i] || 0);
|
|
87
|
+
return out;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Roll the daily counter over at UTC midnight.
|
|
91
|
+
function entry(modelId, keyIdx) {
|
|
92
|
+
const id = slotId(modelId, keyIdx);
|
|
93
|
+
const day = today();
|
|
94
|
+
if (!state[id] || state[id].day !== day) {
|
|
95
|
+
state[id] = { day, count: 0, cooldownUntil: 0 };
|
|
96
|
+
}
|
|
97
|
+
return state[id];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export function recordSuccess(modelId, keyIdx) {
|
|
101
|
+
entry(modelId, keyIdx).count += 1;
|
|
102
|
+
bumpHistory(slotId(modelId, keyIdx));
|
|
103
|
+
persist();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Put a single (model, key) slot in cooldown after a limit error.
|
|
107
|
+
export function tripCooldown(modelId, keyIdx, cooldownMs) {
|
|
108
|
+
entry(modelId, keyIdx).cooldownUntil = Date.now() + cooldownMs;
|
|
109
|
+
persist();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Is this specific key for this model usable right now?
|
|
113
|
+
export function isAvailable(model, keyIdx) {
|
|
114
|
+
const e = entry(model.id, keyIdx);
|
|
115
|
+
if (e.cooldownUntil > Date.now()) return false;
|
|
116
|
+
if (e.count >= model.dailyLimit) return false;
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export function snapshot() {
|
|
121
|
+
const out = {};
|
|
122
|
+
for (const [id, e] of Object.entries(state)) {
|
|
123
|
+
out[id] = {
|
|
124
|
+
day: e.day,
|
|
125
|
+
count: e.count,
|
|
126
|
+
cooldownRemainingMs: Math.max(0, e.cooldownUntil - Date.now()),
|
|
127
|
+
tokensThisMinute: bucketTokens(id),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
return out;
|
|
131
|
+
}
|