@khanglvm/llm-router 2.3.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ import http from "node:http";
2
+ import net from "node:net";
3
+ import path from "node:path";
4
+ import { spawn } from "node:child_process";
5
+ import { Readable } from "node:stream";
6
+ import {
7
+ clearRuntimeState,
8
+ getRuntimeStatePath,
9
+ isProcessRunning,
10
+ readRuntimeState,
11
+ stopProcessByPid
12
+ } from "./instance-state.js";
13
+ import { FIXED_LOCAL_ROUTER_HOST, FIXED_LOCAL_ROUTER_PORT } from "./local-server-settings.js";
14
+
15
+ const BACKEND_STATE_SUFFIX = "backend";
16
+ const DEFAULT_BACKEND_READY_TIMEOUT_MS = 12000;
17
+ const DEFAULT_BACKEND_HEALTH_POLL_MS = 2000;
18
+ const DEFAULT_PROXY_RETRY_TIMEOUT_MS = 20000;
19
+ const DEFAULT_PROXY_RETRY_INTERVAL_MS = 125;
20
+ const RETRYABLE_PROXY_ERROR_CODES = new Set([
21
+ "ECONNREFUSED",
22
+ "ECONNRESET",
23
+ "EPIPE",
24
+ "ETIMEDOUT",
25
+ "UND_ERR_CONNECT_TIMEOUT",
26
+ "UND_ERR_SOCKET"
27
+ ]);
28
+
29
+ function sleep(ms) {
30
+ return new Promise((resolve) => setTimeout(resolve, ms));
31
+ }
32
+
33
+ function formatHostForUrl(host, port) {
34
+ const value = String(host || "127.0.0.1").trim() || "127.0.0.1";
35
+ if (!value.includes(":")) return `${value}:${port}`;
36
+ if (value.startsWith("[") && value.endsWith("]")) return `${value}:${port}`;
37
+ return `[${value}]:${port}`;
38
+ }
39
+
40
+ function normalizeRequestPath(rawUrl) {
41
+ const value = String(rawUrl || "/").trim() || "/";
42
+ if (value.startsWith("http://") || value.startsWith("https://")) {
43
+ try {
44
+ const parsed = new URL(value);
45
+ return `${parsed.pathname}${parsed.search}` || "/";
46
+ } catch {
47
+ return "/";
48
+ }
49
+ }
50
+ if (value.startsWith("/")) return value;
51
+ return `/${value}`;
52
+ }
53
+
54
+ function buildRequestUrl(req, fallbackHost) {
55
+ const requestPath = normalizeRequestPath(req.url);
56
+ return `http://${fallbackHost}${requestPath}`;
57
+ }
58
+
59
+ function hasRequestBody(method) {
60
+ const upper = String(method || "GET").toUpperCase();
61
+ return upper !== "GET" && upper !== "HEAD";
62
+ }
63
+
64
+ async function readRequestBodyBuffer(req) {
65
+ if (!hasRequestBody(req.method)) return null;
66
+ const chunks = [];
67
+ for await (const chunk of req) {
68
+ chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
69
+ }
70
+ return Buffer.concat(chunks);
71
+ }
72
+
73
+ function buildFetchRequest(req, backendOrigin, bodyBuffer) {
74
+ const method = String(req.method || "GET").toUpperCase();
75
+ const headers = new Headers();
76
+
77
+ for (const [name, value] of Object.entries(req.headers || {})) {
78
+ if (Array.isArray(value)) {
79
+ for (const item of value) headers.append(name, item);
80
+ } else if (typeof value === "string") {
81
+ headers.set(name, value);
82
+ }
83
+ }
84
+
85
+ const socketIp = typeof req.socket?.remoteAddress === "string"
86
+ ? req.socket.remoteAddress
87
+ : "";
88
+ if (socketIp) {
89
+ headers.set("x-real-ip", socketIp);
90
+ }
91
+
92
+ const requestUrl = `${backendOrigin}${normalizeRequestPath(req.url)}`;
93
+ if (!hasRequestBody(method)) {
94
+ return {
95
+ url: requestUrl,
96
+ init: { method, headers }
97
+ };
98
+ }
99
+
100
+ return {
101
+ url: requestUrl,
102
+ init: {
103
+ method,
104
+ headers,
105
+ body: bodyBuffer ?? Buffer.alloc(0),
106
+ duplex: "half"
107
+ }
108
+ };
109
+ }
110
+
111
+ async function writeFetchResponseToNode(res, response) {
112
+ res.statusCode = response.status;
113
+ response.headers.forEach((value, name) => {
114
+ res.setHeader(name, value);
115
+ });
116
+
117
+ if (!response.body) {
118
+ res.end();
119
+ return;
120
+ }
121
+
122
+ const readable = Readable.fromWeb(response.body);
123
+ readable.on("error", (error) => {
124
+ res.destroy(error);
125
+ });
126
+ readable.pipe(res);
127
+ }
128
+
129
+ function deriveAuxiliaryStatePath(basePath, suffix) {
130
+ const parsed = path.parse(basePath);
131
+ const ext = parsed.ext || ".json";
132
+ return path.join(parsed.dir, `${parsed.name}.${suffix}${ext}`);
133
+ }
134
+
135
+ export function getBackendRuntimeStatePath({ env = process.env } = {}) {
136
+ return deriveAuxiliaryStatePath(getRuntimeStatePath({ env }), BACKEND_STATE_SUFFIX);
137
+ }
138
+
139
+ async function readActiveRuntimeStateFromPath(filePath, deps = {}) {
140
+ const readRuntimeStateFn = typeof deps.readRuntimeState === "function" ? deps.readRuntimeState : readRuntimeState;
141
+ const clearRuntimeStateFn = typeof deps.clearRuntimeState === "function" ? deps.clearRuntimeState : clearRuntimeState;
142
+
143
+ let runtime = null;
144
+ try {
145
+ runtime = await readRuntimeStateFn(filePath);
146
+ } catch {
147
+ runtime = null;
148
+ }
149
+
150
+ if (!runtime) return null;
151
+ if (isProcessRunning(runtime.pid)) return runtime;
152
+
153
+ try {
154
+ await clearRuntimeStateFn({ pid: runtime.pid }, filePath);
155
+ } catch {
156
+ // ignore cleanup failure for stale state
157
+ }
158
+ return null;
159
+ }
160
+
161
+ async function allocateLoopbackPort(host = FIXED_LOCAL_ROUTER_HOST) {
162
+ return new Promise((resolve, reject) => {
163
+ const probe = net.createServer();
164
+ probe.once("error", reject);
165
+ probe.listen(0, host, () => {
166
+ const address = probe.address();
167
+ const port = typeof address === "object" && address ? Number(address.port) : 0;
168
+ probe.close((closeError) => {
169
+ if (closeError) {
170
+ reject(closeError);
171
+ return;
172
+ }
173
+ resolve(port);
174
+ });
175
+ });
176
+ });
177
+ }
178
+
179
+ function appendRecentOutput(current, chunk, maxChars = 4000) {
180
+ if (!chunk) return current;
181
+ const combined = `${current}${chunk}`;
182
+ return combined.length > maxChars ? combined.slice(-maxChars) : combined;
183
+ }
184
+
185
+ function formatStartFailureMessage(baseMessage, { stderr = "", stdout = "" } = {}) {
186
+ const detail = String(stderr || "").trim() || String(stdout || "").trim();
187
+ return detail ? `${baseMessage}\n${detail}` : baseMessage;
188
+ }
189
+
190
+ function createBackendStartArgs({
191
+ configPath,
192
+ host = FIXED_LOCAL_ROUTER_HOST,
193
+ port,
194
+ watchConfig = true,
195
+ watchBinary = true,
196
+ requireAuth = false
197
+ }) {
198
+ return [
199
+ "start-runtime",
200
+ `--config=${configPath}`,
201
+ `--host=${host}`,
202
+ `--port=${port}`,
203
+ `--watch-config=${watchConfig ? "true" : "false"}`,
204
+ `--watch-binary=${watchBinary ? "true" : "false"}`,
205
+ `--require-auth=${requireAuth ? "true" : "false"}`
206
+ ];
207
+ }
208
+
209
+ function shouldRetryProxyError(fetchError) {
210
+ const code = String(fetchError?.code || fetchError?.cause?.code || "").trim();
211
+ if (RETRYABLE_PROXY_ERROR_CODES.has(code)) return true;
212
+ const message = String(fetchError?.message || fetchError || "").toLowerCase();
213
+ return message.includes("econnrefused")
214
+ || message.includes("other side closed")
215
+ || message.includes("socket")
216
+ || message.includes("fetch failed")
217
+ || message.includes("connect timeout");
218
+ }
219
+
220
+ function sendProxyUnavailable(res, message) {
221
+ if (res.headersSent) {
222
+ res.end();
223
+ return;
224
+ }
225
+ res.statusCode = 503;
226
+ res.setHeader("content-type", "application/json; charset=utf-8");
227
+ res.setHeader("cache-control", "no-store");
228
+ res.end(`${JSON.stringify({
229
+ error: "Router backend unavailable",
230
+ message
231
+ })}\n`);
232
+ }
233
+
234
+ export async function startRouterSupervisor(options = {}, deps = {}) {
235
+ const host = String(options.host || FIXED_LOCAL_ROUTER_HOST).trim() || FIXED_LOCAL_ROUTER_HOST;
236
+ const port = Number.isInteger(Number(options.port)) ? Number(options.port) : FIXED_LOCAL_ROUTER_PORT;
237
+ const configPath = String(options.configPath || "").trim();
238
+ const watchConfig = options.watchConfig !== false;
239
+ const watchBinary = options.watchBinary !== false;
240
+ const requireAuth = options.requireAuth === true;
241
+ const cliPath = String(options.cliPath || process.env.LLM_ROUTER_CLI_PATH || process.argv[1] || "").trim();
242
+ const line = typeof options.onLine === "function" ? options.onLine : console.log;
243
+ const error = typeof options.onError === "function" ? options.onError : console.error;
244
+ const backendStatePath = String(options.backendStatePath || getBackendRuntimeStatePath({ env: deps.env || process.env })).trim();
245
+ const backendHost = String(options.backendHost || FIXED_LOCAL_ROUTER_HOST).trim() || FIXED_LOCAL_ROUTER_HOST;
246
+ const backendPort = Number.isInteger(Number(options.backendPort))
247
+ ? Number(options.backendPort)
248
+ : await allocateLoopbackPort(backendHost);
249
+ const backendReadyTimeoutMs = Number.isFinite(Number(options.backendReadyTimeoutMs))
250
+ ? Math.max(1000, Number(options.backendReadyTimeoutMs))
251
+ : DEFAULT_BACKEND_READY_TIMEOUT_MS;
252
+ const backendHealthPollMs = Number.isFinite(Number(options.backendHealthPollMs))
253
+ ? Math.max(250, Number(options.backendHealthPollMs))
254
+ : DEFAULT_BACKEND_HEALTH_POLL_MS;
255
+ const proxyRetryTimeoutMs = Number.isFinite(Number(options.proxyRetryTimeoutMs))
256
+ ? Math.max(1000, Number(options.proxyRetryTimeoutMs))
257
+ : DEFAULT_PROXY_RETRY_TIMEOUT_MS;
258
+ const proxyRetryIntervalMs = Number.isFinite(Number(options.proxyRetryIntervalMs))
259
+ ? Math.max(25, Number(options.proxyRetryIntervalMs))
260
+ : DEFAULT_PROXY_RETRY_INTERVAL_MS;
261
+
262
+ const spawnFn = typeof deps.spawn === "function" ? deps.spawn : spawn;
263
+ const stopProcessByPidFn = typeof deps.stopProcessByPid === "function" ? deps.stopProcessByPid : stopProcessByPid;
264
+ const clearRuntimeStateFn = typeof deps.clearRuntimeState === "function" ? deps.clearRuntimeState : clearRuntimeState;
265
+ const signalProcess = typeof deps.signalProcess === "function" ? deps.signalProcess : process.kill;
266
+
267
+ let shuttingDown = false;
268
+ let ensuringBackend = null;
269
+ let backendChild = null;
270
+ let healthTimer = null;
271
+ const socketRequestCounts = new Map();
272
+
273
+ async function stopBackendIfRunning() {
274
+ const runtime = await readActiveRuntimeStateFromPath(backendStatePath, deps);
275
+ if (!runtime) return { ok: true, alreadyStopped: true };
276
+
277
+ const stopped = await stopProcessByPidFn(runtime.pid);
278
+ if (stopped?.ok) {
279
+ await clearRuntimeStateFn({ pid: runtime.pid }, backendStatePath).catch(() => {});
280
+ return stopped;
281
+ }
282
+ return stopped || { ok: false, reason: `Failed stopping backend pid ${runtime.pid}.` };
283
+ }
284
+
285
+ async function spawnBackend(reason = "startup") {
286
+ const activeRuntime = await readActiveRuntimeStateFromPath(backendStatePath, deps);
287
+ if (activeRuntime
288
+ && Number(activeRuntime.port) === Number(backendPort)
289
+ && String(activeRuntime.configPath || "").trim() === configPath
290
+ && Boolean(activeRuntime.watchConfig !== false) === Boolean(watchConfig)
291
+ && Boolean(activeRuntime.watchBinary !== false) === Boolean(watchBinary)
292
+ && Boolean(activeRuntime.requireAuth === true) === Boolean(requireAuth)) {
293
+ return activeRuntime;
294
+ }
295
+
296
+ if (activeRuntime) {
297
+ const stopped = await stopBackendIfRunning();
298
+ if (!stopped?.ok) {
299
+ throw new Error(stopped?.reason || `Failed stopping stale backend pid ${activeRuntime.pid}.`);
300
+ }
301
+ } else {
302
+ await clearRuntimeStateFn({}, backendStatePath).catch(() => {});
303
+ }
304
+
305
+ const args = createBackendStartArgs({
306
+ configPath,
307
+ host: backendHost,
308
+ port: backendPort,
309
+ watchConfig,
310
+ watchBinary,
311
+ requireAuth
312
+ });
313
+
314
+ let child;
315
+ try {
316
+ child = spawnFn(process.execPath, [cliPath, ...args], {
317
+ stdio: ["ignore", "pipe", "pipe"],
318
+ env: {
319
+ ...(deps.env || process.env),
320
+ LLM_ROUTER_CLI_PATH: cliPath,
321
+ LLM_ROUTER_RUNTIME_STATE_PATH: backendStatePath
322
+ }
323
+ });
324
+ } catch (spawnError) {
325
+ throw new Error(spawnError instanceof Error ? spawnError.message : String(spawnError));
326
+ }
327
+
328
+ backendChild = child;
329
+ let childError = null;
330
+ let childExit = null;
331
+ let stdout = "";
332
+ let stderr = "";
333
+ const onStdout = (chunk) => {
334
+ stdout = appendRecentOutput(stdout, chunk);
335
+ };
336
+ const onStderr = (chunk) => {
337
+ stderr = appendRecentOutput(stderr, chunk);
338
+ };
339
+ child.stdout?.setEncoding?.("utf8");
340
+ child.stderr?.setEncoding?.("utf8");
341
+ child.stdout?.on?.("data", onStdout);
342
+ child.stderr?.on?.("data", onStderr);
343
+ child.once("error", (spawnError) => {
344
+ childError = spawnError;
345
+ });
346
+ child.once("exit", (code, signal) => {
347
+ childExit = { code, signal };
348
+ if (shuttingDown) return;
349
+ setTimeout(() => {
350
+ if (shuttingDown) return;
351
+ void ensureBackendRunning(`backend-exit:${reason}`).catch((restartError) => {
352
+ error(`Failed restoring router backend after exit: ${restartError instanceof Error ? restartError.message : String(restartError)}`);
353
+ });
354
+ }, 250);
355
+ });
356
+
357
+ const cleanupPipes = () => {
358
+ child.stdout?.off?.("data", onStdout);
359
+ child.stderr?.off?.("data", onStderr);
360
+ child.stdout?.destroy?.();
361
+ child.stderr?.destroy?.();
362
+ };
363
+
364
+ const startedAt = Date.now();
365
+ while (Date.now() - startedAt < backendReadyTimeoutMs) {
366
+ const runtime = await readActiveRuntimeStateFromPath(backendStatePath, deps);
367
+ if (runtime
368
+ && Number(runtime.port) === Number(backendPort)
369
+ && String(runtime.configPath || "").trim() === configPath) {
370
+ cleanupPipes();
371
+ return runtime;
372
+ }
373
+
374
+ if (childError) {
375
+ cleanupPipes();
376
+ throw new Error(formatStartFailureMessage(
377
+ childError instanceof Error ? childError.message : String(childError),
378
+ { stderr, stdout }
379
+ ));
380
+ }
381
+
382
+ if (childExit) {
383
+ cleanupPipes();
384
+ throw new Error(formatStartFailureMessage(
385
+ `Router backend exited before becoming ready (${childExit.signal || childExit.code || "unknown"}).`,
386
+ { stderr, stdout }
387
+ ));
388
+ }
389
+
390
+ await sleep(125);
391
+ }
392
+
393
+ cleanupPipes();
394
+ throw new Error(formatStartFailureMessage(
395
+ `Timed out waiting for router backend to start on http://${formatHostForUrl(backendHost, backendPort)}.`,
396
+ { stderr, stdout }
397
+ ));
398
+ }
399
+
400
+ async function ensureBackendRunning(reason = "runtime-check") {
401
+ if (shuttingDown) {
402
+ throw new Error("Router supervisor is shutting down.");
403
+ }
404
+ if (ensuringBackend) return ensuringBackend;
405
+
406
+ ensuringBackend = Promise.resolve()
407
+ .then(async () => {
408
+ const active = await readActiveRuntimeStateFromPath(backendStatePath, deps);
409
+ if (active
410
+ && Number(active.port) === Number(backendPort)
411
+ && String(active.configPath || "").trim() === configPath) {
412
+ return active;
413
+ }
414
+ line(`Starting router backend (${reason}) on http://${formatHostForUrl(backendHost, backendPort)}...`);
415
+ return spawnBackend(reason);
416
+ })
417
+ .finally(() => {
418
+ ensuringBackend = null;
419
+ });
420
+
421
+ return ensuringBackend;
422
+ }
423
+
424
+ async function requestBackendUpgrade(signal = "SIGUSR2") {
425
+ const runtime = await ensureBackendRunning("upgrade-request");
426
+ try {
427
+ signalProcess(runtime.pid, signal);
428
+ return { ok: true, pid: runtime.pid, signal };
429
+ } catch (signalError) {
430
+ return {
431
+ ok: false,
432
+ reason: signalError instanceof Error ? signalError.message : String(signalError)
433
+ };
434
+ }
435
+ }
436
+
437
+ function closeSocketIfIdle(socket) {
438
+ if (!socket || socket.destroyed) return;
439
+ if (Number(socketRequestCounts.get(socket) || 0) > 0) return;
440
+ socket.end();
441
+ }
442
+
443
+ const server = http.createServer(async (req, res) => {
444
+ const socket = req.socket;
445
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0) + 1);
446
+ let finalized = false;
447
+ const finalizeRequest = () => {
448
+ if (finalized) return;
449
+ finalized = true;
450
+ const remaining = Math.max(0, Number(socketRequestCounts.get(socket) || 0) - 1);
451
+ if (remaining > 0) {
452
+ socketRequestCounts.set(socket, remaining);
453
+ return;
454
+ }
455
+ socketRequestCounts.set(socket, 0);
456
+ if (shuttingDown) {
457
+ closeSocketIfIdle(socket);
458
+ }
459
+ };
460
+ res.once("finish", finalizeRequest);
461
+ res.once("close", finalizeRequest);
462
+
463
+ let bodyBuffer = null;
464
+ try {
465
+ bodyBuffer = await readRequestBodyBuffer(req);
466
+ const startedAt = Date.now();
467
+ let lastError = null;
468
+
469
+ while (Date.now() - startedAt < proxyRetryTimeoutMs) {
470
+ const runtime = await ensureBackendRunning("proxy-request");
471
+ const backendOrigin = `http://${formatHostForUrl(runtime.host || backendHost, runtime.port || backendPort)}`;
472
+
473
+ try {
474
+ const { url, init } = buildFetchRequest(req, backendOrigin, bodyBuffer);
475
+ const response = await fetch(url, init);
476
+ await writeFetchResponseToNode(res, response);
477
+ return;
478
+ } catch (proxyError) {
479
+ lastError = proxyError;
480
+ if (!shouldRetryProxyError(proxyError)) {
481
+ throw proxyError;
482
+ }
483
+ await sleep(proxyRetryIntervalMs);
484
+ }
485
+ }
486
+
487
+ throw lastError || new Error("Timed out waiting for the router backend.");
488
+ } catch (proxyError) {
489
+ error(`Router supervisor proxy failed: ${proxyError instanceof Error ? proxyError.message : String(proxyError)}`);
490
+ sendProxyUnavailable(res, proxyError instanceof Error ? proxyError.message : String(proxyError));
491
+ }
492
+ });
493
+
494
+ server.on("connection", (socket) => {
495
+ socketRequestCounts.set(socket, Number(socketRequestCounts.get(socket) || 0));
496
+ socket.on("close", () => {
497
+ socketRequestCounts.delete(socket);
498
+ });
499
+ });
500
+
501
+ await ensureBackendRunning("startup");
502
+
503
+ await new Promise((resolve, reject) => {
504
+ server.once("error", reject);
505
+ server.listen(port, host, () => {
506
+ server.off("error", reject);
507
+ resolve();
508
+ });
509
+ });
510
+
511
+ healthTimer = setInterval(() => {
512
+ if (shuttingDown) return;
513
+ void ensureBackendRunning("health-check").catch((restartError) => {
514
+ error(`Router backend health check failed: ${restartError instanceof Error ? restartError.message : String(restartError)}`);
515
+ });
516
+ }, backendHealthPollMs);
517
+ healthTimer.unref?.();
518
+
519
+ server.requestBackendUpgrade = (signal = "SIGUSR2") => requestBackendUpgrade(signal);
520
+ server.getBackendRuntime = () => readActiveRuntimeStateFromPath(backendStatePath, deps);
521
+ server.backendRuntimeStatePath = backendStatePath;
522
+ server.backendPort = backendPort;
523
+
524
+ const originalClose = server.close.bind(server);
525
+ server.close = (callback) => {
526
+ shuttingDown = true;
527
+ if (healthTimer) {
528
+ clearInterval(healthTimer);
529
+ healthTimer = null;
530
+ }
531
+ server.closeIdleConnections?.();
532
+ for (const socket of socketRequestCounts.keys()) {
533
+ closeSocketIfIdle(socket);
534
+ }
535
+ originalClose(async () => {
536
+ await stopBackendIfRunning().catch(() => {});
537
+ callback?.();
538
+ });
539
+ return server;
540
+ };
541
+
542
+ return server;
543
+ }