npm - @khanglvm/llm-router - Versions diffs - 2.4.1 → 2.5.2 - Mend

@khanglvm/llm-router 2.4.1 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +22 -0
package/README.md +14 -2
package/package.json +1 -1
package/src/node/dev-command.js +114 -0
package/src/node/huggingface-gguf.js +273 -0
package/src/node/llamacpp-runtime.js +309 -0
package/src/node/local-model-browser.js +132 -0
package/src/node/local-model-capacity.js +39 -0
package/src/node/local-models-service.js +238 -0
package/src/node/start-command.js +12 -0
package/src/node/web-console-client.js +27 -27
package/src/node/web-console-server.js +575 -0
package/src/node/web-console-styles.generated.js +1 -1
package/src/node/web-console-ui/api-client.js +94 -0
package/src/node/web-console-ui/local-models-utils.js +138 -0
package/src/runtime/config.js +22 -7
package/src/runtime/handler/provider-translation.js +5 -5
package/src/runtime/local-models.js +168 -0
package/src/translator/response/openai-to-claude.js +70 -9

package/src/node/llamacpp-runtime.js ADDED Viewed

@@ -0,0 +1,309 @@
+import path from "node:path";
+import os from "node:os";
+import { existsSync } from "node:fs";
+import { spawn, spawnSync } from "node:child_process";
+export const LLAMACPP_DEFAULT_HOST = "127.0.0.1";
+export const LLAMACPP_DEFAULT_PORT = 39391;
+const LLAMACPP_EXECUTABLE = "llama-server";
+const FALLBACK_LLAMACPP_PATHS = Object.freeze([
+  "/opt/homebrew/bin/llama-server",
+  "/usr/local/bin/llama-server"
+]);
+const COMMON_SOURCE_BUILD_PATHS = Object.freeze([
+  "src/llama-cpp/build/bin/llama-server",
+  "src/llama.cpp/build/bin/llama-server",
+  "src/llama-cpp-turboquant/build/bin/llama-server",
+  "src/llama.cpp-turboquant/build/bin/llama-server"
+]);
+let managedLlamacppRuntime = null;
+function isPlainObject(value) {
+  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+function normalizeString(value) {
+  return typeof value === "string" ? value.trim() : "";
+}
+function normalizePort(value, fallback = LLAMACPP_DEFAULT_PORT) {
+  const parsed = Number(value);
+  if (!Number.isInteger(parsed) || parsed <= 0 || parsed > 65535) return fallback;
+  return parsed;
+}
+function normalizePathEntries(entries) {
+  return Array.isArray(entries)
+    ? entries.map((entry) => normalizeString(entry)).filter(Boolean)
+    : [];
+}
+function readConfiguredLlamacppRuntime(config) {
+  const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
+  if (!isPlainObject(runtime)) {
+    return {
+      startWithRouter: false,
+      command: "",
+      host: LLAMACPP_DEFAULT_HOST,
+      port: LLAMACPP_DEFAULT_PORT
+    };
+  }
+  return {
+    startWithRouter: runtime.startWithRouter === true,
+    command: normalizeString(runtime.selectedCommand || runtime.manualCommand || runtime.command || runtime.path),
+    host: normalizeString(runtime.host) || LLAMACPP_DEFAULT_HOST,
+    port: normalizePort(runtime.port, LLAMACPP_DEFAULT_PORT)
+  };
+}
+function buildPreloadModels(config) {
+  const library = config?.metadata?.localModels?.library;
+  const variants = config?.metadata?.localModels?.variants;
+  if (!isPlainObject(library) || !isPlainObject(variants)) return [];
+  const preloadModels = [];
+  for (const variant of Object.values(variants)) {
+    if (!isPlainObject(variant)) continue;
+    if (variant.runtime !== "llamacpp" || variant.preload !== true || variant.enabled !== true) continue;
+    const baseModel = library[variant.baseModelId];
+    const modelPath = normalizeString(baseModel?.path);
+    if (!modelPath) continue;
+    preloadModels.push({
+      variantId: normalizeString(variant.id),
+      modelPath,
+      contextWindow: Number.isFinite(Number(variant.contextWindow)) ? Number(variant.contextWindow) : undefined
+    });
+  }
+  return preloadModels;
+}
+export function detectLlamacppCandidates({
+  envPathEntries = process.env.PATH?.split(path.delimiter) || [],
+  homeDir = os.homedir(),
+  existingPaths = null
+} = {}) {
+  const seen = new Set();
+  const candidates = [];
+  const searchTargets = [
+    ...normalizePathEntries(envPathEntries).map((entry) => ({
+      path: path.join(entry, LLAMACPP_EXECUTABLE),
+      source: "path"
+    })),
+    ...FALLBACK_LLAMACPP_PATHS.map((entry) => ({
+      path: entry,
+      source: "homebrew"
+    })),
+    ...COMMON_SOURCE_BUILD_PATHS.map((entry) => ({
+      path: path.join(homeDir, entry),
+      source: "source-build"
+    }))
+  ];
+  for (const target of searchTargets) {
+    const candidatePath = normalizeString(target.path);
+    if (seen.has(candidatePath)) continue;
+    seen.add(candidatePath);
+    const exists = existingPaths instanceof Set ? existingPaths.has(candidatePath) : existsSync(candidatePath);
+    if (!exists) continue;
+    candidates.push({
+      id: candidatePath,
+      label: candidatePath,
+      path: candidatePath,
+      source: target.source
+    });
+  }
+  return candidates;
+}
+export function buildLlamacppLaunchArgs({
+  command,
+  host = LLAMACPP_DEFAULT_HOST,
+  port = LLAMACPP_DEFAULT_PORT,
+  preloadModels = []
+} = {}) {
+  const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
+  const args = [
+    normalizeString(command),
+    "--host", normalizeString(host) || LLAMACPP_DEFAULT_HOST,
+    "--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT))
+  ];
+  if (firstModel?.modelPath) {
+    args.push("-m", firstModel.modelPath);
+    if (Number.isFinite(Number(firstModel.contextWindow)) && Number(firstModel.contextWindow) > 0) {
+      args.push("-c", String(Math.floor(Number(firstModel.contextWindow))));
+    }
+  }
+  return args.filter(Boolean);
+}
+export function parseLlamacppValidationOutput(output = "") {
+  const text = String(output || "").trim();
+  const lowered = text.toLowerCase();
+  const supportsHost = /(^|\s)--host(\s|$)/m.test(text);
+  const supportsPort = /(^|\s)--port(\s|$)/m.test(text);
+  const referencesModelFlag = /(^|\s)(-m,\s+)?--model(\s|$)/m.test(text);
+  const looksLikeServerHelp = supportsHost && supportsPort && referencesModelFlag;
+  const kind = lowered.includes("llama-server") || looksLikeServerHelp ? "server" : "";
+  return {
+    ok: Boolean(kind) && supportsHost && supportsPort,
+    kind,
+    supportsHost,
+    supportsPort,
+    isTurboQuant: lowered.includes("turboquant") || /\bturbo[234]\b/.test(lowered)
+  };
+}
+export function validateLlamacppCommand(command, { spawnSyncImpl = spawnSync } = {}) {
+  const target = normalizeString(command);
+  if (!target) {
+    return {
+      ok: false,
+      errorMessage: "No llama.cpp command is configured."
+    };
+  }
+  const result = spawnSyncImpl(target, ["--help"], {
+    encoding: "utf8"
+  });
+  if (result?.error) {
+    return {
+      ok: false,
+      errorMessage: result.error instanceof Error ? result.error.message : String(result.error)
+    };
+  }
+  const parsed = parseLlamacppValidationOutput(`${result?.stdout || ""}\n${result?.stderr || ""}`);
+  if (!parsed.ok) {
+    return {
+      ok: false,
+      errorMessage: `Command '${target}' does not appear to be a compatible llama-server binary.`,
+      ...parsed
+    };
+  }
+  return {
+    ok: true,
+    ...parsed
+  };
+}
+async function startConfiguredRuntime(config, {
+  line = () => {},
+  error = () => {},
+  requireAutostart = true
+} = {}, {
+  spawnSyncImpl = spawnSync,
+  spawnImpl = spawn
+} = {}) {
+  const runtime = readConfiguredLlamacppRuntime(config);
+  if (requireAutostart && !runtime.startWithRouter) {
+    return { ok: true, skipped: true, reason: "autostart-disabled" };
+  }
+  if (!runtime.command) {
+    const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
+    error(errorMessage);
+    return { ok: false, errorMessage };
+  }
+  if (managedLlamacppRuntime
+    && managedLlamacppRuntime.command === runtime.command
+    && managedLlamacppRuntime.host === runtime.host
+    && managedLlamacppRuntime.port === runtime.port
+    && managedLlamacppRuntime.child?.exitCode === null
+    && managedLlamacppRuntime.child?.killed !== true) {
+    return { ok: true, alreadyRunning: true, runtime: managedLlamacppRuntime };
+  }
+  const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
+  if (!validation.ok) {
+    error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
+    return validation;
+  }
+  const preloadModels = buildPreloadModels(config);
+  const args = buildLlamacppLaunchArgs({
+    command: runtime.command,
+    host: runtime.host,
+    port: runtime.port,
+    preloadModels
+  });
+  return new Promise((resolve) => {
+    let settled = false;
+    const child = spawnImpl(args[0], args.slice(1), {
+      stdio: "ignore"
+    });
+    const finish = (result) => {
+      if (settled) return;
+      settled = true;
+      resolve(result);
+    };
+    child.once("spawn", () => {
+      managedLlamacppRuntime = {
+        child,
+        command: runtime.command,
+        host: runtime.host,
+        port: runtime.port,
+        args
+      };
+      child.once("exit", () => {
+        if (managedLlamacppRuntime?.child === child) {
+          managedLlamacppRuntime = null;
+        }
+      });
+      if (typeof child.unref === "function") child.unref();
+      line(`Started llama.cpp runtime on http://${runtime.host}:${runtime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
+      finish({ ok: true, runtime: managedLlamacppRuntime, validation });
+    });
+    child.once("error", (spawnError) => {
+      const errorMessage = spawnError instanceof Error ? spawnError.message : String(spawnError);
+      error(`Failed starting llama.cpp runtime: ${errorMessage}`);
+      finish({ ok: false, errorMessage });
+    });
+  });
+}
+export async function ensureConfiguredLlamacppRuntimeStarted(config, callbacks = {}, deps = {}) {
+  return startConfiguredRuntime(config, {
+    ...callbacks,
+    requireAutostart: true
+  }, deps);
+}
+export async function startConfiguredLlamacppRuntime(config, callbacks = {}, deps = {}) {
+  return startConfiguredRuntime(config, {
+    ...callbacks,
+    requireAutostart: false
+  }, deps);
+}
+export async function stopManagedLlamacppRuntime({
+  line = () => {},
+  error = () => {}
+} = {}) {
+  const active = managedLlamacppRuntime;
+  if (!active?.child) {
+    return { ok: true, skipped: true, reason: "not-running" };
+  }
+  managedLlamacppRuntime = null;
+  try {
+    active.child.kill("SIGTERM");
+    line("Stopped managed llama.cpp runtime.");
+    return { ok: true };
+  } catch (stopError) {
+    const errorMessage = stopError instanceof Error ? stopError.message : String(stopError);
+    error(`Failed stopping llama.cpp runtime: ${errorMessage}`);
+    return { ok: false, errorMessage };
+  }
+}

package/src/node/local-model-browser.js ADDED Viewed

@@ -0,0 +1,132 @@
+import path from "node:path";
+import { execFile } from "node:child_process";
+import { promises as fs } from "node:fs";
+const GGUF_PATTERN = /\.gguf$/i;
+function normalizeString(value) {
+  return typeof value === "string" ? value.trim() : "";
+}
+function formatScanEntry(filePath, stats = null) {
+  return {
+    filePath,
+    fileName: path.basename(filePath),
+    sizeBytes: Number.isFinite(Number(stats?.size)) ? Number(stats.size) : undefined
+  };
+}
+async function collectGgufFiles(targetPath, entries = []) {
+  const stats = await fs.stat(targetPath);
+  if (stats.isFile()) {
+    if (GGUF_PATTERN.test(targetPath)) entries.push(formatScanEntry(targetPath, stats));
+    return entries;
+  }
+  if (!stats.isDirectory()) return entries;
+  const children = await fs.readdir(targetPath, { withFileTypes: true });
+  for (const child of children) {
+    const childPath = path.join(targetPath, child.name);
+    if (child.isDirectory()) {
+      await collectGgufFiles(childPath, entries);
+      continue;
+    }
+    if (!child.isFile() || !GGUF_PATTERN.test(child.name)) continue;
+    const childStats = await fs.stat(childPath);
+    entries.push(formatScanEntry(childPath, childStats));
+  }
+  return entries;
+}
+function buildBrowseAppleScript(selection) {
+  if (selection === "directory") {
+    return [
+      "try",
+      "POSIX path of (choose folder with prompt \"Select a folder to scan for GGUF files\")",
+      "on error number -128",
+      "return \"\"",
+      "end try"
+    ];
+  }
+  if (selection === "runtime") {
+    return [
+      "try",
+      "POSIX path of (choose file with prompt \"Select a llama.cpp runtime binary (llama-server)\")",
+      "on error number -128",
+      "return \"\"",
+      "end try"
+    ];
+  }
+  return [
+    "try",
+    "POSIX path of (choose file with prompt \"Select a GGUF file\")",
+    "on error number -128",
+    "return \"\"",
+    "end try"
+  ];
+}
+export async function browseForLocalModelPath({
+  selection = "file"
+} = {}, {
+  platform = process.platform,
+  execFileImpl = execFile
+} = {}) {
+  if (platform !== "darwin") {
+    return {
+      canceled: true,
+      reason: "Native local-model browse is currently available on macOS only.",
+      selection
+    };
+  }
+  const scriptLines = buildBrowseAppleScript(selection);
+  const args = scriptLines.flatMap((line) => ["-e", line]);
+  const result = await runExecFile(execFileImpl, "osascript", args, { encoding: "utf8" });
+  const output = normalizeString(result?.stdout || "");
+  if (!output) {
+    return { canceled: true, selection };
+  }
+  return {
+    canceled: false,
+    selection,
+    path: output
+  };
+}
+export async function scanLocalModelPath(targetPath) {
+  const resolvedPath = normalizeString(targetPath);
+  if (!resolvedPath) return [];
+  const matches = await collectGgufFiles(resolvedPath);
+  return matches.sort((left, right) => left.fileName.localeCompare(right.fileName));
+}
+async function runExecFile(execFileImpl, command, args, options) {
+  if (execFileImpl === execFile) {
+    return new Promise((resolve, reject) => {
+      execFile(command, args, options, (error, stdout, stderr) => {
+        if (error) reject(error);
+        else resolve({ stdout, stderr });
+      });
+    });
+  }
+  if (typeof execFileImpl !== "function") {
+    throw new Error("execFile implementation is required.");
+  }
+  if (execFileImpl.length >= 4) {
+    return new Promise((resolve, reject) => {
+      execFileImpl(command, args, options, (error, stdout, stderr) => {
+        if (error) reject(error);
+        else resolve({ stdout, stderr });
+      });
+    });
+  }
+  return execFileImpl(command, args, options);
+}

package/src/node/local-model-capacity.js ADDED Viewed

@@ -0,0 +1,39 @@
+function normalizePositiveNumber(value) {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed <= 0) return 0;
+  return parsed;
+}
+function calculateEstimatedBytes(variant = {}) {
+  const sizeBytes = normalizePositiveNumber(variant.sizeBytes);
+  const contextWindow = normalizePositiveNumber(variant.contextWindow);
+  const contextBytes = contextWindow * 163840;
+  const preloadPenalty = variant.preload === true ? Math.floor(sizeBytes * 0.15) : 0;
+  return sizeBytes + contextBytes + preloadPenalty;
+}
+export function classifyVariantCapacity(variant, system = {}) {
+  const estimatedBytes = calculateEstimatedBytes(variant);
+  const totalMemoryBytes = normalizePositiveNumber(system.totalMemoryBytes);
+  const safeBudget = Math.floor(totalMemoryBytes * 0.72);
+  const tightBudget = Math.floor(totalMemoryBytes * 0.82);
+  if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > tightBudget) {
+    return { fit: "over-budget", estimatedBytes };
+  }
+  if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > safeBudget) {
+    return { fit: "tight", estimatedBytes };
+  }
+  return { fit: "safe", estimatedBytes };
+}
+export function canActivateVariant({ candidate, activeVariants, totalMemoryBytes }) {
+  const safeBudget = Math.floor(normalizePositiveNumber(totalMemoryBytes) * 0.72);
+  const activeBytes = (Array.isArray(activeVariants) ? activeVariants : [])
+    .reduce((sum, variant) => sum + normalizePositiveNumber(variant?.estimatedBytes), 0);
+  const nextBytes = activeBytes + normalizePositiveNumber(candidate?.estimatedBytes);
+  return nextBytes <= safeBudget
+    ? { allowed: true, reason: "" }
+    : { allowed: false, reason: "Enabling this variant would exceed the local capacity budget." };
+}