npm - @khanglvm/llm-router - Versions diffs - 2.6.0 → 2.6.1 - Mend

@khanglvm/llm-router 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +0 -13
package/README.md +0 -3
package/package.json +1 -1
package/src/node/coding-tool-config.js +15 -1
package/src/node/huggingface-gguf.js +0 -12
package/src/node/llamacpp-runtime.js +78 -256
package/src/node/local-models-service.js +2 -25
package/src/node/local-server.js +2 -60
package/src/node/provider-probe.js +18 -0
package/src/node/quota-probe-mapping.js +215 -0
package/src/node/quota-probe-runner.js +234 -0
package/src/node/web-console-client.js +33 -27
package/src/node/web-console-server.js +107 -64
package/src/node/web-console-styles.generated.js +1 -1
package/src/node/web-console-ui/api-client.js +27 -0
package/src/node/web-console-ui/local-models-utils.js +0 -33
package/src/runtime/balancer.js +47 -4
package/src/runtime/config.js +9 -4
package/src/runtime/handler/fallback.js +7 -0
package/src/runtime/handler/provider-call.js +18 -36
package/src/runtime/handler/runtime-policy.js +1 -4
package/src/runtime/local-models.js +0 -36
package/src/runtime/quota-probe.js +179 -0
package/src/node/llamacpp-managed-runtime.js +0 -202
package/src/node/llamacpp-runtime-profile.js +0 -133

package/CHANGELOG.md CHANGED Viewed

@@ -7,19 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
-## [2.6.0] - 2026-04-23
-### Added
-- Local `llama.cpp` variants can now persist a per-model runtime profile, including auto-tuned presets and custom launch overrides, so each GGUF variant can run with settings that match its own size and context shape instead of sharing one global `llama-server` startup profile.
-- The Web UI now exposes managed `llama.cpp` runtime health for Local Models, including tracked instance counts, healthy/stale summaries, and persisted runtime-profile data for each saved variant.
-### Changed
-- Local variant requests are now resolved through a managed per-variant `llama.cpp` runtime layer that can reuse compatible instances, allocate fallback ports safely, and start the right runtime configuration for the specific model variant without exposing multi-process lifecycle management to the user.
-- Hugging Face GGUF search/download flows now surface file size plus estimated runtime memory guidance directly in the Local Models workflow, making it easier to choose a viable quantization before download.
-### Fixed
-- Managed `llama.cpp` runtimes now reconcile stale tracked instances before reuse, avoid reserving dead immediate-exit servers, and drain pending shutdown/startup edges more reliably so local per-model routing does not leave behind stale `llama-server` processes.
 ## [2.5.2] - 2026-04-23
 ### Fixed

package/README.md CHANGED Viewed

@@ -44,9 +44,6 @@ Open `llr` and use the **Local Models** tab to manage local inference sources al
 - **Native macOS browsing** — use the built-in file picker to choose a single GGUF file, scan a folder recursively for GGUF models, or browse directly to a local `llama-server` binary
 - **Managed + attached model library** — stale or moved files stay visible instead of crashing the app, and can be repaired by locating the file again or removed cleanly
 - **Router-visible local variants** — create friendly model variants with bounded presets, context-window metadata, preload toggles, and Mac unified-memory fit guidance with clearer safe/tight recommendations
-- **Per-variant llama.cpp tuning** — each local variant can store its own runtime profile so balanced, throughput, long-context, low-memory, or custom launch overrides do not fight over one shared global `llama-server` config
-- **Managed per-model runtimes** — the router automatically starts, reuses, and stops the right `llama.cpp` instance for the requested local variant, with stale-runtime cleanup handled internally instead of asking the user to manage separate servers
-- **GGUF size + memory guidance** — Hugging Face search results now show model file size plus estimated runtime memory fit guidance before download, helping choose viable quantizations faster
 - **Alias-ready local routing** — once saved, local variants behave like normal router models and can be used in aliases, capability flags, and fallback chains
 For v1, the managed download flow only searches public Hugging Face GGUF files and the fit guidance is tuned for Macs with unified memory.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@khanglvm/llm-router",
-  "version": "2.6.0",
+  "version": "2.6.1",
   "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
   "keywords": [
     "llm-router",

package/src/node/coding-tool-config.js CHANGED Viewed

@@ -17,6 +17,7 @@ import {
   normalizeFactoryDroidReasoningEffort,
   resolveFactoryDroidRouterModelRef
 } from "../shared/coding-tool-bindings.js";
+import { LOCAL_RUNTIME_PROVIDER_TYPE } from "../runtime/local-models.js";
 const BACKUP_SUFFIX = ".llm_router_backup";
 const CODEX_PROVIDER_ID = "llm-router";
@@ -972,9 +973,11 @@ export async function patchClaudeCodeEffortLevel({
 const FACTORY_DROID_ROUTER_MARKER = "_llmRouterManaged";
 const FACTORY_DROID_OPENAI_PROVIDER = "openai";
 const FACTORY_DROID_ANTHROPIC_PROVIDER = "anthropic";
+const FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER = "generic-chat-completion-api";
 const FACTORY_DROID_ROUTER_PROVIDERS = Object.freeze([
   FACTORY_DROID_OPENAI_PROVIDER,
-  FACTORY_DROID_ANTHROPIC_PROVIDER
+  FACTORY_DROID_ANTHROPIC_PROVIDER,
+  FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER
 ]);
 function dedupeStrings(values = []) {
@@ -1116,6 +1119,17 @@ function resolveFactoryDroidRouteFormat(modelRef, config = {}, seen = new Set())
 }
 function resolveFactoryDroidCustomModelProvider(modelRef, config = {}) {
+  const normalizedModelRef = String(modelRef || "").trim();
+  if (normalizedModelRef.includes("/")) {
+    const separatorIndex = normalizedModelRef.indexOf("/");
+    const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
+    const provider = (Array.isArray(config?.providers) ? config.providers : [])
+      .find((entry) => String(entry?.id || "").trim() === providerId);
+    if (String(provider?.type || "").trim().toLowerCase() === LOCAL_RUNTIME_PROVIDER_TYPE) {
+      return FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER;
+    }
+  }
   return mapFactoryDroidFormatToProvider(resolveFactoryDroidRouteFormat(modelRef, config))
     || FACTORY_DROID_OPENAI_PROVIDER;
 }

package/src/node/huggingface-gguf.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import path from "node:path";
 import { promises as fs } from "node:fs";
-import { estimateLlamacppRuntimeBytes } from "./llamacpp-runtime-profile.js";
 const HUGGING_FACE_API_URL = "https://huggingface.co/api/models";
 const HUGGING_FACE_BASE_URL = "https://huggingface.co";
@@ -155,13 +154,6 @@ export function shapeHuggingFaceGgufResults(files, systemInfo = {}) {
       expectedContextWindow: systemInfo?.expectedContextWindow
     }, systemInfo);
     const quantization = parseQuantizationFromFileName(file);
-    const estimatedRuntimeBytes = sizeBytes
-      ? estimateLlamacppRuntimeBytes({
-        sizeBytes,
-        contextWindow: systemInfo?.expectedContextWindow,
-        preset: status.fit === "tight" ? "memory-safe" : "balanced"
-      })
-      : undefined;
     const fitScore = status.fit === "safe" ? 30 : status.fit === "tight" ? 15 : status.fit === "unknown" ? 8 : -20;
     const rankingScore = fitScore
       + (status.disabled ? -100 : 0)
@@ -174,10 +166,6 @@ export function shapeHuggingFaceGgufResults(files, systemInfo = {}) {
       file,
       quantization,
       sizeBytes,
-      estimatedRuntimeBytes,
-      memoryLabel: estimatedRuntimeBytes
-        ? `${(estimatedRuntimeBytes / (1024 ** 3)).toFixed(1)} GB runtime est.`
-        : "Runtime estimate unavailable",
       disabled: status.disabled,
       disabledReason: status.reason,
       fit: status.fit,

package/src/node/llamacpp-runtime.js CHANGED Viewed

@@ -2,11 +2,6 @@ import path from "node:path";
 import os from "node:os";
 import { existsSync } from "node:fs";
 import { spawn, spawnSync } from "node:child_process";
-import { setTimeout as delay } from "node:timers/promises";
-import { deriveLlamacppLaunchProfile } from "./llamacpp-runtime-profile.js";
-import { createLlamacppManagedRuntimeRegistry } from "./llamacpp-managed-runtime.js";
-import { listListeningPids as listListeningPidsForPort } from "./port-reclaim.js";
-import { stopProcessByPid as stopProcessByPidForRuntime } from "./instance-state.js";
 export const LLAMACPP_DEFAULT_HOST = "127.0.0.1";
 export const LLAMACPP_DEFAULT_PORT = 39391;
@@ -21,8 +16,8 @@ const COMMON_SOURCE_BUILD_PATHS = Object.freeze([
   "src/llama-cpp-turboquant/build/bin/llama-server",
   "src/llama.cpp-turboquant/build/bin/llama-server"
 ]);
-const managedLlamacppRuntimeRegistry = createLlamacppManagedRuntimeRegistry();
-let inFlightConfiguredStartCount = 0;
+let managedLlamacppRuntime = null;
 function isPlainObject(value) {
   return Boolean(value) && typeof value === "object" && !Array.isArray(value);
@@ -44,34 +39,6 @@ function normalizePathEntries(entries) {
     : [];
 }
-function buildRuntimeProfileHash({ command, host, port, args = [] } = {}) {
-  const normalizedArgs = Array.isArray(args) ? args.filter(Boolean) : [];
-  return `${normalizeString(command)}|${normalizeString(host)}|${String(normalizePort(port, LLAMACPP_DEFAULT_PORT))}|${normalizedArgs.join("\u001f")}`;
-}
-function isManagedRuntimeAlive(instance) {
-  const child = instance?.child;
-  if (!child) return false;
-  return child.exitCode === null && child.killed !== true;
-}
-function normalizeListeningPidResult(result) {
-  if (result && typeof result === "object" && result.ok === false) {
-    return { ok: false, pids: [] };
-  }
-  if (Array.isArray(result)) {
-    return result
-      .map((value) => Number(value))
-      .filter((pid) => Number.isInteger(pid) && pid > 0);
-  }
-  if (result && typeof result === "object" && Array.isArray(result.pids)) {
-    return result.pids
-      .map((value) => Number(value))
-      .filter((pid) => Number.isInteger(pid) && pid > 0);
-  }
-  return [];
-}
 function readConfiguredLlamacppRuntime(config) {
   const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
   if (!isPlainObject(runtime)) {
@@ -105,8 +72,6 @@ function buildPreloadModels(config) {
     if (!modelPath) continue;
     preloadModels.push({
       variantId: normalizeString(variant.id),
-      variant,
-      baseModel,
       modelPath,
       contextWindow: Number.isFinite(Number(variant.contextWindow)) ? Number(variant.contextWindow) : undefined
     });
@@ -114,15 +79,6 @@ function buildPreloadModels(config) {
   return preloadModels;
 }
-function detectLlamacppSystemProfile(system = {}) {
-  const totalMemoryBytes = Number(system?.totalMemoryBytes);
-  return {
-    platform: normalizeString(system?.platform) || process.platform,
-    unifiedMemory: system?.unifiedMemory === true || process.platform === "darwin",
-    totalMemoryBytes: Number.isFinite(totalMemoryBytes) && totalMemoryBytes > 0 ? totalMemoryBytes : os.totalmem()
-  };
-}
 export function detectLlamacppCandidates({
   envPathEntries = process.env.PATH?.split(path.delimiter) || [],
   homeDir = os.homedir(),
@@ -166,18 +122,16 @@ export function buildLlamacppLaunchArgs({
   command,
   host = LLAMACPP_DEFAULT_HOST,
   port = LLAMACPP_DEFAULT_PORT,
-  preloadModels = [],
-  launchProfile = null
+  preloadModels = []
 } = {}) {
   const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
   const args = [
     normalizeString(command),
     "--host", normalizeString(host) || LLAMACPP_DEFAULT_HOST,
-    "--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT)),
-    ...((Array.isArray(launchProfile?.args) ? launchProfile.args : []).filter(Boolean))
+    "--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT))
   ];
-  if (!launchProfile && firstModel?.modelPath) {
+  if (firstModel?.modelPath) {
     args.push("-m", firstModel.modelPath);
     if (Number.isFinite(Number(firstModel.contextWindow)) && Number(firstModel.contextWindow) > 0) {
       args.push("-c", String(Math.floor(Number(firstModel.contextWindow))));
@@ -187,31 +141,6 @@ export function buildLlamacppLaunchArgs({
   return args.filter(Boolean);
 }
-export async function spawnManagedLlamacppRuntime({
-  command,
-  host = LLAMACPP_DEFAULT_HOST,
-  port = LLAMACPP_DEFAULT_PORT,
-  launchProfile
-} = {}, {
-  spawnImpl = spawn
-} = {}) {
-  const args = buildLlamacppLaunchArgs({
-    command,
-    host,
-    port,
-    launchProfile
-  });
-  const child = spawnImpl(args[0], args.slice(1), { stdio: "ignore" });
-  return {
-    pid: child?.pid,
-    child,
-    host,
-    port,
-    baseUrl: `http://${host}:${port}/v1`,
-    args
-  };
-}
 export function parseLlamacppValidationOutput(output = "") {
   const text = String(output || "").trim();
   const lowered = text.toLowerCase();
@@ -270,142 +199,78 @@ async function startConfiguredRuntime(config, {
   requireAutostart = true
 } = {}, {
   spawnSyncImpl = spawnSync,
-  spawnImpl = spawn,
-  system = undefined,
-  listListeningPids = undefined,
-  stopProcessByPid = undefined
+  spawnImpl = spawn
 } = {}) {
-  inFlightConfiguredStartCount += 1;
-  try {
-    const runtime = readConfiguredLlamacppRuntime(config);
-    if (requireAutostart && !runtime.startWithRouter) {
-      return { ok: true, skipped: true, reason: "autostart-disabled" };
-    }
+  const runtime = readConfiguredLlamacppRuntime(config);
+  if (requireAutostart && !runtime.startWithRouter) {
+    return { ok: true, skipped: true, reason: "autostart-disabled" };
+  }
-    if (!runtime.command) {
-      const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
-      error(errorMessage);
-      return { ok: false, errorMessage };
-    }
+  if (!runtime.command) {
+    const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
+    error(errorMessage);
+    return { ok: false, errorMessage };
+  }
-    const preloadModels = buildPreloadModels(config);
-    const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
-    const launchProfile = firstModel?.variant && firstModel?.baseModel
-      ? deriveLlamacppLaunchProfile({
-        variant: firstModel.variant,
-        baseModel: firstModel.baseModel,
-        system: detectLlamacppSystemProfile(system)
-      })
-      : null;
-    const args = buildLlamacppLaunchArgs({
-      command: runtime.command,
-      host: runtime.host,
-      port: runtime.port,
-      preloadModels,
-      launchProfile
-    });
-    const variantKey = normalizeString(firstModel?.variant?.key || firstModel?.variantId) || "default";
-    const profileHash = buildRuntimeProfileHash({
-      command: runtime.command,
-      host: runtime.host,
-      port: runtime.port,
-      args: args.slice(1)
-    });
-    const listListeningPidsFn = typeof listListeningPids === "function"
-      ? listListeningPids
-      : (port) => listListeningPidsForPort(port, { spawnSync: spawnSyncImpl });
-    const stopProcessByPidFn = typeof stopProcessByPid === "function"
-      ? stopProcessByPid
-      : (pid) => stopProcessByPidForRuntime(pid);
-    await managedLlamacppRuntimeRegistry.reconcile({
-      listListeningPids: async (port) => normalizeListeningPidResult(await listListeningPidsFn(port)),
-      stopProcessByPid: async (pid) => stopProcessByPidFn(pid)
-    });
+  if (managedLlamacppRuntime
+    && managedLlamacppRuntime.command === runtime.command
+    && managedLlamacppRuntime.host === runtime.host
+    && managedLlamacppRuntime.port === runtime.port
+    && managedLlamacppRuntime.child?.exitCode === null
+    && managedLlamacppRuntime.child?.killed !== true) {
+    return { ok: true, alreadyRunning: true, runtime: managedLlamacppRuntime };
+  }
-    const existing = managedLlamacppRuntimeRegistry
-      .snapshot()
-      .find((instance) => (
-        instance.variantKey === variantKey
-        && instance.profileHash === profileHash
-        && isManagedRuntimeAlive(instance)
-      ));
-    if (existing) {
-      return { ok: true, alreadyRunning: true, runtime: existing };
-    }
+  const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
+  if (!validation.ok) {
+    error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
+    return validation;
+  }
-    const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
-    if (!validation.ok) {
-      error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
-      return validation;
-    }
+  const preloadModels = buildPreloadModels(config);
+  const args = buildLlamacppLaunchArgs({
+    command: runtime.command,
+    host: runtime.host,
+    port: runtime.port,
+    preloadModels
+  });
-    try {
-      const managedRuntime = await managedLlamacppRuntimeRegistry.ensureRuntimeForVariant({
-        variantKey,
-        profileHash,
-        launchArgs: args.slice(1),
-        preferredPort: runtime.port
-      }, {
-        spawnRuntime: async ({ port }) => new Promise((resolve, reject) => {
-          let settled = false;
-          const allocatedArgs = buildLlamacppLaunchArgs({
-            command: runtime.command,
-            host: runtime.host,
-            port,
-            preloadModels,
-            launchProfile
-          });
-          const child = spawnImpl(allocatedArgs[0], allocatedArgs.slice(1), {
-            stdio: "ignore"
-          });
-          const expectedInstanceId = `${variantKey}:${profileHash}:${port}`;
-          if (child && child.__llamacppManagedExitHookAttached !== true) {
-            child.__llamacppManagedExitHookAttached = true;
-            child.once("exit", () => {
-              void managedLlamacppRuntimeRegistry.untrackInstance(expectedInstanceId);
-            });
-          }
-          const settleResolve = (value) => {
-            if (settled) return;
-            settled = true;
-            resolve(value);
-          };
-          const settleReject = (reason) => {
-            if (settled) return;
-            settled = true;
-            reject(reason);
-          };
-          child.once("spawn", () => {
-            if (typeof child.unref === "function") child.unref();
-            settleResolve({
-              pid: child?.pid,
-              child,
-              command: runtime.command,
-              host: runtime.host,
-              port,
-              args: allocatedArgs,
-              baseUrl: `http://${runtime.host}:${port}/v1`
-            });
-          });
-          child.once("error", (spawnError) => {
-            settleReject(spawnError);
-          });
-        }),
-        waitForHealthy: async (instance) => instance
+  return new Promise((resolve) => {
+    let settled = false;
+    const child = spawnImpl(args[0], args.slice(1), {
+      stdio: "ignore"
+    });
+    const finish = (result) => {
+      if (settled) return;
+      settled = true;
+      resolve(result);
+    };
+    child.once("spawn", () => {
+      managedLlamacppRuntime = {
+        child,
+        command: runtime.command,
+        host: runtime.host,
+        port: runtime.port,
+        args
+      };
+      child.once("exit", () => {
+        if (managedLlamacppRuntime?.child === child) {
+          managedLlamacppRuntime = null;
+        }
       });
+      if (typeof child.unref === "function") child.unref();
+      line(`Started llama.cpp runtime on http://${runtime.host}:${runtime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
+      finish({ ok: true, runtime: managedLlamacppRuntime, validation });
+    });
-      line(`Started llama.cpp runtime on http://${managedRuntime.host}:${managedRuntime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
-      return { ok: true, runtime: managedRuntime, validation };
-    } catch (spawnError) {
+    child.once("error", (spawnError) => {
       const errorMessage = spawnError instanceof Error ? spawnError.message : String(spawnError);
       error(`Failed starting llama.cpp runtime: ${errorMessage}`);
-      return { ok: false, errorMessage };
-    }
-  } finally {
-    inFlightConfiguredStartCount = Math.max(0, inFlightConfiguredStartCount - 1);
-  }
+      finish({ ok: false, errorMessage });
+    });
+  });
 }
 export async function ensureConfiguredLlamacppRuntimeStarted(config, callbacks = {}, deps = {}) {
@@ -422,66 +287,23 @@ export async function startConfiguredLlamacppRuntime(config, callbacks = {}, dep
   }, deps);
 }
-export function getManagedLlamacppRuntimeSnapshot() {
-  return managedLlamacppRuntimeRegistry.snapshot().map((instance) => {
-    const { child: _child, ...rest } = instance || {};
-    return JSON.parse(JSON.stringify(rest));
-  });
-}
 export async function stopManagedLlamacppRuntime({
   line = () => {},
   error = () => {}
 } = {}) {
-  while (inFlightConfiguredStartCount > 0) {
-    await delay(0);
-  }
-  if (typeof managedLlamacppRuntimeRegistry.waitForInFlightStarts === "function") {
-    await managedLlamacppRuntimeRegistry.waitForInFlightStarts();
-  }
-  const instances = managedLlamacppRuntimeRegistry.snapshot();
-  if (instances.length === 0) {
+  const active = managedLlamacppRuntime;
+  if (!active?.child) {
     return { ok: true, skipped: true, reason: "not-running" };
   }
-  const failures = [];
-  let stoppedCount = 0;
-  let pendingExitCount = 0;
-  for (const instance of instances) {
-    try {
-      if (instance?.owner === "llm-router" && typeof instance?.child?.kill === "function") {
-        const killResult = instance.child.kill("SIGTERM");
-        if (killResult !== false) {
-          stoppedCount += 1;
-        }
-      }
-      if (!isManagedRuntimeAlive(instance)) {
-        await managedLlamacppRuntimeRegistry.untrackInstance(instance?.instanceId);
-      } else {
-        pendingExitCount += 1;
-      }
-    } catch (stopError) {
-      const errorMessage = stopError instanceof Error ? stopError.message : String(stopError);
-      failures.push(errorMessage);
-      error(`Failed stopping llama.cpp runtime: ${errorMessage}`);
-    }
-  }
-  if (stoppedCount > 0) {
-    if (pendingExitCount === 0) {
-      line(stoppedCount === 1 ? "Stopped managed llama.cpp runtime." : `Stopped ${stoppedCount} managed llama.cpp runtimes.`);
-    } else {
-      line(stoppedCount === 1
-        ? "Stop signal sent to managed llama.cpp runtime; waiting for exit."
-        : `Stop signal sent to ${stoppedCount} managed llama.cpp runtimes; waiting for exits.`);
-    }
+  managedLlamacppRuntime = null;
+  try {
+    active.child.kill("SIGTERM");
+    line("Stopped managed llama.cpp runtime.");
+    return { ok: true };
+  } catch (stopError) {
+    const errorMessage = stopError instanceof Error ? stopError.message : String(stopError);
+    error(`Failed stopping llama.cpp runtime: ${errorMessage}`);
+    return { ok: false, errorMessage };
   }
-  const completed = failures.length === 0 && pendingExitCount === 0;
-  return {
-    ok: completed,
-    stoppedCount,
-    pendingExitCount,
-    ...(failures.length > 0 ? { errorMessage: failures.join("; ") } : {})
-  };
 }

package/src/node/local-models-service.js CHANGED Viewed

@@ -212,42 +212,19 @@ export async function saveLocalModelVariant(config, draft, {
       activeVariants,
       totalMemoryBytes: system.totalMemoryBytes
     });
-  if (!decision.allowed) {
+    if (!decision.allowed) {
       throw new Error(decision.reason);
     }
   }
-  const normalizedMetadata = normalizeLocalModelsMetadata({
-    variants: {
-      draft: normalizedDraft
-    }
-  });
-  const normalizedVariantDraft = Object.values(normalizedMetadata.variants)[0] || {};
-  const previousVariant = isPlainObject(next.metadata.localModels.variants[key])
-    ? next.metadata.localModels.variants[key]
-    : {};
   next.metadata.localModels.variants[key] = {
-    ...previousVariant,
+    ...(isPlainObject(next.metadata.localModels.variants[key]) ? next.metadata.localModels.variants[key] : {}),
     key,
     baseModelId,
     id: modelId,
     name,
     runtime,
     preset: normalizeString(normalizedDraft.preset),
-    runtimeProfile: runtime === "llamacpp"
-      ? normalizedVariantDraft.runtimeProfile
-      : undefined,
-    runtimeStatus: runtime === "llamacpp"
-      ? (isPlainObject(previousVariant.runtimeStatus)
-        ? previousVariant.runtimeStatus
-        : {
-          activeInstanceId: "",
-          lastFailure: null,
-          lastStartedAt: "",
-          lastHealthyAt: ""
-        })
-      : undefined,
     enabled: normalizedDraft.enabled === true,
     preload: normalizedDraft.preload === true,
     contextWindow: Number.isFinite(Number(normalizedDraft.contextWindow)) ? Number(normalizedDraft.contextWindow) : undefined,