@khanglvm/llm-router 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +5 -0
  2. package/README.md +2 -2
  3. package/package.json +1 -1
  4. package/src/cli/router-module.js +32 -5
  5. package/src/node/coding-tool-config.js +138 -25
  6. package/src/node/large-request-log.js +54 -0
  7. package/src/node/litellm-context-catalog.js +13 -1
  8. package/src/node/local-server.js +10 -0
  9. package/src/node/ollama-client.js +195 -0
  10. package/src/node/ollama-hardware.js +94 -0
  11. package/src/node/ollama-install.js +230 -0
  12. package/src/node/provider-probe.js +69 -5
  13. package/src/node/web-console-client.js +36 -36
  14. package/src/node/web-console-server.js +478 -8
  15. package/src/node/web-console-styles.generated.js +1 -1
  16. package/src/node/web-console-ui/amp-utils.js +272 -0
  17. package/src/node/web-console-ui/api-client.js +128 -0
  18. package/src/node/web-console-ui/capability-utils.js +36 -0
  19. package/src/node/web-console-ui/config-editor-utils.js +20 -5
  20. package/src/node/web-console-ui/constants.js +140 -0
  21. package/src/node/web-console-ui/context-window-utils.js +262 -0
  22. package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
  23. package/src/node/web-console-ui/provider-presets.js +211 -0
  24. package/src/node/web-console-ui/quick-start-utils.js +790 -0
  25. package/src/node/web-console-ui/utils.js +353 -0
  26. package/src/node/web-console-ui/web-search-utils.js +460 -0
  27. package/src/runtime/config.js +96 -9
  28. package/src/runtime/handler/fallback.js +71 -0
  29. package/src/runtime/handler/field-filter.js +39 -0
  30. package/src/runtime/handler/large-request-log.js +211 -0
  31. package/src/runtime/handler/provider-call.js +185 -15
  32. package/src/runtime/handler/reasoning-effort.js +11 -1
  33. package/src/runtime/handler/tool-name-sanitizer.js +258 -0
  34. package/src/runtime/handler.js +16 -3
  35. package/src/shared/coding-tool-bindings.js +3 -0
@@ -17,7 +17,7 @@ import {
17
17
  } from "./local-server-settings.js";
18
18
  import { appendActivityLogEntry, clearActivityLogFile, createActivityLogEntry, readActivityLogEntries, resolveActivityLogPath } from "./activity-log.js";
19
19
  import { listListeningPids, reclaimPort } from "./port-reclaim.js";
20
- import { probeProvider, probeProviderEndpointMatrix } from "./provider-probe.js";
20
+ import { probeProvider, probeProviderEndpointMatrix, probeFreeTierModels } from "./provider-probe.js";
21
21
  import { installStartup, startupStatus, stopStartup, uninstallStartup } from "./startup-manager.js";
22
22
  import { WEB_CONSOLE_CSS, renderWebConsoleHtml } from "./web-console-assets.js";
23
23
  import {
@@ -48,11 +48,28 @@ import {
48
48
  unpatchFactoryDroidSettingsFile
49
49
  } from "./coding-tool-config.js";
50
50
  import { loginSubscription } from "../runtime/subscription-provider.js";
51
+ import {
52
+ ollamaCheckConnection,
53
+ ollamaListModels,
54
+ ollamaListRunning,
55
+ ollamaShowModel,
56
+ ollamaLoadModel,
57
+ ollamaUnloadModel,
58
+ ollamaPinModel,
59
+ ollamaSetKeepAlive,
60
+ ollamaPullModel,
61
+ ollamaDeleteModel
62
+ } from "./ollama-client.js";
63
+ import { estimateMaxContext, estimateModelVram, formatBytes } from "./ollama-hardware.js";
64
+ import { detectOllamaInstallation, installOllama, startOllamaServer, stopOllamaServer, isOllamaRunning } from "./ollama-install.js";
51
65
  import {
52
66
  CONFIG_VERSION,
53
67
  DEFAULT_MODEL_ALIAS_ID,
54
68
  DEFAULT_PROVIDER_USER_AGENT,
69
+ OLLAMA_KEEP_ALIVE_PATTERN,
70
+ OLLAMA_PROVIDER_TYPE,
55
71
  configHasProvider,
72
+ normalizeOllamaConfig,
56
73
  normalizeRuntimeConfig,
57
74
  resolveProviderApiKey,
58
75
  validateRuntimeConfig
@@ -710,10 +727,11 @@ async function readConfigState(configPath) {
710
727
  if (!rawText.trim()) rawText = buildDefaultConfigRawText();
711
728
 
712
729
  let parseError = "";
730
+ let rawConfig = null;
713
731
  let normalizedConfig = null;
714
732
  try {
715
- const parsed = rawText.trim() ? JSON.parse(rawText) : {};
716
- normalizedConfig = normalizeRuntimeConfig(parsed, { migrateToVersion: CONFIG_VERSION });
733
+ rawConfig = rawText.trim() ? JSON.parse(rawText) : {};
734
+ normalizedConfig = normalizeRuntimeConfig(rawConfig, { migrateToVersion: CONFIG_VERSION });
717
735
  } catch (error) {
718
736
  parseError = error instanceof Error ? error.message : String(error);
719
737
  }
@@ -728,6 +746,7 @@ async function readConfigState(configPath) {
728
746
 
729
747
  return {
730
748
  rawText,
749
+ rawConfig,
731
750
  normalizedConfig,
732
751
  parseError,
733
752
  summary
@@ -1154,6 +1173,17 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1154
1173
  return normalizeClaudeBindingsInput(bindings);
1155
1174
  }
1156
1175
 
1176
+ function normalizeFactoryDroidBindingState(bindings = {}) {
1177
+ const source = bindings && typeof bindings === "object" && !Array.isArray(bindings) ? bindings : {};
1178
+ return {
1179
+ defaultModel: String(source.defaultModel || "").trim(),
1180
+ missionOrchestratorModel: String(source.missionOrchestratorModel || source.missionModel || "").trim(),
1181
+ missionWorkerModel: String(source.missionWorkerModel || source.missionModel || "").trim(),
1182
+ missionValidatorModel: String(source.missionValidatorModel || source.missionModel || "").trim(),
1183
+ reasoningEffort: normalizeFactoryDroidReasoningEffort(source.reasoningEffort)
1184
+ };
1185
+ }
1186
+
1157
1187
  function areCodexBindingsEqual(left = {}, right = {}) {
1158
1188
  const normalizedLeft = normalizeCodexBindingState(left);
1159
1189
  const normalizedRight = normalizeCodexBindingState(right);
@@ -1174,6 +1204,18 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1174
1204
  );
1175
1205
  }
1176
1206
 
1207
+ function areFactoryDroidBindingsEqual(left = {}, right = {}) {
1208
+ const normalizedLeft = normalizeFactoryDroidBindingState(left);
1209
+ const normalizedRight = normalizeFactoryDroidBindingState(right);
1210
+ return (
1211
+ normalizedLeft.defaultModel === normalizedRight.defaultModel
1212
+ && normalizedLeft.missionOrchestratorModel === normalizedRight.missionOrchestratorModel
1213
+ && normalizedLeft.missionWorkerModel === normalizedRight.missionWorkerModel
1214
+ && normalizedLeft.missionValidatorModel === normalizedRight.missionValidatorModel
1215
+ && normalizedLeft.reasoningEffort === normalizedRight.reasoningEffort
1216
+ );
1217
+ }
1218
+
1177
1219
  function reconcileManagedRouteBinding(ref, rewriteContext) {
1178
1220
  return rewriteManagedRouteRef(ref, rewriteContext);
1179
1221
  }
@@ -1213,6 +1255,23 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1213
1255
  };
1214
1256
  }
1215
1257
 
1258
+ function reconcileFactoryDroidBindingsForConfig(bindings = {}, previousConfig = {}, nextConfig = {}) {
1259
+ const currentBindings = normalizeFactoryDroidBindingState(bindings);
1260
+ const rewriteContext = buildManagedRouteRewriteContext(previousConfig, nextConfig);
1261
+ const nextDefaultModel = reconcileManagedRouteBinding(currentBindings.defaultModel, rewriteContext)
1262
+ || pickDefaultManagedRoute(nextConfig);
1263
+ return {
1264
+ defaultModel: nextDefaultModel,
1265
+ missionOrchestratorModel: reconcileManagedRouteBinding(currentBindings.missionOrchestratorModel, rewriteContext)
1266
+ || nextDefaultModel,
1267
+ missionWorkerModel: reconcileManagedRouteBinding(currentBindings.missionWorkerModel, rewriteContext)
1268
+ || nextDefaultModel,
1269
+ missionValidatorModel: reconcileManagedRouteBinding(currentBindings.missionValidatorModel, rewriteContext)
1270
+ || nextDefaultModel,
1271
+ reasoningEffort: currentBindings.reasoningEffort
1272
+ };
1273
+ }
1274
+
1216
1275
  async function readCodexCliGlobalRoutingState(settings = {}, config = null) {
1217
1276
  const endpointUrl = buildAmpClientEndpointUrl(settings);
1218
1277
  const apiKey = String(config?.masterKey || "").trim();
@@ -1445,8 +1504,12 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1445
1504
  backupExists: false,
1446
1505
  routedViaRouter: false,
1447
1506
  configuredBaseUrl: "",
1507
+ configuredProvider: "",
1448
1508
  bindings: {
1449
1509
  defaultModel: "",
1510
+ missionOrchestratorModel: "",
1511
+ missionWorkerModel: "",
1512
+ missionValidatorModel: "",
1450
1513
  reasoningEffort: ""
1451
1514
  },
1452
1515
  endpointUrl,
@@ -1471,8 +1534,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1471
1534
  && (previousEndpointUrl !== nextEndpointUrl || previousMasterKey !== nextMasterKey)
1472
1535
  );
1473
1536
 
1474
- if (!endpointOrKeyChanged) return false;
1475
-
1476
1537
  const routingState = await readFactoryDroidGlobalRoutingState(previousSettings, previousConfig);
1477
1538
  if (routingState.error) {
1478
1539
  addLog("warn", "Factory Droid route check failed.", routingState.error);
@@ -1481,13 +1542,22 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1481
1542
  if (!routingState.routedViaRouter) return false;
1482
1543
 
1483
1544
  try {
1545
+ const currentBindings = normalizeFactoryDroidBindingState(routingState.bindings);
1546
+ const bindings = reconcileFactoryDroidBindingsForConfig(currentBindings, previousConfig, nextConfig);
1547
+ const bindingsChanged = !areFactoryDroidBindingsEqual(currentBindings, bindings);
1548
+ if (!endpointOrKeyChanged && !bindingsChanged) return false;
1549
+
1484
1550
  await patchFactoryDroidSettingsFile({
1485
1551
  endpointUrl: nextEndpointUrl,
1486
1552
  apiKey: nextMasterKey,
1487
- bindings: routingState.bindings,
1553
+ bindings,
1488
1554
  captureBackup: false
1489
1555
  });
1490
- addLog("info", "Updated Factory Droid route to match the local router.", buildFactoryDroidEndpointUrl(nextSettings));
1556
+ if (endpointOrKeyChanged) {
1557
+ addLog("info", "Updated Factory Droid route to match the local router.", buildFactoryDroidEndpointUrl(nextSettings));
1558
+ } else {
1559
+ addLog("info", "Updated Factory Droid bindings to match the saved router config.", bindings.defaultModel || "Default");
1560
+ }
1491
1561
  return true;
1492
1562
  } catch (error) {
1493
1563
  addLog("warn", "Factory Droid route update failed.", error instanceof Error ? error.message : String(error));
@@ -2081,6 +2151,19 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2081
2151
  const previousConfigState = await readConfigState(configPath);
2082
2152
  const previousConfig = previousConfigState.normalizedConfig || buildDefaultConfigObject();
2083
2153
  const previousLocalServer = getConfigLocalServer(previousConfigState);
2154
+
2155
+ // Safeguard: preserve existing top-level keys absent from the incoming config.
2156
+ // This prevents partial writes (e.g. from scoped Ollama endpoints) from wiping
2157
+ // unrelated config sections like masterKey, modelAliases, amp, metadata, etc.
2158
+ const previousRaw = previousConfigState.rawConfig;
2159
+ if (previousRaw && typeof previousRaw === "object" && parsed && typeof parsed === "object") {
2160
+ for (const key of Object.keys(previousRaw)) {
2161
+ if (!(key in parsed)) {
2162
+ parsed[key] = previousRaw[key];
2163
+ }
2164
+ }
2165
+ }
2166
+
2084
2167
  ignoreConfigWatchUntil = Date.now() + 800;
2085
2168
  const savedConfig = await writeConfigFile(parsed, configPath, { migrateToVersion: CONFIG_VERSION });
2086
2169
  resolveActivityLogSnapshot(savedConfig);
@@ -2201,6 +2284,12 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2201
2284
  const claudeCodeGlobal = await readClaudeCodeGlobalRoutingState(configLocalServer, configState.normalizedConfig);
2202
2285
  const factoryDroidGlobal = await readFactoryDroidGlobalRoutingState(configLocalServer, configState.normalizedConfig);
2203
2286
  const webSearch = await readWebSearchState(configState.normalizedConfig).catch(() => null);
2287
+ const ollamaConfig = configState.normalizedConfig?.ollama;
2288
+ const ollamaBaseUrl = ollamaConfig?.baseUrl || "http://localhost:11434";
2289
+ const ollamaInstallation = detectOllamaInstallation();
2290
+ const ollamaState = ollamaInstallation.installed
2291
+ ? await ollamaCheckConnection(ollamaBaseUrl).catch(() => ({ ok: false }))
2292
+ : { ok: false };
2204
2293
 
2205
2294
  return {
2206
2295
  web: {
@@ -2231,6 +2320,14 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2231
2320
  claudeCode: claudeCodeGlobal,
2232
2321
  factoryDroid: factoryDroidGlobal
2233
2322
  },
2323
+ ollama: {
2324
+ installed: ollamaInstallation.installed,
2325
+ connected: ollamaState.ok === true,
2326
+ baseUrl: ollamaBaseUrl,
2327
+ enabled: ollamaConfig?.enabled === true,
2328
+ version: ollamaInstallation.version || null,
2329
+ path: ollamaInstallation.path || null
2330
+ },
2234
2331
  defaults: {
2235
2332
  providerUserAgent: DEFAULT_PROVIDER_USER_AGENT
2236
2333
  },
@@ -2489,6 +2586,8 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2489
2586
  nextSettings: persistedLocalServer.savedSettings
2490
2587
  });
2491
2588
  }
2589
+ // Non-blocking: preload Ollama models after router start
2590
+ preloadOllamaModels(configState.normalizedConfig).catch(() => {});
2492
2591
  return {
2493
2592
  message: restart ? "Router restarted." : "Router started.",
2494
2593
  snapshot: await buildSnapshot()
@@ -2501,6 +2600,29 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2501
2600
  }
2502
2601
  }
2503
2602
 
2603
+ async function preloadOllamaModels(config) {
2604
+ const ollamaConfig = config?.ollama;
2605
+ if (!ollamaConfig?.enabled) return;
2606
+ const autoLoadModels = ollamaConfig.autoLoadModels || [];
2607
+ if (!autoLoadModels.length) return;
2608
+ const baseUrl = ollamaConfig.baseUrl || "http://localhost:11434";
2609
+ const connected = await ollamaCheckConnection(baseUrl);
2610
+ if (!connected.ok) {
2611
+ addLog("info", "Ollama not reachable, skipping model preload.");
2612
+ return;
2613
+ }
2614
+ for (const modelId of autoLoadModels) {
2615
+ const keepAlive = ollamaConfig.managedModels?.[modelId]?.keepAlive
2616
+ || ollamaConfig.defaultKeepAlive || "5m";
2617
+ const result = await ollamaLoadModel(baseUrl, modelId, keepAlive).catch(() => ({ ok: false }));
2618
+ if (result.ok) {
2619
+ addLog("info", `Ollama: Preloaded ${modelId} (${Math.round(result.loadDurationMs || 0)}ms).`);
2620
+ } else {
2621
+ addLog("warn", `Ollama: Failed to preload ${modelId}.`);
2622
+ }
2623
+ }
2624
+ }
2625
+
2504
2626
  async function shutdown(reason = "web-console-closed") {
2505
2627
  if (closePromise) return closePromise;
2506
2628
  closing = true;
@@ -2812,6 +2934,31 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2812
2934
  return;
2813
2935
  }
2814
2936
 
2937
+ if (method === "POST" && requestUrl.pathname === "/api/config/probe-free-tier-models") {
2938
+ const body = await readJsonBody(req);
2939
+ const baseUrl = String(body?.baseUrl || "").trim();
2940
+ const apiKeyEnv = String(body?.apiKeyEnv || "").trim();
2941
+ const apiKey = String(body?.apiKey || "").trim();
2942
+ const modelIds = Array.isArray(body?.modelIds) ? body.modelIds.map((id) => String(id || "").trim()).filter(Boolean) : [];
2943
+
2944
+ if (!baseUrl || modelIds.length === 0) {
2945
+ sendJson(res, 400, { error: "baseUrl and at least one modelId are required." });
2946
+ return;
2947
+ }
2948
+
2949
+ try {
2950
+ const finalApiKey = await resolveProbeApiKey(apiKeyEnv, apiKey, { context: "probing free-tier models" });
2951
+ addLog("info", "Probing free-tier model availability.", `${modelIds.length} model(s)`);
2952
+ const result = await probeFreeTierModels({ baseUrl, apiKey: finalApiKey, modelIds, timeoutMs: 6000 });
2953
+ const freeCount = Object.values(result).filter((r) => r?.freeTier === true).length;
2954
+ addLog("success", "Free-tier probe finished.", `${freeCount}/${modelIds.length} model(s) on free tier`);
2955
+ sendJson(res, 200, { result });
2956
+ } catch (error) {
2957
+ sendJson(res, error?.statusCode || 500, { error: error instanceof Error ? error.message : String(error) });
2958
+ }
2959
+ return;
2960
+ }
2961
+
2815
2962
  if (method === "POST" && requestUrl.pathname === "/api/config/litellm-context-lookup") {
2816
2963
  const body = await readJsonBody(req);
2817
2964
  const models = Array.isArray(body?.models) ? body.models.map((entry) => String(entry || "").trim()).filter(Boolean) : [];
@@ -3266,7 +3413,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3266
3413
  const body = await readJsonBody(req);
3267
3414
  const effortLevel = String(body?.effortLevel || body?.thinkingLevel || "").trim();
3268
3415
  if (effortLevel && !normalizeClaudeCodeEffortLevel(effortLevel)) {
3269
- sendJson(res, 400, { error: `Invalid effort level '${effortLevel}'. Valid values: low, medium, high, max.` });
3416
+ sendJson(res, 400, { error: `Invalid effort level '${effortLevel}'. Valid values: low, medium, high, xhigh, max.` });
3270
3417
  return;
3271
3418
  }
3272
3419
  const result = await patchClaudeCodeEffortLevel({
@@ -3331,6 +3478,9 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3331
3478
 
3332
3479
  const bindings = {
3333
3480
  defaultModel: String(body?.bindings?.defaultModel || "").trim(),
3481
+ missionOrchestratorModel: String(body?.bindings?.missionOrchestratorModel || body?.bindings?.missionModel || "").trim(),
3482
+ missionWorkerModel: String(body?.bindings?.missionWorkerModel || body?.bindings?.missionModel || "").trim(),
3483
+ missionValidatorModel: String(body?.bindings?.missionValidatorModel || body?.bindings?.missionModel || "").trim(),
3334
3484
  reasoningEffort: normalizeFactoryDroidReasoningEffort(body?.bindings?.reasoningEffort)
3335
3485
  };
3336
3486
  const patchResult = await patchFactoryDroidSettingsFile({
@@ -3378,6 +3528,9 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3378
3528
 
3379
3529
  const bindings = {
3380
3530
  defaultModel: String(body?.bindings?.defaultModel || "").trim(),
3531
+ missionOrchestratorModel: String(body?.bindings?.missionOrchestratorModel || body?.bindings?.missionModel || "").trim(),
3532
+ missionWorkerModel: String(body?.bindings?.missionWorkerModel || body?.bindings?.missionModel || "").trim(),
3533
+ missionValidatorModel: String(body?.bindings?.missionValidatorModel || body?.bindings?.missionModel || "").trim(),
3381
3534
  reasoningEffort: normalizeFactoryDroidReasoningEffort(body?.bindings?.reasoningEffort)
3382
3535
  };
3383
3536
  const patchResult = await patchFactoryDroidSettingsFile({
@@ -3733,6 +3886,323 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3733
3886
  return;
3734
3887
  }
3735
3888
 
3889
+ // ── Ollama API routes ──────────────────────────────────────────────
3890
+ function resolveOllamaBaseUrl(bodyBaseUrl, configBaseUrl) {
3891
+ const raw = String(bodyBaseUrl || configBaseUrl || "http://localhost:11434").trim().replace(/\/+$/, "");
3892
+ try { const u = new URL(raw); if (u.protocol !== "http:" && u.protocol !== "https:") return null; if (u.hostname !== "localhost" && u.hostname !== "127.0.0.1" && u.hostname !== "::1" && !u.hostname.endsWith(".local")) return null; return u.origin; } catch { return null; }
3893
+ }
3894
+
3895
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/status") {
3896
+ const body = await readJsonBody(req);
3897
+ const configState = await readConfigState(configPath);
3898
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
3899
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
3900
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
3901
+ const installation = detectOllamaInstallation();
3902
+ const connection = installation.installed
3903
+ ? await ollamaCheckConnection(baseUrl)
3904
+ : { ok: false, error: "Ollama not installed" };
3905
+ const running = connection.ok ? await ollamaListRunning(baseUrl) : { ok: false, models: [] };
3906
+ sendJson(res, 200, {
3907
+ installed: installation.installed,
3908
+ version: installation.version,
3909
+ path: installation.path,
3910
+ connected: connection.ok,
3911
+ running: running.models || [],
3912
+ baseUrl
3913
+ });
3914
+ return;
3915
+ }
3916
+
3917
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/models") {
3918
+ const body = await readJsonBody(req);
3919
+ const configState = await readConfigState(configPath);
3920
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
3921
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
3922
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
3923
+ const modelsResult = await ollamaListModels(baseUrl);
3924
+ if (!modelsResult.ok) {
3925
+ sendJson(res, 502, { error: modelsResult.error || "Failed to list Ollama models" });
3926
+ return;
3927
+ }
3928
+ const runningResult = await ollamaListRunning(baseUrl);
3929
+ const runningMap = new Map((runningResult.models || []).map((m) => [m.name, m]));
3930
+ const ollamaProvider = (configState.normalizedConfig?.providers || []).find((p) => p.type === OLLAMA_PROVIDER_TYPE);
3931
+ const routedModelIds = new Set((ollamaProvider?.models || []).map((m) => m.id));
3932
+ const enriched = modelsResult.models.map((model) => {
3933
+ const running = runningMap.get(model.name);
3934
+ const managed = ollamaConfig.managedModels?.[model.name];
3935
+ const hwEstimate = model.contextLength && model.parameterSize
3936
+ ? estimateModelVram(model.parameterSize, model.quantizationLevel, model.contextLength)
3937
+ : null;
3938
+ return {
3939
+ ...model,
3940
+ loaded: !!running,
3941
+ sizeVram: running?.sizeVram || 0,
3942
+ sizeVramFormatted: running?.sizeVram ? formatBytes(running.sizeVram) : "",
3943
+ expiresAt: running?.expiresAt || "",
3944
+ isPinned: running?.isPinned || managed?.pinned || false,
3945
+ processor: running?.processor || "",
3946
+ keepAlive: managed?.keepAlive || ollamaConfig.defaultKeepAlive || "5m",
3947
+ autoLoad: managed?.autoLoad || false,
3948
+ inRouter: routedModelIds.has(model.name),
3949
+ estimatedVram: hwEstimate ? formatBytes(hwEstimate.totalBytes) : "",
3950
+ estimatedVramBytes: hwEstimate?.totalBytes || 0
3951
+ };
3952
+ });
3953
+ sendJson(res, 200, { models: enriched });
3954
+ return;
3955
+ }
3956
+
3957
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/load") {
3958
+ const body = await readJsonBody(req);
3959
+ const model = String(body?.model || "").trim();
3960
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
3961
+ const configState = await readConfigState(configPath);
3962
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
3963
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
3964
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
3965
+ const keepAlive = body?.keepAlive || ollamaConfig.managedModels?.[model]?.keepAlive || ollamaConfig.defaultKeepAlive || "5m";
3966
+ addLog("info", `Ollama: Loading ${model}…`);
3967
+ const result = await ollamaLoadModel(baseUrl, model, keepAlive);
3968
+ if (result.ok) addLog("success", `Ollama: Loaded ${model} (${Math.round(result.loadDurationMs || 0)}ms).`);
3969
+ else addLog("warn", `Ollama: Failed to load ${model}.`, result.error || "");
3970
+ sendJson(res, result.ok ? 200 : 502, result);
3971
+ return;
3972
+ }
3973
+
3974
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/unload") {
3975
+ const body = await readJsonBody(req);
3976
+ const model = String(body?.model || "").trim();
3977
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
3978
+ const configState = await readConfigState(configPath);
3979
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
3980
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
3981
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
3982
+ addLog("info", `Ollama: Unloading ${model}…`);
3983
+ const result = await ollamaUnloadModel(baseUrl, model);
3984
+ if (result.ok) addLog("success", `Ollama: Unloaded ${model}.`);
3985
+ sendJson(res, result.ok ? 200 : 502, result);
3986
+ return;
3987
+ }
3988
+
3989
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/pin") {
3990
+ const body = await readJsonBody(req);
3991
+ const model = String(body?.model || "").trim();
3992
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
3993
+ const pinned = body?.pinned === true;
3994
+ const configState = await readConfigState(configPath);
3995
+ const rawConfig = configState.rawConfig || {};
3996
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
3997
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
3998
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
3999
+ const pinResult = pinned
4000
+ ? await ollamaPinModel(baseUrl, model)
4001
+ : await ollamaSetKeepAlive(baseUrl, model, ollamaConfig.managedModels?.[model]?.keepAlive || ollamaConfig.defaultKeepAlive || "5m");
4002
+ if (!pinResult.ok) { sendJson(res, 502, { error: pinResult.error || "Failed to update model pin state" }); return; }
4003
+ const nextOllama = { ...(rawConfig.ollama || {}), managedModels: { ...(rawConfig.ollama?.managedModels || {}) } };
4004
+ nextOllama.managedModels[model] = { ...(nextOllama.managedModels[model] || {}), pinned };
4005
+ await writeAndBroadcastConfig({ ...rawConfig, ollama: nextOllama }, { source: "ollama-pin" });
4006
+ sendJson(res, 200, { ok: true, pinned });
4007
+ return;
4008
+ }
4009
+
4010
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/keep-alive") {
4011
+ const body = await readJsonBody(req);
4012
+ const model = String(body?.model || "").trim();
4013
+ const keepAlive = String(body?.keepAlive || "").trim();
4014
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
4015
+ if (!OLLAMA_KEEP_ALIVE_PATTERN.test(keepAlive)) { sendJson(res, 400, { error: "Invalid keep_alive value" }); return; }
4016
+ const configState = await readConfigState(configPath);
4017
+ const rawConfig = configState.rawConfig || {};
4018
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
4019
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
4020
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
4021
+ const kaResult = await ollamaSetKeepAlive(baseUrl, model, keepAlive);
4022
+ if (!kaResult.ok) { sendJson(res, 502, { error: kaResult.error || "Failed to update keep-alive" }); return; }
4023
+ const nextOllama = { ...(rawConfig.ollama || {}), managedModels: { ...(rawConfig.ollama?.managedModels || {}) } };
4024
+ nextOllama.managedModels[model] = { ...(nextOllama.managedModels[model] || {}), keepAlive };
4025
+ await writeAndBroadcastConfig({ ...rawConfig, ollama: nextOllama }, { source: "ollama-keep-alive" });
4026
+ sendJson(res, 200, { ok: true });
4027
+ return;
4028
+ }
4029
+
4030
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/sync-router") {
4031
+ const body = await readJsonBody(req);
4032
+ const configState = await readConfigState(configPath);
4033
+ const rawConfig = configState.rawConfig || {};
4034
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
4035
+ const baseUrl = resolveOllamaBaseUrl(body?.baseUrl, ollamaConfig.baseUrl);
4036
+ if (!baseUrl) { sendJson(res, 400, { error: "Invalid Ollama base URL" }); return; }
4037
+ const modelsResult = await ollamaListModels(baseUrl);
4038
+ if (!modelsResult.ok) { sendJson(res, 502, { error: modelsResult.error || "Failed to list Ollama models" }); return; }
4039
+ const modelIds = modelsResult.models.map((m) => m.name);
4040
+ const providers = [...(rawConfig.providers || [])];
4041
+ let ollamaProvider = providers.find((p) => p.type === OLLAMA_PROVIDER_TYPE);
4042
+ const previousModelIds = new Set((ollamaProvider?.models || []).map((m) => typeof m === "string" ? m : m?.id));
4043
+ if (!ollamaProvider) {
4044
+ ollamaProvider = { id: "ollama", name: "Ollama", type: OLLAMA_PROVIDER_TYPE, baseUrl: baseUrl + "/v1", models: [] };
4045
+ providers.push(ollamaProvider);
4046
+ }
4047
+ ollamaProvider.baseUrl = baseUrl + "/v1";
4048
+ ollamaProvider.models = modelIds.map((id) => {
4049
+ const existing = (ollamaProvider.models || []).find((m) => (typeof m === "string" ? m : m?.id) === id);
4050
+ if (existing && typeof existing === "object") return existing;
4051
+ const details = modelsResult.models.find((m) => m.name === id);
4052
+ return { id, contextWindow: details?.contextLength || undefined };
4053
+ });
4054
+ const nextConfig = { ...rawConfig, providers };
4055
+ const { snapshot } = await writeAndBroadcastConfig(nextConfig, { source: "ollama-sync" });
4056
+ const addedCount = modelIds.filter((id) => !previousModelIds.has(id)).length;
4057
+ const removedCount = [...previousModelIds].filter((id) => !modelIds.includes(id)).length;
4058
+ addLog("info", `Ollama: Synced ${modelIds.length} models (${addedCount} added, ${removedCount} removed).`);
4059
+ sendJson(res, 200, { ok: true, modelCount: modelIds.length, addedCount, removedCount });
4060
+ return;
4061
+ }
4062
+
4063
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/add-model") {
4064
+ const body = await readJsonBody(req);
4065
+ const model = String(body?.model || "").trim();
4066
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
4067
+ const configState = await readConfigState(configPath);
4068
+ const rawConfig = configState.rawConfig || {};
4069
+ const ollamaConfig = configState.normalizedConfig?.ollama || {};
4070
+ const baseUrl = (ollamaConfig.baseUrl || "http://localhost:11434").replace(/\/+$/, "");
4071
+ const providers = [...(rawConfig.providers || [])];
4072
+ let ollamaProvider = providers.find((p) => p.type === OLLAMA_PROVIDER_TYPE);
4073
+ if (!ollamaProvider) {
4074
+ ollamaProvider = { id: "ollama", name: "Ollama", type: OLLAMA_PROVIDER_TYPE, baseUrl: baseUrl + "/v1", models: [] };
4075
+ providers.push(ollamaProvider);
4076
+ }
4077
+ const existing = (ollamaProvider.models || []).find((m) => (typeof m === "string" ? m : m?.id) === model);
4078
+ if (existing) { sendJson(res, 200, { ok: true, added: false, reason: "already exists" }); return; }
4079
+ const contextLength = body?.contextLength || undefined;
4080
+ ollamaProvider.models = [...(ollamaProvider.models || []), { id: model, ...(contextLength ? { contextWindow: contextLength } : {}) }];
4081
+ await writeAndBroadcastConfig({ ...rawConfig, providers }, { source: "ollama-add-model" });
4082
+ addLog("info", `Ollama: Added ${model} to router.`);
4083
+ sendJson(res, 200, { ok: true, added: true });
4084
+ return;
4085
+ }
4086
+
4087
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/remove-model") {
4088
+ const body = await readJsonBody(req);
4089
+ const model = String(body?.model || "").trim();
4090
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
4091
+ const configState = await readConfigState(configPath);
4092
+ const rawConfig = configState.rawConfig || {};
4093
+ const providers = [...(rawConfig.providers || [])];
4094
+ const ollamaProvider = providers.find((p) => p.type === OLLAMA_PROVIDER_TYPE);
4095
+ if (!ollamaProvider) { sendJson(res, 200, { ok: true, removed: false }); return; }
4096
+ const before = (ollamaProvider.models || []).length;
4097
+ ollamaProvider.models = (ollamaProvider.models || []).filter((m) => (typeof m === "string" ? m : m?.id) !== model);
4098
+ const removed = ollamaProvider.models.length < before;
4099
+ if (removed) {
4100
+ await writeAndBroadcastConfig({ ...rawConfig, providers }, { source: "ollama-remove-model" });
4101
+ addLog("info", `Ollama: Removed ${model} from router.`);
4102
+ }
4103
+ sendJson(res, 200, { ok: true, removed });
4104
+ return;
4105
+ }
4106
+
4107
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/save-settings") {
4108
+ const body = await readJsonBody(req);
4109
+ const configState = await readConfigState(configPath);
4110
+ const rawConfig = configState.rawConfig || {};
4111
+ const nextOllama = { ...(rawConfig.ollama || {}) };
4112
+ if (body?.baseUrl !== undefined) nextOllama.baseUrl = String(body.baseUrl).trim();
4113
+ if (body?.enabled !== undefined) nextOllama.enabled = body.enabled !== false;
4114
+ if (body?.autoConnect !== undefined) nextOllama.autoConnect = body.autoConnect !== false;
4115
+ if (body?.defaultKeepAlive !== undefined && OLLAMA_KEEP_ALIVE_PATTERN.test(String(body.defaultKeepAlive))) {
4116
+ nextOllama.defaultKeepAlive = String(body.defaultKeepAlive);
4117
+ }
4118
+ await writeAndBroadcastConfig({ ...rawConfig, ollama: nextOllama }, { source: "ollama-settings" });
4119
+ sendJson(res, 200, { ok: true });
4120
+ return;
4121
+ }
4122
+
4123
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/auto-load") {
4124
+ const body = await readJsonBody(req);
4125
+ const model = String(body?.model || "").trim();
4126
+ const autoLoad = body?.autoLoad === true;
4127
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
4128
+ const configState = await readConfigState(configPath);
4129
+ const rawConfig = configState.rawConfig || {};
4130
+ const nextOllama = { ...(rawConfig.ollama || {}), managedModels: { ...(rawConfig.ollama?.managedModels || {}) } };
4131
+ nextOllama.managedModels[model] = { ...(nextOllama.managedModels[model] || {}), autoLoad };
4132
+ const autoLoadModels = Object.entries(nextOllama.managedModels)
4133
+ .filter(([, v]) => v?.autoLoad).map(([k]) => k);
4134
+ nextOllama.autoLoadModels = autoLoadModels;
4135
+ await writeAndBroadcastConfig({ ...rawConfig, ollama: nextOllama }, { source: "ollama-auto-load" });
4136
+ sendJson(res, 200, { ok: true, autoLoad });
4137
+ return;
4138
+ }
4139
+
4140
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/install") {
4141
+ const installation = detectOllamaInstallation();
4142
+ if (installation.installed) {
4143
+ sendJson(res, 200, { ok: true, alreadyInstalled: true, version: installation.version });
4144
+ return;
4145
+ }
4146
+ addLog("info", "Ollama: Starting installation…");
4147
+ const result = await installOllama({
4148
+ onProgress: (event) => pushEvent("ollama-install-progress", event)
4149
+ });
4150
+ if (result.ok && !result.alreadyInstalled) {
4151
+ addLog("success", `Ollama: Installed (${result.version || "unknown"}).`);
4152
+ const started = await startOllamaServer();
4153
+ sendJson(res, 200, { ...result, serverStarted: started.ok });
4154
+ broadcastState();
4155
+ } else if (result.ok) {
4156
+ sendJson(res, 200, result);
4157
+ } else {
4158
+ addLog("error", "Ollama: Installation failed.", result.error || "");
4159
+ sendJson(res, 500, result);
4160
+ }
4161
+ return;
4162
+ }
4163
+
4164
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/start-server") {
4165
+ const result = await startOllamaServer();
4166
+ if (result.ok) addLog("success", "Ollama: Server started.");
4167
+ else addLog("warn", "Ollama: Failed to start server.", result.error || "");
4168
+ sendJson(res, result.ok ? 200 : 502, result);
4169
+ broadcastState();
4170
+ return;
4171
+ }
4172
+
4173
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/stop-server") {
4174
+ const result = stopOllamaServer();
4175
+ if (result.ok) addLog("info", "Ollama: Server stopped.");
4176
+ sendJson(res, 200, result);
4177
+ broadcastState();
4178
+ return;
4179
+ }
4180
+
4181
+ if (method === "POST" && requestUrl.pathname === "/api/ollama/context-length") {
4182
+ const body = await readJsonBody(req);
4183
+ const model = String(body?.model || "").trim();
4184
+ const contextLength = Number(body?.contextLength);
4185
+ if (!model) { sendJson(res, 400, { error: "model is required" }); return; }
4186
+ if (!Number.isFinite(contextLength) || contextLength <= 0) { sendJson(res, 400, { error: "contextLength must be a positive number" }); return; }
4187
+ const configState = await readConfigState(configPath);
4188
+ const rawConfig = configState.rawConfig || {};
4189
+ const nextOllama = { ...(rawConfig.ollama || {}), managedModels: { ...(rawConfig.ollama?.managedModels || {}) } };
4190
+ nextOllama.managedModels[model] = { ...(nextOllama.managedModels[model] || {}), contextLength: Math.round(contextLength) };
4191
+ // Also update the provider model entry contextWindow
4192
+ const providers = [...(rawConfig.providers || [])];
4193
+ const ollamaProvider = providers.find((p) => p.type === OLLAMA_PROVIDER_TYPE);
4194
+ if (ollamaProvider) {
4195
+ ollamaProvider.models = (ollamaProvider.models || []).map((m) => {
4196
+ const mid = typeof m === "string" ? m : m?.id;
4197
+ if (mid === model) return { ...(typeof m === "object" ? m : { id: m }), contextWindow: Math.round(contextLength) };
4198
+ return m;
4199
+ });
4200
+ }
4201
+ await writeAndBroadcastConfig({ ...rawConfig, ollama: nextOllama, providers }, { source: "ollama-context-length" });
4202
+ sendJson(res, 200, { ok: true });
4203
+ return;
4204
+ }
4205
+
3736
4206
  if (method === "POST" && requestUrl.pathname === "/api/exit") {
3737
4207
  sendJson(res, 200, { ok: true, message: "Closing web console." });
3738
4208
  setTimeout(() => {