offgrid-ai 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.9.3",
3
+ "version": "0.9.5",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -1,7 +1,7 @@
1
1
  // ── Unload model from server memory after benchmark ────────────────────────────
2
2
 
3
3
  import { backendFor } from "../backends.mjs";
4
- import { apiRootUrl } from "../process.mjs";
4
+ import { apiRootUrl, serverModelIds } from "../process.mjs";
5
5
  import { existsSync } from "node:fs";
6
6
  import { readFile, writeFile } from "node:fs/promises";
7
7
  import { join } from "node:path";
@@ -33,14 +33,67 @@ export async function unloadModelFromServer(profile) {
33
33
  }
34
34
 
35
35
  if (backend.id === "omlx") {
36
- // oMLX does not expose a model-unload endpoint. The model stays resident
37
- // until the oMLX server process is stopped.
38
- return { unloaded: false, backend: backend.id, reason: "no unload API available" };
36
+ return await unloadOmlxModel(profile);
39
37
  }
40
38
 
41
39
  return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
42
40
  }
43
41
 
42
+ async function unloadOmlxModel(profile) {
43
+ const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
44
+ const adminUrl = `${baseUrl}/admin/api/models`;
45
+ const modelId = profile.modelAlias || profile.omlxModel || profile.id;
46
+
47
+ try {
48
+ const ids = await serverModelIds(profile.baseUrl);
49
+ const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
50
+ const targetId = match ?? modelId;
51
+
52
+ const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
53
+ method: "POST",
54
+ headers: { "Content-Type": "application/json" },
55
+ signal: AbortSignal.timeout(30000),
56
+ });
57
+
58
+ if (response.ok) {
59
+ return { unloaded: true, backend: "omlx", modelId: targetId };
60
+ }
61
+
62
+ const detail = await responseErrorDetail(response);
63
+
64
+ if (response.status === 400 && /not loaded/i.test(detail)) {
65
+ return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
66
+ }
67
+
68
+ if (response.status === 401 || response.status === 403) {
69
+ return {
70
+ unloaded: false,
71
+ backend: "omlx",
72
+ modelId: targetId,
73
+ error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
74
+ };
75
+ }
76
+
77
+ return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
78
+ } catch (err) {
79
+ if (err?.name === "AbortError" || err?.name === "TimeoutError") {
80
+ return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
81
+ }
82
+ return { unloaded: false, backend: "omlx", modelId, error: err.message };
83
+ }
84
+ }
85
+
86
+ async function responseErrorDetail(response) {
87
+ const text = await response.text().catch(() => "");
88
+ if (!text) return "";
89
+ try {
90
+ const body = JSON.parse(text);
91
+ return body?.detail ?? body?.message ?? text;
92
+ } catch {
93
+ return text;
94
+ }
95
+ }
96
+
44
97
  export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
45
98
  const metadataPath = join(runDirectory, "metadata.json");
46
99
  const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
@@ -4,7 +4,7 @@ import { join } from "node:path";
4
4
  import { ensureDirs } from "../config.mjs";
5
5
  import { backendFor } from "../backends.mjs";
6
6
  import { hasPi, hasPiModel, syncPiConfig } from "../harness-pi.mjs";
7
- import { serverReady, startServer, waitForReady, stopProfile } from "../process.mjs";
7
+ import { serverReady, startServer, waitForReady, stopProfile, modelAvailableOnServer } from "../process.mjs";
8
8
  import { loadProfiles } from "../profiles.mjs";
9
9
  import { pc, createPrompt } from "../ui.mjs";
10
10
  import { linkBenchmarkRepo } from "./repo.mjs";
@@ -28,9 +28,20 @@ async function chooseBenchmarkAction(prompt, canRun) {
28
28
  return await prompt.choice("Action", canRun ? choices : choices.filter((c) => c.value === "prepare"), canRun ? "run" : "prepare");
29
29
  }
30
30
 
31
+ function managedModelId(profile) {
32
+ return profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.label;
33
+ }
34
+
35
+ async function ensureManagedModelAvailableForBenchmark(profile, backend) {
36
+ if (backend.type !== "managed-server") return;
37
+ if (await modelAvailableOnServer(profile)) return;
38
+ throw new Error(`${managedModelId(profile)} is not available on ${backend.label} at ${profile.baseUrl}.`);
39
+ }
40
+
31
41
  async function ensureServerForBenchmark(profile) {
32
42
  const backend = backendFor(profile.backend);
33
43
  if (await serverReady(profile.baseUrl)) {
44
+ await ensureManagedModelAvailableForBenchmark(profile, backend);
34
45
  console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
35
46
  return { started: false };
36
47
  }
@@ -52,6 +63,7 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
52
63
  options.signal.addEventListener("abort", () => controller.abort(), { once: true });
53
64
  }
54
65
  let serverStarted = false;
66
+ let benchmarkStarted = false;
55
67
  let metadata = null;
56
68
 
57
69
  const onSigint = () => {
@@ -72,6 +84,7 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
72
84
  await syncPiConfig(profile);
73
85
  }
74
86
 
87
+ benchmarkStarted = true;
75
88
  const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
76
89
 
77
90
  let speedMetrics = null;
@@ -110,11 +123,13 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
110
123
  console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
111
124
  }
112
125
  }
113
- const unloadResult = await unloadModelFromServer(profile);
114
- if (!unloadResult.unloaded && unloadResult.error) {
115
- console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
116
- } else if (!unloadResult.unloaded && unloadResult.reason) {
117
- console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
126
+ if (benchmarkStarted) {
127
+ const unloadResult = await unloadModelFromServer(profile);
128
+ if (!unloadResult.unloaded && unloadResult.error) {
129
+ console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
130
+ } else if (!unloadResult.unloaded && unloadResult.reason) {
131
+ console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
132
+ }
118
133
  }
119
134
  }
120
135
 
package/src/process.mjs CHANGED
@@ -209,7 +209,7 @@ export async function waitForReady(profile, pid, rawLogPath) {
209
209
 
210
210
  // ── Internals ──────────────────────────────────────────────────────────────
211
211
 
212
- async function serverModelIds(baseUrl) {
212
+ export async function serverModelIds(baseUrl) {
213
213
  const result = await fetchJson(`${baseUrl.replace(/\/+$/u, "")}/models`);
214
214
  if (!result.ok) return [];
215
215
  return (Array.isArray(result.data?.data) ? result.data.data : [])