offgrid-ai 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.9.2",
3
+ "version": "0.9.4",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
package/src/backends.mjs CHANGED
@@ -143,7 +143,7 @@ function isLocalOllamaModel(model) {
143
143
  function isChatOmlxModel(model) {
144
144
  if (typeof model?.id !== "string" || !model.id.trim()) return false;
145
145
  const type = String(model.type ?? model.model_type ?? "").toLowerCase();
146
- if (["embedding", "embeddings", "reranker", "tool", "converter"].includes(type)) return false;
146
+ if (["embedding", "embeddings", "reranker", "tool", "converter", "markitdown"].includes(type)) return false;
147
147
  if (Object.hasOwn(model, "max_model_len") && model.max_model_len === null) return false;
148
148
  return true;
149
149
  }
@@ -1,7 +1,7 @@
1
1
  // ── Unload model from server memory after benchmark ────────────────────────────
2
2
 
3
3
  import { backendFor } from "../backends.mjs";
4
- import { apiRootUrl } from "../process.mjs";
4
+ import { apiRootUrl, serverModelIds } from "../process.mjs";
5
5
  import { existsSync } from "node:fs";
6
6
  import { readFile, writeFile } from "node:fs/promises";
7
7
  import { join } from "node:path";
@@ -33,14 +33,62 @@ export async function unloadModelFromServer(profile) {
33
33
  }
34
34
 
35
35
  if (backend.id === "omlx") {
36
- // oMLX does not expose a model-unload endpoint. The model stays resident
37
- // until the oMLX server process is stopped.
38
- return { unloaded: false, backend: backend.id, reason: "no unload API available" };
36
+ return await unloadOmlxModel(profile);
39
37
  }
40
38
 
41
39
  return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
42
40
  }
43
41
 
42
+ async function unloadOmlxModel(profile) {
43
+ const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
44
+ const adminUrl = `${baseUrl}/admin/api/models`;
45
+ const modelId = profile.modelAlias || profile.omlxModel || profile.id;
46
+
47
+ try {
48
+ const ids = await serverModelIds(profile.baseUrl);
49
+ const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
50
+ const targetId = match ?? modelId;
51
+
52
+ const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
53
+ method: "POST",
54
+ headers: { "Content-Type": "application/json" },
55
+ signal: AbortSignal.timeout(30000),
56
+ });
57
+
58
+ if (response.ok) {
59
+ return { unloaded: true, backend: "omlx", modelId: targetId };
60
+ }
61
+
62
+ let detail = "";
63
+ try {
64
+ const body = await response.json();
65
+ detail = body?.detail ?? body?.message ?? "";
66
+ } catch {
67
+ detail = await response.text().catch(() => "");
68
+ }
69
+
70
+ if (response.status === 400 && /not loaded/i.test(detail)) {
71
+ return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
72
+ }
73
+
74
+ if (response.status === 401 || response.status === 403) {
75
+ return {
76
+ unloaded: false,
77
+ backend: "omlx",
78
+ modelId: targetId,
79
+ error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
80
+ };
81
+ }
82
+
83
+ return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
84
+ } catch (err) {
85
+ if (err?.name === "AbortError" || err?.name === "TimeoutError") {
86
+ return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
87
+ }
88
+ return { unloaded: false, backend: "omlx", modelId, error: err.message };
89
+ }
90
+ }
91
+
44
92
  export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
45
93
  const metadataPath = join(runDirectory, "metadata.json");
46
94
  const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
@@ -5,7 +5,7 @@ import { join } from "node:path";
5
5
  import { spawn } from "node:child_process";
6
6
  import {
7
7
  BENCH_COLORS, renderStreamEvent,
8
- formatToolCall,
8
+ formatToolCall, printFinalLine,
9
9
  } from "./stream-renderer.mjs";
10
10
  import { piModelString } from "./shared.mjs";
11
11
 
@@ -212,6 +212,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
212
212
  return;
213
213
  }
214
214
 
215
+ printFinalLine(BENCH_COLORS.info("Pi benchmark finished"));
216
+
215
217
  if (runResult.exitCode !== 0) {
216
218
  runResult.error = { message: `Pi exited with code ${runResult.exitCode}` };
217
219
  resolve(runResult);
@@ -142,7 +142,6 @@ export function renderStreamEvent(parsed, state, opts = {}) {
142
142
  }
143
143
  case "agent_end":
144
144
  clearStatusLine();
145
- printFinalLine(BENCH_COLORS.info("Pi benchmark finished"));
146
145
  break;
147
146
  default:
148
147
  break;
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
2
2
  import { ensureDirs } from "../config.mjs";
3
3
  import { backendFor } from "../backends.mjs";
4
4
  import { normalizeProfile, readProfile, saveProfile } from "../profiles.mjs";
5
- import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile } from "../process.mjs";
5
+ import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile, modelAvailableOnServer } from "../process.mjs";
6
6
  import { syncPiConfig, hasPiModel, launchPi, hasPi } from "../harness-pi.mjs";
7
7
  import { tailFriendly } from "../logs.mjs";
8
8
  import { estimateMemory } from "../estimate.mjs";
@@ -33,6 +33,11 @@ export async function runProfile(profile, options = {}) {
33
33
  if (!(await serverReady(profile.baseUrl))) {
34
34
  throw new Error(`${backend.label} is not running at ${profile.baseUrl}. Start it and try again.`);
35
35
  }
36
+ const available = await modelAvailableOnServer(profile);
37
+ if (!available) {
38
+ const modelId = profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.label;
39
+ throw new Error(`${modelId} is not available on ${backend.label} at ${profile.baseUrl}.`);
40
+ }
36
41
  console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
37
42
  } else {
38
43
  const startup = await ensureLocalServer(profile, backend, options);
@@ -13,17 +13,48 @@ export async function statusCommand() {
13
13
  }
14
14
 
15
15
  const running = statuses.filter((item) => item.status.running);
16
- if (running.length === 0) {
17
- console.log(renderCard("Status", renderRows([
18
- ["Running now", pc.dim("none")],
19
- ["Ready setups", profiles.length > 0 ? String(profiles.length) : pc.dim("none")],
20
- ["Next step", profiles.length > 0 ? "Run offgrid-ai to start chatting" : pc.yellow("Run offgrid-ai to set up a model")],
21
- ]), { formatBorder: pc.dim }));
22
- return;
16
+ const managedUpMissing = statuses.filter((item) => {
17
+ const backend = backendFor(item.profile.backend);
18
+ return backend.type === "managed-server" && item.status.serverUp && !item.status.modelAvailable;
19
+ });
20
+ const managedUpNotLoaded = statuses.filter((item) => {
21
+ const backend = backendFor(item.profile.backend);
22
+ return backend.type === "managed-server" && item.status.serverUp && item.status.modelAvailable && !item.status.modelLoaded;
23
+ });
24
+
25
+ const summaryRows = [
26
+ ["Running now", running.length > 0 ? pc.green(`${running.length} model${running.length === 1 ? "" : "s"}`) : pc.dim("none")],
27
+ ["Ready setups", profiles.length > 0 ? String(profiles.length) : pc.dim("none")],
28
+ ];
29
+
30
+ if (managedUpMissing.length > 0) {
31
+ summaryRows.push(["Server up, model missing", pc.yellow(String(managedUpMissing.length))]);
32
+ }
33
+ if (managedUpNotLoaded.length > 0) {
34
+ summaryRows.push(["Server up, model not loaded", pc.yellow(String(managedUpNotLoaded.length))]);
35
+ }
36
+
37
+ summaryRows.push(["Next step", profiles.length > 0 ? "Run offgrid-ai to start chatting" : pc.yellow("Run offgrid-ai to set up a model")]);
38
+
39
+ console.log(renderCard("Status", renderRows(summaryRows), { formatBorder: running.length > 0 ? pc.green : pc.dim }));
40
+
41
+ if (managedUpMissing.length > 0 || managedUpNotLoaded.length > 0) {
42
+ const detailRows = [];
43
+ for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {
44
+ const backend = backendFor(profile.backend);
45
+ const modelId = profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.id;
46
+ const state = status.modelAvailable
47
+ ? pc.yellow("server up · model not loaded")
48
+ : pc.red("server up · model missing");
49
+ detailRows.push([`${profile.label} (${modelId})`, state]);
50
+ detailRows.push(["Server", `${backend.label} at ${profile.baseUrl}`]);
51
+ }
52
+ console.log("\n" + renderCard("Managed servers", renderRows(detailRows), { formatBorder: pc.yellow }));
23
53
  }
24
54
 
25
- console.log(renderCard("Status", renderRows([
26
- ["Running now", pc.green(`${running.length} model${running.length === 1 ? "" : "s"}`)],
55
+ if (running.length === 0) return;
56
+
57
+ console.log("\n" + renderCard("Running", renderRows([
27
58
  ["Stop", "offgrid-ai stop"],
28
59
  ]), { formatBorder: pc.green }));
29
60
  for (const { profile, status } of running) {
package/src/process.mjs CHANGED
@@ -132,12 +132,27 @@ export async function modelLoadedOnServer(profile) {
132
132
  return matches;
133
133
  }
134
134
 
135
+ export async function modelAvailableOnServer(profile) {
136
+ const backend = backendFor(profile.backend);
137
+ if (backend.id === "ollama") {
138
+ return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
139
+ }
140
+ if (backend.id === "omlx") {
141
+ // /v1/models lists discovered models; an ID must exist there to be usable.
142
+ return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
143
+ }
144
+ // Local servers are tied to a specific model file via their command argv.
145
+ return true;
146
+ }
147
+
135
148
  export async function profileRuntimeStatus(profile) {
136
149
  const backend = backendFor(profile.backend);
137
150
  if (backend.type === "managed-server") {
138
151
  const ready = await serverReady(profile.baseUrl);
139
- const modelLoaded = ready ? await modelLoadedOnServer(profile) : false;
140
- return { state: null, pid: null, running: ready && modelLoaded, ready, serverUp: ready, modelLoaded, rssBytes: null, startedAt: null };
152
+ const [modelLoaded, modelAvailable] = ready
153
+ ? await Promise.all([modelLoadedOnServer(profile), modelAvailableOnServer(profile)])
154
+ : [false, false];
155
+ return { state: null, pid: null, running: ready && modelLoaded, ready, serverUp: ready, modelLoaded, modelAvailable, rssBytes: null, startedAt: null };
141
156
  }
142
157
  const state = await readState(profile.id);
143
158
  const running = Boolean(state?.pid && pidAlive(state.pid));
@@ -194,7 +209,7 @@ export async function waitForReady(profile, pid, rawLogPath) {
194
209
 
195
210
  // ── Internals ──────────────────────────────────────────────────────────────
196
211
 
197
- async function serverModelIds(baseUrl) {
212
+ export async function serverModelIds(baseUrl) {
198
213
  const result = await fetchJson(`${baseUrl.replace(/\/+$/u, "")}/models`);
199
214
  if (!result.ok) return [];
200
215
  return (Array.isArray(result.data?.data) ? result.data.data : [])
@@ -211,6 +226,15 @@ async function ollamaLoadedModelIds(profile) {
211
226
  .filter(Boolean);
212
227
  }
213
228
 
229
+ async function ollamaAvailableModelIds(profile) {
230
+ const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
231
+ if (!result.ok) return [];
232
+ return (Array.isArray(result.data?.models) ? result.data.models : [])
233
+ .flatMap((model) => [model?.name, model?.model])
234
+ .map((id) => String(id ?? "").trim())
235
+ .filter(Boolean);
236
+ }
237
+
214
238
  async function omlxLoadedModelIds(profile) {
215
239
  const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
216
240
  const fromStatus = statusResult.ok