offgrid-ai 0.3.11 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +2 -2
- package/src/cli.mjs +75 -60
- package/src/command.mjs +21 -0
- package/src/profile-setup.mjs +82 -0
package/README.md
CHANGED
|
@@ -51,7 +51,7 @@ curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/
|
|
|
51
51
|
|
|
52
52
|
1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
|
|
53
53
|
|
|
54
|
-
2. **One command to run.** `offgrid-ai` → pick a model →
|
|
54
|
+
2. **One command to run.** `offgrid-ai` → pick a model → confirm context/KV memory settings on first setup → it starts llama-server, syncs Pi config, and launches Pi.
|
|
55
55
|
|
|
56
56
|
3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
|
|
57
57
|
|
|
@@ -77,7 +77,7 @@ When you run `offgrid-ai` for the first time on a fresh machine:
|
|
|
77
77
|
- **oMLX** — Apple Silicon optimized
|
|
78
78
|
4. **Models** — If no models found, tells you where to get them.
|
|
79
79
|
|
|
80
|
-
Subsequent runs skip everything that's already installed.
|
|
80
|
+
Subsequent runs skip everything that's already installed. When a GGUF model is set up for the first time, offgrid-ai asks only for the memory-impacting choices: context window and KV cache precision. Sampling defaults are shown but not forced into a tuning wizard.
|
|
81
81
|
|
|
82
82
|
## Data directory
|
|
83
83
|
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "offgrid-ai",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.13",
|
|
4
4
|
"description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
|
|
5
5
|
"author": "Eeshan Srivastava (https://eeshans.com)",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"bin": {
|
|
8
|
-
"offgrid-ai": "
|
|
8
|
+
"offgrid-ai": "bin/offgrid-ai.mjs"
|
|
9
9
|
},
|
|
10
10
|
"files": [
|
|
11
11
|
"bin/*.mjs",
|
package/src/cli.mjs
CHANGED
|
@@ -12,6 +12,8 @@ import { estimateMemory } from "./estimate.mjs";
|
|
|
12
12
|
import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt, parseOptions } from "./ui.mjs";
|
|
13
13
|
import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand, runUpdateCommand } from "./updates.mjs";
|
|
14
14
|
import { removeInstallerPathEntries } from "./shell-path.mjs";
|
|
15
|
+
import { configureLocalProfile } from "./profile-setup.mjs";
|
|
16
|
+
import { buildPrettyCommand } from "./command.mjs";
|
|
15
17
|
|
|
16
18
|
// ── Entry point ────────────────────────────────────────────────────────────
|
|
17
19
|
|
|
@@ -200,15 +202,21 @@ export async function mainFlow() {
|
|
|
200
202
|
}
|
|
201
203
|
|
|
202
204
|
// Pick what to do
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
205
|
+
while (true) {
|
|
206
|
+
const action = await prompt.choice("What next?", [
|
|
207
|
+
{ value: "run", label: "Run a model", hint: "Start server and launch Pi" },
|
|
208
|
+
...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
|
|
209
|
+
{ value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
|
|
210
|
+
], "run");
|
|
211
|
+
|
|
212
|
+
if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
|
|
213
|
+
if (action === "manage") {
|
|
214
|
+
const result = await manageProfiles(prompt, profiles);
|
|
215
|
+
if (result === "back") continue;
|
|
216
|
+
return result;
|
|
217
|
+
}
|
|
218
|
+
if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
|
|
219
|
+
}
|
|
212
220
|
} finally {
|
|
213
221
|
prompt.close();
|
|
214
222
|
}
|
|
@@ -270,10 +278,12 @@ async function pickAndRun(prompt, profiles, newModels, managedItems) {
|
|
|
270
278
|
const model = newModels.find((m) => m.path === modelPath);
|
|
271
279
|
if (!model) throw new Error("Model not found.");
|
|
272
280
|
const profile = await createProfileFromModel(model);
|
|
273
|
-
await
|
|
274
|
-
|
|
275
|
-
await
|
|
276
|
-
|
|
281
|
+
const configured = await configureLocalProfile(prompt, profile);
|
|
282
|
+
if (!configured) return;
|
|
283
|
+
await saveProfile(configured);
|
|
284
|
+
console.log(pc.green(`Saved profile: ${configured.label}`));
|
|
285
|
+
await syncPiConfig(configured);
|
|
286
|
+
return await runProfile(configured);
|
|
277
287
|
}
|
|
278
288
|
|
|
279
289
|
if (selected.startsWith("managed:")) {
|
|
@@ -381,58 +391,63 @@ async function runProfile(profile, options = {}) {
|
|
|
381
391
|
// ── Manage profiles ─────────────────────────────────────────────────────────
|
|
382
392
|
|
|
383
393
|
async function manageProfiles(prompt, profiles) {
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
394
|
+
while (true) {
|
|
395
|
+
const choices = profiles.map((p) => ({
|
|
396
|
+
value: p.id,
|
|
397
|
+
label: p.label,
|
|
398
|
+
hint: `${p.modelAlias} · ${p.baseUrl}`,
|
|
399
|
+
}));
|
|
400
|
+
choices.push({ value: "__back", label: "← Back" });
|
|
390
401
|
|
|
391
|
-
|
|
392
|
-
|
|
402
|
+
const selected = await prompt.choice("Which profile?", choices, choices[0].value);
|
|
403
|
+
if (selected === "__back") return "back";
|
|
393
404
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
405
|
+
const profile = await readProfile(selected);
|
|
406
|
+
const backend = backendFor(profile.backend);
|
|
407
|
+
const isManaged = backend.type === "managed-server";
|
|
408
|
+
const piConfigured = await hasPiModel(profile);
|
|
397
409
|
|
|
398
|
-
|
|
399
|
-
console.log("");
|
|
400
|
-
console.log(renderSection("Profile", renderRows([
|
|
401
|
-
["ID", pc.cyan(profile.id)],
|
|
402
|
-
["Label", pc.bold(profile.label)],
|
|
403
|
-
["Backend", backend.label],
|
|
404
|
-
["Endpoint", pc.green(profile.baseUrl)],
|
|
405
|
-
...(!isManaged ? [
|
|
406
|
-
["Model", profile.modelPath ?? "unknown"],
|
|
407
|
-
["MMProj", profile.mmprojPath ?? "none"],
|
|
408
|
-
["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
|
|
409
|
-
] : []),
|
|
410
|
-
["Alias", pc.cyan(profile.modelAlias)],
|
|
411
|
-
["Pi", (await hasPiModel(profile)) ? pc.green("configured") : pc.yellow("not synced")],
|
|
412
|
-
])));
|
|
413
|
-
|
|
414
|
-
if (!isManaged && profile.commandArgv) {
|
|
410
|
+
// Show profile details
|
|
415
411
|
console.log("");
|
|
416
|
-
console.log(
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
412
|
+
console.log(renderSection("Profile", renderRows([
|
|
413
|
+
["ID", pc.cyan(profile.id)],
|
|
414
|
+
["Label", pc.bold(profile.label)],
|
|
415
|
+
["Backend", backend.label],
|
|
416
|
+
["Endpoint", pc.green(profile.baseUrl)],
|
|
417
|
+
...(!isManaged ? [
|
|
418
|
+
["Model", profile.modelPath ?? "unknown"],
|
|
419
|
+
["MMProj", profile.mmprojPath ?? "none"],
|
|
420
|
+
["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
|
|
421
|
+
] : []),
|
|
422
|
+
["Alias", pc.cyan(profile.modelAlias)],
|
|
423
|
+
["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
|
|
424
|
+
])));
|
|
425
|
+
|
|
426
|
+
if (!isManaged && profile.commandArgv) {
|
|
427
|
+
console.log("");
|
|
428
|
+
console.log(pc.bold("llama-server command"));
|
|
429
|
+
console.log(pc.dim(buildPrettyCommand(profile)));
|
|
430
|
+
}
|
|
427
431
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
432
|
+
const action = await prompt.choice("Action", [
|
|
433
|
+
{ value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
|
|
434
|
+
{ value: "run", label: "Run", hint: "Start server + Pi" },
|
|
435
|
+
...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
|
|
436
|
+
{ value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
|
|
437
|
+
{ value: "__back", label: "← Back", hint: "Choose another profile" },
|
|
438
|
+
], "sync");
|
|
439
|
+
|
|
440
|
+
if (action === "__back") continue;
|
|
441
|
+
if (action === "sync") {
|
|
442
|
+
await syncPiConfig(profile);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
if (action === "run") return await runProfile(profile);
|
|
446
|
+
if (action === "server") return await runProfile(profile, { with: "server" });
|
|
447
|
+
if (action === "remove") {
|
|
448
|
+
await removeProfileInteractive(profile.id);
|
|
449
|
+
return;
|
|
450
|
+
}
|
|
436
451
|
}
|
|
437
452
|
}
|
|
438
453
|
|
package/src/command.mjs
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export function buildPrettyCommand(profile, binary = "llama-server") {
|
|
2
|
+
const argv = profile.commandArgv ?? [];
|
|
3
|
+
const lines = [`${quoteShell(binary)} \\`];
|
|
4
|
+
for (let i = 0; i < argv.length; i++) {
|
|
5
|
+
const arg = argv[i];
|
|
6
|
+
const next = argv[i + 1];
|
|
7
|
+
const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
|
|
8
|
+
if (hasValue) {
|
|
9
|
+
lines.push(` ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
|
|
10
|
+
i += 1;
|
|
11
|
+
} else {
|
|
12
|
+
lines.push(` ${arg}${i + 1 < argv.length ? " \\" : ""}`);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
return lines.join("\n");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function quoteShell(value) {
|
|
19
|
+
const text = String(value);
|
|
20
|
+
return /^[A-Za-z0-9_/@%+=:,.-]+$/u.test(text) ? text : `'${text.replace(/'/gu, `'"'"'`)}'`;
|
|
21
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { estimateMemory } from "./estimate.mjs";
|
|
2
|
+
import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
|
|
3
|
+
|
|
4
|
+
const CACHE_CHOICES = [
|
|
5
|
+
{ value: "bf16", label: "bf16", hint: "default: stable, good quality" },
|
|
6
|
+
{ value: "f16", label: "f16", hint: "stable fallback, similar memory to bf16" },
|
|
7
|
+
{ value: "q8_0", label: "q8_0", hint: "lower memory, usually safe" },
|
|
8
|
+
{ value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
|
|
9
|
+
];
|
|
10
|
+
|
|
11
|
+
export async function configureLocalProfile(prompt, profile) {
|
|
12
|
+
console.log("");
|
|
13
|
+
console.log(renderSection("Model setup", renderRows([
|
|
14
|
+
["Model", pc.bold(profile.label)],
|
|
15
|
+
["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
|
|
16
|
+
["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
|
|
17
|
+
["Sampling", samplingSummary(profile.flags)],
|
|
18
|
+
])));
|
|
19
|
+
console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
|
|
20
|
+
console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
|
|
21
|
+
|
|
22
|
+
const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
|
|
23
|
+
const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
|
|
24
|
+
const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
|
|
25
|
+
const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
|
|
26
|
+
|
|
27
|
+
console.log("");
|
|
28
|
+
console.log(renderSection("Defaults", renderRows([
|
|
29
|
+
["Temperature", configured.flags.temperature],
|
|
30
|
+
["Top-p", configured.flags.topP],
|
|
31
|
+
["Top-k", configured.flags.topK],
|
|
32
|
+
["Min-p", configured.flags.minP],
|
|
33
|
+
["Presence penalty", configured.flags.presencePenalty],
|
|
34
|
+
["Repeat penalty", configured.flags.repeatPenalty],
|
|
35
|
+
])));
|
|
36
|
+
|
|
37
|
+
console.log("\n" + renderMemoryEstimate(configured));
|
|
38
|
+
if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
|
|
39
|
+
return configured;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function applyRuntimeFlagOverrides(profile, overrides) {
|
|
43
|
+
const flags = { ...profile.flags, ...overrides };
|
|
44
|
+
return {
|
|
45
|
+
...profile,
|
|
46
|
+
flags,
|
|
47
|
+
baseUrl: `http://${flags.host}:${flags.port}/v1`,
|
|
48
|
+
commandArgv: updateArgv(profile.commandArgv ?? [], {
|
|
49
|
+
"--ctx-size": flags.ctxSize,
|
|
50
|
+
"--cache-type-k": flags.cacheTypeK,
|
|
51
|
+
"--cache-type-v": flags.cacheTypeV,
|
|
52
|
+
}),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function updateArgv(argv, values) {
|
|
57
|
+
const next = [...argv];
|
|
58
|
+
for (const [flag, value] of Object.entries(values)) {
|
|
59
|
+
const index = next.indexOf(flag);
|
|
60
|
+
if (index === -1) next.push(flag, String(value));
|
|
61
|
+
else next[index + 1] = String(value);
|
|
62
|
+
}
|
|
63
|
+
return next;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function renderMemoryEstimate(profile) {
|
|
67
|
+
try {
|
|
68
|
+
const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
|
|
69
|
+
return renderSection("Memory", renderRows([
|
|
70
|
+
["Estimated total", pc.bold(`~${formatBytes(est.totalBytes)}`)],
|
|
71
|
+
["Model", formatBytes(est.modelBytes)],
|
|
72
|
+
["KV cache", est.kvBytes ? `~${formatBytes(est.kvBytes)} (${profile.flags.ctxSize.toLocaleString()} ctx, ${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV})` : "unknown"],
|
|
73
|
+
...(est.note ? [["Note", pc.yellow(est.note)]] : []),
|
|
74
|
+
]));
|
|
75
|
+
} catch {
|
|
76
|
+
return renderSection("Memory", pc.dim("Estimate unavailable for this model."));
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function samplingSummary(flags) {
|
|
81
|
+
return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
|
|
82
|
+
}
|