offgrid-ai 0.18.0 → 0.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/package.json +1 -1
- package/src/download.mjs +18 -2
- package/src/estimate.mjs +12 -4
- package/src/huggingface.mjs +36 -2
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# offgrid-ai
|
|
4
4
|
|
|
5
|
-
**
|
|
5
|
+
**Run local AI models on your machine — pick, configure, and chat.**
|
|
6
6
|
|
|
7
7
|
[](package.json)
|
|
8
8
|
[]()
|
|
@@ -12,19 +12,23 @@
|
|
|
12
12
|
|
|
13
13
|
## What is offgrid-ai?
|
|
14
14
|
|
|
15
|
-
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX
|
|
15
|
+
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX has a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity while still providing a powerful and flexible way to run local models.
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
The recommended workflow:
|
|
18
18
|
|
|
19
|
-
1. Download models from **LM Studio
|
|
20
|
-
2.
|
|
21
|
-
3.
|
|
19
|
+
1. Download models from **HuggingFace** (or use models you already have from LM Studio, oMLX, etc.)
|
|
20
|
+
2. Configure using the `offgrid-ai` interactive setup
|
|
21
|
+
3. Start chatting in **Pi** — offgrid-ai handles the server lifecycle
|
|
22
22
|
|
|
23
23
|
## Core Features
|
|
24
|
-
|
|
25
|
-
-
|
|
26
|
-
- Auto-
|
|
27
|
-
-
|
|
24
|
+
|
|
25
|
+
- **Download models** from HuggingFace with a quant picker and RAM fit indicators
|
|
26
|
+
- **Auto-detects** models from LM Studio, oMLX, and HuggingFace cache
|
|
27
|
+
- **Glass-box setup** — every configuration flag gets an explanation card with tradeoffs and memory impact
|
|
28
|
+
- **Model management** — delete models from disk, remove configurations, reconfigure settings
|
|
29
|
+
- **Auto-detects MTP** (multi-token prediction) and **QAT** (quantization-aware training) models, applies the correct flags
|
|
30
|
+
- **Start / stop servers** automatically for chat sessions (llama-server and oMLX)
|
|
31
|
+
- **oMLX integration** — auto-start, MTP enable via admin API, restart after download/deletion
|
|
28
32
|
|
|
29
33
|
## Quick start
|
|
30
34
|
|
|
@@ -52,7 +56,7 @@ The curl installer is recommended for first-time setup because it also verifies
|
|
|
52
56
|
|
|
53
57
|
### 2. Pick a model
|
|
54
58
|
|
|
55
|
-
The first time you run offgrid-ai, it looks for models already on your machine. If it
|
|
59
|
+
The first time you run offgrid-ai, it looks for models already on your machine. If it doesn't find any, you can download one directly from HuggingFace — just pick "↓ Download a model" and enter a repo ID (e.g. `unsloth/gemma-4-E2B-it-GGUF`).
|
|
56
60
|
|
|
57
61
|
<img width="808" height="274" alt="image" src="https://github.com/user-attachments/assets/6e1583ab-65db-423c-b0eb-b627586fbf86" />
|
|
58
62
|
|
|
@@ -74,16 +78,17 @@ Pick a model from the list and press Enter. offgrid-ai configures the rest and o
|
|
|
74
78
|
## Everyday commands
|
|
75
79
|
|
|
76
80
|
```bash
|
|
77
|
-
offgrid-ai #
|
|
81
|
+
offgrid-ai # model picker — pick, configure, download, or manage models
|
|
78
82
|
offgrid-ai status # see if any model is running
|
|
79
83
|
offgrid-ai stop # stop the running model
|
|
80
84
|
offgrid-ai uninstall # remove offgrid-ai
|
|
81
85
|
```
|
|
82
86
|
|
|
83
|
-
##
|
|
87
|
+
## Platform support
|
|
84
88
|
|
|
85
|
-
- **
|
|
86
|
-
- **
|
|
89
|
+
- **macOS (Apple Silicon)** — full support: llama.cpp (GGUF) + oMLX (MLX)
|
|
90
|
+
- **Linux** — llama.cpp (GGUF) only. oMLX is Apple Silicon exclusive.
|
|
91
|
+
- **Windows** — not supported
|
|
87
92
|
|
|
88
93
|
## Need help?
|
|
89
94
|
|
|
@@ -104,4 +109,4 @@ node bin/offgrid-ai.mjs
|
|
|
104
109
|
|
|
105
110
|
## License
|
|
106
111
|
|
|
107
|
-
Personal project by [Eeshan Srivastava](https://eeshans.com).
|
|
112
|
+
Personal project by [Eeshan Srivastava](https://eeshans.com).
|
package/package.json
CHANGED
package/src/download.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// Model download flow — HuggingFace downloads with quant picker and RAM fit.
|
|
2
2
|
// Used by onboarding (no models found) and the model picker (↓ Download a model).
|
|
3
3
|
|
|
4
|
-
import { hasHfCli, parseHfRef, resolveHfDownload, downloadModel, listGgufFiles, getHfModelInfo, isMlxRepo } from "./huggingface.mjs";
|
|
4
|
+
import { hasHfCli, parseHfRef, resolveHfDownload, downloadModel, listGgufFiles, listMmprojFiles, getHfModelInfo, isMlxRepo } from "./huggingface.mjs";
|
|
5
5
|
import { detectHardware, installedRamGB, getFreeDiskBytes } from "./hardware.mjs";
|
|
6
6
|
import { allFittingModels } from "./recommendations.mjs";
|
|
7
7
|
import { parseModelName } from "./model-name.mjs";
|
|
@@ -140,6 +140,22 @@ export async function downloadFlow(prompt) {
|
|
|
140
140
|
return false;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
// For GGUF, check if the repo has a vision projector (mmproj) to download alongside
|
|
144
|
+
let extraFiles = [];
|
|
145
|
+
if (plan.format === "gguf") {
|
|
146
|
+
try {
|
|
147
|
+
const mmprojFiles = await listMmprojFiles(repo);
|
|
148
|
+
if (mmprojFiles.length > 0) {
|
|
149
|
+
const mmproj = mmprojFiles[0];
|
|
150
|
+
extraFiles = [mmproj.path];
|
|
151
|
+
plan.totalSizeBytes += mmproj.sizeBytes;
|
|
152
|
+
console.log(pc.dim(`Includes vision projector: ${mmproj.path} (${formatBytes(mmproj.sizeBytes)})`));
|
|
153
|
+
}
|
|
154
|
+
} catch {
|
|
155
|
+
// If we can't check for mmproj, proceed without it
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
143
159
|
console.log(pc.dim(`\nDownloading ${repo}${filename ? `/${filename}` : ""} (${formatBytes(plan.totalSizeBytes)})`));
|
|
144
160
|
if (plan.format === "mlx") {
|
|
145
161
|
const modelParts = repo.split("/").filter(Boolean);
|
|
@@ -158,7 +174,7 @@ export async function downloadFlow(prompt) {
|
|
|
158
174
|
console.log(pc.green("\n✓ Download complete."));
|
|
159
175
|
await offerOmlxRestart(prompt, "to load the new model");
|
|
160
176
|
} else {
|
|
161
|
-
await downloadModel(plan);
|
|
177
|
+
await downloadModel(plan, { extraFiles });
|
|
162
178
|
console.log(pc.green("\n✓ Download complete. Run offgrid-ai again to see the model in the picker."));
|
|
163
179
|
}
|
|
164
180
|
return true;
|
package/src/estimate.mjs
CHANGED
|
@@ -10,8 +10,13 @@ export function estimateMemory(modelPath, mmprojPath, draftModelPath, flags) {
|
|
|
10
10
|
const prefix = typeof architecture === "string" ? architecture : null;
|
|
11
11
|
const layers = numberMeta(metadata, prefix && `${prefix}.block_count`);
|
|
12
12
|
const headKv = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.head_count_kv`);
|
|
13
|
-
const
|
|
14
|
-
const
|
|
13
|
+
const embeddingLength = numberMeta(metadata, prefix && `${prefix}.embedding_length`);
|
|
14
|
+
const headCount = numberMeta(metadata, prefix && `${prefix}.attention.head_count`);
|
|
15
|
+
// key_length and value_length are not always present in GGUF metadata.
|
|
16
|
+
// llama.cpp derives them from embedding_length / head_count when missing.
|
|
17
|
+
const defaultLength = embeddingLength && headCount ? embeddingLength / headCount : undefined;
|
|
18
|
+
const keyLength = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.key_length`) ?? defaultLength;
|
|
19
|
+
const valueLength = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.value_length`) ?? defaultLength;
|
|
15
20
|
const slidingWindow = numberMeta(metadata, prefix && `${prefix}.attention.sliding_window`);
|
|
16
21
|
const slidingWindowPattern = booleanArrayMeta(metadata, prefix && `${prefix}.attention.sliding_window_pattern`);
|
|
17
22
|
const keyLengthSwa = numberMeta(metadata, prefix && `${prefix}.attention.key_length_swa`);
|
|
@@ -65,10 +70,13 @@ function estimateKvBytes(input) {
|
|
|
65
70
|
keyLength = input.keyLengthSwa ?? keyLength;
|
|
66
71
|
valueLength = input.valueLengthSwa ?? valueLength;
|
|
67
72
|
}
|
|
68
|
-
if (
|
|
73
|
+
if (headKv == null || keyLength == null || valueLength == null) {
|
|
69
74
|
return { bytes: 0, note: "KV estimate unavailable: incomplete layer-specific GGUF metadata.", mode: "unknown" };
|
|
70
75
|
}
|
|
71
|
-
|
|
76
|
+
// Layers with headKv = 0 have no KV cache — skip them
|
|
77
|
+
if (headKv && keyLength && valueLength) {
|
|
78
|
+
total += layerCtx * parallel * headKv * ((keyLength * bytesK) + (valueLength * bytesV));
|
|
79
|
+
}
|
|
72
80
|
}
|
|
73
81
|
return { bytes: total, note: "", mode: input.slidingWindowPattern?.length ? "layered-swa" : "layered" };
|
|
74
82
|
}
|
package/src/huggingface.mjs
CHANGED
|
@@ -92,11 +92,11 @@ async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch }
|
|
|
92
92
|
return await response.json();
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
-
/** List all GGUF files in a HuggingFace repo with their sizes. */
|
|
95
|
+
/** List all GGUF files in a HuggingFace repo with their sizes (excludes MTP drafters and vision projectors). */
|
|
96
96
|
export async function listGgufFiles(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
97
97
|
const tree = await getHfTree(repo, { fetchImpl });
|
|
98
98
|
return tree
|
|
99
|
-
.filter((f) => f.type === "file" && f.path.endsWith(".gguf"))
|
|
99
|
+
.filter((f) => f.type === "file" && f.path.endsWith(".gguf") && !isDrafterFile(f.path) && !isMmprojFile(f.path))
|
|
100
100
|
.map((f) => ({
|
|
101
101
|
path: f.path,
|
|
102
102
|
sizeBytes: f.lfs?.size ?? f.size ?? 0,
|
|
@@ -104,6 +104,35 @@ export async function listGgufFiles(repo, { fetchImpl = globalThis.fetch } = {})
|
|
|
104
104
|
.sort((a, b) => a.sizeBytes - b.sizeBytes);
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
+
/** Check if a GGUF file is an MTP drafter based on its path/name. */
|
|
108
|
+
function isDrafterFile(path) {
|
|
109
|
+
// In an MTP/ subdirectory: MTP/gemma-4-E2B-it-Q8_0-MTP.gguf
|
|
110
|
+
if (path.includes("/MTP/") || path.includes("/mtp/")) return true;
|
|
111
|
+
const name = path.split("/").pop() ?? path;
|
|
112
|
+
// Starts with mtp- or mtp_: mtp-ornith-9b-mtp-kl-Q8_0.gguf
|
|
113
|
+
if (/^mtp[-_]/i.test(name)) return true;
|
|
114
|
+
// Contains -MTP. or -mtp. before extension: gemma-4-E2B-it-Q8_0-MTP.gguf
|
|
115
|
+
if (/-mtp\./i.test(name)) return true;
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/** Check if a GGUF file is a vision projector (mmproj) based on its name. */
|
|
120
|
+
function isMmprojFile(path) {
|
|
121
|
+
const name = path.split("/").pop() ?? path;
|
|
122
|
+
return /mmproj/i.test(name);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** List all mmproj (vision projector) GGUF files in a HuggingFace repo. */
|
|
126
|
+
export async function listMmprojFiles(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
127
|
+
const tree = await getHfTree(repo, { fetchImpl });
|
|
128
|
+
return tree
|
|
129
|
+
.filter((f) => f.type === "file" && f.path.endsWith(".gguf") && isMmprojFile(f.path))
|
|
130
|
+
.map((f) => ({
|
|
131
|
+
path: f.path,
|
|
132
|
+
sizeBytes: f.lfs?.size ?? f.size ?? 0,
|
|
133
|
+
}));
|
|
134
|
+
}
|
|
135
|
+
|
|
107
136
|
/** Fetch model metadata from the HF API. */
|
|
108
137
|
export async function getHfModelInfo(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
109
138
|
const url = `https://huggingface.co/api/models/${repo}`;
|
|
@@ -166,6 +195,7 @@ export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch }
|
|
|
166
195
|
* @param {object} model - from resolveHfDownload
|
|
167
196
|
* @param {object} [options]
|
|
168
197
|
* @param {string} [options.localDir] - for MLX: target directory
|
|
198
|
+
* @param {string[]} [options.extraFiles] - additional files to download (e.g. mmproj)
|
|
169
199
|
* @returns {Promise<{ localDir: string, format: string }>}
|
|
170
200
|
*/
|
|
171
201
|
export async function downloadModel(model, options = {}) {
|
|
@@ -176,6 +206,10 @@ export async function downloadModel(model, options = {}) {
|
|
|
176
206
|
// Single file to HF cache — scanner finds it there
|
|
177
207
|
await mkdir(HF_HUB_DIR, { recursive: true });
|
|
178
208
|
args.push(model.files[0].filename, "--cache-dir", HF_HUB_DIR);
|
|
209
|
+
// Include additional files (e.g. vision projector) in the same download
|
|
210
|
+
for (const file of options.extraFiles ?? []) {
|
|
211
|
+
args.push(file);
|
|
212
|
+
}
|
|
179
213
|
localDir = HF_HUB_DIR;
|
|
180
214
|
} else if (options.localDir) {
|
|
181
215
|
// Full repo to a flat local directory (oMLX)
|