offgrid-ai 0.18.0 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/package.json +1 -1
- package/src/estimate.mjs +12 -4
- package/src/huggingface.mjs +14 -2
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# offgrid-ai
|
|
4
4
|
|
|
5
|
-
**
|
|
5
|
+
**Run local AI models on your machine — pick, configure, and chat.**
|
|
6
6
|
|
|
7
7
|
[](package.json)
|
|
8
8
|
[]()
|
|
@@ -12,19 +12,23 @@
|
|
|
12
12
|
|
|
13
13
|
## What is offgrid-ai?
|
|
14
14
|
|
|
15
|
-
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX
|
|
15
|
+
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX has a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity while still providing a powerful and flexible way to run local models.
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
The recommended workflow:
|
|
18
18
|
|
|
19
|
-
1. Download models from **LM Studio
|
|
20
|
-
2.
|
|
21
|
-
3.
|
|
19
|
+
1. Download models from **HuggingFace** (or use models you already have from LM Studio, oMLX, etc.)
|
|
20
|
+
2. Configure using the `offgrid-ai` interactive setup
|
|
21
|
+
3. Start chatting in **Pi** — offgrid-ai handles the server lifecycle
|
|
22
22
|
|
|
23
23
|
## Core Features
|
|
24
|
-
|
|
25
|
-
-
|
|
26
|
-
- Auto-
|
|
27
|
-
-
|
|
24
|
+
|
|
25
|
+
- **Download models** from HuggingFace with a quant picker and RAM fit indicators
|
|
26
|
+
- **Auto-detects** models from LM Studio, oMLX, and HuggingFace cache
|
|
27
|
+
- **Glass-box setup** — every configuration flag gets an explanation card with tradeoffs and memory impact
|
|
28
|
+
- **Model management** — delete models from disk, remove configurations, reconfigure settings
|
|
29
|
+
- **Auto-detects MTP** (multi-token prediction) and **QAT** (quantization-aware training) models, applies the correct flags
|
|
30
|
+
- **Start / stop servers** automatically for chat sessions (llama-server and oMLX)
|
|
31
|
+
- **oMLX integration** — auto-start, MTP enable via admin API, restart after download/deletion
|
|
28
32
|
|
|
29
33
|
## Quick start
|
|
30
34
|
|
|
@@ -52,7 +56,7 @@ The curl installer is recommended for first-time setup because it also verifies
|
|
|
52
56
|
|
|
53
57
|
### 2. Pick a model
|
|
54
58
|
|
|
55
|
-
The first time you run offgrid-ai, it looks for models already on your machine. If it
|
|
59
|
+
The first time you run offgrid-ai, it looks for models already on your machine. If it doesn't find any, you can download one directly from HuggingFace — just pick "↓ Download a model" and enter a repo ID (e.g. `unsloth/gemma-4-E2B-it-GGUF`).
|
|
56
60
|
|
|
57
61
|
<img width="808" height="274" alt="image" src="https://github.com/user-attachments/assets/6e1583ab-65db-423c-b0eb-b627586fbf86" />
|
|
58
62
|
|
|
@@ -74,16 +78,17 @@ Pick a model from the list and press Enter. offgrid-ai configures the rest and o
|
|
|
74
78
|
## Everyday commands
|
|
75
79
|
|
|
76
80
|
```bash
|
|
77
|
-
offgrid-ai #
|
|
81
|
+
offgrid-ai # model picker — pick, configure, download, or manage models
|
|
78
82
|
offgrid-ai status # see if any model is running
|
|
79
83
|
offgrid-ai stop # stop the running model
|
|
80
84
|
offgrid-ai uninstall # remove offgrid-ai
|
|
81
85
|
```
|
|
82
86
|
|
|
83
|
-
##
|
|
87
|
+
## Platform support
|
|
84
88
|
|
|
85
|
-
- **
|
|
86
|
-
- **
|
|
89
|
+
- **macOS (Apple Silicon)** — full support: llama.cpp (GGUF) + oMLX (MLX)
|
|
90
|
+
- **Linux** — llama.cpp (GGUF) only. oMLX is Apple Silicon exclusive.
|
|
91
|
+
- **Windows** — not supported
|
|
87
92
|
|
|
88
93
|
## Need help?
|
|
89
94
|
|
|
@@ -104,4 +109,4 @@ node bin/offgrid-ai.mjs
|
|
|
104
109
|
|
|
105
110
|
## License
|
|
106
111
|
|
|
107
|
-
Personal project by [Eeshan Srivastava](https://eeshans.com).
|
|
112
|
+
Personal project by [Eeshan Srivastava](https://eeshans.com).
|
package/package.json
CHANGED
package/src/estimate.mjs
CHANGED
|
@@ -10,8 +10,13 @@ export function estimateMemory(modelPath, mmprojPath, draftModelPath, flags) {
|
|
|
10
10
|
const prefix = typeof architecture === "string" ? architecture : null;
|
|
11
11
|
const layers = numberMeta(metadata, prefix && `${prefix}.block_count`);
|
|
12
12
|
const headKv = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.head_count_kv`);
|
|
13
|
-
const
|
|
14
|
-
const
|
|
13
|
+
const embeddingLength = numberMeta(metadata, prefix && `${prefix}.embedding_length`);
|
|
14
|
+
const headCount = numberMeta(metadata, prefix && `${prefix}.attention.head_count`);
|
|
15
|
+
// key_length and value_length are not always present in GGUF metadata.
|
|
16
|
+
// llama.cpp derives them from embedding_length / head_count when missing.
|
|
17
|
+
const defaultLength = embeddingLength && headCount ? embeddingLength / headCount : undefined;
|
|
18
|
+
const keyLength = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.key_length`) ?? defaultLength;
|
|
19
|
+
const valueLength = numberOrArrayMeta(metadata, prefix && `${prefix}.attention.value_length`) ?? defaultLength;
|
|
15
20
|
const slidingWindow = numberMeta(metadata, prefix && `${prefix}.attention.sliding_window`);
|
|
16
21
|
const slidingWindowPattern = booleanArrayMeta(metadata, prefix && `${prefix}.attention.sliding_window_pattern`);
|
|
17
22
|
const keyLengthSwa = numberMeta(metadata, prefix && `${prefix}.attention.key_length_swa`);
|
|
@@ -65,10 +70,13 @@ function estimateKvBytes(input) {
|
|
|
65
70
|
keyLength = input.keyLengthSwa ?? keyLength;
|
|
66
71
|
valueLength = input.valueLengthSwa ?? valueLength;
|
|
67
72
|
}
|
|
68
|
-
if (
|
|
73
|
+
if (headKv == null || keyLength == null || valueLength == null) {
|
|
69
74
|
return { bytes: 0, note: "KV estimate unavailable: incomplete layer-specific GGUF metadata.", mode: "unknown" };
|
|
70
75
|
}
|
|
71
|
-
|
|
76
|
+
// Layers with headKv = 0 have no KV cache — skip them
|
|
77
|
+
if (headKv && keyLength && valueLength) {
|
|
78
|
+
total += layerCtx * parallel * headKv * ((keyLength * bytesK) + (valueLength * bytesV));
|
|
79
|
+
}
|
|
72
80
|
}
|
|
73
81
|
return { bytes: total, note: "", mode: input.slidingWindowPattern?.length ? "layered-swa" : "layered" };
|
|
74
82
|
}
|
package/src/huggingface.mjs
CHANGED
|
@@ -92,11 +92,11 @@ async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch }
|
|
|
92
92
|
return await response.json();
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
-
/** List all GGUF files in a HuggingFace repo with their sizes. */
|
|
95
|
+
/** List all GGUF files in a HuggingFace repo with their sizes (excludes MTP drafters). */
|
|
96
96
|
export async function listGgufFiles(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
97
97
|
const tree = await getHfTree(repo, { fetchImpl });
|
|
98
98
|
return tree
|
|
99
|
-
.filter((f) => f.type === "file" && f.path.endsWith(".gguf"))
|
|
99
|
+
.filter((f) => f.type === "file" && f.path.endsWith(".gguf") && !isDrafterFile(f.path))
|
|
100
100
|
.map((f) => ({
|
|
101
101
|
path: f.path,
|
|
102
102
|
sizeBytes: f.lfs?.size ?? f.size ?? 0,
|
|
@@ -104,6 +104,18 @@ export async function listGgufFiles(repo, { fetchImpl = globalThis.fetch } = {})
|
|
|
104
104
|
.sort((a, b) => a.sizeBytes - b.sizeBytes);
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
+
/** Check if a GGUF file is an MTP drafter based on its path/name. */
|
|
108
|
+
function isDrafterFile(path) {
|
|
109
|
+
// In an MTP/ subdirectory: MTP/gemma-4-E2B-it-Q8_0-MTP.gguf
|
|
110
|
+
if (path.includes("/MTP/") || path.includes("/mtp/")) return true;
|
|
111
|
+
const name = path.split("/").pop() ?? path;
|
|
112
|
+
// Starts with mtp- or mtp_: mtp-ornith-9b-mtp-kl-Q8_0.gguf
|
|
113
|
+
if (/^mtp[-_]/i.test(name)) return true;
|
|
114
|
+
// Contains -MTP. or -mtp. before extension: gemma-4-E2B-it-Q8_0-MTP.gguf
|
|
115
|
+
if (/-mtp\./i.test(name)) return true;
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
107
119
|
/** Fetch model metadata from the HF API. */
|
|
108
120
|
export async function getHfModelInfo(repo, { fetchImpl = globalThis.fetch } = {}) {
|
|
109
121
|
const url = `https://huggingface.co/api/models/${repo}`;
|