offgrid-ai 0.9.0 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -25
- package/package.json +1 -1
- package/src/autodetect.mjs +6 -3
- package/src/backends.mjs +5 -10
- package/src/benchmark/pi-runner.mjs +3 -1
- package/src/benchmark/prepare.mjs +2 -1
- package/src/benchmark/stream-renderer.mjs +0 -1
- package/src/commands/run.mjs +6 -1
- package/src/commands/status.mjs +40 -9
- package/src/model-name.mjs +220 -0
- package/src/process.mjs +26 -2
- package/src/scan.mjs +9 -20
package/README.md
CHANGED
|
@@ -2,28 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
# offgrid-ai
|
|
4
4
|
|
|
5
|
-
**
|
|
5
|
+
**Helper CLI for running local AI models on Mac with llama.cpp, ollama, and oMLX.**
|
|
6
6
|
|
|
7
7
|
[](package.json)
|
|
8
8
|
[]()
|
|
9
9
|
|
|
10
|
-
Install • Pick a model • Start chatting
|
|
11
|
-
```bash
|
|
12
|
-
curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | bash
|
|
13
|
-
```
|
|
14
10
|
|
|
15
11
|
</div>
|
|
16
12
|
|
|
17
13
|
## What is offgrid-ai?
|
|
18
14
|
|
|
19
|
-
offgrid-ai is a command-line tool that lets you run AI models locally.
|
|
15
|
+
offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama.cpp, ollama, or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
|
|
16
|
+
|
|
17
|
+
This is the recommended workflow:
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
1. Download models from **LM Studio**, **Ollama**, or **oMLX**
|
|
20
|
+
2. Do minimal configuration using the `offgrid-ai` command
|
|
21
|
+
3. Run the model with `offgrid-ai` with Pi in interactive mode
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
23
|
+
## Core Features
|
|
24
|
+
- Auto-detects available models from LM Studio, Ollama, and oMLX
|
|
25
|
+
- Auto-detects MTP (multi-token prediction) or QAT (quantization aware training) models, and applies the correct flags for llama.cpp
|
|
26
|
+
- Auto-applies the optimal flags for the model type in llama.cpp
|
|
27
|
+
- Start / stop llama.cpp server automatically for chat sessions
|
|
27
28
|
|
|
28
29
|
## Quick start
|
|
29
30
|
|
|
@@ -35,7 +36,7 @@ Open your terminal and run:
|
|
|
35
36
|
curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | bash
|
|
36
37
|
```
|
|
37
38
|
|
|
38
|
-
This installs offgrid-ai and
|
|
39
|
+
This installs offgrid-ai and dependencies (node, npm, and llama.cpp). Then open a new terminal window and run:
|
|
39
40
|
|
|
40
41
|
```bash
|
|
41
42
|
offgrid-ai
|
|
@@ -53,14 +54,8 @@ The curl installer is recommended for first-time setup because it also verifies
|
|
|
53
54
|
|
|
54
55
|
The first time you run offgrid-ai, it looks for models already on your machine. If it does not find any, it tells you how to get one.
|
|
55
56
|
|
|
56
|
-
|
|
57
|
+
<img width="808" height="274" alt="image" src="https://github.com/user-attachments/assets/6e1583ab-65db-423c-b0eb-b627586fbf86" />
|
|
57
58
|
|
|
58
|
-
| Source | Example command |
|
|
59
|
-
|---|---|
|
|
60
|
-
| LM Studio | `lms get qwen/qwen3.5-9b` |
|
|
61
|
-
| Ollama | `ollama pull gemma3:4b` |
|
|
62
|
-
| oMLX | Use `omlx start` |
|
|
63
|
-
| Hugging Face | Download a GGUF file |
|
|
64
59
|
|
|
65
60
|
### 3. Start chatting
|
|
66
61
|
|
|
@@ -68,23 +63,29 @@ Supported ways to get models:
|
|
|
68
63
|
offgrid-ai
|
|
69
64
|
```
|
|
70
65
|
|
|
66
|
+
<img width="786" height="281" alt="image" src="https://github.com/user-attachments/assets/03cb1e06-d461-4bdf-ad82-f0692e5ba5c6" />
|
|
67
|
+
|
|
68
|
+
|
|
71
69
|
Pick a model from the list and press Enter. offgrid-ai configures the rest and opens the Pi coding agent.
|
|
72
70
|
|
|
71
|
+
<img width="786" height="499" alt="image" src="https://github.com/user-attachments/assets/223e1455-c69c-4405-a91c-5bac1b9fc9bd" />
|
|
72
|
+
|
|
73
|
+
|
|
73
74
|
## Everyday commands
|
|
74
75
|
|
|
75
76
|
```bash
|
|
76
|
-
offgrid-ai #
|
|
77
|
-
offgrid-ai status # see
|
|
77
|
+
offgrid-ai # primary entry-point for the CLI
|
|
78
|
+
offgrid-ai status # see if any model is running
|
|
78
79
|
offgrid-ai stop # stop the running model
|
|
79
|
-
offgrid-ai benchmark # run a benchmark
|
|
80
|
+
offgrid-ai benchmark # run a benchmark paired with my local llm benchmark runner
|
|
80
81
|
offgrid-ai uninstall # remove offgrid-ai
|
|
81
82
|
```
|
|
82
83
|
|
|
83
84
|
## What can I do with it?
|
|
84
85
|
|
|
85
|
-
- **Chat with local models** —
|
|
86
|
-
- **Run benchmarks** — compare how different models perform on creative or data-science tasks.
|
|
87
|
-
- **Keep data private** — everything
|
|
86
|
+
- **Chat with local models** — you download the models yourself, and then offgrid-ai helps configure and run then
|
|
87
|
+
- **Run benchmarks** — compare how different models perform on creative or data-science tasks. Pairs with my other [local llm benchmark runner](https://github.com/eeshansrivastava89/local-llm-visual-benchmark)
|
|
88
|
+
- **Keep data private** — everything runs on your machine without any cloud connections
|
|
88
89
|
|
|
89
90
|
## Need help?
|
|
90
91
|
|
package/package.json
CHANGED
package/src/autodetect.mjs
CHANGED
|
@@ -2,13 +2,13 @@ import { basename } from "node:path";
|
|
|
2
2
|
import { existsSync } from "node:fs";
|
|
3
3
|
import { readGgufMetadata } from "./gguf.mjs";
|
|
4
4
|
import { defaultFlagsForBackend } from "./backends.mjs";
|
|
5
|
+
import { parseModelName } from "./model-name.mjs";
|
|
5
6
|
|
|
6
7
|
// ── Detect model capabilities from GGUF metadata ──────────────────────────
|
|
7
8
|
|
|
8
9
|
export function detectCapabilities(modelPath, mmprojPath) {
|
|
9
10
|
const meta = safeReadGgufMetadata(modelPath);
|
|
10
11
|
const mmprojMeta = mmprojPath ? safeReadGgufMetadata(mmprojPath) : {};
|
|
11
|
-
const name = basename(modelPath).toLowerCase();
|
|
12
12
|
const pathHints = String(modelPath).toLowerCase();
|
|
13
13
|
|
|
14
14
|
// Architecture
|
|
@@ -33,8 +33,11 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
33
33
|
// Do not treat all Qwen models as MTP; require an explicit filename or metadata hint.
|
|
34
34
|
const mtp = /\bmtp\b|draft-mtp|multi-token/i.test(pathHints) || Object.keys(meta).some((key) => /mtp|draft|speculative/i.test(key));
|
|
35
35
|
|
|
36
|
-
// Quantization
|
|
37
|
-
|
|
36
|
+
// Quantization — use parseModelName (single path) for filename-based extraction.
|
|
37
|
+
// GGUF metadata does not store a standardized quant field, so the filename
|
|
38
|
+
// is the authoritative source for quant identification.
|
|
39
|
+
const parsed = parseModelName(basename(modelPath).replace(/\.gguf$/i, ""), "local-gguf");
|
|
40
|
+
const quant = parsed.quant;
|
|
38
41
|
|
|
39
42
|
// Context size from metadata, fallback to name hints
|
|
40
43
|
const metaCtx = architecture
|
package/src/backends.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { findLlamaServer } from "./config.mjs";
|
|
2
2
|
import { scanGgufModels } from "./scan.mjs";
|
|
3
|
+
import { parseModelName } from "./model-name.mjs";
|
|
3
4
|
|
|
4
5
|
// ── Backend definitions ────────────────────────────────────────────────────
|
|
5
6
|
|
|
@@ -97,7 +98,7 @@ async function scanOllamaModels() {
|
|
|
97
98
|
.filter((model) => isLocalOllamaModel(model))
|
|
98
99
|
.map((model) => ({
|
|
99
100
|
id: model.name,
|
|
100
|
-
label:
|
|
101
|
+
label: parseModelName(model.name, "ollama").display,
|
|
101
102
|
aliasSuggestion: model.name,
|
|
102
103
|
sizeBytes: model.size ?? 0,
|
|
103
104
|
quant: model.details?.quantization_level,
|
|
@@ -120,7 +121,7 @@ async function scanOmlxModels() {
|
|
|
120
121
|
.filter((model) => isChatOmlxModel(model))
|
|
121
122
|
.map((model) => ({
|
|
122
123
|
id: model.id,
|
|
123
|
-
label:
|
|
124
|
+
label: parseModelName(model.id, "omlx").display,
|
|
124
125
|
aliasSuggestion: model.id,
|
|
125
126
|
sizeBytes: 0,
|
|
126
127
|
quant: null,
|
|
@@ -142,15 +143,9 @@ function isLocalOllamaModel(model) {
|
|
|
142
143
|
function isChatOmlxModel(model) {
|
|
143
144
|
if (typeof model?.id !== "string" || !model.id.trim()) return false;
|
|
144
145
|
const type = String(model.type ?? model.model_type ?? "").toLowerCase();
|
|
145
|
-
if (["embedding", "embeddings", "reranker", "tool", "converter"].includes(type)) return false;
|
|
146
|
+
if (["embedding", "embeddings", "reranker", "tool", "converter", "markitdown"].includes(type)) return false;
|
|
146
147
|
if (Object.hasOwn(model, "max_model_len") && model.max_model_len === null) return false;
|
|
147
148
|
return true;
|
|
148
149
|
}
|
|
149
150
|
|
|
150
|
-
|
|
151
|
-
return name.replace(/[-_]/g, " ").replace(/^gemma\b/i, "Gemma").replace(/^qwen/i, "Qwen");
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
function omlxLabel(id) {
|
|
155
|
-
return id.replace(/[-_]/g, " ").replace(/^gemma-4/i, "Gemma 4").replace(/^qwen/i, "Qwen");
|
|
156
|
-
}
|
|
151
|
+
// (ollamaLabel and omlxLabel removed — parseModelName in model-name.mjs is the single path)
|
|
@@ -5,7 +5,7 @@ import { join } from "node:path";
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
6
|
import {
|
|
7
7
|
BENCH_COLORS, renderStreamEvent,
|
|
8
|
-
formatToolCall,
|
|
8
|
+
formatToolCall, printFinalLine,
|
|
9
9
|
} from "./stream-renderer.mjs";
|
|
10
10
|
import { piModelString } from "./shared.mjs";
|
|
11
11
|
|
|
@@ -212,6 +212,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
212
212
|
return;
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
printFinalLine(BENCH_COLORS.info("Pi benchmark finished"));
|
|
216
|
+
|
|
215
217
|
if (runResult.exitCode !== 0) {
|
|
216
218
|
runResult.error = { message: `Pi exited with code ${runResult.exitCode}` };
|
|
217
219
|
resolve(runResult);
|
|
@@ -4,6 +4,7 @@ import { mkdir, writeFile } from "node:fs/promises";
|
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { pc, renderRows, renderSection } from "../ui.mjs";
|
|
6
6
|
import { slugModelId, createRunId, buildToolPrompt } from "./shared.mjs";
|
|
7
|
+
import { parseModelName } from "../model-name.mjs";
|
|
7
8
|
|
|
8
9
|
function harnessDisplayName(id) {
|
|
9
10
|
if (id === "pi") return "Pi";
|
|
@@ -54,7 +55,7 @@ export async function prepareBenchmarkRun({ repoPath, benchmark, kind, modelId,
|
|
|
54
55
|
kind,
|
|
55
56
|
runId,
|
|
56
57
|
benchmark: { id: benchmark.id, title: benchmark.title, description: benchmark.description, prompt: benchmark.prompt },
|
|
57
|
-
model: { id: modelId, slug: modelSlug },
|
|
58
|
+
model: { id: modelId, slug: modelSlug, displayName: parseModelName(modelId, modelSource === "ollama" ? "ollama" : modelSource === "omlx" ? "omlx" : "local-gguf").display },
|
|
58
59
|
status: "prepared",
|
|
59
60
|
createdAt: now.toISOString(),
|
|
60
61
|
updatedAt: now.toISOString(),
|
package/src/commands/run.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
|
|
|
2
2
|
import { ensureDirs } from "../config.mjs";
|
|
3
3
|
import { backendFor } from "../backends.mjs";
|
|
4
4
|
import { normalizeProfile, readProfile, saveProfile } from "../profiles.mjs";
|
|
5
|
-
import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile } from "../process.mjs";
|
|
5
|
+
import { startServer, stopProfile, waitForReady, serverReady, serverMatchesProfile, modelAvailableOnServer } from "../process.mjs";
|
|
6
6
|
import { syncPiConfig, hasPiModel, launchPi, hasPi } from "../harness-pi.mjs";
|
|
7
7
|
import { tailFriendly } from "../logs.mjs";
|
|
8
8
|
import { estimateMemory } from "../estimate.mjs";
|
|
@@ -33,6 +33,11 @@ export async function runProfile(profile, options = {}) {
|
|
|
33
33
|
if (!(await serverReady(profile.baseUrl))) {
|
|
34
34
|
throw new Error(`${backend.label} is not running at ${profile.baseUrl}. Start it and try again.`);
|
|
35
35
|
}
|
|
36
|
+
const available = await modelAvailableOnServer(profile);
|
|
37
|
+
if (!available) {
|
|
38
|
+
const modelId = profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.label;
|
|
39
|
+
throw new Error(`${modelId} is not available on ${backend.label} at ${profile.baseUrl}.`);
|
|
40
|
+
}
|
|
36
41
|
console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
|
|
37
42
|
} else {
|
|
38
43
|
const startup = await ensureLocalServer(profile, backend, options);
|
package/src/commands/status.mjs
CHANGED
|
@@ -13,17 +13,48 @@ export async function statusCommand() {
|
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
const running = statuses.filter((item) => item.status.running);
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
return;
|
|
16
|
+
const managedUpMissing = statuses.filter((item) => {
|
|
17
|
+
const backend = backendFor(item.profile.backend);
|
|
18
|
+
return backend.type === "managed-server" && item.status.serverUp && !item.status.modelAvailable;
|
|
19
|
+
});
|
|
20
|
+
const managedUpNotLoaded = statuses.filter((item) => {
|
|
21
|
+
const backend = backendFor(item.profile.backend);
|
|
22
|
+
return backend.type === "managed-server" && item.status.serverUp && item.status.modelAvailable && !item.status.modelLoaded;
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
const summaryRows = [
|
|
26
|
+
["Running now", running.length > 0 ? pc.green(`${running.length} model${running.length === 1 ? "" : "s"}`) : pc.dim("none")],
|
|
27
|
+
["Ready setups", profiles.length > 0 ? String(profiles.length) : pc.dim("none")],
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
if (managedUpMissing.length > 0) {
|
|
31
|
+
summaryRows.push(["Server up, model missing", pc.yellow(String(managedUpMissing.length))]);
|
|
32
|
+
}
|
|
33
|
+
if (managedUpNotLoaded.length > 0) {
|
|
34
|
+
summaryRows.push(["Server up, model not loaded", pc.yellow(String(managedUpNotLoaded.length))]);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
summaryRows.push(["Next step", profiles.length > 0 ? "Run offgrid-ai to start chatting" : pc.yellow("Run offgrid-ai to set up a model")]);
|
|
38
|
+
|
|
39
|
+
console.log(renderCard("Status", renderRows(summaryRows), { formatBorder: running.length > 0 ? pc.green : pc.dim }));
|
|
40
|
+
|
|
41
|
+
if (managedUpMissing.length > 0 || managedUpNotLoaded.length > 0) {
|
|
42
|
+
const detailRows = [];
|
|
43
|
+
for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {
|
|
44
|
+
const backend = backendFor(profile.backend);
|
|
45
|
+
const modelId = profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.id;
|
|
46
|
+
const state = status.modelAvailable
|
|
47
|
+
? pc.yellow("server up · model not loaded")
|
|
48
|
+
: pc.red("server up · model missing");
|
|
49
|
+
detailRows.push([`${profile.label} (${modelId})`, state]);
|
|
50
|
+
detailRows.push(["Server", `${backend.label} at ${profile.baseUrl}`]);
|
|
51
|
+
}
|
|
52
|
+
console.log("\n" + renderCard("Managed servers", renderRows(detailRows), { formatBorder: pc.yellow }));
|
|
23
53
|
}
|
|
24
54
|
|
|
25
|
-
|
|
26
|
-
|
|
55
|
+
if (running.length === 0) return;
|
|
56
|
+
|
|
57
|
+
console.log("\n" + renderCard("Running", renderRows([
|
|
27
58
|
["Stop", "offgrid-ai stop"],
|
|
28
59
|
]), { formatBorder: pc.green }));
|
|
29
60
|
for (const { profile, status } of running) {
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
// ── Single path for parsing and formatting model names ─────────────────────
|
|
2
|
+
//
|
|
3
|
+
// Every model display name in offgrid-ai goes through parseModelName().
|
|
4
|
+
// No other function should format, title-case, or dissect a model name.
|
|
5
|
+
//
|
|
6
|
+
// The returned `id` is always the raw identifier (untouched) and is used for
|
|
7
|
+
// API calls, profile IDs, Pi config matching, and benchmark directory slugs.
|
|
8
|
+
// The returned `display` is the human-readable string shown in pickers, details,
|
|
9
|
+
// and benchmark metadata.
|
|
10
|
+
|
|
11
|
+
// ── Known model families ────────────────────────────────────────────────
|
|
12
|
+
//
|
|
13
|
+
// Mapped to their title-case form. Matched as prefix tokens so "qwen"
|
|
14
|
+
// matches "qwen3", "qwen2.5", etc.
|
|
15
|
+
|
|
16
|
+
const FAMILY_TITLE_CASE = {
|
|
17
|
+
"deepseek-r": "DeepSeek-R",
|
|
18
|
+
"deepseek": "DeepSeek",
|
|
19
|
+
"starcoder2": "StarCoder2",
|
|
20
|
+
"starcoder": "StarCoder",
|
|
21
|
+
"command-r": "Command-R",
|
|
22
|
+
"command": "Command",
|
|
23
|
+
"codestral": "Codestral",
|
|
24
|
+
"mistral": "Mistral",
|
|
25
|
+
"mixtral": "Mixtral",
|
|
26
|
+
"mathstral": "Mathstral",
|
|
27
|
+
"pixtral": "Pixtral",
|
|
28
|
+
"gemma": "Gemma",
|
|
29
|
+
"qwen": "Qwen",
|
|
30
|
+
"llama": "Llama",
|
|
31
|
+
"phi": "Phi",
|
|
32
|
+
"yi": "Yi",
|
|
33
|
+
"zephyr": "Zephyr",
|
|
34
|
+
"internlm": "InternLM",
|
|
35
|
+
"cohere": "Cohere",
|
|
36
|
+
"falcon": "Falcon",
|
|
37
|
+
"baichuan": "Baichuan",
|
|
38
|
+
"mamba": "Mamba",
|
|
39
|
+
"solar": "Solar",
|
|
40
|
+
"granite": "Granite",
|
|
41
|
+
"dbrx": "DBRX",
|
|
42
|
+
"stablelm": "StableLM",
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Sort families by length descending so longer families match first
|
|
46
|
+
const SORTED_FAMILIES = Object.keys(FAMILY_TITLE_CASE).sort((a, b) => b.length - a.length);
|
|
47
|
+
|
|
48
|
+
// ── Quant patterns (order matters — longer/more-specific first) ────────
|
|
49
|
+
|
|
50
|
+
const QUANT_PATTERNS = [
|
|
51
|
+
/[-_]UD-[A-Z0-9_]+/i,
|
|
52
|
+
/[-_]IQ[0-9_]+(?:_[A-Z]+)?/i,
|
|
53
|
+
/[-_]Q\d_K_[A-Z]+/i,
|
|
54
|
+
/[-_]Q\d_[01]/i,
|
|
55
|
+
/[-_]F(?:16|32)/i,
|
|
56
|
+
/[-_]BF16/i,
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
// ── Tag tokens extracted from the name ──────────────────────────────────
|
|
60
|
+
|
|
61
|
+
const TAG_TOKENS = [
|
|
62
|
+
"it", "instruct", "chat", "code", "base", "vision", "mtp",
|
|
63
|
+
"mmproj", "draft", "assistant",
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
// ── Main entry point ───────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Parse a raw model identifier into a structured display name.
|
|
70
|
+
*
|
|
71
|
+
* @param {string} rawId The raw identifier: GGUF filename (no .gguf),
|
|
72
|
+
* Ollama model name, or oMLX model id.
|
|
73
|
+
* @param {"local-gguf"|"ollama"|"omlx"} source Where this name came from.
|
|
74
|
+
* @returns {{ publisher: string|null, model: string, params: string|null,
|
|
75
|
+
* quant: string|null, tags: string[], display: string,
|
|
76
|
+
* sort: string, id: string }}
|
|
77
|
+
*/
|
|
78
|
+
export function parseModelName(rawId, source) {
|
|
79
|
+
const id = rawId; // never modify the raw id
|
|
80
|
+
|
|
81
|
+
// 1. Extract publisher (anything before the first /)
|
|
82
|
+
let publisher = null;
|
|
83
|
+
let name = rawId;
|
|
84
|
+
const slashIdx = rawId.indexOf("/");
|
|
85
|
+
if (slashIdx !== -1) {
|
|
86
|
+
publisher = rawId.slice(0, slashIdx);
|
|
87
|
+
name = rawId.slice(slashIdx + 1);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// 2. For Ollama, split on : to separate model from tag (e.g. "gemma3:4b")
|
|
91
|
+
// The tag after : is a model size/variant identifier — not a GGUF quant.
|
|
92
|
+
let ollamaTag = null;
|
|
93
|
+
if (source === "ollama") {
|
|
94
|
+
const colonIdx = name.lastIndexOf(":");
|
|
95
|
+
if (colonIdx !== -1) {
|
|
96
|
+
ollamaTag = name.slice(colonIdx + 1);
|
|
97
|
+
name = name.slice(0, colonIdx);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 3. Extract quant (GGUF quantization suffix)
|
|
102
|
+
let quant = null;
|
|
103
|
+
for (const pattern of QUANT_PATTERNS) {
|
|
104
|
+
const match = name.match(pattern);
|
|
105
|
+
if (match) {
|
|
106
|
+
quant = match[0].replace(/^[-_]/, "");
|
|
107
|
+
name = name.slice(0, match.index) + name.slice(match.index + match[0].length);
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// 4. Extract known tags as hyphen/underscore-delimited tokens
|
|
113
|
+
const tags = [];
|
|
114
|
+
for (const tag of TAG_TOKENS) {
|
|
115
|
+
const tagRegex = new RegExp(`(?:^|[-_])${tag}(?:$|[-_])`, "i");
|
|
116
|
+
if (tagRegex.test(name)) {
|
|
117
|
+
tags.push(tag);
|
|
118
|
+
// Remove the tag token from the name
|
|
119
|
+
name = name.replace(new RegExp(`(?:^|[-_])${tag}(?=[-_]|$)`, "i"), (m) => {
|
|
120
|
+
// Preserve the leading hyphen/underscore boundary
|
|
121
|
+
return m.startsWith("-") || m.startsWith("_") ? "" : "";
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// Clean up leftover separators
|
|
126
|
+
name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
|
|
127
|
+
|
|
128
|
+
// 5. For Ollama, re-attach the tag as part of the model name
|
|
129
|
+
// (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
|
|
130
|
+
if (ollamaTag) {
|
|
131
|
+
name = name + "-" + ollamaTag;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// 6. Title-case the remaining model name
|
|
135
|
+
let model = titleCaseModel(name);
|
|
136
|
+
|
|
137
|
+
// If nothing is left after parsing, fall back to the raw name
|
|
138
|
+
if (!model || model.trim() === "") {
|
|
139
|
+
model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// 7. Extract params (size like 30B, 12B) for sort/filter convenience
|
|
143
|
+
const params = extractParams(model);
|
|
144
|
+
|
|
145
|
+
// 8. Build display string
|
|
146
|
+
const display = buildDisplay(publisher, model, tags, quant);
|
|
147
|
+
|
|
148
|
+
// 9. Build sort key (lowercase, no publisher, for alphabetical ordering)
|
|
149
|
+
const sort = model.toLowerCase().replace(/[-_]/g, " ");
|
|
150
|
+
|
|
151
|
+
return { publisher, model, params, quant, tags, display, sort, id };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ── Display builder ────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
function buildDisplay(publisher, model, tags, quant) {
|
|
157
|
+
const parts = [];
|
|
158
|
+
if (publisher) {
|
|
159
|
+
parts.push(publisher);
|
|
160
|
+
}
|
|
161
|
+
let modelPart = model;
|
|
162
|
+
if (tags.length > 0) {
|
|
163
|
+
modelPart += ` (${tags.join(", ")})`;
|
|
164
|
+
}
|
|
165
|
+
parts.push(modelPart);
|
|
166
|
+
if (quant) {
|
|
167
|
+
parts.push(quant);
|
|
168
|
+
}
|
|
169
|
+
return parts.join(" › ");
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Params extraction ──────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
function extractParams(model) {
|
|
175
|
+
const match = model.match(/\b(\d+(?:\.\d+)?)\s*B\b/);
|
|
176
|
+
return match ? match[1] + "B" : null;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ── Title-case model names ────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
function titleCaseModel(name) {
|
|
182
|
+
// Replace hyphens and underscores with spaces
|
|
183
|
+
let result = name.replace(/[-_]/g, " ");
|
|
184
|
+
|
|
185
|
+
// Title-case known families (prefix match so "qwen" matches "qwen3", etc.)
|
|
186
|
+
// Insert a space between the family name and a following digit/version.
|
|
187
|
+
for (const family of SORTED_FAMILIES) {
|
|
188
|
+
const pattern = new RegExp(`\\b${family}(?=[0-9])`, "gi");
|
|
189
|
+
result = result.replace(pattern, FAMILY_TITLE_CASE[family] + " ");
|
|
190
|
+
// Also match family at end of word (no digit following)
|
|
191
|
+
const patternEnd = new RegExp(`\\b${family}(?![a-z0-9])`, "gi");
|
|
192
|
+
result = result.replace(patternEnd, FAMILY_TITLE_CASE[family]);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Title-case param sizes (30b → 30B, 12b → 12B, 0.5b → 0.5B)
|
|
196
|
+
result = result.replace(/\b(\d+(?:\.\d+)?)\s*[bB]\b/g, (_, num) => {
|
|
197
|
+
return num + "B";
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// Title-case version numbers that follow a family name (Gemma 3, Qwen 2.5)
|
|
201
|
+
// Pattern: family name followed by a space then a bare digit sequence
|
|
202
|
+
// that's not a param size (not followed by B/b).
|
|
203
|
+
// We already have "Gemma 4", "Qwen 3" etc. from family + spacing.
|
|
204
|
+
// Just ensure the numbers look clean.
|
|
205
|
+
|
|
206
|
+
// Title-case "it" and "instruct" if they survived tag extraction
|
|
207
|
+
result = result.replace(/\bit\b/g, "IT");
|
|
208
|
+
result = result.replace(/\binstruct\b/gi, "Instruct");
|
|
209
|
+
|
|
210
|
+
// Title-case "r1", "r2" etc. (DeepSeek-R1, etc.)
|
|
211
|
+
result = result.replace(/\br(\d+)\b/gi, (_, num) => `R${num}`);
|
|
212
|
+
|
|
213
|
+
// Title-case standalone aXb patterns (A3B, A12B — active parameters)
|
|
214
|
+
result = result.replace(/\ba(\d+)\s*b\b/gi, (_, num) => `A${num}B`);
|
|
215
|
+
|
|
216
|
+
// Clean up extra spaces
|
|
217
|
+
result = result.replace(/\s{2,}/g, " ").trim();
|
|
218
|
+
|
|
219
|
+
return result;
|
|
220
|
+
}
|
package/src/process.mjs
CHANGED
|
@@ -132,12 +132,27 @@ export async function modelLoadedOnServer(profile) {
|
|
|
132
132
|
return matches;
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
+
export async function modelAvailableOnServer(profile) {
|
|
136
|
+
const backend = backendFor(profile.backend);
|
|
137
|
+
if (backend.id === "ollama") {
|
|
138
|
+
return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
|
|
139
|
+
}
|
|
140
|
+
if (backend.id === "omlx") {
|
|
141
|
+
// /v1/models lists discovered models; an ID must exist there to be usable.
|
|
142
|
+
return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
|
|
143
|
+
}
|
|
144
|
+
// Local servers are tied to a specific model file via their command argv.
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
|
|
135
148
|
export async function profileRuntimeStatus(profile) {
|
|
136
149
|
const backend = backendFor(profile.backend);
|
|
137
150
|
if (backend.type === "managed-server") {
|
|
138
151
|
const ready = await serverReady(profile.baseUrl);
|
|
139
|
-
const modelLoaded = ready
|
|
140
|
-
|
|
152
|
+
const [modelLoaded, modelAvailable] = ready
|
|
153
|
+
? await Promise.all([modelLoadedOnServer(profile), modelAvailableOnServer(profile)])
|
|
154
|
+
: [false, false];
|
|
155
|
+
return { state: null, pid: null, running: ready && modelLoaded, ready, serverUp: ready, modelLoaded, modelAvailable, rssBytes: null, startedAt: null };
|
|
141
156
|
}
|
|
142
157
|
const state = await readState(profile.id);
|
|
143
158
|
const running = Boolean(state?.pid && pidAlive(state.pid));
|
|
@@ -211,6 +226,15 @@ async function ollamaLoadedModelIds(profile) {
|
|
|
211
226
|
.filter(Boolean);
|
|
212
227
|
}
|
|
213
228
|
|
|
229
|
+
async function ollamaAvailableModelIds(profile) {
|
|
230
|
+
const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
|
|
231
|
+
if (!result.ok) return [];
|
|
232
|
+
return (Array.isArray(result.data?.models) ? result.data.models : [])
|
|
233
|
+
.flatMap((model) => [model?.name, model?.model])
|
|
234
|
+
.map((id) => String(id ?? "").trim())
|
|
235
|
+
.filter(Boolean);
|
|
236
|
+
}
|
|
237
|
+
|
|
214
238
|
async function omlxLoadedModelIds(profile) {
|
|
215
239
|
const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
|
|
216
240
|
const fromStatus = statusResult.ok
|
package/src/scan.mjs
CHANGED
|
@@ -3,6 +3,7 @@ import { readdir } from "node:fs/promises";
|
|
|
3
3
|
import { basename, dirname, join } from "node:path";
|
|
4
4
|
import { getModelScanDirs } from "./config.mjs";
|
|
5
5
|
import { readGgufMetadata } from "./gguf.mjs";
|
|
6
|
+
import { parseModelName } from "./model-name.mjs";
|
|
6
7
|
|
|
7
8
|
// ── Scan for GGUF models and MTP drafters ────────────────────────────────
|
|
8
9
|
|
|
@@ -48,6 +49,7 @@ async function scanOneDir(root) {
|
|
|
48
49
|
const mmprojPath = mmprojs.find((candidate) => dirname(candidate) === dir) ?? null;
|
|
49
50
|
const name = basename(path).replace(/\.gguf$/i, "");
|
|
50
51
|
const sizeBytes = statSync(path).size;
|
|
52
|
+
const parsed = parseModelName(name, "local-gguf");
|
|
51
53
|
|
|
52
54
|
// Read GGUF metadata to detect drafter architecture
|
|
53
55
|
const meta = safeReadGgufMetadata(path);
|
|
@@ -57,9 +59,9 @@ async function scanOneDir(root) {
|
|
|
57
59
|
// This is an MTP drafter model, not a main model
|
|
58
60
|
drafters.push({
|
|
59
61
|
path,
|
|
60
|
-
label:
|
|
61
|
-
aliasSuggestion:
|
|
62
|
-
quant:
|
|
62
|
+
label: parsed.display,
|
|
63
|
+
aliasSuggestion: parsed.id,
|
|
64
|
+
quant: parsed.quant,
|
|
63
65
|
sizeBytes,
|
|
64
66
|
architecture,
|
|
65
67
|
targetHint: drafterTargetHint(name),
|
|
@@ -70,9 +72,9 @@ async function scanOneDir(root) {
|
|
|
70
72
|
models.push({
|
|
71
73
|
path,
|
|
72
74
|
mmprojPath,
|
|
73
|
-
label:
|
|
74
|
-
aliasSuggestion:
|
|
75
|
-
quant:
|
|
75
|
+
label: parsed.display,
|
|
76
|
+
aliasSuggestion: parsed.id,
|
|
77
|
+
quant: parsed.quant,
|
|
76
78
|
sizeBytes,
|
|
77
79
|
backend: "llama-cpp",
|
|
78
80
|
source: "local-gguf",
|
|
@@ -143,20 +145,7 @@ async function findFiles(root, predicate) {
|
|
|
143
145
|
return result;
|
|
144
146
|
}
|
|
145
147
|
|
|
146
|
-
|
|
147
|
-
return name
|
|
148
|
-
.replace(/-/g, " ")
|
|
149
|
-
.replace(/\bqwen/i, "Qwen")
|
|
150
|
-
.replace(/q4_k_m/i, "Q4_K_M");
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function aliasFromName(name) {
|
|
154
|
-
return name.replace(/-Q4_K_M$/i, "-GGUF");
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function quantFromName(name) {
|
|
158
|
-
return name.match(/(Q\d_K_[A-Z]+|Q\d_[01]|UD-[A-Z0-9_]+)/)?.[1];
|
|
159
|
-
}
|
|
148
|
+
// (labelFromName, aliasFromName, quantFromName removed — parseModelName in model-name.mjs is the single path)
|
|
160
149
|
|
|
161
150
|
|
|
162
151
|
function safeReadGgufMetadata(path) {
|