offgrid-ai 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/bin/offgrid-ai.mjs +10 -0
- package/install.sh +162 -0
- package/package.json +51 -0
- package/src/autodetect.mjs +113 -0
- package/src/backends.mjs +135 -0
- package/src/cli.mjs +603 -0
- package/src/config.mjs +102 -0
- package/src/estimate.mjs +113 -0
- package/src/gguf.mjs +70 -0
- package/src/harness-pi.mjs +140 -0
- package/src/json.mjs +16 -0
- package/src/logs.mjs +42 -0
- package/src/process.mjs +175 -0
- package/src/profiles.mjs +165 -0
- package/src/scan.mjs +78 -0
- package/src/ui.mjs +102 -0
package/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# offgrid-ai
|
|
4
|
+
|
|
5
|
+
**Privacy-first CLI for running local LLMs. Your AI, your machine, nothing leaves.**
|
|
6
|
+
|
|
7
|
+
[](package.json)
|
|
8
|
+
[]()
|
|
9
|
+
|
|
10
|
+
Install • Run • Done.
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | bash
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
## What it does
|
|
19
|
+
|
|
20
|
+
You run `offgrid-ai`. It finds your local models, auto-configures everything, starts the server, and launches Pi. Zero configuration. No parameter tuning. No presets.
|
|
21
|
+
|
|
22
|
+
**First run** walks you through installing anything missing (Homebrew, llama-server). After that, just run it.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
offgrid-ai # pick a model and run it
|
|
26
|
+
offgrid-ai status # show running servers (from another terminal)
|
|
27
|
+
offgrid-ai stop # stop a running server
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
One command. Installs Node.js if you don't have it, then installs offgrid-ai.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | bash
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Or if you already have Node.js:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
npm install -g offgrid-ai
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Or review the install script first:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | less
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## How it works
|
|
51
|
+
|
|
52
|
+
1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
|
|
53
|
+
|
|
54
|
+
2. **One command to run.** `offgrid-ai` → pick a model → it figures out the flags, starts llama-server, syncs Pi config, and launches Pi.
|
|
55
|
+
|
|
56
|
+
3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
|
|
57
|
+
|
|
58
|
+
## Supported backends
|
|
59
|
+
|
|
60
|
+
| Backend | Type | Auto-detected |
|
|
61
|
+
|---|---|---|
|
|
62
|
+
| **llama.cpp** | Local server | ✓ GGUF models in `~/.lmstudio/models/` |
|
|
63
|
+
| **llama.cpp MTP** | Local server (speculative decoding) | ✓ MTP detected from model metadata |
|
|
64
|
+
| **Ollama** | Managed server | ✓ via `localhost:11434` |
|
|
65
|
+
| **oMLX** | Managed server | ✓ via `127.0.0.1:8000` |
|
|
66
|
+
|
|
67
|
+
## First run onboarding
|
|
68
|
+
|
|
69
|
+
When you run `offgrid-ai` for the first time on a fresh machine:
|
|
70
|
+
|
|
71
|
+
1. **Homebrew** — Required. Offered to install if missing.
|
|
72
|
+
2. **llama-server** — Required for GGUF models. Offered to install via Homebrew.
|
|
73
|
+
3. **Model backend** — At least one is needed: LM Studio, Ollama, or oMLX.
|
|
74
|
+
4. **Models** — If no models found, tells you where to get them.
|
|
75
|
+
|
|
76
|
+
Subsequent runs skip everything that's already installed.
|
|
77
|
+
|
|
78
|
+
## Data directory
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
~/.offgrid-ai/
|
|
82
|
+
config.json # auto-detected paths, editable for overrides
|
|
83
|
+
profiles/ # one per model, auto-created on first run
|
|
84
|
+
<id>/
|
|
85
|
+
profile.json # model metadata + auto-detected settings
|
|
86
|
+
command.json # llama-server flags (auto-generated, hand-editable)
|
|
87
|
+
notes.md # scratch notes
|
|
88
|
+
logs/
|
|
89
|
+
run/ # PID state files
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Benchmark (coming soon)
|
|
93
|
+
|
|
94
|
+
"Benchmark" is always shown as an option in the CLI. If the [local-llm-visual-benchmark](https://github.com/eeshansrivastava89/local-llm-visual-benchmark) repo is found locally, it works. If not, it offers to clone it. Model management works standalone; benchmarking is the upsell.
|
|
95
|
+
|
|
96
|
+
## Development
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
git clone https://github.com/eeshansrivastava89/offgrid-ai.git
|
|
100
|
+
cd offgrid-ai
|
|
101
|
+
npm install
|
|
102
|
+
node bin/offgrid-ai.mjs
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
Personal project by [Eeshan Srivastava](https://eeshans.com).
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import pc from "picocolors";
|
|
3
|
+
import { run } from "../src/cli.mjs";
|
|
4
|
+
|
|
5
|
+
try {
|
|
6
|
+
await run(process.argv.slice(2));
|
|
7
|
+
} catch (error) {
|
|
8
|
+
console.error(pc.red("error:"), error instanceof Error ? error.message : String(error));
|
|
9
|
+
process.exit(1);
|
|
10
|
+
}
|
package/install.sh
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# offgrid-ai installer
|
|
3
|
+
#
|
|
4
|
+
# Install:
|
|
5
|
+
# curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | bash
|
|
6
|
+
#
|
|
7
|
+
# Or review first:
|
|
8
|
+
# curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/install.sh | less
|
|
9
|
+
#
|
|
10
|
+
# What this does:
|
|
11
|
+
# 1. Checks for Node.js
|
|
12
|
+
# 2. If not found, installs it via nvm (no sudo needed)
|
|
13
|
+
# 3. Installs offgrid-ai globally via npm
|
|
14
|
+
# 4. Runs offgrid-ai
|
|
15
|
+
#
|
|
16
|
+
# Flags:
|
|
17
|
+
# --dry-run Show what would happen without making changes
|
|
18
|
+
# --no-run Install but don't launch offgrid-ai after
|
|
19
|
+
# --help Show this help
|
|
20
|
+
#
|
|
21
|
+
# This script never uses sudo. Everything installs to user-writable directories.
|
|
22
|
+
|
|
23
|
+
set -euo pipefail
|
|
24
|
+
|
|
25
|
+
# ── Flags ───────────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
DRY_RUN=false
|
|
28
|
+
SKIP_RUN=false
|
|
29
|
+
|
|
30
|
+
for arg in "$@"; do
|
|
31
|
+
case "$arg" in
|
|
32
|
+
--dry-run) DRY_RUN=true; echo "[dry-run] No changes will be made." ;;
|
|
33
|
+
--no-run) SKIP_RUN=true ;;
|
|
34
|
+
--help|-h) echo "Usage: curl -fsSL <url> | bash -s -- [--dry-run] [--no-run]"; exit 0 ;;
|
|
35
|
+
esac
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
dry() { if $DRY_RUN; then printf "[dry-run] %s\n" "$*"; return 0; else "$@"; fi; }
|
|
39
|
+
|
|
40
|
+
# ── Output helpers ──────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
BOLD='\033[1m' RESET='\033[0m' GREEN='\033[32m' YELLOW='\033[33m' BLUE='\033[34m' RED='\033[31m'
|
|
43
|
+
info() { printf "${BLUE}→${RESET} %s\n" "$*"; }
|
|
44
|
+
ok() { printf "${GREEN}✓${RESET} %s\n" "$*"; }
|
|
45
|
+
warn() { printf "${YELLOW}!${RESET} %s\n" "$*"; }
|
|
46
|
+
fail() { printf "${RED}✗${RESET} %s\n" "$*"; exit 1; }
|
|
47
|
+
|
|
48
|
+
# ── Detect OS ───────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
OS="$(uname -s)"
|
|
51
|
+
ARCH="$(uname -m)"
|
|
52
|
+
case "$OS" in
|
|
53
|
+
Darwin) OS="macos" ;;
|
|
54
|
+
Linux) OS="linux" ;;
|
|
55
|
+
*) fail "Unsupported OS: $OS. offgrid-ai requires macOS or Linux." ;;
|
|
56
|
+
esac
|
|
57
|
+
case "$ARCH" in
|
|
58
|
+
x86_64|amd64) ARCH="x64" ;;
|
|
59
|
+
arm64|aarch64) ARCH="arm64" ;;
|
|
60
|
+
*) fail "Unsupported architecture: $ARCH" ;;
|
|
61
|
+
esac
|
|
62
|
+
info "Detected: ${OS}-${ARCH}"
|
|
63
|
+
|
|
64
|
+
# ── Check for Node.js ───────────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
if command -v node &>/dev/null; then
|
|
67
|
+
NODE_VERSION="$(node --version 2>/dev/null || echo "unknown")"
|
|
68
|
+
ok "Node.js ${NODE_VERSION} found at $(command -v node)"
|
|
69
|
+
else
|
|
70
|
+
echo ""
|
|
71
|
+
printf "${BOLD}offgrid-ai needs Node.js.${RESET}\n"
|
|
72
|
+
printf "It will be installed now via nvm (Node Version Manager).\n"
|
|
73
|
+
printf "This installs to your home directory — no sudo needed.\n"
|
|
74
|
+
echo ""
|
|
75
|
+
|
|
76
|
+
# Install nvm
|
|
77
|
+
export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"
|
|
78
|
+
info "Installing nvm..."
|
|
79
|
+
dry curl -fsSL https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh -o /tmp/nvm-install.sh
|
|
80
|
+
dry bash /tmp/nvm-install.sh
|
|
81
|
+
|
|
82
|
+
# Source nvm
|
|
83
|
+
if ! $DRY_RUN; then
|
|
84
|
+
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
# Install Node.js LTS
|
|
88
|
+
info "Installing Node.js LTS..."
|
|
89
|
+
dry nvm install --lts
|
|
90
|
+
|
|
91
|
+
# Verify
|
|
92
|
+
if $DRY_RUN || command -v node &>/dev/null; then
|
|
93
|
+
ok "Node.js $(node --version 2>/dev/null || echo 'installed') installed via nvm."
|
|
94
|
+
else
|
|
95
|
+
# nvm added to shell profile but not active in this session
|
|
96
|
+
ok "Node.js installed via nvm."
|
|
97
|
+
echo ""
|
|
98
|
+
warn "Node.js was installed but your shell doesn't see it yet."
|
|
99
|
+
echo " Restart your terminal, or run: source ~/.nvm/nvm.sh"
|
|
100
|
+
echo " Then re-run this installer."
|
|
101
|
+
exit 0
|
|
102
|
+
fi
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
# ── Install offgrid-ai ──────────────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
echo ""
|
|
108
|
+
printf "${BOLD}Installing offgrid-ai...${RESET}\n"
|
|
109
|
+
dry npm install -g offgrid-ai
|
|
110
|
+
|
|
111
|
+
# ── Verify ───────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
if $DRY_RUN; then
|
|
114
|
+
ok "offgrid-ai installed (dry-run)"
|
|
115
|
+
echo ""
|
|
116
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
117
|
+
printf "${BOLD}${GREEN} offgrid-ai is ready! (dry-run)${RESET}\n"
|
|
118
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
119
|
+
echo ""
|
|
120
|
+
echo " Run: offgrid-ai"
|
|
121
|
+
echo ""
|
|
122
|
+
exit 0
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
if command -v offgrid-ai &>/dev/null; then
|
|
126
|
+
ok "offgrid-ai installed at $(command -v offgrid-ai)"
|
|
127
|
+
else
|
|
128
|
+
echo ""
|
|
129
|
+
warn "offgrid-ai was installed but isn't on your PATH yet."
|
|
130
|
+
echo " Restart your terminal and run: offgrid-ai"
|
|
131
|
+
echo " Or run: source ~/.nvm/nvm.sh && offgrid-ai"
|
|
132
|
+
echo ""
|
|
133
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
134
|
+
printf "${BOLD}${GREEN} offgrid-ai is ready!${RESET}\n"
|
|
135
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
136
|
+
echo ""
|
|
137
|
+
echo " Run: offgrid-ai"
|
|
138
|
+
echo ""
|
|
139
|
+
exit 0
|
|
140
|
+
fi
|
|
141
|
+
|
|
142
|
+
# ── Done ─────────────────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
echo ""
|
|
145
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
146
|
+
printf "${BOLD}${GREEN} offgrid-ai is ready!${RESET}\n"
|
|
147
|
+
printf "${BOLD}${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
148
|
+
echo ""
|
|
149
|
+
echo " First run will walk you through setting up everything you need"
|
|
150
|
+
echo " (llama-server, model backends, Pi)."
|
|
151
|
+
echo ""
|
|
152
|
+
echo " Run: offgrid-ai"
|
|
153
|
+
echo ""
|
|
154
|
+
|
|
155
|
+
if [[ -t 0 ]] && ! $SKIP_RUN; then
|
|
156
|
+
printf "${BOLD}Run offgrid-ai now? [Y/n]${RESET} "
|
|
157
|
+
read -r response
|
|
158
|
+
response="${response:-Y}"
|
|
159
|
+
if [[ "$response" =~ ^[Yy]$ ]]; then
|
|
160
|
+
exec offgrid-ai
|
|
161
|
+
fi
|
|
162
|
+
fi
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "offgrid-ai",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
|
|
5
|
+
"author": "Eeshan Srivastava (https://eeshans.com)",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"bin": {
|
|
8
|
+
"offgrid-ai": "./bin/offgrid-ai.mjs"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"bin/*.mjs",
|
|
12
|
+
"src/*.mjs",
|
|
13
|
+
"install.sh"
|
|
14
|
+
],
|
|
15
|
+
"publishConfig": {
|
|
16
|
+
"access": "public"
|
|
17
|
+
},
|
|
18
|
+
"repository": {
|
|
19
|
+
"type": "git",
|
|
20
|
+
"url": "git+https://github.com/eeshansrivastava89/offgrid-ai.git"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/eeshansrivastava89/offgrid-ai#readme",
|
|
23
|
+
"bugs": {
|
|
24
|
+
"url": "https://github.com/eeshansrivastava89/offgrid-ai/issues"
|
|
25
|
+
},
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=20"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"start": "node bin/offgrid-ai.mjs",
|
|
31
|
+
"test": "node --test test/*.mjs",
|
|
32
|
+
"check:privacy": "node scripts/privacy-gate.mjs",
|
|
33
|
+
"release:check": "bash scripts/release-check.sh",
|
|
34
|
+
"release:check:fast": "bash scripts/release-check.sh --skip-install --skip-manual",
|
|
35
|
+
"prepack": "npm run check:privacy"
|
|
36
|
+
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@clack/prompts": "^1.4.0",
|
|
39
|
+
"picocolors": "^1.1.0"
|
|
40
|
+
},
|
|
41
|
+
"keywords": [
|
|
42
|
+
"local-llm",
|
|
43
|
+
"llama-cpp",
|
|
44
|
+
"ollama",
|
|
45
|
+
"cli",
|
|
46
|
+
"privacy",
|
|
47
|
+
"llm",
|
|
48
|
+
"ai"
|
|
49
|
+
],
|
|
50
|
+
"license": "MIT"
|
|
51
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { basename } from "node:path";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { readGgufMetadata } from "./gguf.mjs";
|
|
4
|
+
|
|
5
|
+
// ── Detect model capabilities from GGUF metadata ──────────────────────────
|
|
6
|
+
|
|
7
|
+
export function detectCapabilities(modelPath, mmprojPath) {
|
|
8
|
+
const meta = existsSync(modelPath) ? readGgufMetadata(modelPath) : {};
|
|
9
|
+
const name = basename(modelPath).toLowerCase();
|
|
10
|
+
|
|
11
|
+
// Architecture
|
|
12
|
+
const architecture = meta["general.architecture"] ?? null;
|
|
13
|
+
|
|
14
|
+
// Thinking / reasoning mode
|
|
15
|
+
const hasThinkingKwargs = meta["chat_template_kwargs"] !== undefined;
|
|
16
|
+
const nameHintsThinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name);
|
|
17
|
+
const thinking = hasThinkingKwargs || nameHintsThinking;
|
|
18
|
+
|
|
19
|
+
// Vision — mmproj present
|
|
20
|
+
const vision = mmprojPath && existsSync(mmprojPath);
|
|
21
|
+
|
|
22
|
+
// MTP (multi-token prediction) — detect speculative decoding
|
|
23
|
+
const mtp = /mtp/i.test(name) || architecture === "qwen3";
|
|
24
|
+
|
|
25
|
+
// Quantization
|
|
26
|
+
const quant = name.match(/(Q\d_K_[A-Z]+|UD-[A-Z0-9_]+)/i)?.[1] ?? null;
|
|
27
|
+
|
|
28
|
+
// Context size from metadata, fallback to name hints
|
|
29
|
+
const metaCtx = architecture
|
|
30
|
+
? numberMeta(meta, `${architecture}.context_length`)
|
|
31
|
+
: undefined;
|
|
32
|
+
const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
|
|
33
|
+
|
|
34
|
+
return { architecture, thinking, vision, mtp, quant, metaCtx, ctxSize, meta };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ── Compute llama-server flags from capabilities ───────────────────────────
|
|
38
|
+
|
|
39
|
+
export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath) {
|
|
40
|
+
const { thinking, mtp, quant } = capabilities;
|
|
41
|
+
const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
|
|
42
|
+
|
|
43
|
+
const flags = {
|
|
44
|
+
host: "127.0.0.1",
|
|
45
|
+
port: 8080,
|
|
46
|
+
ctxSize: capabilities.ctxSize,
|
|
47
|
+
flashAttention: "on",
|
|
48
|
+
cacheTypeK: isLowMem ? "f16" : "bf16",
|
|
49
|
+
cacheTypeV: isLowMem ? "f16" : "bf16",
|
|
50
|
+
jinja: true,
|
|
51
|
+
temperature: 0.6,
|
|
52
|
+
topP: 0.95,
|
|
53
|
+
topK: thinking ? 64 : 20,
|
|
54
|
+
minP: 0,
|
|
55
|
+
presencePenalty: thinking ? 0 : 1.5,
|
|
56
|
+
repeatPenalty: thinking ? 1.1 : 1.0,
|
|
57
|
+
parallel: 1,
|
|
58
|
+
batchSize: 512,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Thinking mode
|
|
62
|
+
if (thinking) {
|
|
63
|
+
flags.chatTemplateKwargs = { enable_thinking: true };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Build argv
|
|
67
|
+
const argv = [
|
|
68
|
+
"--model", modelPath,
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
if (mmprojPath) argv.push("--mmproj", mmprojPath);
|
|
72
|
+
if (draftModelPath) argv.push("--spec-draft-model", draftModelPath);
|
|
73
|
+
|
|
74
|
+
argv.push(
|
|
75
|
+
"--host", String(flags.host),
|
|
76
|
+
"--port", String(flags.port),
|
|
77
|
+
"--ctx-size", String(flags.ctxSize),
|
|
78
|
+
"--flash-attn", flags.flashAttention,
|
|
79
|
+
"--cache-type-k", flags.cacheTypeK,
|
|
80
|
+
"--cache-type-v", flags.cacheTypeV,
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
if (flags.jinja) argv.push("--jinja");
|
|
84
|
+
|
|
85
|
+
argv.push(
|
|
86
|
+
"--temp", String(flags.temperature),
|
|
87
|
+
"--top-p", String(flags.topP),
|
|
88
|
+
"--top-k", String(flags.topK),
|
|
89
|
+
"--min-p", flags.minP.toFixed(2),
|
|
90
|
+
"--presence-penalty", String(flags.presencePenalty),
|
|
91
|
+
"--repeat-penalty", String(flags.repeatPenalty),
|
|
92
|
+
"--batch-size", String(flags.batchSize),
|
|
93
|
+
"--parallel", String(flags.parallel),
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
if (flags.chatTemplateKwargs) {
|
|
97
|
+
argv.push("--chat-template-kwargs", JSON.stringify(flags.chatTemplateKwargs));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// MTP flags
|
|
101
|
+
if (mtp) {
|
|
102
|
+
argv.push("--spec-type", "draft-mtp", "--spec-draft-n-max", "2");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { flags, argv };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ── Internal helper ─────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
function numberMeta(meta, key) {
|
|
111
|
+
const value = key ? meta[key] : undefined;
|
|
112
|
+
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
113
|
+
}
|
package/src/backends.mjs
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { findLlamaServer } from "./config.mjs";
|
|
2
|
+
import { scanGgufModels } from "./scan.mjs";
|
|
3
|
+
|
|
4
|
+
// ── Backend definitions ────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
export const BACKENDS = {
|
|
7
|
+
"llama-cpp": {
|
|
8
|
+
id: "llama-cpp",
|
|
9
|
+
label: "llama.cpp",
|
|
10
|
+
type: "local-server",
|
|
11
|
+
providerId: "llama-cpp",
|
|
12
|
+
defaultPort: 8080,
|
|
13
|
+
defaultBaseUrl: "http://127.0.0.1:8080/v1",
|
|
14
|
+
needsCommandFile: true,
|
|
15
|
+
needsModelFile: true,
|
|
16
|
+
scanModels: () => scanGgufModels(),
|
|
17
|
+
},
|
|
18
|
+
"llama-cpp-mtp": {
|
|
19
|
+
id: "llama-cpp-mtp",
|
|
20
|
+
label: "llama.cpp MTP",
|
|
21
|
+
type: "local-server",
|
|
22
|
+
providerId: "llama-cpp-mtp",
|
|
23
|
+
defaultPort: 8081,
|
|
24
|
+
defaultBaseUrl: "http://127.0.0.1:8081/v1",
|
|
25
|
+
needsCommandFile: true,
|
|
26
|
+
needsModelFile: true,
|
|
27
|
+
extraArgs: ["--spec-type", "draft-mtp", "--spec-draft-n-max", "2"],
|
|
28
|
+
scanModels: () => scanGgufModels(),
|
|
29
|
+
},
|
|
30
|
+
"ollama": {
|
|
31
|
+
id: "ollama",
|
|
32
|
+
label: "Ollama",
|
|
33
|
+
type: "managed-server",
|
|
34
|
+
providerId: "ollama",
|
|
35
|
+
defaultPort: 11434,
|
|
36
|
+
defaultBaseUrl: "http://localhost:11434/v1",
|
|
37
|
+
needsCommandFile: false,
|
|
38
|
+
needsModelFile: false,
|
|
39
|
+
scanModels: () => scanOllamaModels(),
|
|
40
|
+
},
|
|
41
|
+
"omlx": {
|
|
42
|
+
id: "omlx",
|
|
43
|
+
label: "oMLX",
|
|
44
|
+
type: "managed-server",
|
|
45
|
+
providerId: "omlx",
|
|
46
|
+
defaultPort: 8000,
|
|
47
|
+
defaultBaseUrl: "http://127.0.0.1:8000/v1",
|
|
48
|
+
needsCommandFile: false,
|
|
49
|
+
needsModelFile: false,
|
|
50
|
+
scanModels: () => scanOmlxModels(),
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export function backendFor(backendId) {
|
|
55
|
+
const backend = BACKENDS[backendId ?? "llama-cpp"];
|
|
56
|
+
if (!backend) throw new Error(`Unknown backend: ${backendId}`);
|
|
57
|
+
return backend;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function inferBackendId(modelOrProfile) {
|
|
61
|
+
const haystack = [
|
|
62
|
+
modelOrProfile?.path,
|
|
63
|
+
modelOrProfile?.modelPath,
|
|
64
|
+
modelOrProfile?.label,
|
|
65
|
+
modelOrProfile?.modelAlias,
|
|
66
|
+
modelOrProfile?.id,
|
|
67
|
+
modelOrProfile?.providerId,
|
|
68
|
+
modelOrProfile?.backend,
|
|
69
|
+
].filter(Boolean).join(" ").toLowerCase();
|
|
70
|
+
if (haystack.includes("mtp")) return "llama-cpp-mtp";
|
|
71
|
+
return "llama-cpp";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export async function backendBinaryFor(backendId) {
|
|
75
|
+
const backend = BACKENDS[backendId ?? "llama-cpp"];
|
|
76
|
+
if (backend.type === "managed-server") return null;
|
|
77
|
+
const discovered = await findLlamaServer();
|
|
78
|
+
return discovered; // null means "not found — trigger onboarding"
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── Ollama model discovery ──────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
async function scanOllamaModels() {
|
|
84
|
+
try {
|
|
85
|
+
const response = await fetch("http://localhost:11434/api/tags", { signal: AbortSignal.timeout(3000) });
|
|
86
|
+
if (!response.ok) return [];
|
|
87
|
+
const body = await response.json();
|
|
88
|
+
if (!Array.isArray(body?.models)) return [];
|
|
89
|
+
return body.models.map((model) => ({
|
|
90
|
+
id: model.name,
|
|
91
|
+
label: ollamaLabel(model.name),
|
|
92
|
+
aliasSuggestion: model.name,
|
|
93
|
+
sizeBytes: model.size ?? 0,
|
|
94
|
+
quant: model.details?.quantization_level,
|
|
95
|
+
family: model.details?.family,
|
|
96
|
+
backend: "ollama",
|
|
97
|
+
source: "ollama",
|
|
98
|
+
})).sort((a, b) => a.label.localeCompare(b.label));
|
|
99
|
+
} catch {
|
|
100
|
+
return [];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ── oMLX model discovery ───────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
async function scanOmlxModels() {
|
|
107
|
+
try {
|
|
108
|
+
const response = await fetch("http://127.0.0.1:8000/v1/models", { signal: AbortSignal.timeout(3000) });
|
|
109
|
+
if (!response.ok) return [];
|
|
110
|
+
const body = await response.json();
|
|
111
|
+
if (!Array.isArray(body?.data)) return [];
|
|
112
|
+
return body.data.map((model) => ({
|
|
113
|
+
id: model.id,
|
|
114
|
+
label: omlxLabel(model.id),
|
|
115
|
+
aliasSuggestion: model.id,
|
|
116
|
+
sizeBytes: 0,
|
|
117
|
+
quant: null,
|
|
118
|
+
family: null,
|
|
119
|
+
backend: "omlx",
|
|
120
|
+
source: "omlx",
|
|
121
|
+
})).sort((a, b) => a.label.localeCompare(b.label));
|
|
122
|
+
} catch {
|
|
123
|
+
return [];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ── Labels ──────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
function ollamaLabel(name) {
|
|
130
|
+
return name.replace(/[-_]/g, " ").replace(/^gemma\b/i, "Gemma").replace(/^qwen/i, "Qwen");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function omlxLabel(id) {
|
|
134
|
+
return id.replace(/[-_]/g, " ").replace(/^gemma-4/i, "Gemma 4").replace(/^qwen/i, "Qwen");
|
|
135
|
+
}
|