preppergpt 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -10
- package/compose/preppergpt.yaml +2 -5
- package/docs/hardware.md +12 -4
- package/docs/model-sources.md +3 -1
- package/docs/preppergpt-local-parity-map.md +4 -0
- package/installer/cli.mjs +53 -11
- package/installer/lib/detect.mjs +269 -5
- package/installer/lib/planner.mjs +75 -8
- package/installer/lib/render.mjs +62 -4
- package/package.json +1 -1
- package/profiles/models.json +17 -9
package/README.md
CHANGED
|
@@ -6,9 +6,11 @@ uses upstream OpenWebUI for the app shell and adds a hardware detector, model
|
|
|
6
6
|
planner, Docker Compose runtime, local sidecars, and a practical PrepperGPT
|
|
7
7
|
field-kit theme.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
PrepperGPT supports Linux first, including NVIDIA CUDA GPUs, Linux AMD ROCm
|
|
10
|
+
GPUs, and CPU fallback where possible. Windows users should install and run it
|
|
11
|
+
inside WSL2; native Windows installs are intentionally rejected until the native
|
|
12
|
+
runtime path is reliable. It is an online installer: model and container
|
|
13
|
+
downloads require a working network during setup.
|
|
12
14
|
|
|
13
15
|
PrepperGPT optimizes for survivability over cloud-like latency. On very large
|
|
14
16
|
local models, very low tokens/sec is acceptable because the alternative in the
|
|
@@ -16,11 +18,17 @@ target scenario is no assistant at all.
|
|
|
16
18
|
|
|
17
19
|
## Install
|
|
18
20
|
|
|
19
|
-
Install from npm:
|
|
21
|
+
Install and start from npm in one command:
|
|
20
22
|
|
|
21
23
|
```bash
|
|
22
|
-
npx preppergpt install --profile balanced
|
|
23
|
-
|
|
24
|
+
npx --yes preppergpt install --profile balanced --start
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or use two commands:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npx --yes preppergpt install --profile balanced
|
|
31
|
+
npx --yes preppergpt start
|
|
24
32
|
```
|
|
25
33
|
|
|
26
34
|
Or install globally:
|
|
@@ -40,11 +48,14 @@ node bin/preppergpt.js install --profile balanced
|
|
|
40
48
|
node bin/preppergpt.js start
|
|
41
49
|
```
|
|
42
50
|
|
|
51
|
+
Windows users should install Ubuntu in WSL2, enable Docker Desktop's WSL
|
|
52
|
+
integration, and run the npm or GitHub install commands inside the WSL2 shell.
|
|
53
|
+
|
|
43
54
|
Other profiles:
|
|
44
55
|
|
|
45
56
|
```bash
|
|
46
|
-
preppergpt install --profile intelligence
|
|
47
|
-
preppergpt install --profile speed
|
|
57
|
+
npx --yes preppergpt install --profile intelligence
|
|
58
|
+
npx --yes preppergpt install --profile speed
|
|
48
59
|
```
|
|
49
60
|
|
|
50
61
|
Open the app at:
|
|
@@ -61,7 +72,7 @@ Change them before exposing the machine to any network.
|
|
|
61
72
|
```bash
|
|
62
73
|
preppergpt detect
|
|
63
74
|
preppergpt plan --profile balanced
|
|
64
|
-
preppergpt install --profile balanced
|
|
75
|
+
preppergpt install --profile balanced --start
|
|
65
76
|
preppergpt start
|
|
66
77
|
preppergpt stop
|
|
67
78
|
preppergpt status
|
|
@@ -76,7 +87,8 @@ preppergpt bundle whisper
|
|
|
76
87
|
machine, preferring GLM 5.2 Q8 on enterprise hardware, then GLM 5.2 Q4, then
|
|
77
88
|
long-context coding routes when available.
|
|
78
89
|
- `speed`: chooses smaller GPU-friendly routes and makes low-latency chat the
|
|
79
|
-
default.
|
|
90
|
+
default. NVIDIA hosts use CUDA container access; Linux AMD hosts use the
|
|
91
|
+
Ollama ROCm image and ROCm device mounts when ROCm is detected.
|
|
80
92
|
- `balanced`: uses the local auto-router as the default and keeps reasoning,
|
|
81
93
|
coding, research, vision, image, and STT routes additive.
|
|
82
94
|
|
|
@@ -95,6 +107,10 @@ GLM 5.2 Q8/Q4 and Flux weights are marked as manual or external in
|
|
|
95
107
|
`profiles/models.json`. `preppergpt doctor` reports which selected routes still
|
|
96
108
|
need local files or endpoints.
|
|
97
109
|
|
|
110
|
+
AMD acceleration requires a Linux ROCm host with `rocm-smi` or `rocminfo`
|
|
111
|
+
available. AMD cards detected without ROCm stay on CPU-compatible routes and
|
|
112
|
+
receive a doctor warning instead of a broken GPU configuration.
|
|
113
|
+
|
|
98
114
|
The GLM 5.2 Q8 route is intended for an enterprise/off-grid bunker-class host:
|
|
99
115
|
large RAM, fast NVMe, and patience for slow local generation when no hosted
|
|
100
116
|
service remains available.
|
package/compose/preppergpt.yaml
CHANGED
|
@@ -88,7 +88,7 @@ services:
|
|
|
88
88
|
PORT: "${PREPPERGPT_PORT:-8080}"
|
|
89
89
|
|
|
90
90
|
ollama:
|
|
91
|
-
image: ollama/ollama:latest
|
|
91
|
+
image: ${OLLAMA_IMAGE:-ollama/ollama:latest}
|
|
92
92
|
container_name: preppergpt-ollama
|
|
93
93
|
restart: unless-stopped
|
|
94
94
|
network_mode: host
|
|
@@ -187,9 +187,6 @@ services:
|
|
|
187
187
|
network_mode: host
|
|
188
188
|
volumes:
|
|
189
189
|
- ${PREPPERGPT_DATA_DIR:?set PREPPERGPT_DATA_DIR}/local-agent:/data
|
|
190
|
-
- /tmp/.X11-unix:/tmp/.X11-unix:rw
|
|
191
|
-
- ${XDG_RUNTIME_DIR:-/run/user/1000}:${XDG_RUNTIME_DIR:-/run/user/1000}:rw
|
|
192
|
-
- ${XAUTHORITY:-/tmp/.preppergpt-missing-xauthority}:/tmp/.Xauthority:ro
|
|
193
190
|
environment:
|
|
194
191
|
LOCAL_AGENT_HOST: "127.0.0.1"
|
|
195
192
|
LOCAL_AGENT_PORT: "18043"
|
|
@@ -210,7 +207,7 @@ services:
|
|
|
210
207
|
LOCAL_AGENT_TIKA_URL: "http://127.0.0.1:9998/tika"
|
|
211
208
|
LOCAL_AGENT_SCHEDULER_URL: "http://127.0.0.1:18042"
|
|
212
209
|
LOCAL_AGENT_PLAYWRIGHT_WS_URL: "ws://127.0.0.1:18045"
|
|
213
|
-
LOCAL_AGENT_DESKTOP_ENABLED: "${LOCAL_AGENT_DESKTOP_ENABLED:-
|
|
210
|
+
LOCAL_AGENT_DESKTOP_ENABLED: "${LOCAL_AGENT_DESKTOP_ENABLED:-0}"
|
|
214
211
|
DISPLAY: "${DISPLAY:-}"
|
|
215
212
|
WAYLAND_DISPLAY: "${WAYLAND_DISPLAY:-}"
|
|
216
213
|
XDG_RUNTIME_DIR: "${XDG_RUNTIME_DIR:-/run/user/1000}"
|
package/docs/hardware.md
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
# Hardware Guide
|
|
2
2
|
|
|
3
|
-
PrepperGPT works best on Linux with
|
|
4
|
-
model weights.
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
PrepperGPT works best on Linux with a supported GPU and enough NVMe space for
|
|
4
|
+
model weights. NVIDIA CUDA is supported, Linux AMD ROCm is supported when
|
|
5
|
+
`rocm-smi` or `rocminfo` is available, and CPU fallback remains available for
|
|
6
|
+
smaller routes. Windows users should run PrepperGPT inside WSL2; native Windows
|
|
7
|
+
installs are rejected with WSL2 guidance. It is designed for post-apocalyptic or
|
|
8
|
+
long-duration outage scenarios, so the high-end GLM tiers deliberately favor
|
|
9
|
+
local availability and answer quality over hosted-service latency.
|
|
7
10
|
|
|
8
11
|
Recommended starting points:
|
|
9
12
|
|
|
10
13
|
- Speed profile: 16 GB RAM, 8-12 GB VRAM, 40 GB free disk.
|
|
11
14
|
- Balanced profile: 32-64 GB RAM, 12-24 GB VRAM, 120 GB free disk.
|
|
15
|
+
- Linux AMD ROCm profile: 32-128 GB RAM, 12-24+ GB AMD VRAM, ROCm tools
|
|
16
|
+
installed, and Docker access to `/dev/kfd` and `/dev/dri`.
|
|
12
17
|
- Intelligence profile: 96 GB RAM or more, fast NVMe, and hundreds of GB free
|
|
13
18
|
for GLM 5.2 Q4 or similar large weights.
|
|
14
19
|
- Enterprise 8-bit GLM tier: 256 GB RAM or more, 48-80 GB VRAM preferred,
|
|
@@ -27,6 +32,9 @@ for situations where there is no cloud model to fall back to.
|
|
|
27
32
|
| --- | --- | --- |
|
|
28
33
|
| Basic CPU laptop | 16 GB RAM, no GPU, 80 GB disk | `local-chatgpt-auto`, `llama3.1:8b`, `local-vision-moondream2`, bundled Whisper |
|
|
29
34
|
| Mid NVIDIA | 64 GB RAM, 12 GB usable VRAM, 250 GB disk | Gemma fast lane, Qwen coder fallback, local vision, bundled Whisper |
|
|
35
|
+
| Mid AMD ROCm | 64 GB RAM, 12-24 GB usable AMD VRAM, ROCm on Linux, 250 GB disk | Ollama ROCm Gemma fast lane, Qwen coder fallback, local vision through Ollama, bundled Whisper |
|
|
36
|
+
| AMD without ROCm | 32-64 GB RAM, AMD GPU detected, no ROCm tools | CPU-compatible routes plus a doctor warning to install ROCm for acceleration |
|
|
37
|
+
| Windows WSL2 | 32+ GB RAM, Docker Desktop WSL integration, WSL2 Ubuntu | Linux-style install inside WSL2; native Windows install is rejected |
|
|
30
38
|
| High NVIDIA | 128 GB RAM, 24 GB VRAM, 750 GB NVMe | GLM 5.2 Q4 configured, Slopcode/Qwen configured, Gemma fast lane, Flux configured |
|
|
31
39
|
| Full PrepperGPT rig | 128+ GB RAM, 24+ GB VRAM, 1 TB NVMe, GLM/Slopcode/Flux files present | GLM 5.2 Q4 primary, Slopcode coding, Gemma fast lane, Deep Research, Agent, Vision, Flux, Whisper |
|
|
32
40
|
| Enterprise 8-bit GLM rig | 256+ GB RAM, 48-80+ GB VRAM preferred, 1.5-2 TB fast NVMe | `glm52-q8-local` primary for Max Intelligence, `glm52-q4-local` fallback, Slopcode/Qwen coding, Gemma fast lane, full sidecar stack |
|
package/docs/model-sources.md
CHANGED
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
PrepperGPT separates routing from model licensing and distribution.
|
|
4
4
|
|
|
5
|
-
- Ollama models are pulled by the local Ollama runtime when available.
|
|
5
|
+
- Ollama models are pulled by the local Ollama runtime when available. NVIDIA
|
|
6
|
+
hosts use the standard Ollama image; Linux AMD ROCm hosts use
|
|
7
|
+
`ollama/ollama:rocm` with `/dev/kfd` and `/dev/dri` exposed.
|
|
6
8
|
- Whisper Base STT is installer-cached from `Systran/faster-whisper-base`
|
|
7
9
|
under the local PrepperGPT model directory and mounted into OpenWebUI.
|
|
8
10
|
- Hugging Face vision models are downloaded by the local vision sidecar.
|
|
@@ -14,5 +14,9 @@ local use when hosted AI services are unavailable:
|
|
|
14
14
|
- Local vision sidecar at `http://127.0.0.1:18044/v1`
|
|
15
15
|
- SearXNG, Tika, Jupyter, and ComfyUI support services
|
|
16
16
|
|
|
17
|
+
Hardware support is additive: Linux NVIDIA uses CUDA container access, Linux AMD
|
|
18
|
+
uses ROCm when available, CPU fallback remains available, and Windows support is
|
|
19
|
+
through WSL2 rather than native Windows.
|
|
20
|
+
|
|
17
21
|
The local goal is functional local parity for common ChatGPT workflows, not
|
|
18
22
|
hosted frontier-model quality or cloud account continuity.
|
package/installer/cli.mjs
CHANGED
|
@@ -2,12 +2,12 @@ import fs from "node:fs";
|
|
|
2
2
|
import http from "node:http";
|
|
3
3
|
import { ensureWhisperBundle, modelDirs, whisperBundleStatus } from "./lib/bundles.mjs";
|
|
4
4
|
import { detectMachine } from "./lib/detect.mjs";
|
|
5
|
-
import { buildPlan, normalizeProfile } from "./lib/planner.mjs";
|
|
5
|
+
import { buildPlan, installSupportError, normalizeProfile } from "./lib/planner.mjs";
|
|
6
6
|
import { packageRoot, runtimePaths } from "./lib/paths.mjs";
|
|
7
7
|
import { renderInstall } from "./lib/render.mjs";
|
|
8
8
|
import { commandResult, parseArgs, readJson, shellQuote } from "./lib/util.mjs";
|
|
9
9
|
|
|
10
|
-
const VERSION = "0.1.
|
|
10
|
+
const VERSION = "0.1.4";
|
|
11
11
|
|
|
12
12
|
function usage() {
|
|
13
13
|
return `PrepperGPT ${VERSION}
|
|
@@ -15,7 +15,7 @@ function usage() {
|
|
|
15
15
|
Usage:
|
|
16
16
|
preppergpt detect [--json]
|
|
17
17
|
preppergpt plan --profile balanced|intelligence|speed [--json]
|
|
18
|
-
preppergpt install --profile balanced|intelligence|speed [--dry-run] [--skip-bundles] [--home PATH]
|
|
18
|
+
preppergpt install --profile balanced|intelligence|speed [--start] [--dry-run] [--skip-bundles] [--home PATH]
|
|
19
19
|
preppergpt start [--home PATH]
|
|
20
20
|
preppergpt stop [--home PATH]
|
|
21
21
|
preppergpt status [--home PATH] [--json]
|
|
@@ -34,6 +34,22 @@ function profileFrom(flags) {
|
|
|
34
34
|
return normalizeProfile(flags.profile || flags.mode || "balanced");
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
function requiredToolStatuses(detection) {
|
|
38
|
+
return {
|
|
39
|
+
docker: Boolean(detection.tools?.docker),
|
|
40
|
+
dockerCompose: Boolean(detection.tools?.dockerCompose),
|
|
41
|
+
curl: Boolean(detection.tools?.curl),
|
|
42
|
+
"python3 or python": Boolean(detection.tools?.python3 || detection.tools?.python)
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function assertSupportedInstall(detection) {
|
|
47
|
+
const message = installSupportError(detection);
|
|
48
|
+
if (message) {
|
|
49
|
+
throw new Error(message);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
37
53
|
function composeArgs(paths) {
|
|
38
54
|
return ["compose", "--env-file", paths.envFile, "-f", `${packageRoot}/compose/preppergpt.yaml`, "-f", paths.generatedCompose];
|
|
39
55
|
}
|
|
@@ -88,19 +104,20 @@ async function commandDetect(flags) {
|
|
|
88
104
|
printJson(detection);
|
|
89
105
|
return;
|
|
90
106
|
}
|
|
91
|
-
console.log(`Host: ${detection.hostname} (${detection.platform}/${detection.arch})`);
|
|
107
|
+
console.log(`Host: ${detection.hostname} (${detection.platformKind || detection.platform}/${detection.arch})`);
|
|
92
108
|
console.log(`CPU: ${detection.cpu.cores} cores, ${detection.cpu.model}`);
|
|
93
109
|
console.log(`RAM: ${detection.memory.totalGb} GB total, ${detection.memory.freeGb} GB free`);
|
|
94
110
|
const bestDisk = detection.disks[0];
|
|
95
111
|
console.log(`Disk: ${bestDisk ? `${bestDisk.freeGb.toFixed(1)} GB free at ${bestDisk.mount}` : "not detected"}`);
|
|
96
112
|
if (detection.gpus.length) {
|
|
97
113
|
for (const gpu of detection.gpus) {
|
|
98
|
-
|
|
114
|
+
const memory = gpu.totalVramGb ? `${gpu.totalVramGb} GB VRAM, ${gpu.freeVramGb ?? "unknown"} GB free` : "VRAM unknown";
|
|
115
|
+
console.log(`GPU ${gpu.index}: ${gpu.vendor}/${gpu.runtime || "unknown"} ${gpu.name}, ${memory}`);
|
|
99
116
|
}
|
|
100
117
|
} else {
|
|
101
|
-
console.log("GPU: no
|
|
118
|
+
console.log("GPU: no supported GPU detected");
|
|
102
119
|
}
|
|
103
|
-
const missing = Object.entries(detection
|
|
120
|
+
const missing = Object.entries(requiredToolStatuses(detection)).filter(([, present]) => !present).map(([tool]) => tool);
|
|
104
121
|
console.log(`Tools: ${missing.length ? `missing ${missing.join(", ")}` : "all required tools present"}`);
|
|
105
122
|
}
|
|
106
123
|
|
|
@@ -117,9 +134,13 @@ async function commandPlan(flags) {
|
|
|
117
134
|
async function commandInstall(flags) {
|
|
118
135
|
const home = flags.home;
|
|
119
136
|
const detection = await detectMachine();
|
|
137
|
+
assertSupportedInstall(detection);
|
|
120
138
|
const plan = buildPlan(detection, profileFrom(flags));
|
|
121
139
|
if (flags.dry_run) {
|
|
122
140
|
printPlan(plan);
|
|
141
|
+
if (flags.start) {
|
|
142
|
+
console.log("\nWould start PrepperGPT after install.");
|
|
143
|
+
}
|
|
123
144
|
console.log("\nDry run only. No files written.");
|
|
124
145
|
return;
|
|
125
146
|
}
|
|
@@ -132,14 +153,21 @@ async function commandInstall(flags) {
|
|
|
132
153
|
console.log(`Wrote ${paths.envFile}`);
|
|
133
154
|
console.log(`Wrote ${paths.generatedCompose}`);
|
|
134
155
|
console.log(`Wrote ${paths.modelPlan}`);
|
|
156
|
+
if (flags.start) {
|
|
157
|
+
runCompose(paths, ["up", "-d"]);
|
|
158
|
+
console.log("\nPrepperGPT start requested.");
|
|
159
|
+
console.log("Open http://127.0.0.1:8080");
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
135
162
|
console.log("\nNext:");
|
|
136
|
-
console.log(` preppergpt start --home ${shellQuote(paths.root)}`);
|
|
163
|
+
console.log(` npx --yes preppergpt start --home ${shellQuote(paths.root)}`);
|
|
137
164
|
console.log(" Open http://127.0.0.1:8080");
|
|
138
165
|
}
|
|
139
166
|
|
|
140
167
|
async function commandSwitchProfile(flags) {
|
|
141
168
|
const paths = runtimePaths(flags.home);
|
|
142
169
|
const detection = await detectMachine();
|
|
170
|
+
assertSupportedInstall(detection);
|
|
143
171
|
const plan = buildPlan(detection, profileFrom(flags));
|
|
144
172
|
renderInstall(plan, detection, { home: paths.root });
|
|
145
173
|
console.log(`Switched PrepperGPT to ${plan.profile}.`);
|
|
@@ -199,9 +227,23 @@ async function commandDoctor(flags) {
|
|
|
199
227
|
const plan = buildPlan(detection, profileFrom(flags));
|
|
200
228
|
printPlan(plan);
|
|
201
229
|
console.log("\nDoctor:");
|
|
202
|
-
const
|
|
203
|
-
|
|
204
|
-
console.log(`
|
|
230
|
+
const supportError = installSupportError(detection);
|
|
231
|
+
if (supportError) {
|
|
232
|
+
console.log(` platform: unsupported (${supportError})`);
|
|
233
|
+
} else {
|
|
234
|
+
console.log(` platform: ok (${detection.platformKind || detection.platform})`);
|
|
235
|
+
}
|
|
236
|
+
for (const [tool, present] of Object.entries(requiredToolStatuses(detection))) {
|
|
237
|
+
console.log(` ${tool}: ${present ? "ok" : "missing"}`);
|
|
238
|
+
}
|
|
239
|
+
const amdGpu = detection.gpus.some((gpu) => gpu.vendor === "amd");
|
|
240
|
+
const nvidiaGpu = detection.gpus.some((gpu) => gpu.vendor === "nvidia");
|
|
241
|
+
if (nvidiaGpu) {
|
|
242
|
+
console.log(` nvidia cuda: ${detection.tools.nvidiaSmi ? "ok" : "missing nvidia-smi"}`);
|
|
243
|
+
}
|
|
244
|
+
if (amdGpu) {
|
|
245
|
+
const rocmReady = detection.tools.rocmSmi || detection.tools.rocminfo;
|
|
246
|
+
console.log(` amd rocm: ${rocmReady ? "ok" : "missing rocm-smi or rocminfo"}`);
|
|
205
247
|
}
|
|
206
248
|
for (const [port, entry] of Object.entries(detection.ports)) {
|
|
207
249
|
if (!entry.free) {
|
package/installer/lib/detect.mjs
CHANGED
|
@@ -6,6 +6,31 @@ import { commandExists, commandResult, gb } from "./util.mjs";
|
|
|
6
6
|
|
|
7
7
|
const DEFAULT_PORTS = [8080, 11434, 11438, 11441, 18041, 18042, 18043, 18044, 18045, 18080, 8188, 8888, 9998];
|
|
8
8
|
|
|
9
|
+
function readFileMaybe(file) {
|
|
10
|
+
try {
|
|
11
|
+
return fs.readFileSync(file, "utf8");
|
|
12
|
+
} catch {
|
|
13
|
+
return "";
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function detectPlatformKind() {
|
|
18
|
+
if (process.platform === "win32") {
|
|
19
|
+
return "windows-native";
|
|
20
|
+
}
|
|
21
|
+
if (process.platform === "darwin") {
|
|
22
|
+
return "macos";
|
|
23
|
+
}
|
|
24
|
+
if (process.platform !== "linux") {
|
|
25
|
+
return "unknown";
|
|
26
|
+
}
|
|
27
|
+
const release = `${readFileMaybe("/proc/sys/kernel/osrelease")}\n${readFileMaybe("/proc/version")}`.toLowerCase();
|
|
28
|
+
if (process.env.WSL_DISTRO_NAME || process.env.WSL_INTEROP || release.includes("microsoft") || release.includes("wsl")) {
|
|
29
|
+
return "wsl2";
|
|
30
|
+
}
|
|
31
|
+
return "linux";
|
|
32
|
+
}
|
|
33
|
+
|
|
9
34
|
function parseDf(target) {
|
|
10
35
|
const result = commandResult("df", ["-Pk", target], { timeoutMs: 5000 });
|
|
11
36
|
if (!result.ok) {
|
|
@@ -47,7 +72,57 @@ function candidateDiskPaths() {
|
|
|
47
72
|
});
|
|
48
73
|
}
|
|
49
74
|
|
|
50
|
-
function
|
|
75
|
+
function usableVram(totalVramGb) {
|
|
76
|
+
return Math.round(totalVramGb * 0.82 * 10) / 10;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function normalizeGb(value) {
|
|
80
|
+
const number = Number(value);
|
|
81
|
+
return Number.isFinite(number) ? Math.round(number * 10) / 10 : null;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function bytesToGb(value) {
|
|
85
|
+
const number = Number(value);
|
|
86
|
+
return Number.isFinite(number) ? normalizeGb(number / 1024 ** 3) : null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function memoryToGb(value, fallbackUnit = "bytes") {
|
|
90
|
+
if (value === null || value === undefined) {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
const text = String(value).trim();
|
|
94
|
+
const matches = [...text.matchAll(/(\d+(?:\.\d+)?)/g)];
|
|
95
|
+
const number = Number(matches.at(-1)?.[1]);
|
|
96
|
+
if (!Number.isFinite(number)) {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
const lower = text.toLowerCase();
|
|
100
|
+
if (/(gib|gb)/.test(lower)) {
|
|
101
|
+
return normalizeGb(number);
|
|
102
|
+
}
|
|
103
|
+
if (/(mib|mb)/.test(lower)) {
|
|
104
|
+
return normalizeGb(number / 1024);
|
|
105
|
+
}
|
|
106
|
+
if (/(kib|kb)/.test(lower)) {
|
|
107
|
+
return normalizeGb(number / 1024 / 1024);
|
|
108
|
+
}
|
|
109
|
+
if (/\(b\)|bytes?/.test(lower) || number > 1024 ** 3) {
|
|
110
|
+
return bytesToGb(number);
|
|
111
|
+
}
|
|
112
|
+
if (fallbackUnit === "mib" || number > 1024) {
|
|
113
|
+
return normalizeGb(number / 1024);
|
|
114
|
+
}
|
|
115
|
+
if (fallbackUnit === "bytes") {
|
|
116
|
+
return bytesToGb(number);
|
|
117
|
+
}
|
|
118
|
+
return normalizeGb(number);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function bestMatchingKey(record, patterns) {
|
|
122
|
+
return Object.keys(record).find((key) => patterns.some((pattern) => pattern.test(key)));
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function detectNvidiaGpus() {
|
|
51
126
|
if (!commandExists("nvidia-smi")) {
|
|
52
127
|
return [];
|
|
53
128
|
}
|
|
@@ -66,16 +141,199 @@ function detectGpus() {
|
|
|
66
141
|
return {
|
|
67
142
|
index,
|
|
68
143
|
vendor: "nvidia",
|
|
144
|
+
runtime: "cuda",
|
|
69
145
|
name,
|
|
70
|
-
totalVramGb:
|
|
71
|
-
freeVramGb:
|
|
72
|
-
usableVramGb:
|
|
146
|
+
totalVramGb: normalizeGb(Number(totalMiB) / 1024),
|
|
147
|
+
freeVramGb: normalizeGb(Number(freeMiB) / 1024),
|
|
148
|
+
usableVramGb: usableVram(Number(totalMiB) / 1024),
|
|
73
149
|
driver
|
|
74
150
|
};
|
|
75
151
|
})
|
|
76
152
|
.filter((gpu) => gpu.name && Number.isFinite(gpu.totalVramGb));
|
|
77
153
|
}
|
|
78
154
|
|
|
155
|
+
function detectAmdRocmSmiGpus() {
|
|
156
|
+
if (!commandExists("rocm-smi")) {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
const jsonResult = commandResult("rocm-smi", [
|
|
160
|
+
"--showproductname",
|
|
161
|
+
"--showmeminfo",
|
|
162
|
+
"vram",
|
|
163
|
+
"--showdriverversion",
|
|
164
|
+
"--json"
|
|
165
|
+
]);
|
|
166
|
+
if (jsonResult.ok) {
|
|
167
|
+
try {
|
|
168
|
+
const parsed = JSON.parse(jsonResult.stdout);
|
|
169
|
+
return Object.entries(parsed)
|
|
170
|
+
.filter(([id, record]) => /^card\d+/i.test(id) && record && typeof record === "object")
|
|
171
|
+
.map(([id, record], index) => {
|
|
172
|
+
const totalKey = bestMatchingKey(record, [/vram.*total/i, /total.*memory/i]);
|
|
173
|
+
const usedKey = bestMatchingKey(record, [/vram.*used/i, /used.*memory/i]);
|
|
174
|
+
const nameKey = bestMatchingKey(record, [/product.*name/i, /card.*series/i, /marketing.*name/i]);
|
|
175
|
+
const driverKey = bestMatchingKey(record, [/driver/i]);
|
|
176
|
+
const totalVramGb = memoryToGb(record[totalKey]);
|
|
177
|
+
const usedVramGb = memoryToGb(record[usedKey]) || 0;
|
|
178
|
+
const freeVramGb = totalVramGb === null ? null : normalizeGb(Math.max(totalVramGb - usedVramGb, 0));
|
|
179
|
+
return {
|
|
180
|
+
index,
|
|
181
|
+
vendor: "amd",
|
|
182
|
+
runtime: "rocm",
|
|
183
|
+
name: String(record[nameKey] || id),
|
|
184
|
+
totalVramGb,
|
|
185
|
+
freeVramGb,
|
|
186
|
+
usableVramGb: totalVramGb === null ? null : usableVram(totalVramGb),
|
|
187
|
+
driver: String(record[driverKey] || "")
|
|
188
|
+
};
|
|
189
|
+
})
|
|
190
|
+
.filter((gpu) => gpu.name);
|
|
191
|
+
} catch {
|
|
192
|
+
// Fall through to text parsing.
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const textResult = commandResult("rocm-smi", ["--showproductname", "--showmeminfo", "vram", "--showdriverversion"]);
|
|
197
|
+
if (!textResult.ok) {
|
|
198
|
+
return [];
|
|
199
|
+
}
|
|
200
|
+
const cards = new Map();
|
|
201
|
+
for (const line of textResult.stdout.split(/\n/)) {
|
|
202
|
+
const match = line.match(/(card\d+)\s*[:\t ]+(.*)$/i);
|
|
203
|
+
if (!match) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
const [, id, value] = match;
|
|
207
|
+
const record = cards.get(id) || {};
|
|
208
|
+
if (/product|series|marketing/i.test(value)) {
|
|
209
|
+
record.name = value.split(/[:=]/).at(-1)?.trim() || record.name;
|
|
210
|
+
}
|
|
211
|
+
if (/total.*vram|vram.*total/i.test(value)) {
|
|
212
|
+
record.totalVramGb = memoryToGb(value);
|
|
213
|
+
}
|
|
214
|
+
if (/used.*vram|vram.*used/i.test(value)) {
|
|
215
|
+
record.usedVramGb = memoryToGb(value);
|
|
216
|
+
}
|
|
217
|
+
if (/driver/i.test(value)) {
|
|
218
|
+
record.driver = value.split(/[:=]/).at(-1)?.trim();
|
|
219
|
+
}
|
|
220
|
+
cards.set(id, record);
|
|
221
|
+
}
|
|
222
|
+
return [...cards.entries()].map(([id, record], index) => {
|
|
223
|
+
const totalVramGb = record.totalVramGb ?? null;
|
|
224
|
+
const freeVramGb =
|
|
225
|
+
totalVramGb === null ? null : normalizeGb(Math.max(totalVramGb - (record.usedVramGb || 0), 0));
|
|
226
|
+
return {
|
|
227
|
+
index,
|
|
228
|
+
vendor: "amd",
|
|
229
|
+
runtime: "rocm",
|
|
230
|
+
name: record.name || id,
|
|
231
|
+
totalVramGb,
|
|
232
|
+
freeVramGb,
|
|
233
|
+
usableVramGb: totalVramGb === null ? null : usableVram(totalVramGb),
|
|
234
|
+
driver: record.driver || ""
|
|
235
|
+
};
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function detectAmdRocinfoGpus() {
|
|
240
|
+
if (!commandExists("rocminfo")) {
|
|
241
|
+
return [];
|
|
242
|
+
}
|
|
243
|
+
const result = commandResult("rocminfo", [], { timeoutMs: 8000 });
|
|
244
|
+
if (!result.ok) {
|
|
245
|
+
return [];
|
|
246
|
+
}
|
|
247
|
+
const names = [];
|
|
248
|
+
for (const line of result.stdout.split(/\n/)) {
|
|
249
|
+
const match = line.match(/^\s*(?:Marketing Name|Name):\s*(.+)$/);
|
|
250
|
+
if (match && /amd|radeon|instinct|gfx/i.test(match[1])) {
|
|
251
|
+
names.push(match[1].trim());
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
return [...new Set(names)].map((name, index) => ({
|
|
255
|
+
index,
|
|
256
|
+
vendor: "amd",
|
|
257
|
+
runtime: "rocm",
|
|
258
|
+
name,
|
|
259
|
+
totalVramGb: null,
|
|
260
|
+
freeVramGb: null,
|
|
261
|
+
usableVramGb: null,
|
|
262
|
+
driver: ""
|
|
263
|
+
}));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
export function parseLspciMachineLine(line) {
|
|
267
|
+
const fields = [];
|
|
268
|
+
const pattern = /"([^"]*)"|(\S+)/g;
|
|
269
|
+
let match = pattern.exec(line);
|
|
270
|
+
while (match) {
|
|
271
|
+
fields.push(match[1] ?? match[2]);
|
|
272
|
+
match = pattern.exec(line);
|
|
273
|
+
}
|
|
274
|
+
return fields;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function isDisplayController(value) {
|
|
278
|
+
return /^(VGA compatible controller|3D controller|Display controller)$/i.test(value || "");
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function isAmdVendor(value) {
|
|
282
|
+
return /\b(Advanced Micro Devices|AMD|AMD\/ATI|ATI Technologies)\b/i.test(value || "");
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export function amdPciGpuFromLspciLine(line, index = 0) {
|
|
286
|
+
const fields = parseLspciMachineLine(line);
|
|
287
|
+
const [, className, vendor, device] = fields;
|
|
288
|
+
if (!isDisplayController(className) || !isAmdVendor(vendor)) {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
return {
|
|
292
|
+
index,
|
|
293
|
+
vendor: "amd",
|
|
294
|
+
runtime: "none",
|
|
295
|
+
name: [vendor, device].filter(Boolean).join(" ").trim() || "AMD GPU",
|
|
296
|
+
totalVramGb: null,
|
|
297
|
+
freeVramGb: null,
|
|
298
|
+
usableVramGb: null,
|
|
299
|
+
driver: ""
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function detectAmdPciGpus() {
|
|
304
|
+
if (!commandExists("lspci")) {
|
|
305
|
+
return [];
|
|
306
|
+
}
|
|
307
|
+
const result = commandResult("lspci", ["-mm"], { timeoutMs: 5000 });
|
|
308
|
+
if (!result.ok) {
|
|
309
|
+
return [];
|
|
310
|
+
}
|
|
311
|
+
return result.stdout
|
|
312
|
+
.split(/\n/)
|
|
313
|
+
.map((line, index) => amdPciGpuFromLspciLine(line, index))
|
|
314
|
+
.filter(Boolean);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function dedupeGpus(gpus) {
|
|
318
|
+
const seen = new Set();
|
|
319
|
+
return gpus.filter((gpu) => {
|
|
320
|
+
const key = `${gpu.vendor}:${gpu.name}:${gpu.totalVramGb ?? "unknown"}`;
|
|
321
|
+
if (seen.has(key)) {
|
|
322
|
+
return false;
|
|
323
|
+
}
|
|
324
|
+
seen.add(key);
|
|
325
|
+
return true;
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function detectGpus() {
|
|
330
|
+
const nvidia = detectNvidiaGpus();
|
|
331
|
+
const amdRocm = detectAmdRocmSmiGpus();
|
|
332
|
+
const amdFallback = amdRocm.length ? [] : detectAmdRocinfoGpus();
|
|
333
|
+
const amdPci = amdRocm.length || amdFallback.length ? [] : detectAmdPciGpus();
|
|
334
|
+
return dedupeGpus([...nvidia, ...amdRocm, ...amdFallback, ...amdPci]);
|
|
335
|
+
}
|
|
336
|
+
|
|
79
337
|
async function portFree(port) {
|
|
80
338
|
return new Promise((resolve) => {
|
|
81
339
|
const server = net.createServer();
|
|
@@ -93,6 +351,7 @@ async function detectPorts(ports = DEFAULT_PORTS) {
|
|
|
93
351
|
}
|
|
94
352
|
|
|
95
353
|
export async function detectMachine(options = {}) {
|
|
354
|
+
const platformKind = detectPlatformKind();
|
|
96
355
|
const disks = candidateDiskPaths()
|
|
97
356
|
.map(parseDf)
|
|
98
357
|
.filter(Boolean)
|
|
@@ -104,12 +363,17 @@ export async function detectMachine(options = {}) {
|
|
|
104
363
|
tmux: commandExists("tmux"),
|
|
105
364
|
curl: commandExists("curl"),
|
|
106
365
|
python3: commandExists("python3"),
|
|
366
|
+
python: commandExists("python"),
|
|
107
367
|
git: commandExists("git"),
|
|
108
|
-
nvidiaSmi: commandExists("nvidia-smi")
|
|
368
|
+
nvidiaSmi: commandExists("nvidia-smi"),
|
|
369
|
+
rocmSmi: commandExists("rocm-smi"),
|
|
370
|
+
rocminfo: commandExists("rocminfo")
|
|
109
371
|
};
|
|
110
372
|
return {
|
|
111
373
|
generatedAt: new Date().toISOString(),
|
|
112
374
|
platform: process.platform,
|
|
375
|
+
platformKind,
|
|
376
|
+
isWsl2: platformKind === "wsl2",
|
|
113
377
|
arch: process.arch,
|
|
114
378
|
hostname: os.hostname(),
|
|
115
379
|
cpu: {
|
|
@@ -23,18 +23,57 @@ function bestDisk(detection) {
|
|
|
23
23
|
return detection.disks?.[0] || { freeGb: 0, isNvme: false, path: "" };
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
function
|
|
27
|
-
|
|
26
|
+
function detectedPlatformKind(detection) {
|
|
27
|
+
if (detection.platformKind) {
|
|
28
|
+
return detection.platformKind;
|
|
29
|
+
}
|
|
30
|
+
if (detection.platform === "win32") {
|
|
31
|
+
return "windows-native";
|
|
32
|
+
}
|
|
33
|
+
if (detection.platform === "darwin") {
|
|
34
|
+
return "macos";
|
|
35
|
+
}
|
|
36
|
+
return detection.platform || "unknown";
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function bestGpu(detection, vendors = null) {
|
|
40
|
+
const allowed = vendors ? new Set(vendors) : null;
|
|
41
|
+
return [...(detection.gpus || [])]
|
|
42
|
+
.filter((gpu) => !allowed || allowed.has(gpu.vendor))
|
|
43
|
+
.sort((a, b) => (b.usableVramGb || 0) - (a.usableVramGb || 0))[0] || null;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function gpuVendorLabel(vendors) {
|
|
47
|
+
if (!vendors?.length) {
|
|
48
|
+
return "supported";
|
|
49
|
+
}
|
|
50
|
+
return vendors.map((vendor) => (vendor === "nvidia" ? "NVIDIA" : vendor === "amd" ? "AMD" : vendor)).join("/");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function hasRocmRuntime(detection) {
|
|
54
|
+
return Boolean(detection.tools?.rocmSmi || detection.tools?.rocminfo);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function installSupportError(detection) {
|
|
58
|
+
if (detectedPlatformKind(detection) === "windows-native") {
|
|
59
|
+
return "Native Windows install is not supported yet. Install PrepperGPT inside WSL2 so Docker, Linux paths, and local model services use the supported Linux runtime.";
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
28
62
|
}
|
|
29
63
|
|
|
30
64
|
function requirementFailures(model, detection) {
|
|
31
65
|
const requires = model.requires || {};
|
|
32
66
|
const disk = bestDisk(detection);
|
|
33
|
-
const
|
|
67
|
+
const platformKind = detectedPlatformKind(detection);
|
|
68
|
+
const gpuVendors = requires.gpuVendors || null;
|
|
69
|
+
const gpu = bestGpu(detection, gpuVendors);
|
|
34
70
|
const failures = [];
|
|
35
71
|
if (requires.platforms && !requires.platforms.includes(detection.platform)) {
|
|
36
72
|
failures.push(`requires platform ${requires.platforms.join(", ")}`);
|
|
37
73
|
}
|
|
74
|
+
if (requires.platformKinds && !requires.platformKinds.includes(platformKind)) {
|
|
75
|
+
failures.push(`requires platform kind ${requires.platformKinds.join(", ")}`);
|
|
76
|
+
}
|
|
38
77
|
if (requires.minRamGb && detection.memory.totalGb < requires.minRamGb) {
|
|
39
78
|
failures.push(`requires ${requires.minRamGb} GB RAM`);
|
|
40
79
|
}
|
|
@@ -44,11 +83,22 @@ function requirementFailures(model, detection) {
|
|
|
44
83
|
if (requires.nvme && disk.freeGb >= (requires.diskGb || 0) && !disk.isNvme) {
|
|
45
84
|
failures.push("strongly prefers NVMe for acceptable load time");
|
|
46
85
|
}
|
|
86
|
+
if (requires.gpuVendors && detection.gpus?.length && !gpu) {
|
|
87
|
+
failures.push(`requires ${gpuVendorLabel(requires.gpuVendors)} GPU`);
|
|
88
|
+
}
|
|
47
89
|
if (requires.gpu && !gpu) {
|
|
48
|
-
failures.push(
|
|
90
|
+
failures.push(`requires ${gpuVendorLabel(requires.gpuVendors)} GPU`);
|
|
49
91
|
}
|
|
50
92
|
if (requires.minVramGb && (!gpu || gpu.usableVramGb < requires.minVramGb)) {
|
|
51
|
-
failures.push(`requires about ${requires.minVramGb} GB usable VRAM`);
|
|
93
|
+
failures.push(`requires ${gpuVendorLabel(requires.gpuVendors)} GPU with about ${requires.minVramGb} GB usable VRAM`);
|
|
94
|
+
}
|
|
95
|
+
if (requires.requiresRocm && gpu?.vendor === "amd") {
|
|
96
|
+
if (platformKind !== "linux") {
|
|
97
|
+
failures.push("requires a Linux ROCm host for AMD GPU acceleration");
|
|
98
|
+
}
|
|
99
|
+
if (gpu.runtime !== "rocm" || !hasRocmRuntime(detection)) {
|
|
100
|
+
failures.push("requires ROCm runtime tools for AMD GPU acceleration");
|
|
101
|
+
}
|
|
52
102
|
}
|
|
53
103
|
return failures;
|
|
54
104
|
}
|
|
@@ -116,9 +166,16 @@ export function buildPlan(detection, requestedProfile = "balanced", catalog = lo
|
|
|
116
166
|
}));
|
|
117
167
|
|
|
118
168
|
const warnings = [];
|
|
169
|
+
const installError = installSupportError(detection);
|
|
170
|
+
if (installError) {
|
|
171
|
+
warnings.push(installError);
|
|
172
|
+
}
|
|
119
173
|
const missingTools = Object.entries(detection.tools || {})
|
|
120
|
-
.filter(([tool, present]) => ["docker", "dockerCompose", "curl"
|
|
174
|
+
.filter(([tool, present]) => ["docker", "dockerCompose", "curl"].includes(tool) && !present)
|
|
121
175
|
.map(([tool]) => tool);
|
|
176
|
+
if (!detection.tools?.python3 && !detection.tools?.python) {
|
|
177
|
+
missingTools.push("python3 or python");
|
|
178
|
+
}
|
|
122
179
|
if (missingTools.length) {
|
|
123
180
|
warnings.push(`Missing required tools: ${missingTools.join(", ")}`);
|
|
124
181
|
}
|
|
@@ -128,8 +185,18 @@ export function buildPlan(detection, requestedProfile = "balanced", catalog = lo
|
|
|
128
185
|
if (occupiedPorts.length) {
|
|
129
186
|
warnings.push(`Ports already in use: ${occupiedPorts.join(", ")}`);
|
|
130
187
|
}
|
|
131
|
-
|
|
132
|
-
|
|
188
|
+
const acceleratedGpu = (detection.gpus || []).find(
|
|
189
|
+
(gpu) => gpu.vendor === "nvidia" || (gpu.vendor === "amd" && gpu.runtime === "rocm" && detectedPlatformKind(detection) === "linux")
|
|
190
|
+
);
|
|
191
|
+
if (!acceleratedGpu) {
|
|
192
|
+
warnings.push("No supported GPU acceleration detected; CPU fallback will be much slower.");
|
|
193
|
+
}
|
|
194
|
+
const amdWithoutRocm = (detection.gpus || []).some((gpu) => gpu.vendor === "amd" && gpu.runtime !== "rocm");
|
|
195
|
+
if (amdWithoutRocm) {
|
|
196
|
+
warnings.push("AMD GPU detected without ROCm; install ROCm on Linux to enable AMD acceleration.");
|
|
197
|
+
}
|
|
198
|
+
if ((detection.gpus || []).some((gpu) => gpu.vendor === "amd") && detectedPlatformKind(detection) === "wsl2") {
|
|
199
|
+
warnings.push("AMD GPU acceleration is supported on Linux ROCm hosts; WSL2 installs will use CPU fallback unless an external AMD endpoint is provided.");
|
|
133
200
|
}
|
|
134
201
|
if (manualAssets.length) {
|
|
135
202
|
warnings.push("Some selected high-quality routes need manual model files or already-running external endpoints.");
|
package/installer/lib/render.mjs
CHANGED
|
@@ -9,10 +9,39 @@ function secret(bytes = 24) {
|
|
|
9
9
|
return crypto.randomBytes(bytes).toString("hex");
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
+
function platformKind(detection) {
|
|
13
|
+
return detection.platformKind || (detection.platform === "win32" ? "windows-native" : detection.platform || "unknown");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function primaryAccelerator(detection) {
|
|
17
|
+
const gpus = detection.gpus || [];
|
|
18
|
+
const nvidia = gpus.find((gpu) => gpu.vendor === "nvidia");
|
|
19
|
+
if (nvidia) {
|
|
20
|
+
return { vendor: "nvidia", runtime: "cuda" };
|
|
21
|
+
}
|
|
22
|
+
const amdRocm = gpus.find((gpu) => gpu.vendor === "amd" && gpu.runtime === "rocm");
|
|
23
|
+
if (amdRocm && platformKind(detection) === "linux") {
|
|
24
|
+
return { vendor: "amd", runtime: "rocm" };
|
|
25
|
+
}
|
|
26
|
+
return { vendor: "cpu", runtime: "cpu" };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function desktopIntegrationEnabled(detection) {
|
|
30
|
+
const explicit = String(process.env.LOCAL_AGENT_DESKTOP_ENABLED || "").toLowerCase();
|
|
31
|
+
if (["0", "false", "no", "off"].includes(explicit)) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
const kind = platformKind(detection);
|
|
35
|
+
const platformSupportsDesktop = kind === "linux" || kind === "wsl2";
|
|
36
|
+
const hasDisplay = Boolean(process.env.DISPLAY || process.env.WAYLAND_DISPLAY);
|
|
37
|
+
return platformSupportsDesktop && hasDisplay && explicit !== "0";
|
|
38
|
+
}
|
|
39
|
+
|
|
12
40
|
function envFile(plan, paths, detection) {
|
|
13
41
|
const dataDir = process.env.PREPPERGPT_DATA_DIR || paths.dataDir;
|
|
14
42
|
const modelsDir = process.env.PREPPERGPT_MODELS_DIR || `${dataDir}/models`;
|
|
15
43
|
const whisperHostDir = path.join(modelsDir, "whisper", "base");
|
|
44
|
+
const accelerator = primaryAccelerator(detection);
|
|
16
45
|
const selectedReasoningModel = plan.selected?.reasoning?.id || "glm52-q4-local";
|
|
17
46
|
const selectedGlmBaseUrl =
|
|
18
47
|
selectedReasoningModel === "glm52-q8-local"
|
|
@@ -33,7 +62,10 @@ function envFile(plan, paths, detection) {
|
|
|
33
62
|
PREPPERGPT_MODEL_ORDER_LIST: JSON.stringify(plan.routeIds),
|
|
34
63
|
PREPPERGPT_GLM_MODEL: selectedReasoningModel,
|
|
35
64
|
PREPPERGPT_GLM_BASE_URL: selectedGlmBaseUrl,
|
|
36
|
-
|
|
65
|
+
PREPPERGPT_GPU_VENDOR: accelerator.vendor,
|
|
66
|
+
PREPPERGPT_ACCELERATOR: accelerator.runtime,
|
|
67
|
+
PREPPERGPT_DOCKER_GPUS: accelerator.vendor === "nvidia" ? "all" : "",
|
|
68
|
+
OLLAMA_IMAGE: accelerator.vendor === "amd" ? "ollama/ollama:rocm" : "ollama/ollama:latest",
|
|
37
69
|
WEBUI_NAME: "PrepperGPT",
|
|
38
70
|
WEBUI_ADMIN_EMAIL: process.env.WEBUI_ADMIN_EMAIL || "admin@preppergpt.local",
|
|
39
71
|
WEBUI_ADMIN_PASSWORD: adminPassword,
|
|
@@ -53,13 +85,38 @@ function envFile(plan, paths, detection) {
|
|
|
53
85
|
|
|
54
86
|
function generatedCompose(plan, detection) {
|
|
55
87
|
const modelOrder = JSON.stringify(plan.routeIds);
|
|
56
|
-
const
|
|
57
|
-
|
|
88
|
+
const accelerator = primaryAccelerator(detection);
|
|
89
|
+
const gpuBlock =
|
|
90
|
+
accelerator.vendor === "nvidia"
|
|
91
|
+
? [
|
|
58
92
|
" ollama:",
|
|
59
93
|
" gpus: all",
|
|
60
94
|
" local-vision:",
|
|
61
95
|
" gpus: all"
|
|
62
96
|
]
|
|
97
|
+
: accelerator.vendor === "amd"
|
|
98
|
+
? [
|
|
99
|
+
" ollama:",
|
|
100
|
+
" devices:",
|
|
101
|
+
" - /dev/kfd:/dev/kfd",
|
|
102
|
+
" - /dev/dri:/dev/dri",
|
|
103
|
+
" group_add:",
|
|
104
|
+
" - video",
|
|
105
|
+
" - render",
|
|
106
|
+
" security_opt:",
|
|
107
|
+
" - seccomp=unconfined"
|
|
108
|
+
]
|
|
109
|
+
: [];
|
|
110
|
+
const desktopBlock = desktopIntegrationEnabled(detection)
|
|
111
|
+
? [
|
|
112
|
+
" local-agent:",
|
|
113
|
+
" environment:",
|
|
114
|
+
" LOCAL_AGENT_DESKTOP_ENABLED: \"1\"",
|
|
115
|
+
" volumes:",
|
|
116
|
+
" - /tmp/.X11-unix:/tmp/.X11-unix:rw",
|
|
117
|
+
" - ${XDG_RUNTIME_DIR:-/run/user/1000}:${XDG_RUNTIME_DIR:-/run/user/1000}:rw",
|
|
118
|
+
" - ${XAUTHORITY:-/tmp/.preppergpt-missing-xauthority}:/tmp/.Xauthority:ro"
|
|
119
|
+
]
|
|
63
120
|
: [];
|
|
64
121
|
return [
|
|
65
122
|
"services:",
|
|
@@ -68,7 +125,8 @@ function generatedCompose(plan, detection) {
|
|
|
68
125
|
` DEFAULT_MODELS: "${plan.defaultModel}"`,
|
|
69
126
|
` MODEL_ORDER_LIST: '${modelOrder.replaceAll("'", "''")}'`,
|
|
70
127
|
` TASK_MODEL: "${plan.selected.fast?.id || plan.defaultModel}"`,
|
|
71
|
-
...gpuBlock
|
|
128
|
+
...gpuBlock,
|
|
129
|
+
...desktopBlock
|
|
72
130
|
].join("\n") + "\n";
|
|
73
131
|
}
|
|
74
132
|
|
package/package.json
CHANGED
package/profiles/models.json
CHANGED
|
@@ -131,16 +131,18 @@
|
|
|
131
131
|
"contextTokens": 262144,
|
|
132
132
|
"qualityScore": 78,
|
|
133
133
|
"speedScore": 96,
|
|
134
|
-
"tpsEstimate": "35-90 completion tokens/sec on a modern 16-24 GB NVIDIA GPU",
|
|
134
|
+
"tpsEstimate": "35-90 completion tokens/sec on a modern 16-24 GB NVIDIA GPU or supported Linux AMD ROCm GPU",
|
|
135
135
|
"requires": {
|
|
136
136
|
"minRamGb": 24,
|
|
137
137
|
"minVramGb": 11,
|
|
138
|
-
"diskGb": 20
|
|
138
|
+
"diskGb": 20,
|
|
139
|
+
"gpuVendors": ["nvidia", "amd"],
|
|
140
|
+
"requiresRocm": true
|
|
139
141
|
},
|
|
140
142
|
"source": {
|
|
141
143
|
"type": "ollama",
|
|
142
144
|
"model": "gemma4:12b",
|
|
143
|
-
"description": "Pulled or provided through the local Ollama server."
|
|
145
|
+
"description": "Pulled or provided through the local Ollama server. AMD hosts use the Ollama ROCm container when ROCm is detected on Linux."
|
|
144
146
|
}
|
|
145
147
|
},
|
|
146
148
|
{
|
|
@@ -151,16 +153,18 @@
|
|
|
151
153
|
"contextTokens": 32768,
|
|
152
154
|
"qualityScore": 76,
|
|
153
155
|
"speedScore": 75,
|
|
154
|
-
"tpsEstimate": "12-45 completion tokens/sec depending on GPU and quantization",
|
|
156
|
+
"tpsEstimate": "12-45 completion tokens/sec depending on GPU vendor, ROCm/CUDA readiness, and quantization",
|
|
155
157
|
"requires": {
|
|
156
158
|
"minRamGb": 24,
|
|
157
159
|
"minVramGb": 10,
|
|
158
|
-
"diskGb": 16
|
|
160
|
+
"diskGb": 16,
|
|
161
|
+
"gpuVendors": ["nvidia", "amd"],
|
|
162
|
+
"requiresRocm": true
|
|
159
163
|
},
|
|
160
164
|
"source": {
|
|
161
165
|
"type": "ollama",
|
|
162
166
|
"model": "qwen2.5-coder:14b",
|
|
163
|
-
"description": "Ollama coding fallback."
|
|
167
|
+
"description": "Ollama coding fallback. AMD hosts use the Ollama ROCm container when ROCm is detected on Linux."
|
|
164
168
|
}
|
|
165
169
|
},
|
|
166
170
|
{
|
|
@@ -230,7 +234,9 @@
|
|
|
230
234
|
"requires": {
|
|
231
235
|
"minRamGb": 24,
|
|
232
236
|
"minVramGb": 11,
|
|
233
|
-
"diskGb": 20
|
|
237
|
+
"diskGb": 20,
|
|
238
|
+
"gpuVendors": ["nvidia", "amd"],
|
|
239
|
+
"requiresRocm": true
|
|
234
240
|
},
|
|
235
241
|
"source": {
|
|
236
242
|
"type": "virtual",
|
|
@@ -268,11 +274,13 @@
|
|
|
268
274
|
"requires": {
|
|
269
275
|
"minRamGb": 32,
|
|
270
276
|
"minVramGb": 16,
|
|
271
|
-
"diskGb": 60
|
|
277
|
+
"diskGb": 60,
|
|
278
|
+
"gpuVendors": ["nvidia", "amd"],
|
|
279
|
+
"requiresRocm": true
|
|
272
280
|
},
|
|
273
281
|
"source": {
|
|
274
282
|
"type": "manual",
|
|
275
|
-
"description": "Place Flux model, text encoder, and VAE files in the configured ComfyUI models directory."
|
|
283
|
+
"description": "Place Flux model, text encoder, and VAE files in the configured ComfyUI models directory. Use a CUDA or Linux ROCm ComfyUI runtime that matches the host GPU."
|
|
276
284
|
}
|
|
277
285
|
},
|
|
278
286
|
{
|