npm - preppergpt - Versions diffs - 0.1.0 → 0.1.2 - Mend

preppergpt 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +39 -15
package/compose/preppergpt.yaml +10 -8
package/docs/bundles.md +30 -0
package/docs/hardware.md +18 -1
package/docs/model-sources.md +3 -2
package/docs/preppergpt-local-parity-map.md +4 -2
package/installer/cli.mjs +33 -2
package/installer/lib/bundles.mjs +107 -0
package/installer/lib/planner.mjs +7 -4
package/installer/lib/render.mjs +21 -1
package/package.json +6 -4
package/profiles/models.json +38 -16
package/services/local-scheduler/app.py +20 -18

package/README.md CHANGED Viewed

@@ -1,32 +1,45 @@
 # PrepperGPT
-PrepperGPT packages a local-first ChatGPT-like experience for Linux machines.
-It uses upstream OpenWebUI for the app shell and adds a hardware detector,
-model planner, Docker Compose runtime, local sidecars, and a practical
-PrepperGPT field-kit theme.
+PrepperGPT packages a local-first ChatGPT-like experience for post-apocalyptic
+or long-duration outage scenarios where hosted AI services are unavailable. It
+uses upstream OpenWebUI for the app shell and adds a hardware detector, model
+planner, Docker Compose runtime, local sidecars, and a practical PrepperGPT
+field-kit theme.
 The first release targets Linux with NVIDIA GPUs first, with CPU fallback where
 possible. It is an online installer: model and container downloads require a
 working network during setup.
+PrepperGPT optimizes for survivability over cloud-like latency. On very large
+local models, very low tokens/sec is acceptable because the alternative in the
+target scenario is no assistant at all.
 ## Install
-Until the npm package is published, install from GitHub:
+Install from npm:
 ```bash
-git clone https://github.com/teamslop/preppergpt.git
-cd preppergpt
-node bin/preppergpt.js install --profile balanced
-node bin/preppergpt.js start
+npx preppergpt install --profile balanced
+preppergpt start
 ```
-After npm publication:
+Or install globally:
 ```bash
-npx preppergpt install --profile balanced
+npm install -g preppergpt
+preppergpt install --profile balanced
 preppergpt start
 ```
+GitHub source install:
+```bash
+git clone https://github.com/teamslop/preppergpt.git
+cd preppergpt
+node bin/preppergpt.js install --profile balanced
+node bin/preppergpt.js start
+```
 Other profiles:
 ```bash
@@ -54,12 +67,14 @@ preppergpt stop
 preppergpt status
 preppergpt doctor
 preppergpt switch-profile --profile speed
+preppergpt bundle whisper
 ```
 ## Profiles
 - `intelligence`: chooses the strongest local reasoning route that fits the
-  machine, preferring GLM 5.2 Q4 and long-context coding routes when available.
+  machine, preferring GLM 5.2 Q8 on enterprise hardware, then GLM 5.2 Q4, then
+  long-context coding routes when available.
 - `speed`: chooses smaller GPU-friendly routes and makes low-latency chat the
   default.
 - `balanced`: uses the local auto-router as the default and keeps reasoning,
@@ -70,11 +85,20 @@ and route ordering into the generated compose override.
 ## Model Assets
-Some routes can be pulled by the runtime, while very large routes such as GLM
-5.2 Q4 and Flux weights are marked as manual or external in
+PrepperGPT installs a bundled local Whisper Base STT cache during
+`preppergpt install`. It is stored under `~/.preppergpt/data/models/whisper/base`
+by default and mounted into OpenWebUI, so speech-to-text works from local files
+after setup.
+Some other routes can be pulled by the runtime, while very large routes such as
+GLM 5.2 Q8/Q4 and Flux weights are marked as manual or external in
 `profiles/models.json`. `preppergpt doctor` reports which selected routes still
 need local files or endpoints.
+The GLM 5.2 Q8 route is intended for an enterprise/off-grid bunker-class host:
+large RAM, fast NVMe, and patience for slow local generation when no hosted
+service remains available.
 ## Publishing
 The package is designed to be published as:
@@ -84,7 +108,7 @@ npm publish --access public
 ```
 Publishing requires an authenticated npm account with permission to publish the
-currently unclaimed `preppergpt` package name.
+`preppergpt` package.
 The source repository is expected at:

package/compose/preppergpt.yaml CHANGED Viewed

@@ -13,6 +13,7 @@ services:
         hard: 1048576
     volumes:
       - ${PREPPERGPT_DATA_DIR:?set PREPPERGPT_DATA_DIR}/openwebui:/app/backend/data
+      - ${PREPPERGPT_MODELS_DIR:?set PREPPERGPT_MODELS_DIR}:/models:ro
       - ../services/comfyui:/app/backend/data/parity-comfyui:ro
       - ../themes/preppergpt/static/favicon.svg:/app/backend/open_webui/static/favicon.svg:ro
       - ../themes/preppergpt/static/logo.svg:/app/backend/open_webui/static/logo.svg:ro
@@ -26,8 +27,8 @@ services:
       ENABLE_OLLAMA_API: "True"
       OLLAMA_BASE_URLS: "${OLLAMA_BASE_URL:-http://127.0.0.1:11434}"
       ENABLE_OPENAI_API: "True"
-      OPENAI_API_BASE_URLS: "${SLOCODE_BASE_URL:-http://127.0.0.1:11438/v1};${GLM52_BASE_URL:-http://127.0.0.1:11441/v1};http://127.0.0.1:18041/v1;http://127.0.0.1:18043/v1;http://127.0.0.1:18044/v1"
-      OPENAI_API_KEYS: "slopcode;glm52;deep-research;local-agent;local-vision"
+      OPENAI_API_BASE_URLS: "${GLM52_Q8_BASE_URL:-http://127.0.0.1:11446/v1};${SLOCODE_BASE_URL:-http://127.0.0.1:11438/v1};${GLM52_BASE_URL:-http://127.0.0.1:11441/v1};http://127.0.0.1:18041/v1;http://127.0.0.1:18043/v1;http://127.0.0.1:18044/v1"
+      OPENAI_API_KEYS: "glm52-q8;slopcode;glm52;deep-research;local-agent;local-vision"
       ENABLE_DIRECT_CONNECTIONS: "True"
       DEFAULT_MODELS: "${PREPPERGPT_DEFAULT_MODEL:-local-chatgpt-auto}"
       MODEL_ORDER_LIST: "${PREPPERGPT_MODEL_ORDER_LIST:-[\"local-chatgpt-auto\"]}"
@@ -69,9 +70,10 @@ services:
       CODE_INTERPRETER_JUPYTER_URL: "http://127.0.0.1:8888"
       CODE_INTERPRETER_JUPYTER_AUTH: "token"
       CODE_INTERPRETER_JUPYTER_AUTH_TOKEN: "${JUPYTER_TOKEN:?set JUPYTER_TOKEN}"
-      WHISPER_MODEL: "large-v3"
+      WHISPER_MODEL: "${PREPPERGPT_WHISPER_MODEL_PATH:-/models/whisper/base}"
+      WHISPER_MODEL_DIR: "/models/whisper"
       WHISPER_COMPUTE_TYPE: "int8"
-      WHISPER_MODEL_AUTO_UPDATE: "True"
+      WHISPER_MODEL_AUTO_UPDATE: "False"
       WHISPER_VAD_FILTER: "True"
       WHISPER_MULTILINGUAL: "True"
       ENABLE_IMAGE_GENERATION: "True"
@@ -147,8 +149,8 @@ services:
       DEEP_RESEARCH_PORT: "18041"
       DEEP_RESEARCH_PUBLIC_BASE_URL: "http://127.0.0.1:18041"
       DEEP_RESEARCH_MODEL_ID: "deep-research-glm52"
-      DEEP_RESEARCH_MODEL: "${DEEP_RESEARCH_MODEL:-glm52-q4-local}"
-      DEEP_RESEARCH_GLM_BASE_URL: "${GLM52_BASE_URL:-http://127.0.0.1:11441/v1}"
+      DEEP_RESEARCH_MODEL: "${PREPPERGPT_GLM_MODEL:-glm52-q4-local}"
+      DEEP_RESEARCH_GLM_BASE_URL: "${PREPPERGPT_GLM_BASE_URL:-http://127.0.0.1:11441/v1}"
       DEEP_RESEARCH_SEARXNG_URL: "http://127.0.0.1:18080/search"
       DEEP_RESEARCH_TIKA_URL: "http://127.0.0.1:9998/tika"
       DEEP_RESEARCH_LOCAL_APP_CONNECTOR_URL: "http://127.0.0.1:18042"
@@ -193,8 +195,8 @@ services:
       LOCAL_AGENT_PORT: "18043"
       LOCAL_AGENT_PUBLIC_BASE_URL: "http://127.0.0.1:18043"
       LOCAL_AGENT_MODEL_ID: "local-agent-glm52"
-      LOCAL_AGENT_GLM_MODEL: "glm52-q4-local"
-      LOCAL_AGENT_GLM_BASE_URL: "${GLM52_BASE_URL:-http://127.0.0.1:11441/v1}"
+      LOCAL_AGENT_GLM_MODEL: "${PREPPERGPT_GLM_MODEL:-glm52-q4-local}"
+      LOCAL_AGENT_GLM_BASE_URL: "${PREPPERGPT_GLM_BASE_URL:-http://127.0.0.1:11441/v1}"
       LOCAL_AGENT_AUTO_ROUTER_MODEL_ID: "local-auto-router"
       LOCAL_AGENT_AUTO_ROUTER_FAST_MODEL: "gemma4:12b-256k-gpu"
       LOCAL_AGENT_AUTO_ROUTER_FAST_BASE_URL: "http://127.0.0.1:11434/v1"

package/docs/bundles.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Bundles
+PrepperGPT keeps npm lightweight but installs small always-on local assets
+during setup.
+## Whisper Base
+`preppergpt install` downloads the MIT-licensed `Systran/faster-whisper-base`
+CTranslate2 model into:
+```text
+~/.preppergpt/data/models/whisper/base
+```
+OpenWebUI receives:
+```text
+WHISPER_MODEL=/models/whisper/base
+WHISPER_MODEL_DIR=/models/whisper
+WHISPER_MODEL_AUTO_UPDATE=False
+```
+To repair or refresh the bundle:
+```bash
+preppergpt bundle whisper
+preppergpt bundle whisper --force
+```
+Source: https://huggingface.co/Systran/faster-whisper-base

package/docs/hardware.md CHANGED Viewed

@@ -1,7 +1,9 @@
 # Hardware Guide
 PrepperGPT works best on Linux with an NVIDIA GPU and enough NVMe space for
-model weights.
+model weights. It is designed for post-apocalyptic or long-duration outage
+scenarios, so the high-end GLM tiers deliberately favor local availability and
+answer quality over hosted-service latency.
 Recommended starting points:
@@ -9,7 +11,22 @@ Recommended starting points:
 - Balanced profile: 32-64 GB RAM, 12-24 GB VRAM, 120 GB free disk.
 - Intelligence profile: 96 GB RAM or more, fast NVMe, and hundreds of GB free
   for GLM 5.2 Q4 or similar large weights.
+- Enterprise 8-bit GLM tier: 256 GB RAM or more, 48-80 GB VRAM preferred,
+  and 1.5-2 TB of fast NVMe for GLM 5.2 Q8 plus working/cache room.
 The installer reserves about 15-20% VRAM headroom when deciding whether a model
 fits. If a large manual model is selected, `preppergpt doctor` explains the
 endpoint or file path that must be provided.
+Very low tokens/sec is acceptable for the GLM 5.2 Q8 tier because that tier is
+for situations where there is no cloud model to fall back to.
+## Hardware Matrix
+| Tier | Typical specs | PrepperGPT routes |
+| --- | --- | --- |
+| Basic CPU laptop | 16 GB RAM, no GPU, 80 GB disk | `local-chatgpt-auto`, `llama3.1:8b`, `local-vision-moondream2`, bundled Whisper |
+| Mid NVIDIA | 64 GB RAM, 12 GB usable VRAM, 250 GB disk | Gemma fast lane, Qwen coder fallback, local vision, bundled Whisper |
+| High NVIDIA | 128 GB RAM, 24 GB VRAM, 750 GB NVMe | GLM 5.2 Q4 configured, Slopcode/Qwen configured, Gemma fast lane, Flux configured |
+| Full PrepperGPT rig | 128+ GB RAM, 24+ GB VRAM, 1 TB NVMe, GLM/Slopcode/Flux files present | GLM 5.2 Q4 primary, Slopcode coding, Gemma fast lane, Deep Research, Agent, Vision, Flux, Whisper |
+| Enterprise 8-bit GLM rig | 256+ GB RAM, 48-80+ GB VRAM preferred, 1.5-2 TB fast NVMe | `glm52-q8-local` primary for Max Intelligence, `glm52-q4-local` fallback, Slopcode/Qwen coding, Gemma fast lane, full sidecar stack |

package/docs/model-sources.md CHANGED Viewed

@@ -3,9 +3,10 @@
 PrepperGPT separates routing from model licensing and distribution.
 - Ollama models are pulled by the local Ollama runtime when available.
-- OpenWebUI STT models are downloaded by OpenWebUI/faster-whisper.
+- Whisper Base STT is installer-cached from `Systran/faster-whisper-base`
+  under the local PrepperGPT model directory and mounted into OpenWebUI.
 - Hugging Face vision models are downloaded by the local vision sidecar.
-- Very large GLM, Slopcode, and Flux assets are marked as manual or external
+- Very large GLM Q8/Q4, Slopcode, and Flux assets are marked as manual or external
   until a license-compatible public download source is configured.
 Manual routes are still added to OpenWebUI. They become live when their local

package/docs/preppergpt-local-parity-map.md CHANGED Viewed

@@ -1,10 +1,12 @@
 # PrepperGPT Local Parity Map
-PrepperGPT packages the local ChatGPT-like stack around OpenWebUI:
+PrepperGPT packages the local ChatGPT-like stack around OpenWebUI for resilient
+local use when hosted AI services are unavailable:
 - OpenWebUI UI at `http://127.0.0.1:8080`
 - Ollama fast local models at `http://127.0.0.1:11434`
-- Optional GLM 5.2 route at `http://127.0.0.1:11441/v1`
+- Optional GLM 5.2 Q8 route at `http://127.0.0.1:11446/v1`
+- Optional GLM 5.2 Q4 route at `http://127.0.0.1:11441/v1`
 - Optional Slopcode/Qwen route at `http://127.0.0.1:11438/v1`
 - Deep research sidecar at `http://127.0.0.1:18041/v1`
 - Local scheduler connector at `http://127.0.0.1:18042`

package/installer/cli.mjs CHANGED Viewed

@@ -1,12 +1,13 @@
 import fs from "node:fs";
 import http from "node:http";
+import { ensureWhisperBundle, modelDirs, whisperBundleStatus } from "./lib/bundles.mjs";
 import { detectMachine } from "./lib/detect.mjs";
 import { buildPlan, normalizeProfile } from "./lib/planner.mjs";
 import { packageRoot, runtimePaths } from "./lib/paths.mjs";
 import { renderInstall } from "./lib/render.mjs";
 import { commandResult, parseArgs, readJson, shellQuote } from "./lib/util.mjs";
-const VERSION = "0.1.0";
+const VERSION = "0.1.2";
 function usage() {
   return `PrepperGPT ${VERSION}
@@ -14,11 +15,12 @@ function usage() {
 Usage:
   preppergpt detect [--json]
   preppergpt plan --profile balanced|intelligence|speed [--json]
-  preppergpt install --profile balanced|intelligence|speed [--dry-run] [--home PATH]
+  preppergpt install --profile balanced|intelligence|speed [--dry-run] [--skip-bundles] [--home PATH]
   preppergpt start [--home PATH]
   preppergpt stop [--home PATH]
   preppergpt status [--home PATH] [--json]
   preppergpt doctor [--profile balanced|intelligence|speed] [--home PATH]
+  preppergpt bundle whisper [--home PATH] [--force]
   preppergpt switch-profile --profile balanced|intelligence|speed [--home PATH]
   preppergpt version
 `;
@@ -122,6 +124,11 @@ async function commandInstall(flags) {
     return;
   }
   const paths = renderInstall(plan, detection, { home });
+  if (!flags.skip_bundles) {
+    console.log("Installing bundled Whisper base STT model...");
+    const bundle = await ensureWhisperBundle(paths.whisperHostDir, { force: Boolean(flags.force_bundle) });
+    console.log(`Whisper bundle: ${bundle.ready ? "ready" : "not ready"} at ${paths.whisperHostDir}`);
+  }
   console.log(`Wrote ${paths.envFile}`);
   console.log(`Wrote ${paths.generatedCompose}`);
   console.log(`Wrote ${paths.modelPlan}`);
@@ -187,6 +194,7 @@ async function commandStatus(flags) {
 }
 async function commandDoctor(flags) {
+  const paths = runtimePaths(flags.home);
   const detection = await detectMachine();
   const plan = buildPlan(detection, profileFrom(flags));
   printPlan(plan);
@@ -200,6 +208,28 @@ async function commandDoctor(flags) {
       console.log(`  port ${port}: occupied`);
     }
   }
+  const dirs = modelDirs(paths);
+  const whisper = whisperBundleStatus(dirs.whisperHostDir);
+  console.log(`  whisper-base bundle: ${whisper.ready ? "ok" : `missing ${whisper.missing.length} files`} (${dirs.whisperHostDir})`);
+}
+async function commandBundle(flags, positional) {
+  const name = positional[1] || "whisper";
+  if (!["whisper", "whisper-base"].includes(name)) {
+    throw new Error(`Unknown bundle: ${name}`);
+  }
+  const paths = runtimePaths(flags.home);
+  const dirs = modelDirs(paths);
+  const bundle = await ensureWhisperBundle(dirs.whisperHostDir, {
+    force: Boolean(flags.force),
+    dryRun: Boolean(flags.dry_run)
+  });
+  console.log(`Whisper bundle ${bundle.ready ? "ready" : "not ready"} at ${dirs.whisperHostDir}`);
+  if (bundle.missing?.length) {
+    for (const file of bundle.missing) {
+      console.log(`  missing ${file}`);
+    }
+  }
 }
 export async function runCli(argv) {
@@ -220,6 +250,7 @@ export async function runCli(argv) {
   if (command === "stop") return commandStop(flags);
   if (command === "status") return commandStatus(flags);
   if (command === "doctor") return commandDoctor(flags);
+  if (command === "bundle") return commandBundle(flags, positional);
   if (command === "switch-profile") return commandSwitchProfile(flags);
   throw new Error(`Unknown command: ${command}\n\n${usage()}`);
 }

package/installer/lib/bundles.mjs ADDED Viewed

@@ -0,0 +1,107 @@
+import fs from "node:fs";
+import path from "node:path";
+import { Readable } from "node:stream";
+import { pipeline } from "node:stream/promises";
+import { readJson, writeJson } from "./util.mjs";
+export const WHISPER_BUNDLE = {
+  id: "whisper-base",
+  name: "Whisper Base STT Bundle",
+  repo: "Systran/faster-whisper-base",
+  revision: "main",
+  license: "MIT",
+  modelPathInContainer: "/models/whisper/base",
+  files: ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt", "README.md"],
+  description: "CTranslate2 faster-whisper conversion of openai/whisper-base for local OpenWebUI STT."
+};
+function parseEnvFile(file) {
+  if (!fs.existsSync(file)) {
+    return {};
+  }
+  const entries = {};
+  for (const line of fs.readFileSync(file, "utf8").split(/\r?\n/)) {
+    if (!line || line.trim().startsWith("#") || !line.includes("=")) {
+      continue;
+    }
+    const [key, ...valueParts] = line.split("=");
+    let value = valueParts.join("=");
+    if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+      value = value.slice(1, -1);
+    }
+    entries[key] = value;
+  }
+  return entries;
+}
+export function modelDirs(paths) {
+  const env = parseEnvFile(paths.envFile);
+  const modelsDir = process.env.PREPPERGPT_MODELS_DIR || env.PREPPERGPT_MODELS_DIR || path.join(paths.dataDir, "models");
+  const whisperHostDir = path.join(modelsDir, "whisper", "base");
+  return { modelsDir, whisperHostDir };
+}
+export function whisperBundleStatus(targetDir) {
+  const files = WHISPER_BUNDLE.files.map((file) => path.join(targetDir, file));
+  const missing = files.filter((file) => !fs.existsSync(file));
+  let manifest = null;
+  const manifestPath = path.join(targetDir, "preppergpt-bundle.json");
+  if (fs.existsSync(manifestPath)) {
+    try {
+      manifest = readJson(manifestPath);
+    } catch {
+      manifest = null;
+    }
+  }
+  return {
+    id: WHISPER_BUNDLE.id,
+    targetDir,
+    ready: missing.length === 0,
+    missing,
+    manifest
+  };
+}
+async function downloadFile(url, targetFile) {
+  const response = await fetch(url, {
+    headers: {
+      "User-Agent": "preppergpt/0.1"
+    },
+    redirect: "follow"
+  });
+  if (!response.ok || !response.body) {
+    throw new Error(`Failed to download ${url}: HTTP ${response.status}`);
+  }
+  fs.mkdirSync(path.dirname(targetFile), { recursive: true });
+  const tmp = `${targetFile}.tmp-${process.pid}`;
+  await pipeline(Readable.fromWeb(response.body), fs.createWriteStream(tmp));
+  fs.renameSync(tmp, targetFile);
+}
+export async function ensureWhisperBundle(targetDir, options = {}) {
+  const status = whisperBundleStatus(targetDir);
+  if (status.ready && !options.force) {
+    return { ...status, changed: false };
+  }
+  fs.mkdirSync(targetDir, { recursive: true });
+  if (options.dryRun) {
+    return { ...status, changed: false, dryRun: true };
+  }
+  for (const file of WHISPER_BUNDLE.files) {
+    const targetFile = path.join(targetDir, file);
+    if (fs.existsSync(targetFile) && !options.force) {
+      continue;
+    }
+    const url = `https://huggingface.co/${WHISPER_BUNDLE.repo}/resolve/${WHISPER_BUNDLE.revision}/${file}`;
+    if (!options.quiet) {
+      console.log(`Downloading ${WHISPER_BUNDLE.repo}/${file}`);
+    }
+    await downloadFile(url, targetFile);
+  }
+  writeJson(path.join(targetDir, "preppergpt-bundle.json"), {
+    ...WHISPER_BUNDLE,
+    installedAt: new Date().toISOString(),
+    source: `https://huggingface.co/${WHISPER_BUNDLE.repo}`
+  });
+  return { ...whisperBundleStatus(targetDir), changed: true };
+}

package/installer/lib/planner.mjs CHANGED Viewed

@@ -62,7 +62,9 @@ function chooseFirst(candidates, models, detection) {
       continue;
     }
     const failures = requirementFailures(model, detection);
-    if (failures.length === 0 || model.source?.type === "manual" || model.source?.type === "external") {
+    const canUseExternalFallback =
+      ["manual", "external"].includes(model.source?.type) && !model.source?.requiresHardwareFit;
+    if (failures.length === 0 || canUseExternalFallback) {
       return { model, skipped };
     }
     skipped.push({ id, reasons: failures });
@@ -91,8 +93,9 @@ export function buildPlan(detection, requestedProfile = "balanced", catalog = lo
     }
   }
+  const defaultModel = selected.chat?.id || priorities.defaultModel;
   const routeIds = unique([
-    priorities.defaultModel,
+    defaultModel,
     selected.chat?.id,
     selected.fast?.id,
     selected.reasoning?.id,
@@ -136,7 +139,7 @@ export function buildPlan(detection, requestedProfile = "balanced", catalog = lo
     generatedAt: new Date().toISOString(),
     profile,
     profileLabel: priorities.label,
-    defaultModel: priorities.defaultModel,
+    defaultModel,
     routeIds,
     selected,
     skipped,
@@ -144,7 +147,7 @@ export function buildPlan(detection, requestedProfile = "balanced", catalog = lo
     estimates: estimatePlan(profile, selected),
     env: {
       PREPPERGPT_PROFILE: profile,
-      PREPPERGPT_DEFAULT_MODEL: priorities.defaultModel,
+      PREPPERGPT_DEFAULT_MODEL: defaultModel,
       PREPPERGPT_MODEL_ORDER_LIST: JSON.stringify(routeIds)
     },
     warnings

package/installer/lib/render.mjs CHANGED Viewed

@@ -1,5 +1,7 @@
 import crypto from "node:crypto";
 import fs from "node:fs";
+import path from "node:path";
+import { WHISPER_BUNDLE } from "./bundles.mjs";
 import { packagedPath, runtimePaths } from "./paths.mjs";
 import { envQuote, writeJson, writeText } from "./util.mjs";
@@ -10,6 +12,12 @@ function secret(bytes = 24) {
 function envFile(plan, paths, detection) {
   const dataDir = process.env.PREPPERGPT_DATA_DIR || paths.dataDir;
   const modelsDir = process.env.PREPPERGPT_MODELS_DIR || `${dataDir}/models`;
+  const whisperHostDir = path.join(modelsDir, "whisper", "base");
+  const selectedReasoningModel = plan.selected?.reasoning?.id || "glm52-q4-local";
+  const selectedGlmBaseUrl =
+    selectedReasoningModel === "glm52-q8-local"
+      ? process.env.GLM52_Q8_BASE_URL || "http://127.0.0.1:11446/v1"
+      : process.env.GLM52_BASE_URL || "http://127.0.0.1:11441/v1";
   const adminPassword = process.env.PREPPERGPT_ADMIN_PASSWORD || secret(18);
   const jupyterToken = process.env.JUPYTER_TOKEN || secret(18);
   const searxngSecret = process.env.SEARXNG_SECRET_KEY || secret(24);
@@ -17,9 +25,14 @@ function envFile(plan, paths, detection) {
     PREPPERGPT_PROFILE: plan.profile,
     PREPPERGPT_DATA_DIR: dataDir,
     PREPPERGPT_MODELS_DIR: modelsDir,
+    PREPPERGPT_WHISPER_HOST_DIR: whisperHostDir,
+    PREPPERGPT_WHISPER_MODEL: WHISPER_BUNDLE.id,
+    PREPPERGPT_WHISPER_MODEL_PATH: WHISPER_BUNDLE.modelPathInContainer,
     PREPPERGPT_PORT: process.env.PREPPERGPT_PORT || "8080",
     PREPPERGPT_DEFAULT_MODEL: plan.defaultModel,
     PREPPERGPT_MODEL_ORDER_LIST: JSON.stringify(plan.routeIds),
+    PREPPERGPT_GLM_MODEL: selectedReasoningModel,
+    PREPPERGPT_GLM_BASE_URL: selectedGlmBaseUrl,
     PREPPERGPT_DOCKER_GPUS: detection.gpus?.length ? "all" : "",
     WEBUI_NAME: "PrepperGPT",
     WEBUI_ADMIN_EMAIL: process.env.WEBUI_ADMIN_EMAIL || "admin@preppergpt.local",
@@ -29,6 +42,7 @@ function envFile(plan, paths, detection) {
     JUPYTER_TOKEN: jupyterToken,
     SEARXNG_SECRET_KEY: searxngSecret,
     GLM52_BASE_URL: process.env.GLM52_BASE_URL || "http://127.0.0.1:11441/v1",
+    GLM52_Q8_BASE_URL: process.env.GLM52_Q8_BASE_URL || "http://127.0.0.1:11446/v1",
     SLOCODE_BASE_URL: process.env.SLOCODE_BASE_URL || "http://127.0.0.1:11438/v1",
     OLLAMA_BASE_URL: process.env.OLLAMA_BASE_URL || "http://127.0.0.1:11434"
   };
@@ -60,17 +74,23 @@ function generatedCompose(plan, detection) {
 export function renderInstall(plan, detection, options = {}) {
   const paths = runtimePaths(options.home);
+  const dataDir = process.env.PREPPERGPT_DATA_DIR || paths.dataDir;
+  const modelsDir = process.env.PREPPERGPT_MODELS_DIR || `${dataDir}/models`;
+  const whisperHostDir = path.join(modelsDir, "whisper", "base");
   fs.mkdirSync(paths.root, { recursive: true });
   fs.mkdirSync(paths.dataDir, { recursive: true });
   fs.mkdirSync(paths.composeDir, { recursive: true });
   fs.mkdirSync(`${paths.dataDir}/preppergpt`, { recursive: true });
-  fs.mkdirSync(`${paths.dataDir}/models`, { recursive: true });
+  fs.mkdirSync(modelsDir, { recursive: true });
+  fs.mkdirSync(whisperHostDir, { recursive: true });
   writeText(paths.envFile, envFile(plan, paths, detection), 0o600);
   writeText(paths.generatedCompose, generatedCompose(plan, detection));
   writeJson(paths.modelPlan, plan);
   writeJson(paths.detectReport, detection);
   return {
     ...paths,
+    modelsDir,
+    whisperHostDir,
     packageCompose: packagedPath("compose", "preppergpt.yaml")
   };
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "preppergpt",
-  "version": "0.1.0",
-  "description": "A local-first ChatGPT-like field kit built on OpenWebUI and local models.",
+  "version": "0.1.2",
+  "description": "A post-apocalyptic local AI field kit for running a ChatGPT-like experience when hosted services are unavailable.",
   "type": "module",
   "bin": {
     "preppergpt": "bin/preppergpt.js"
@@ -19,7 +19,7 @@
   ],
   "scripts": {
     "test": "node --test",
-    "check": "npm run test && node bin/preppergpt.js plan --profile balanced --json >/dev/null && node bin/preppergpt.js install --profile balanced --dry-run",
+    "check": "npm run test && node bin/preppergpt.js plan --profile balanced --json >/dev/null && node bin/preppergpt.js install --profile balanced --dry-run && node bin/preppergpt.js bundle whisper --dry-run >/dev/null",
     "pack:dry-run": "npm pack --dry-run"
   },
   "keywords": [
@@ -28,7 +28,9 @@
     "llm",
     "ollama",
     "preppergpt",
-    "offline-ai"
+    "offline-ai",
+    "survival",
+    "post-apocalyptic"
   ],
   "homepage": "https://github.com/teamslop/preppergpt#readme",
   "bugs": {

package/profiles/models.json CHANGED Viewed

@@ -5,15 +5,15 @@
       "label": "Max intelligence",
       "defaultModel": "glm52-q4-local",
       "roles": {
-        "chat": ["glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b", "llama3.1:8b"],
-        "reasoning": ["glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
+        "chat": ["glm52-q8-local", "glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b", "llama3.1:8b"],
+        "reasoning": ["glm52-q8-local", "glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
         "fast": ["gemma4:12b-256k-gpu", "llama3.1:8b"],
         "coding": ["qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
         "research": ["deep-research-glm52"],
         "agent": ["local-agent-glm52"],
         "vision": ["local-vision-gemma4-12b", "local-vision-moondream2"],
         "image": ["flux-2-klein-9b-fp8"],
-        "stt": ["whisper-large-v3"]
+        "stt": ["whisper-base-bundled"]
       }
     },
     "balanced": {
@@ -21,14 +21,14 @@
       "defaultModel": "local-chatgpt-auto",
       "roles": {
         "chat": ["local-chatgpt-auto", "gemma4:12b-256k-gpu", "glm52-q4-local", "llama3.1:8b"],
-        "reasoning": ["glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
+        "reasoning": ["glm52-q8-local", "glm52-q4-local", "qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
         "fast": ["gemma4:12b-256k-gpu", "llama3.1:8b"],
         "coding": ["qwen3.6-35b-a3b:slopcode-cpu-64k", "qwen2.5-coder:14b"],
         "research": ["deep-research-glm52"],
         "agent": ["local-agent-glm52"],
         "vision": ["local-vision-gemma4-12b", "local-vision-moondream2"],
         "image": ["flux-2-klein-9b-fp8"],
-        "stt": ["whisper-large-v3"]
+        "stt": ["whisper-base-bundled"]
       }
     },
     "speed": {
@@ -43,7 +43,7 @@
         "agent": ["local-agent-glm52"],
         "vision": ["local-vision-moondream2", "local-vision-gemma4-12b"],
         "image": ["flux-2-klein-9b-fp8"],
-        "stt": ["whisper-large-v3"]
+        "stt": ["whisper-base-bundled"]
       }
     }
   },
@@ -66,6 +66,26 @@
         "description": "Virtual OpenAI-compatible route exposed by the local-agent sidecar."
       }
     },
+    {
+      "id": "glm52-q8-local",
+      "name": "GLM 5.2 Q8 Local",
+      "roles": ["chat", "reasoning"],
+      "backend": "llama.cpp",
+      "contextTokens": 65536,
+      "qualityScore": 104,
+      "speedScore": 15,
+      "tpsEstimate": "very low tokens/sec may be acceptable in disaster/off-grid use because no hosted model is available; benchmark locally",
+      "requires": {
+        "minRamGb": 192,
+        "diskGb": 1000,
+        "nvme": true
+      },
+      "source": {
+        "type": "external",
+        "requiresHardwareFit": true,
+        "description": "Run a GLM 5.2 Q8 OpenAI-compatible llama.cpp server at http://127.0.0.1:11446/v1 with weights on fast NVMe for maximum local quality when hosted services are unavailable."
+      }
+    },
     {
       "id": "glm52-q4-local",
       "name": "GLM 5.2 Q4 Local",
@@ -74,7 +94,7 @@
       "contextTokens": 65536,
       "qualityScore": 100,
       "speedScore": 25,
-      "tpsEstimate": "0.4-3 completion tokens/sec on large CPU/NVMe builds; benchmark locally",
+      "tpsEstimate": "0.4-3 completion tokens/sec on large CPU/NVMe builds; acceptable for disaster/off-grid use when no hosted service is available; benchmark locally",
       "requires": {
         "minRamGb": 96,
         "diskGb": 520,
@@ -256,21 +276,23 @@
       }
     },
     {
-      "id": "whisper-large-v3",
-      "name": "Whisper Large v3 STT",
+      "id": "whisper-base-bundled",
+      "name": "Bundled Whisper Base STT",
       "roles": ["stt"],
       "backend": "openwebui-faster-whisper",
       "contextTokens": 0,
-      "qualityScore": 82,
-      "speedScore": 65,
-      "tpsEstimate": "audio transcription speed depends on CPU/GPU and clip length",
+      "qualityScore": 72,
+      "speedScore": 82,
+      "tpsEstimate": "local speech-to-text speed depends on CPU/GPU and clip length",
       "requires": {
-        "minRamGb": 16,
-        "diskGb": 4
+        "minRamGb": 8,
+        "diskGb": 1
       },
       "source": {
-        "type": "openwebui",
-        "description": "OpenWebUI faster-whisper local STT model."
+        "type": "bundled-download",
+        "model": "Systran/faster-whisper-base",
+        "license": "MIT",
+        "description": "Installer-cached faster-whisper base model mounted into OpenWebUI for offline local STT after install."
       }
     }
   ]

package/services/local-scheduler/app.py CHANGED Viewed

@@ -3780,11 +3780,11 @@ def local_parity_recommended_model(feature_family: str, primary_models: list[str
         return models[0] if models else None
     if "codex" in family or "software" in family or "code" in family:
-        return first_available(["slopcode-qwen-coder-local", "local-agent-glm52", "glm52-q4-local"])
+        return first_available(["slopcode-qwen-coder-local", "local-agent-glm52", "glm52-q8-local", "glm52-q4-local"])
     if "deep research" in family:
-        return first_available(["deep-research-glm52", "glm52-q4-local"])
+        return first_available(["deep-research-glm52", "glm52-q8-local", "glm52-q4-local"])
     if "developer mode" in family or "mcp" in family:
-        return first_available(["local-agent-glm52", "local-chatgpt-auto", "glm52-q4-local"])
+        return first_available(["local-agent-glm52", "local-chatgpt-auto", "glm52-q8-local", "glm52-q4-local"])
     if "image generation" in family:
         return first_available(["flux-2-klein-9b-fp8"])
     if "image editing" in family:
@@ -3792,18 +3792,18 @@ def local_parity_recommended_model(feature_family: str, primary_models: list[str
     if "image understanding" in family:
         return first_available(["local-vision-gemma4-12b", "local-vision-moondream2"])
     if "voice" in family or "record mode" in family:
-        return first_available(["whisper-large-v3", "local-agent-glm52"])
+        return first_available(["whisper-base-bundled", "whisper-large-v3", "local-agent-glm52"])
     if "shopping" in family:
-        return first_available(["glm52-shopping-research-local", "glm52-q4-local"])
+        return first_available(["glm52-shopping-research-local", "glm52-q8-local", "glm52-q4-local"])
     if "job search" in family or "resume" in family or "finance" in family:
-        return first_available(["local-agent-glm52", "local-chatgpt-auto", "glm52-q4-local"])
+        return first_available(["local-agent-glm52", "local-chatgpt-auto", "glm52-q8-local", "glm52-q4-local"])
     if "study" in family:
-        return first_available(["glm52-study-coach-local", "glm52-q4-local"])
+        return first_available(["glm52-study-coach-local", "glm52-q8-local", "glm52-q4-local"])
     if "advanced reasoning" in family or "long context" in family:
-        return first_available(["glm52-q4-local"])
+        return first_available(["glm52-q8-local", "glm52-q4-local"])
     if "data analysis" in family or "canvas" in family or "memory" in family or "agent mode" in family:
-        return first_available(["local-agent-glm52", "glm52-q4-local"])
-    return first_available(["local-chatgpt-auto", "local-auto-router", "local-instant-gemma4-12b", "glm52-q4-local"])
+        return first_available(["local-agent-glm52", "glm52-q8-local", "glm52-q4-local"])
+    return first_available(["local-chatgpt-auto", "local-auto-router", "local-instant-gemma4-12b", "glm52-q8-local", "glm52-q4-local"])
 def local_parity_route_for_model(feature_family: str, model: str | None, profiles: dict) -> dict:
@@ -3833,10 +3833,10 @@ def local_parity_route_for_model(feature_family: str, model: str | None, profile
         route_id = "slopcode_tiny"
         route_type = "benchmarked_chat_route"
         action = "Select the Slopcode/Qwen coding model in OpenWebUI for local software work."
-    elif model_text == "glm52-q4-local" or "advanced reasoning" in family or "long context" in family:
+    elif model_text in {"glm52-q8-local", "glm52-q4-local"} or "advanced reasoning" in family or "long context" in family:
         route_id = "glm_tiny"
         route_type = "benchmarked_chat_route"
-        action = "Select GLM 5.2 Q4 in OpenWebUI for private long-context reasoning."
+        action = "Select the best available local GLM 5.2 route in OpenWebUI for private long-context reasoning."
     elif "shopping" in model_text:
         route_id = "glm52_shopping_research_preset"
         route_type = "chat_preset"
@@ -4462,9 +4462,9 @@ WORKFLOW_RECIPE_BLUEPRINTS = [
         "id": "private-long-context-workflow",
         "task_id": "private-long-context-reasoning",
         "title": "Private long-context reasoning with GLM 5.2",
-        "openwebui_entrypoint": "Model picker -> glm52-q4-local",
+        "openwebui_entrypoint": "Model picker -> glm52-q8-local on enterprise rigs, otherwise glm52-q4-local",
         "steps": [
-            "Select glm52-q4-local when privacy and context length matter more than latency.",
+            "Select glm52-q8-local on enterprise hardware when maximum local quality matters; otherwise select glm52-q4-local.",
             "Keep the prompt bounded when possible; use files/projects for reusable context.",
             "Use fast local routes for quick follow-ups when GLM latency is not needed.",
         ],
@@ -5293,6 +5293,7 @@ def local_parity_dashboard() -> dict:
         },
         "urls": {
             "openwebui": "http://127.0.0.1:8080",
+            "glm52_q8_openai": "http://127.0.0.1:11446/v1",
             "glm52_openai": "http://127.0.0.1:11441/v1",
             "slopcode_openai": "http://127.0.0.1:11438/v1",
             "deep_research_openai": "http://127.0.0.1:18041/v1",
@@ -5306,6 +5307,7 @@ def local_parity_dashboard() -> dict:
         },
         "primary_models": [
             {"id": "local-chatgpt-auto", "route": "fast_router", "best_for": "default local ChatGPT-like routing"},
+            {"id": "glm52-q8-local", "route": "glm_tiny", "context_tokens": 65536, "best_for": "enterprise 8-bit private long-context reasoning"},
             {"id": "glm52-q4-local", "route": "glm_tiny", "context_tokens": 65536, "best_for": "private long-context reasoning"},
             {
                 "id": "qwen3.6-35b-a3b:slopcode-cpu-64k",
@@ -5317,7 +5319,7 @@ def local_parity_dashboard() -> dict:
             {"id": "local-agent-glm52", "route": "local_agent", "best_for": "tool and agent workflows"},
             {"id": "local-vision-gemma4-12b", "route": "local_vision", "best_for": "image understanding"},
             {"id": "flux-2-klein-9b-fp8", "route": "comfyui_flux", "best_for": "image generation"},
-            {"id": "whisper-large-v3", "route": "local_whisper_stt", "best_for": "speech-to-text"},
+            {"id": "whisper-base-bundled", "route": "local_whisper_stt", "best_for": "speech-to-text"},
         ],
         "route_profiles": {
             key: {
@@ -6660,7 +6662,7 @@ def local_parity_audit_html() -> str:
       {metric("Starter prompts", f"{starter_summary.get('ready_starter_prompts')}/{starter_summary.get('starter_prompts')}", "prompt-library items")}
       {metric("Current release", f"{source_freshness_summary.get('current_release_covered_families')}/{source_freshness_summary.get('current_release_expected_families')}", "families")}
       {metric("Release evidence", f"{source_freshness_summary.get('current_release_covered_evidence_terms')}/{source_freshness_summary.get('current_release_expected_evidence_terms')}", "terms")}
-      {metric("Primary GLM route", "glm52-q4-local", "local long-context model")}
+      {metric("Primary GLM route", "glm52-q8-local / glm52-q4-local", "local long-context model")}
       {metric("Scope exclusions", f"{frontier_summary.get('excluded_from_local_goal_items')}/{frontier_summary.get('boundary_items')}", "hosted capabilities")}
       {metric("Evidence artifacts", f"{evidence_summary.get('ready_artifacts')}/{evidence_summary.get('artifacts')}", "privacy-safe proof")}
       {metric("Quality evals", scorecard_summary.get('quality_evals'), "executable")}
@@ -9653,7 +9655,7 @@ def local_parity_gap_report_html() -> str:
       {metric("Quality evals", summary.get('quality_evals'), "executable catalog")}
       {metric("Continuity", summary.get('continuity_status'), "fallback status")}
       {metric("Sources", summary.get('source_entries'), "source snapshot")}
-      {metric("Primary GLM route", "glm52-q4-local", "local long-context model")}
+      {metric("Primary GLM route", "glm52-q8-local / glm52-q4-local", "local long-context model")}
       {metric("GLM context", "65,536", "tokens")}
       {metric("Scope exclusions", f"{frontier_summary.get('excluded_from_local_goal_items')}/{frontier_summary.get('boundary_items')}", "hosted capabilities")}
     </section>
@@ -12797,7 +12799,7 @@ def local_model_route_recommendations() -> dict:
         "glm_tiny": {
             "title": "Private GLM 5.2 reasoning route",
             "benchmark_suite": "glm_tiny",
-            "default_model": "glm52-q4-local",
+            "default_model": "glm52-q8-local or glm52-q4-local",
             "target_tps": 0.1,
             "best_for": [
                 "private long-context reasoning",