npm - @geravant/sinain - Versions diffs - 1.24.1 → 1.26.0 - Mend

@geravant/sinain 1.24.1 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.env.example +12 -1
package/config-shared.js +176 -25
package/config.js +21 -3
package/launcher.js +19 -5
package/onboard.js +150 -41
package/package.json +1 -1
package/sense_client/config.py +1 -1
package/sense_client/ollama_vision.py +4 -4
package/sense_client/vision.py +14 -3
package/setup-overlay.js +1 -1
package/sinain-agent/run.sh +67 -1
package/sinain-core/src/agent/analyzer.ts +2 -1
package/sinain-core/src/config.ts +20 -1
package/sinain-core/src/distribution/download-manager.ts +199 -0
package/sinain-core/src/index.ts +19 -11

package/.env.example CHANGED Viewed

@@ -16,6 +16,17 @@ ANALYSIS_MODEL=google/gemini-2.5-flash-lite
 OPENROUTER_API_KEY=                # get one free at https://openrouter.ai
                                    # used by context analysis + transcription
+# ── Local Mode (unified — fully offline, zero cloud) ─────────────────────────
+# SINAIN_LOCAL_* is the primary namespace. Setting SINAIN_LOCAL_MODE=true
+# auto-derives analyzer, vision, and transcription config from the two model
+# vars below (see sinain-core/src/config.ts). The legacy LOCAL_VISION_* vars
+# are still honored as a fallback but are deprecated — prefer SINAIN_LOCAL_*.
+# Quick start:  cp .env.paranoid .env   (or ./start.sh --paranoid)
+# Prereqs:      ollama serve && ollama pull phi4-mini qwen2.5vl:7b
+# SINAIN_LOCAL_MODE=false            # true → offline analyzer + vision + STT
+# SINAIN_LOCAL_LLM=phi4-mini         # Ollama model: analyzer + distiller
+# SINAIN_LOCAL_VISION=qwen2.5vl:7b   # Ollama model: screen OCR / scene (sense_client)
 # ── Privacy ──────────────────────────────────────────────────────────────────
 PRIVACY_MODE=standard              # off | standard | strict | paranoid
                                    # standard: auto-redacts credentials before cloud APIs
@@ -86,7 +97,7 @@ TRANSCRIPTION_LANGUAGE=en-US
 # Install: brew install whisper-cpp
 # Models:  https://huggingface.co/ggerganov/whisper.cpp/tree/main
 # LOCAL_WHISPER_BIN=whisper-cli
-# LOCAL_WHISPER_MODEL=~/models/ggml-large-v3-turbo.bin
+# LOCAL_WHISPER_MODEL=~/.sinain/models/whisper/ggml-large-v3-turbo.bin
 # LOCAL_WHISPER_TIMEOUT_MS=15000
 # ── OpenClaw / NemoClaw Gateway ──────────────────────────────────────────────

package/config-shared.js CHANGED Viewed

@@ -531,34 +531,185 @@ async function setupLocalGateway(existing) {
   };
 }
-export async function stepPrivacy(existing, label = "Privacy mode") {
+/**
+ * Local mode: run everything on-device with Ollama + whisper.cpp.
+ *
+ * Returns null (skip) or { llm, vision } model names.
+ * When enabled, also checks Ollama is reachable and offers to pull models.
+ */
+export async function stepLocalMode(existing, label = "Local mode (Ollama)") {
+  const currentEnabled = existing.SINAIN_LOCAL_MODE === "true";
+  const enable = guard(await p.confirm({
+    message: `${label} — run analysis + OCR on your machine, no cloud?`,
+    initialValue: currentEnabled,
+  }));
+  if (!enable) return null;
+  // Check Ollama
+  let ollamaOk = false;
+  let availableModels = [];
+  const s = p.spinner();
+  s.start("Checking Ollama...");
+  try {
+    const res = await fetch("http://localhost:11434/api/tags", { signal: AbortSignal.timeout(3000) });
+    if (res.ok) {
+      const data = await res.json();
+      availableModels = (data.models || []).map((m) => m.name);
+      ollamaOk = true;
+      s.stop(c.green(`Ollama running (${availableModels.length} models).`));
+    } else {
+      s.stop(c.yellow("Ollama responded but returned an error."));
+    }
+  } catch {
+    s.stop(c.yellow("Ollama not reachable at localhost:11434."));
+  }
+  if (!ollamaOk) {
+    p.note(
+      "Install and start Ollama first:\n" +
+      "  brew install ollama && ollama serve\n" +
+      "Then re-run setup.",
+      "Ollama required",
+    );
+    const proceed = guard(await p.confirm({
+      message: "Continue anyway? (config will be saved, but won't work until Ollama runs)",
+      initialValue: false,
+    }));
+    if (!proceed) return null;
+  }
+  // LLM model (analysis + distillation)
+  const currentLlm = existing.SINAIN_LOCAL_LLM || "phi4-mini";
+  const llmOptions = [
+    { value: "phi4-mini", label: "phi4-mini", hint: "2.5 GB — fast, good quality (recommended)" },
+    { value: "gemma3:4b", label: "gemma3:4b", hint: "2.5 GB — Google, competitive quality" },
+    { value: "llama3.2:3b", label: "llama3.2:3b", hint: "2.0 GB — Meta, smallest" },
+  ];
+  // Add current model if it's custom and not in the list
+  if (!llmOptions.some((o) => o.value === currentLlm)) {
+    llmOptions.push({ value: currentLlm, label: currentLlm, hint: "currently configured" });
+  }
+  llmOptions.push({ value: "custom", label: "Custom", hint: "Enter any Ollama model name" });
+  let llm = guard(await p.select({
+    message: "LLM model (analysis + knowledge distillation)",
+    options: llmOptions,
+    initialValue: llmOptions.some((o) => o.value === currentLlm) ? currentLlm : "custom",
+  }));
+  if (llm === "custom") {
+    llm = guard(await p.text({
+      message: "Ollama model name for LLM",
+      placeholder: "model-name or model-name:tag",
+      validate: (val) => { if (!val) return "Model name required"; },
+    }));
+  }
+  // Vision model (screen OCR)
+  const currentVision = existing.SINAIN_LOCAL_VISION || "qwen2.5vl:7b";
+  const visionOptions = [
+    { value: "qwen2.5vl:7b", label: "qwen2.5vl:7b", hint: "4.7 GB — best OCR quality (recommended)" },
+    { value: "gemma4:e2b", label: "gemma4:e2b", hint: "5.2 GB — Google multimodal, new" },
+    { value: "llava:7b", label: "llava:7b", hint: "4.7 GB — general purpose vision" },
+    { value: "moondream", label: "moondream", hint: "1.7 GB — fastest, lower quality" },
+  ];
+  if (!visionOptions.some((o) => o.value === currentVision)) {
+    visionOptions.push({ value: currentVision, label: currentVision, hint: "currently configured" });
+  }
+  visionOptions.push({ value: "custom", label: "Custom", hint: "Enter any Ollama vision model" });
+  let vision = guard(await p.select({
+    message: "Vision model (screen OCR)",
+    options: visionOptions,
+    initialValue: visionOptions.some((o) => o.value === currentVision) ? currentVision : "custom",
+  }));
+  if (vision === "custom") {
+    vision = guard(await p.text({
+      message: "Ollama model name for vision",
+      placeholder: "model-name:tag",
+      validate: (val) => { if (!val) return "Model name required"; },
+    }));
+  }
+  // Offer to pull missing models
+  if (ollamaOk) {
+    const missing = [llm, vision].filter((m) => !availableModels.some((a) => a.startsWith(m)));
+    if (missing.length > 0) {
+      const pull = guard(await p.confirm({
+        message: `Pull missing models? (${missing.join(", ")})`,
+        initialValue: true,
+      }));
+      if (pull) {
+        for (const model of missing) {
+          const sp = p.spinner();
+          sp.start(`Pulling ${model}...`);
+          try {
+            execFileSync("ollama", ["pull", model], { stdio: "pipe", timeout: 600_000 });
+            sp.stop(c.green(`${model} pulled.`));
+          } catch {
+            sp.stop(c.yellow(`Failed to pull ${model} — pull manually: ollama pull ${model}`));
+          }
+        }
+      }
+    }
+  }
+  return { llm, vision };
+}
+export async function stepPrivacy(existing, label = "Privacy mode", { localModeEnabled = false } = {}) {
   const current = existing.PRIVACY_MODE || "standard";
-  return guard(await p.select({
+  const options = [
+    {
+      value: "off",
+      label: "Off",
+      hint: "No filtering — screen text, credentials, everything sent to cloud",
+    },
+    {
+      value: "standard",
+      label: "Standard",
+      hint: "Auto-redacts cards, API keys, tokens before sending to cloud",
+    },
+    {
+      value: "strict",
+      label: "Strict",
+      hint: "Only summaries leave your machine, no raw screen text or audio",
+    },
+  ];
+  if (localModeEnabled) {
+    options.push({
+      value: "paranoid",
+      label: "Paranoid",
+      hint: "Zero cloud calls — all processing stays on-device via Ollama + Whisper",
+    });
+  } else {
+    options.push({
+      value: "paranoid",
+      label: "Paranoid",
+      hint: c.dim("Requires local mode — enable it first"),
+    });
+  }
+  const choice = guard(await p.select({
     message: label,
-    options: [
-      {
-        value: "off",
-        label: "Off",
-        hint: "No filtering — screen text, credentials, everything sent to cloud",
-      },
-      {
-        value: "standard",
-        label: "Standard",
-        hint: "Auto-redacts cards, API keys, tokens before sending to cloud",
-      },
-      {
-        value: "strict",
-        label: "Strict",
-        hint: "Only summaries leave your machine, no raw screen text or audio",
-      },
-      {
-        value: "paranoid",
-        label: "Paranoid",
-        hint: "Zero cloud calls — needs Whisper + Ollama installed or nothing works",
-      },
-    ],
-    initialValue: current,
+    options,
+    initialValue: current === "paranoid" && !localModeEnabled ? "standard" : current,
   }));
+  if (choice === "paranoid" && !localModeEnabled) {
+    p.log.warn("Paranoid mode requires local mode (Ollama + Whisper). Enable local mode first.");
+    return guard(await p.select({
+      message: `${label} (local mode not enabled)`,
+      options: options.slice(0, 3),
+      initialValue: "standard",
+    }));
+  }
+  return choice;
 }
 export async function stepModel(existing, label = "AI model for HUD analysis") {

package/config.js CHANGED Viewed

@@ -6,7 +6,7 @@
 import * as p from "@clack/prompts";
 import {
   c, guard, readEnv, writeEnv, summarizeConfig, runHealthCheck,
-  stepApiKey, stepTranscription, stepGateway, stepPrivacy, stepModel, stepAgent,
+  stepApiKey, stepTranscription, stepGateway, stepPrivacy, stepModel, stepAgent, stepLocalMode,
   ENV_PATH, IS_WINDOWS, HOME, PKG_DIR,
 } from "./config-shared.js";
 import fs from "fs";
@@ -16,6 +16,7 @@ import path from "path";
 const SECTIONS = [
   { value: "apikey",        label: "API Key",        hint: "OpenRouter API key" },
+  { value: "localmode",     label: "Local Mode",     hint: "Ollama + Whisper, zero cloud" },
   { value: "transcription", label: "Transcription",  hint: "Cloud or local whisper" },
   { value: "model",         label: "Model",          hint: "AI model for analysis" },
   { value: "privacy",       label: "Privacy",        hint: "Standard / strict / paranoid" },
@@ -48,9 +49,26 @@ async function runSection(section, existing) {
       const model = await stepModel(existing);
       return { AGENT_MODEL: model };
     }
+    case "localmode": {
+      const result = await stepLocalMode(existing);
+      if (result) {
+        return {
+          SINAIN_LOCAL_MODE: "true",
+          SINAIN_LOCAL_LLM: result.llm,
+          SINAIN_LOCAL_VISION: result.vision,
+        };
+      }
+      return { SINAIN_LOCAL_MODE: "" };
+    }
     case "privacy": {
-      const mode = await stepPrivacy(existing);
-      return { PRIVACY_MODE: mode };
+      const localModeEnabled = existing.SINAIN_LOCAL_MODE === "true";
+      const mode = await stepPrivacy(existing, "Privacy mode", { localModeEnabled });
+      const vars = { PRIVACY_MODE: mode };
+      if (mode === "paranoid" && localModeEnabled) {
+        vars.PRIVACY_OCR_AGENT_GATEWAY = "redacted";
+        vars.PRIVACY_AUDIO_AGENT_GATEWAY = "redacted";
+      }
+      return vars;
     }
     case "gateway": {
       return await stepGateway(existing);

package/launcher.js CHANGED Viewed

@@ -106,6 +106,20 @@ async function main() {
   // Load user config
   loadUserEnv();
+  // Propagate unified local mode config to component-level vars
+  if (process.env.SINAIN_LOCAL_MODE === "true") {
+    const llm = process.env.SINAIN_LOCAL_LLM || "phi4-mini";
+    const vision = process.env.SINAIN_LOCAL_VISION || "qwen2.5vl:7b";
+    if (!process.env.LOCAL_VISION_ENABLED) process.env.LOCAL_VISION_ENABLED = "true";
+    if (!process.env.LOCAL_VISION_MODEL) process.env.LOCAL_VISION_MODEL = vision;
+    if (!process.env.ANALYSIS_PROVIDER) process.env.ANALYSIS_PROVIDER = "ollama";
+    if (!process.env.ANALYSIS_MODEL) process.env.ANALYSIS_MODEL = llm;
+    if (!process.env.TRANSCRIPTION_BACKEND) process.env.TRANSCRIPTION_BACKEND = "local";
+    if (!process.env.SINAIN_FAST_MODEL) process.env.SINAIN_FAST_MODEL = `ollama/${llm}`;
+    if (!process.env.SINAIN_SMART_MODEL) process.env.SINAIN_SMART_MODEL = `ollama/${llm}`;
+    log(`${MAGENTA}LOCAL MODE${RESET} — LLM: ${llm}, Vision: ${vision}`);
+  }
   // Ensure Ollama is running (if local vision enabled)
   if (process.env.LOCAL_VISION_ENABLED === "true") {
     await ensureOllama();
@@ -162,10 +176,11 @@ async function main() {
     color: CYAN,
   });
-  // Health check
-  const healthy = await healthCheck("http://localhost:9500/health", 20);
+  // Health check (local mode needs longer — cold model load + startup distillation)
+  const healthTimeout = process.env.SINAIN_LOCAL_MODE === "true" ? 45 : 20;
+  const healthy = await healthCheck("http://localhost:9500/health", healthTimeout);
   if (!healthy) {
-    fail("sinain-core did not become healthy after 20s");
+    fail(`sinain-core did not become healthy after ${healthTimeout}s`);
   }
   ok("sinain-core healthy on :9500");
@@ -398,8 +413,7 @@ async function preflight() {
     if (fs.existsSync(prebuiltApp)) {
       ok("overlay: pre-built app");
     } else {
-      warn("no overlay available — run: sinain setup-overlay");
-      skipOverlay = true;
+      warn("no overlay available — will auto-download from GitHub Releases");
     }
   }

package/onboard.js CHANGED Viewed

@@ -8,8 +8,8 @@ import fs from "fs";
 import path from "path";
 import { execFileSync } from "child_process";
 import {
-  c, guard, maskKey, readEnv, writeEnv, writeAgentsConfig, summarizeConfig, runHealthCheck,
-  stepApiKey, stepTranscription, stepGateway, stepPrivacy, stepModel,
+  c, guard, cmdExists, maskKey, readEnv, writeEnv, writeAgentsConfig, summarizeConfig, runHealthCheck,
+  stepApiKey, stepTranscription, stepGateway, stepPrivacy, stepModel, stepLocalMode,
   HOME, SINAIN_DIR, ENV_PATH, PKG_DIR, IS_WINDOWS, IS_MAC,
 } from "./config-shared.js";
 import { stepMcpInstall, detectMcpAgents } from "./mcp-register.js";
@@ -68,9 +68,12 @@ async function stepOverlay(existing) {
     const label = choice === "download" ? "Downloading overlay..." : "Building overlay from source...";
     s.start(label);
     try {
-      // setup-overlay.js handles both modes via process.argv
-      if (choice === "source") process.argv.push("--from-source");
-      await import("./setup-overlay.js");
+      const { downloadOverlay, buildFromSource } = await import("./setup-overlay.js");
+      if (choice === "source") {
+        await buildFromSource();
+      } else {
+        await downloadOverlay({ silent: true });
+      }
       s.stop(c.green("Overlay installed."));
     } catch (err) {
       s.stop(c.yellow(`Failed: ${err.message}`));
@@ -130,6 +133,11 @@ export async function runOnboard(args = {}) {
         label: "QuickStart",
         hint: "Get running in 2 minutes. Configure details later.",
       },
+      {
+        value: "local",
+        label: "Local / Paranoid",
+        hint: "Fully offline — Ollama + Whisper, zero cloud calls.",
+      },
       {
         value: "advanced",
         label: "Advanced",
@@ -139,7 +147,7 @@ export async function runOnboard(args = {}) {
     initialValue: "quickstart",
   }));
-  const totalSteps = flow === "quickstart" ? 2 : 6;
+  const totalSteps = flow === "quickstart" ? 2 : flow === "local" ? 4 : 6;
   // ── Collect vars ────────────────────────────────────────────────────────
@@ -149,12 +157,130 @@ export async function runOnboard(args = {}) {
   // complete so we don't churn ~/.sinain/agents.json on every prompt.
   let agentsPatch = {};
-  // Step 1: API key (both flows)
-  const apiKey = await stepApiKey(base, `[1/${totalSteps}] OpenRouter API key`);
-  vars.OPENROUTER_API_KEY = apiKey;
-  p.log.success("API key saved.");
+  // Step 1: API key (quickstart + advanced only — local mode skips cloud)
+  if (flow !== "local") {
+    const apiKey = await stepApiKey(base, `[1/${totalSteps}] OpenRouter API key`);
+    vars.OPENROUTER_API_KEY = apiKey;
+    p.log.success("API key saved.");
+  }
+  if (flow === "local") {
+    // ── Local / Paranoid flow ─────────────────────────────────────────────
+    // Step 1: Local models (Ollama)
+    const localResult = await stepLocalMode(base, `[1/${totalSteps}] Local models`);
+    if (localResult) {
+      vars.SINAIN_LOCAL_MODE = "true";
+      vars.SINAIN_LOCAL_LLM = localResult.llm;
+      vars.SINAIN_LOCAL_VISION = localResult.vision;
+      p.log.success(`LLM: ${localResult.llm}, Vision: ${localResult.vision}`);
+    } else {
+      p.log.warn("Local mode cancelled — switching to QuickStart defaults.");
+      vars.TRANSCRIPTION_BACKEND = "openrouter";
+      vars.PRIVACY_MODE = "standard";
+      vars.AGENT_MODEL = "google/gemini-2.5-flash-lite";
+    }
+    // Step 2: Whisper setup (if local mode enabled)
+    if (vars.SINAIN_LOCAL_MODE === "true") {
+      vars.TRANSCRIPTION_BACKEND = "local";
+      const hasWhisper = !IS_WINDOWS && cmdExists("whisper-cli");
+      if (hasWhisper) {
+        p.log.success(`[2/${totalSteps}] whisper-cli found — local transcription enabled.`);
+      } else if (IS_MAC) {
+        const install = guard(await p.confirm({
+          message: `[2/${totalSteps}] whisper-cli not found. Install via Homebrew?`,
+          initialValue: true,
+        }));
+        if (install) {
+          const s = p.spinner();
+          s.start("Installing whisper-cpp...");
+          try {
+            execFileSync("brew", ["install", "whisper-cpp"], { stdio: "pipe" });
+            s.stop(c.green("whisper-cpp installed."));
+          } catch {
+            s.stop(c.yellow("Install failed — audio transcription won't work offline."));
+          }
+        }
+      }
+      // Check whisper model
+      const modelDir = path.join(HOME, "models");
+      const modelPath = path.join(modelDir, "ggml-large-v3-turbo.bin");
+      if (fs.existsSync(modelPath)) {
+        vars.LOCAL_WHISPER_MODEL = modelPath;
+        p.log.info(`Whisper model: ${c.dim(modelPath)}`);
+      } else {
+        const download = guard(await p.confirm({
+          message: "Download Whisper model (~1.5 GB)?",
+          initialValue: true,
+        }));
+        if (download) {
+          const s = p.spinner();
+          s.start("Downloading Whisper model...");
+          try {
+            fs.mkdirSync(modelDir, { recursive: true });
+            execFileSync("curl", [
+              "-L", "--progress-bar",
+              "-o", modelPath,
+              "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin",
+            ], { stdio: "inherit" });
+            s.stop(c.green("Model downloaded."));
+            vars.LOCAL_WHISPER_MODEL = modelPath;
+          } catch {
+            s.stop(c.yellow("Download failed. Run manually later."));
+          }
+        }
+      }
+      // Step 3: Privacy — default to paranoid since user chose local mode
+      vars.PRIVACY_MODE = "paranoid";
+      const privacy = await stepPrivacy(base, `[3/${totalSteps}] Privacy mode`, { localModeEnabled: true });
+      vars.PRIVACY_MODE = privacy;
+      p.log.success(`Privacy: ${privacy}.`);
+      // Privacy overrides for escalation (redacted OCR+audio in escalation)
+      if (privacy === "paranoid") {
+        vars.PRIVACY_OCR_AGENT_GATEWAY = "redacted";
+        vars.PRIVACY_AUDIO_AGENT_GATEWAY = "redacted";
+      }
+    }
-  if (flow === "quickstart") {
+    // Step 4: Gateway (optional — works with local mode too)
+    const hasExistingGateway = (() => {
+      try {
+        const agentsPath = path.join(SINAIN_DIR, "agents.json");
+        if (!fs.existsSync(agentsPath)) return false;
+        const cfg = JSON.parse(fs.readFileSync(agentsPath, "utf-8"));
+        return !!cfg?.profiles?.openclaw;
+      } catch { return false; }
+    })();
+    const enableGateway = guard(await p.confirm({
+      message: `[4/${totalSteps}] Enable OpenClaw gateway? (escalation agent for deeper analysis)`,
+      initialValue: hasExistingGateway,
+    }));
+    if (enableGateway) {
+      const gatewayResult = await stepGateway(base, "OpenClaw gateway");
+      Object.assign(vars, gatewayResult.envVars);
+      Object.assign(agentsPatch, gatewayResult.agentsPatch);
+    } else {
+      agentsPatch.openclawProfile = null;
+    }
+    agentsPatch.default = base.SINAIN_AGENT || "claude";
+    p.note(
+      [
+        `Local mode: ${vars.SINAIN_LOCAL_MODE === "true" ? c.green("enabled") : "disabled"}`,
+        vars.SINAIN_LOCAL_LLM ? `  LLM: ${vars.SINAIN_LOCAL_LLM}` : null,
+        vars.SINAIN_LOCAL_VISION ? `  Vision: ${vars.SINAIN_LOCAL_VISION}` : null,
+        `Transcription: ${vars.TRANSCRIPTION_BACKEND}`,
+        `Privacy: ${vars.PRIVACY_MODE}`,
+        `OpenClaw gateway: ${enableGateway ? "enabled" : "disabled"}`,
+        "",
+        `Start with: ./start.sh --paranoid`,
+        `Change later: sinain config`,
+      ].filter(Boolean).join("\n"),
+      "Local mode summary",
+    );
+  } else if (flow === "quickstart") {
     // QuickStart: sensible defaults + a single opt-in question for OpenClaw.
     // Gateway integration is off by default; users who want it run Advanced
     // (or answer Yes here, which then walks them through stepGateway).
@@ -267,35 +393,14 @@ export async function runOnboard(args = {}) {
       }
     }
-    // If Ollama is installed, offer to pull a local LLM for paranoid-mode
-    // analysis. Mirrors the whisper download pattern — auto-acquire optional,
-    // user can `ollama pull <model>` manually later if they skip here.
-    let ollamaInstalled = false;
-    try {
-      execFileSync("ollama", ["--version"], { stdio: "ignore" });
-      ollamaInstalled = true;
-    } catch { /* ollama not on PATH */ }
-    if (ollamaInstalled) {
-      const pullOllama = guard(await p.confirm({
-        message: "Pull an Ollama model for paranoid-mode analysis (~4.7 GB for llava)?",
-        initialValue: true,
-      }));
-      if (pullOllama) {
-        const modelName = guard(await p.text({
-          message: "Ollama model to pull",
-          placeholder: "llava",
-          defaultValue: "llava",
-        }));
-        const s = p.spinner();
-        s.start(`Pulling ${modelName} via Ollama (this can take several minutes)...`);
-        try {
-          execFileSync("ollama", ["pull", modelName], { stdio: "inherit" });
-          s.stop(c.green(`Pulled ${modelName}.`));
-        } catch {
-          s.stop(c.yellow(`Pull failed. Run \`ollama pull ${modelName}\` manually later.`));
-        }
-      }
+    // Offer local mode (Ollama) — enables paranoid privacy
+    const localResult = await stepLocalMode(base, "Local mode (Ollama)");
+    const localModeEnabled = !!localResult;
+    if (localResult) {
+      vars.SINAIN_LOCAL_MODE = "true";
+      vars.SINAIN_LOCAL_LLM = localResult.llm;
+      vars.SINAIN_LOCAL_VISION = localResult.vision;
+      p.log.success(`Local mode: LLM=${localResult.llm}, Vision=${localResult.vision}`);
     }
     // OpenClaw gateway is opt-in: most users run sinain in standalone mode
@@ -333,8 +438,12 @@ export async function runOnboard(args = {}) {
       p.log.info("Standalone mode (no gateway).");
     }
-    const privacy = await stepPrivacy(base, "[4/6] Privacy mode");
+    const privacy = await stepPrivacy(base, "[4/6] Privacy mode", { localModeEnabled });
     vars.PRIVACY_MODE = privacy;
+    if (privacy === "paranoid" && localModeEnabled) {
+      vars.PRIVACY_OCR_AGENT_GATEWAY = "redacted";
+      vars.PRIVACY_AUDIO_AGENT_GATEWAY = "redacted";
+    }
     p.log.success(`Privacy: ${privacy}.`);
     const model = await stepModel(base, "[5/6] AI model for HUD analysis");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@geravant/sinain",
-  "version": "1.24.1",
+  "version": "1.26.0",
   "description": "Context OS — ambient intelligence for builders. Captures screen + audio, distills into a private knowledge graph, accessible from MCP, web UI, and HUD overlay.",
   "type": "module",
   "bin": {

package/sense_client/config.py CHANGED Viewed

@@ -41,7 +41,7 @@ DEFAULTS = {
     "vision": {
         "enabled": False,
         "backend": "ollama",
-        "model": "llava",
+        "model": "qwen2.5vl:7b",
         "ollamaUrl": "http://localhost:11434",
         "timeout": 10.0,
         "throttleSeconds": 5,

package/sense_client/ollama_vision.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Ollama Vision — local multimodal inference for screen scene understanding.
-Provides a thin client for Ollama's vision models (llava, llama3.2-vision,
-moondream, nanollava). Used by sense_client for scene descriptions and
+Provides a thin client for Ollama's vision models (qwen2.5vl, llama3.2-vision,
+moondream, llava). Used by sense_client for scene descriptions and
 optionally by sinain-core's agent analyzer for local vision analysis.
 Falls back gracefully when Ollama is unavailable — never crashes the pipeline.
@@ -37,9 +37,9 @@ class OllamaVision:
     def __init__(
         self,
-        model: str = "llava",
+        model: str = "qwen2.5vl:7b",
         base_url: str = "http://localhost:11434",
-        timeout: float = 10.0,
+        timeout: float = 30.0,
         max_tokens: int = 200,
     ):
         self.model = model

package/sense_client/vision.py CHANGED Viewed

@@ -56,7 +56,7 @@ class VisionProvider(ABC):
 class OllamaVisionProvider(VisionProvider):
     """Local vision via Ollama HTTP API."""
-    def __init__(self, model: str = "llava", base_url: str = "http://localhost:11434",
+    def __init__(self, model: str = "qwen2.5vl:7b", base_url: str = "http://localhost:11434",
                  timeout: float = 10.0, max_tokens: int = 200):
         from .ollama_vision import OllamaVision
         self._client = OllamaVision(model=model, base_url=base_url,
@@ -172,19 +172,30 @@ def create_vision(config: dict) -> Optional[VisionProvider]:
     Priority:
     1. Paranoid privacy or no API key → local only (Ollama)
-    2. LOCAL_VISION_ENABLED=true → local (Ollama)
+    2. SINAIN_LOCAL_MODE=true / SINAIN_LOCAL_VISION set → local (Ollama)
     3. API key available → cloud (OpenRouter)
     4. Nothing available → None (vision disabled, OCR still works)
+    Env-var namespace: SINAIN_LOCAL_* is primary. The legacy LOCAL_VISION_*
+    vars are still honored as a fallback for older .env files; sinain-core's
+    config.ts also bridges SINAIN_LOCAL_* → LOCAL_VISION_* for compatibility.
     """
     privacy = os.environ.get("PRIVACY_MODE", "off")
     api_key = os.environ.get("OPENROUTER_API_KEY", "")
     vision_cfg = config.get("vision", {})
+    # Primary: SINAIN_LOCAL_MODE / SINAIN_LOCAL_VISION. Legacy: LOCAL_VISION_*.
     local_enabled = (
         vision_cfg.get("enabled", False)
+        or os.environ.get("SINAIN_LOCAL_MODE", "").lower() == "true"
+        or bool(os.environ.get("SINAIN_LOCAL_VISION", ""))
         or os.environ.get("LOCAL_VISION_ENABLED", "").lower() == "true"
     )
-    local_model = os.environ.get("LOCAL_VISION_MODEL", vision_cfg.get("model", "llava"))
+    local_model = (
+        os.environ.get("SINAIN_LOCAL_VISION")
+        or os.environ.get("LOCAL_VISION_MODEL")
+        or vision_cfg.get("model", "qwen2.5vl:7b")
+    )
     local_url = vision_cfg.get("ollamaUrl", "http://localhost:11434")
     local_timeout = vision_cfg.get("timeout", 10.0)

package/setup-overlay.js CHANGED Viewed

@@ -199,7 +199,7 @@ ${GREEN}✓${RESET} Overlay ready!
 // ── Build from source (legacy) ───────────────────────────────────────────────
-async function buildFromSource() {
+export async function buildFromSource() {
   // Check flutter
   try {
     execSync("which flutter", { stdio: "pipe" });

package/sinain-agent/run.sh CHANGED Viewed

@@ -197,6 +197,11 @@ agent_has_mcp() {
   type=$(prof_get_or "$check" type "$check")
   case "$type" in
     claude|openclaude|codex|goose) return 0 ;;
+    # Hermes is an MCP client, but headless tool approval (calling back into
+    # sinain_respond) depends on its yolo/approval config. Default to pipe
+    # mode (self-contained text oracle); opt in to the claude-style MCP flow
+    # with HERMES_USE_MCP=true once approval is configured (see startup block).
+    hermes) [ "${HERMES_USE_MCP:-false}" = "true" ] && return 0 || return 1 ;;
     junie) $JUNIE_HAS_MCP ;;
     *) return 1 ;;
   esac
@@ -330,6 +335,16 @@ invoke_agent() {
           --no-session \
           --max-turns "$turns"
         ;;
+      hermes)
+        # MCP mode (reached only when HERMES_USE_MCP=true — see agent_has_mcp).
+        # Hermes loads the sinain MCP server from ~/.hermes/config.yaml
+        # (registered at startup) and calls sinain_respond / sinain_knowledge_query
+        # itself, mirroring the claude flow. `-z/--oneshot` prints only the final
+        # text to stdout and auto-bypasses approvals (no TTY hang). Turn budget
+        # comes from config.yaml (max_turns, default 60) — there's no top-level
+        # --max-turns flag. `--toolsets`/`-t` can narrow tools if needed.
+        "$bin" -z "$prompt"
+        ;;
       aider)
         return 1  # No MCP support — caller falls back to invoke_pipe
         ;;
@@ -376,6 +391,17 @@ invoke_pipe() {
     aider)
       "$bin" --yes -m "$msg"
       ;;
+    hermes)
+      # Hermes one-shot: `-z/--oneshot` sends a single prompt and prints ONLY
+      # the final response text to stdout (no banner/spinner/tool previews,
+      # no session-id line) — and auto-bypasses tool approvals, so it never
+      # hangs waiting on a TTY. Tools, memory, and skills still load. The
+      # escalation message already includes full screen/audio/digest context,
+      # so Hermes answers as a self-contained oracle using its own configured
+      # model (set via `hermes model`/`hermes setup`) — no sinain MCP needed.
+      # For the richer flow where Hermes calls sinain tools, set HERMES_USE_MCP=true.
+      "$bin" -z "$msg" 2>/dev/null
+      ;;
     *)
       # Generic: pipe message to stdin to whatever binary the profile names
       echo "$msg" | "$bin" 2>/dev/null
@@ -455,6 +481,46 @@ print('  sinain extension added to ' + config_path)
   fi
 fi
+# Hermes: auto-register sinain MCP server in ~/.hermes/config.yaml (opt-in).
+# Only when HERMES_USE_MCP=true and hermes is the selected agent — pipe mode
+# (the default) is a black-box text oracle and needs none of this. Hermes
+# reads MCP servers from config.yaml under the `mcp_servers` key (stdio:
+# command + args + env). ruamel.yaml (a Hermes core dep) preserves the
+# user's comments/formatting; falls back to PyYAML if unavailable.
+if [ "${HERMES_USE_MCP:-false}" = "true" ] && [ "$AGENT" = "hermes" ]; then
+  TSX_BIN="$(cd "$SCRIPT_DIR/.." && pwd)/sinain-core/node_modules/.bin/tsx"
+  MCP_ENTRY="$(cd "$SCRIPT_DIR/.." && pwd)/sinain-mcp-server/index.ts"
+  HERMES_CONFIG="${HERMES_CONFIG_DIR:-$HOME/.hermes}/config.yaml"
+  if [ -f "$HERMES_CONFIG" ] && ! grep -q "sinain:" "$HERMES_CONFIG" 2>/dev/null; then
+    echo "Registering sinain MCP server with hermes ($HERMES_CONFIG)..."
+    python3 -c "
+import sys
+try:
+    from ruamel.yaml import YAML
+    _y = YAML()
+    load = _y.load
+    def dump(cfg, f): _y.dump(cfg, f)
+except Exception:
+    import yaml as _py
+    load = _py.safe_load
+    def dump(cfg, f): _py.safe_dump(cfg, f, default_flow_style=False, sort_keys=False)
+path, tsx, entry, core, ws = sys.argv[1:6]
+with open(path) as f:
+    cfg = load(f) or {}
+cfg.setdefault('mcp_servers', {})['sinain'] = {
+    'command': tsx,
+    'args': [entry],
+    'env': {'SINAIN_CORE_URL': core, 'SINAIN_WORKSPACE': ws},
+}
+with open(path, 'w') as f:
+    dump(cfg, f)
+print('  sinain mcp_server added to ' + path)
+" "$HERMES_CONFIG" "$TSX_BIN" "$MCP_ENTRY" "$CORE_URL" "$WORKSPACE"
+  elif [ ! -f "$HERMES_CONFIG" ]; then
+    echo "  ⚠ HERMES_USE_MCP=true but $HERMES_CONFIG missing — run \`hermes setup\` first"
+  fi
+fi
 # Ollama warmup — pin the backing model so each agent invocation hits hot weights.
 # openclaude + Ollama via the OpenAI-compat endpoint does NOT forward keep_alive,
 # so we ping Ollama's native /api/generate once with keep_alive=-1 (persistent).
@@ -498,7 +564,7 @@ fi
 # Built-in defaults are 1:1 (profile name == binary == type). Users can
 # override fields or add custom profiles by editing sinain-agent/agents.json.
 # Profiles whose binaries aren't in PATH are silently skipped.
-for default_name in claude openclaude codex goose junie aider; do
+for default_name in claude openclaude codex goose junie aider hermes; do
   prof_set "$default_name" bin "$default_name"
   prof_set "$default_name" type "$default_name"
 done

package/sinain-core/src/agent/analyzer.ts CHANGED Viewed

@@ -366,7 +366,8 @@ async function callOllama(
 ): Promise<AgentResult> {
   const start = Date.now();
   const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), config.timeout);
+  // Local Ollama models need more time than cloud APIs (cold start + generation)
+  const timeout = setTimeout(() => controller.abort(), Math.max(config.timeout, 45_000));
   try {
     const imageB64List = (images || []).map((img) => img.data);

package/sinain-core/src/config.ts CHANGED Viewed

@@ -186,6 +186,25 @@ export function loadConfig(): CoreConfig {
     gainDb: intEnv("MIC_GAIN_DB", 0),
   };
+  // ── Local mode: unified config ──────────────────────────────────────────
+  // SINAIN_LOCAL_MODE=true auto-derives all component config from two vars:
+  //   SINAIN_LOCAL_LLM=phi4-mini       → analyzer + distiller
+  //   SINAIN_LOCAL_VISION=qwen2.5vl:7b → sense_client (propagated via start.sh)
+  // Must run BEFORE transcriptionConfig / analysisConfig are read.
+  const localMode = boolEnv("SINAIN_LOCAL_MODE", false);
+  if (localMode) {
+    const localLlm = env("SINAIN_LOCAL_LLM", "phi4-mini");
+    const localVision = env("SINAIN_LOCAL_VISION", "qwen2.5vl:7b");
+    if (!process.env.ANALYSIS_PROVIDER) process.env.ANALYSIS_PROVIDER = "ollama";
+    if (!process.env.ANALYSIS_MODEL) process.env.ANALYSIS_MODEL = localLlm;
+    if (!process.env.ANALYSIS_VISION_MODEL) process.env.ANALYSIS_VISION_MODEL = localLlm;
+    if (!process.env.TRANSCRIPTION_BACKEND) process.env.TRANSCRIPTION_BACKEND = "local";
+    if (!process.env.LOCAL_VISION_ENABLED) process.env.LOCAL_VISION_ENABLED = "true";
+    if (!process.env.LOCAL_VISION_MODEL) process.env.LOCAL_VISION_MODEL = localVision;
+    if (!process.env.SINAIN_FAST_MODEL) process.env.SINAIN_FAST_MODEL = `ollama/${localLlm}`;
+    if (!process.env.SINAIN_SMART_MODEL) process.env.SINAIN_SMART_MODEL = `ollama/${localLlm}`;
+  }
   const transcriptionConfig: TranscriptionConfig = {
     backend: env("TRANSCRIPTION_BACKEND", "openrouter") as TranscriptionConfig["backend"],
     openrouterApiKey: env("OPENROUTER_API_KEY", ""),
@@ -193,7 +212,7 @@ export function loadConfig(): CoreConfig {
     language: env("TRANSCRIPTION_LANGUAGE", "en-US"),
     local: {
       bin: env("LOCAL_WHISPER_BIN", "whisper-cli"),
-      modelPath: resolvePath(env("LOCAL_WHISPER_MODEL", "~/models/ggml-large-v3-turbo.bin")),
+      modelPath: resolvePath(env("LOCAL_WHISPER_MODEL", "~/.sinain/models/whisper/ggml-large-v3-turbo.bin")),
       language: env("TRANSCRIPTION_LANGUAGE", "en-US"),
       timeoutMs: intEnv("LOCAL_WHISPER_TIMEOUT_MS", 15000),
     },

package/sinain-core/src/distribution/download-manager.ts ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Download Manager — resumable, integrity-checked, atomic model downloads.
+ *
+ * SEED-001 Phase 4. Drives model-weight downloads into ~/.sinain/models/ for
+ * the first-run wizard (whisper model for T1/T2). Ollama models are pulled via
+ * Ollama's own /api/pull, NOT this manager.
+ *
+ * STATUS: SCAFFOLD. The download/verify/atomic-install core below is a working
+ * first implementation, but the manifest fetch + wizard wiring are marked TODO
+ * and it is not yet called from anywhere. See docs/dmg-distribution-spec.md §5.
+ *
+ * Design (SPEC §5a):
+ *   - Resumable:  HTTP Range requests; persists a `.part` file + byte offset.
+ *   - Integrity:  SHA-256 verified against the hosted manifest before promotion.
+ *   - Atomic:     download to `*.part` → verify → rename() into the final path,
+ *                 so the canonical path never holds a half-written model.
+ */
+import { createHash } from "node:crypto";
+import { createReadStream, createWriteStream } from "node:fs";
+import { mkdir, rename, stat } from "node:fs/promises";
+import { dirname } from "node:path";
+import { Readable } from "node:stream";
+import { pipeline } from "node:stream/promises";
+import { log, warn } from "../log.js";
+const TAG = "download";
+/** One downloadable artifact, as listed in the hosted models manifest. */
+export interface ModelManifestEntry {
+  /** Stable identifier, e.g. "whisper-large-v3-turbo". */
+  id: string;
+  /** Absolute download URL. */
+  url: string;
+  /** Lowercase hex SHA-256 of the complete file. */
+  sha256: string;
+  /** Expected size in bytes (for progress + sanity check). */
+  sizeBytes: number;
+  /** Install tier this artifact belongs to. */
+  tier: "T1" | "T2";
+  /** Final on-disk path; `~` is expanded by the caller (see resolvePath). */
+  destPath: string;
+}
+export interface DownloadProgress {
+  id: string;
+  receivedBytes: number;
+  totalBytes: number;
+  /** 0..1, or null when total size is unknown. */
+  fraction: number | null;
+}
+export type ProgressHandler = (p: DownloadProgress) => void;
+export class IntegrityError extends Error {
+  constructor(
+    public readonly id: string,
+    public readonly expected: string,
+    public readonly actual: string,
+  ) {
+    super(`integrity check failed for ${id}: expected ${expected}, got ${actual}`);
+    this.name = "IntegrityError";
+  }
+}
+/**
+ * Download a single manifest entry with resume + integrity + atomic install.
+ * Returns the final installed path on success; throws IntegrityError on a
+ * checksum mismatch or Error on network/IO failure.
+ */
+export async function downloadModel(
+  entry: ModelManifestEntry,
+  onProgress?: ProgressHandler,
+  signal?: AbortSignal,
+): Promise<string> {
+  const finalPath = entry.destPath;
+  const partPath = `${finalPath}.part`;
+  await mkdir(dirname(finalPath), { recursive: true });
+  // If a complete, valid file already exists, skip the download.
+  if (await fileMatches(finalPath, entry.sha256)) {
+    log(TAG, `${entry.id}: already present and verified`);
+    return finalPath;
+  }
+  // Resume from an existing partial download if present.
+  let startByte = 0;
+  try {
+    startByte = (await stat(partPath)).size;
+  } catch {
+    startByte = 0; // no .part yet
+  }
+  const headers: Record<string, string> = {};
+  if (startByte > 0) {
+    headers["Range"] = `bytes=${startByte}-`;
+    log(TAG, `${entry.id}: resuming from ${startByte} bytes`);
+  }
+  const res = await fetch(entry.url, { headers, signal });
+  if (!res.ok && res.status !== 206) {
+    throw new Error(`${entry.id}: download failed — HTTP ${res.status}`);
+  }
+  if (!res.body) {
+    throw new Error(`${entry.id}: response had no body`);
+  }
+  // If the server ignored Range (200 instead of 206), restart from scratch.
+  const append = res.status === 206 && startByte > 0;
+  if (!append) startByte = 0;
+  const total = entry.sizeBytes;
+  let received = startByte;
+  const fileStream = createWriteStream(partPath, { flags: append ? "a" : "w" });
+  const body = Readable.fromWeb(res.body as Parameters<typeof Readable.fromWeb>[0]);
+  body.on("data", (chunk: Buffer) => {
+    received += chunk.length;
+    onProgress?.({
+      id: entry.id,
+      receivedBytes: received,
+      totalBytes: total,
+      fraction: total > 0 ? Math.min(received / total, 1) : null,
+    });
+  });
+  await pipeline(body, fileStream);
+  // Verify the completed .part before promoting it.
+  const actual = await sha256File(partPath);
+  if (actual !== entry.sha256) {
+    throw new IntegrityError(entry.id, entry.sha256, actual);
+  }
+  // Atomic install: rename is atomic within the same filesystem.
+  await rename(partPath, finalPath);
+  log(TAG, `${entry.id}: installed → ${finalPath}`);
+  return finalPath;
+}
+/** SHA-256 of a file as lowercase hex. */
+export async function sha256File(path: string): Promise<string> {
+  const hash = createHash("sha256");
+  await pipeline(createReadStream(path), hash);
+  return hash.digest("hex");
+}
+/** True iff `path` exists and its SHA-256 equals `expectedSha256`. */
+async function fileMatches(path: string, expectedSha256: string): Promise<boolean> {
+  try {
+    await stat(path);
+  } catch {
+    return false;
+  }
+  try {
+    return (await sha256File(path)) === expectedSha256;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Fetch the hosted models manifest (GitHub Pages).
+ *
+ * TODO(Phase 4): point at the real manifest URL once GitHub Pages hosting is
+ * set up (see docs/distribution/models-manifest.example.json for the schema),
+ * and decide how the manifest is versioned against app releases (open Q7).
+ */
+export async function fetchManifest(_manifestUrl: string): Promise<ModelManifestEntry[]> {
+  throw new Error(
+    "fetchManifest not implemented — SEED-001 Phase 4. See docs/dmg-distribution-spec.md §5a.",
+  );
+}
+/**
+ * Download every entry for a given tier, sequentially.
+ *
+ * TODO(Phase 5): wire this into the first-run wizard's tier-config step so the
+ * whisper model downloads with a progress bar after the user picks T1/T2.
+ */
+export async function downloadForTier(
+  entries: ModelManifestEntry[],
+  tier: "T1" | "T2",
+  onProgress?: ProgressHandler,
+  signal?: AbortSignal,
+): Promise<string[]> {
+  const wanted = entries.filter((e) => e.tier === tier || (tier === "T2" && e.tier === "T1"));
+  const installed: string[] = [];
+  for (const entry of wanted) {
+    try {
+      installed.push(await downloadModel(entry, onProgress, signal));
+    } catch (err) {
+      warn(TAG, `failed to download ${entry.id}:`, err);
+      throw err;
+    }
+  }
+  return installed;
+}

package/sinain-core/src/index.ts CHANGED Viewed

@@ -29,6 +29,14 @@ import { initPrivacy, levelFor, applyLevel } from "./privacy/index.js";
 const TAG = "core";
+/**
+ * Python interpreter for the sinain-memory scripts (graph_query, page_renderer,
+ * distillers). In a packaged build the launcher sets SINAIN_PYTHON to the one
+ * interpreter that has the deps — bare "python3" can resolve to a dep-less
+ * install and make knowledge pages silently fall back to empty.
+ */
+const PYTHON_BIN = process.env.SINAIN_PYTHON || "python3";
 /** Resolve workspace path, expanding leading ~ to HOME. */
 function resolveWorkspace(): string {
   const raw = process.env.SINAIN_WORKSPACE || `${process.env.HOME}/.openclaw/workspace`;
@@ -70,7 +78,7 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
     try {
       const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts * 2), "--format", "json"];
       if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
-      const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
+      const out = execFileSync(PYTHON_BIN, args, { timeout: 5000, encoding: "utf-8" }).trim();
       if (out) {
         const parsed = JSON.parse(out);
         const facts = parsed.facts || parsed;
@@ -141,7 +149,7 @@ async function listKnowledgeEntitiesMulti(max: number): Promise<string> {
   for (const dbPath of dbPaths) {
     if (!existsSync(dbPath)) continue;
     try {
-      const out = execFileSync("python3", [
+      const out = execFileSync(PYTHON_BIN, [
         scriptPath, "--db", dbPath, "--top", String(max), "--format", "json",
       ], { timeout: 5000, encoding: "utf-8" });
       const parsed = JSON.parse(out);
@@ -206,7 +214,7 @@ async function searchEntitiesMulti(query: string, limit: number): Promise<unknow
   for (const dbPath of resolveKnowledgeDbPaths()) {
     if (!existsSync(dbPath)) continue;
     try {
-      const out = execFileSync("python3", [
+      const out = execFileSync(PYTHON_BIN, [
         scriptPath, "--db", dbPath,
         "--search-entities", query,
         "--search-limit", String(limit * 2), // 2x then de-dup
@@ -257,7 +265,7 @@ async function exportConceptBundle(
     if (opts.includePage) args.push("--include-page");
     try {
       // 30s budget — large 2-hop exports can take time on big graphs.
-      const { stdout } = await pExecFile("python3", args,
+      const { stdout } = await pExecFile(PYTHON_BIN, args,
         { timeout: 30_000, encoding: "utf-8", maxBuffer: 50 * 1024 * 1024 });
       const parsed = JSON.parse(stdout);
       // If the export found at least one entity (the root), return it.
@@ -299,7 +307,7 @@ async function importConceptBundle(
   ];
   const { spawn } = await import("node:child_process");
   return await new Promise((resolve) => {
-    const child = spawn("python3", args, { timeout: 30_000 });
+    const child = spawn(PYTHON_BIN, args, { timeout: 30_000 });
     let stdout = "";
     let stderr = "";
     child.stdout.on("data", (c: Buffer) => { stdout += c.toString("utf-8"); });
@@ -348,7 +356,7 @@ async function retractOrRestoreFact(
       if (opts.undoToken) args.push("--undo-token", opts.undoToken);
     }
     try {
-      const { stdout } = await pExecFile("python3", args, { timeout: 10_000, encoding: "utf-8" });
+      const { stdout } = await pExecFile(PYTHON_BIN, args, { timeout: 10_000, encoding: "utf-8" });
       const parsed = JSON.parse(stdout);
       if (parsed.ok) return parsed;
       // If error is "fact not found" try the next DB; otherwise return the error
@@ -384,7 +392,7 @@ async function renderEntityPageMulti(
     if (opts.refresh) args.push("--refresh");
     try {
       // 60s budget — LLM rendering for large entities can take 20-30s.
-      const { stdout } = await pExecFile("python3", args, { timeout: 60_000, encoding: "utf-8" });
+      const { stdout } = await pExecFile(PYTHON_BIN, args, { timeout: 60_000, encoding: "utf-8" });
       const parsed = JSON.parse(stdout);
       if (parsed.fact_count > 0) return parsed;
     } catch (e) {
@@ -411,7 +419,7 @@ async function graphChildrenMulti(entity: string): Promise<unknown> {
   for (const dbPath of resolveKnowledgeDbPaths()) {
     if (!existsSync(dbPath)) continue;
     try {
-      const out = execFileSync("python3", [
+      const out = execFileSync(PYTHON_BIN, [
         scriptPath, "--db", dbPath,
         "--graph-children", entity,
         "--graph-limit", "50",
@@ -458,7 +466,7 @@ if not result:
     result = store.entity_as_of("${entity}", d)
 print(json.dumps({k: v for k, v in result.items()}, ensure_ascii=False))
 `;
-      const out = execFileSync("python3", ["-c", pyCode], {
+      const out = execFileSync(PYTHON_BIN, ["-c", pyCode], {
         timeout: 5000, encoding: "utf-8",
       }).trim();
       if (out && out !== "{}") return out;
@@ -486,7 +494,7 @@ async function exportKnowledgeMulti(domain: string | null, max: number): Promise
   for (const dbPath of dbPaths) {
     if (!existsSync(dbPath)) continue;
     try {
-      const out = execFileSync("python3", [
+      const out = execFileSync(PYTHON_BIN, [
         scriptPath, "--db", dbPath, "--top", String(max), "--format", "json",
       ], { timeout: 5000, encoding: "utf-8" });
       const parsed = JSON.parse(out);
@@ -608,7 +616,7 @@ store.close()
 print(json.dumps(stats))
 `;
-    const result = execFileSync("python3", ["-c", script], {
+    const result = execFileSync(PYTHON_BIN, ["-c", script], {
       input: JSON.stringify(graphOps),
       timeout: 10_000,
       encoding: "utf-8",