npm - @agentprojectcontext/apx - Versions diffs - 1.42.1 → 1.43.0 - Mend

@agentprojectcontext/apx 1.42.1 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/package.json +1 -1
package/src/core/channels/telegram/api.js +62 -0
package/src/core/channels/telegram/ask-callbacks.js +238 -0
package/src/core/config/index.js +2 -0
package/src/core/config/redact.js +2 -0
package/src/core/confirmation/adapters/telegram.js +20 -37
package/src/core/desktop/process.js +126 -0
package/src/core/voice/stt-hardware.js +87 -0
package/src/core/voice/stt-models.js +97 -0
package/src/core/voice/transcription.js +147 -16
package/src/host/daemon/api/desktop.js +54 -8
package/src/host/daemon/api/transcribe.js +40 -1
package/src/host/daemon/plugins/desktop/index.js +6 -1
package/src/host/daemon/plugins/telegram/index.js +61 -351
package/src/host/daemon/whisper-server.js +18 -8
package/src/host/daemon/whisper-server.py +71 -44
package/src/interfaces/cli/commands/desktop.js +13 -68
package/src/interfaces/desktop/main.js +32 -4
package/src/interfaces/desktop/renderer.js +26 -5
package/src/interfaces/web/dist/assets/index-B0nTYflm.js +651 -0
package/src/interfaces/web/dist/assets/index-B0nTYflm.js.map +1 -0
package/src/interfaces/web/dist/assets/index-C22PmKCD.css +1 -0
package/src/interfaces/web/dist/index.html +2 -2
package/src/interfaces/web/package-lock.json +3 -3
package/src/interfaces/web/src/components/ShortcutInput.tsx +156 -0
package/src/interfaces/web/src/components/voice/VoiceSttCard.tsx +101 -5
package/src/interfaces/web/src/i18n/en.ts +28 -2
package/src/interfaces/web/src/i18n/es.ts +28 -2
package/src/interfaces/web/src/lib/api/desktop.ts +28 -0
package/src/interfaces/web/src/lib/api/voice.ts +26 -2
package/src/interfaces/web/src/screens/modules/DeckScreen.tsx +55 -3
package/src/interfaces/web/src/screens/modules/DesktopScreen.tsx +98 -36
package/src/interfaces/web/dist/assets/index-BReF4_xV.js +0 -646
package/src/interfaces/web/dist/assets/index-BReF4_xV.js.map +0 -1
package/src/interfaces/web/dist/assets/index-wrEbTJbc.css +0 -1

package/src/core/voice/stt-models.js ADDED Viewed

@@ -0,0 +1,97 @@
+// STT model catalog + on-disk status (downloaded? how big?).
+//
+// Both engines pull their weights from the HuggingFace hub cache
+// (~/.cache/huggingface/hub/models--<org>--<name>), just in different formats:
+//   faster-whisper → Systran/faster-whisper-<model>   (CTranslate2)
+//   mlx-whisper    → mlx-community/whisper-<model>     (MLX)
+//
+// We read the cache directory to report presence + real byte size, and carry an
+// approximate download size for models that aren't there yet (Ollama-style).
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+function hubDir() {
+  const base = process.env.HF_HOME || path.join(os.homedir(), ".cache", "huggingface");
+  return path.join(base, "hub");
+}
+/** HF cache folder name for a repo id, e.g. "Systran/faster-whisper-small". */
+function repoCacheName(repoId) {
+  return "models--" + repoId.replace(/\//g, "--");
+}
+function dirSizeBytes(dir) {
+  let total = 0;
+  let stack = [dir];
+  while (stack.length) {
+    const d = stack.pop();
+    let entries;
+    try { entries = fs.readdirSync(d, { withFileTypes: true }); } catch { continue; }
+    for (const e of entries) {
+      const p = path.join(d, e.name);
+      // HF stores real bytes once in blobs/ and symlinks them from snapshots/.
+      // Count only the real files (skip symlinks) so we don't double-count.
+      if (e.isSymbolicLink()) continue;
+      if (e.isDirectory()) stack.push(p);
+      else if (e.isFile()) { try { total += fs.lstatSync(p).size; } catch {} }
+    }
+  }
+  return total;
+}
+export function humanSize(bytes) {
+  if (!bytes || bytes < 1) return "—";
+  const u = ["B", "KB", "MB", "GB"];
+  let i = 0, n = bytes;
+  while (n >= 1024 && i < u.length - 1) { n /= 1024; i++; }
+  return `${n.toFixed(n >= 10 || i === 0 ? 0 : 1)} ${u[i]}`;
+}
+// Catalog: per backend, the offered models with their HF repo id and an
+// approximate download size (used only until the model is actually on disk).
+export const STT_MODEL_CATALOG = {
+  faster: [
+    { id: "tiny",           repo: "Systran/faster-whisper-tiny",            approx_mb: 75 },
+    { id: "base",           repo: "Systran/faster-whisper-base",            approx_mb: 145 },
+    { id: "small",          repo: "Systran/faster-whisper-small",           approx_mb: 480 },
+    { id: "medium",         repo: "Systran/faster-whisper-medium",          approx_mb: 1500 },
+    { id: "large-v3",       repo: "Systran/faster-whisper-large-v3",        approx_mb: 3100 },
+    { id: "large-v3-turbo", repo: "mobiuslabsgmbh/faster-whisper-large-v3-turbo", approx_mb: 1600 },
+  ],
+  mlx: [
+    { id: "small",          repo: "mlx-community/whisper-small-mlx",            approx_mb: 480 },
+    { id: "large-v3",       repo: "mlx-community/whisper-large-v3-mlx",         approx_mb: 3100 },
+    { id: "large-v3-turbo", repo: "mlx-community/whisper-large-v3-turbo",       approx_mb: 1600 },
+  ],
+};
+/** Status of one repo in the HF cache. */
+export function modelStatusByRepo(repo) {
+  const dir = path.join(hubDir(), repoCacheName(repo));
+  if (!fs.existsSync(dir)) return { downloaded: false, size_bytes: 0 };
+  const size = dirSizeBytes(dir);
+  // A bare ref/lock dir with no blobs is "not really downloaded".
+  return { downloaded: size > 1_000_000, size_bytes: size };
+}
+/** List a backend's models with download status + sizes. */
+export function listSttModels(backend) {
+  const catalog = STT_MODEL_CATALOG[backend] || [];
+  return catalog.map((m) => {
+    const st = modelStatusByRepo(m.repo);
+    return {
+      id: m.id,
+      repo: m.repo,
+      downloaded: st.downloaded,
+      size: st.downloaded ? humanSize(st.size_bytes) : `~${humanSize(m.approx_mb * 1024 * 1024)}`,
+      size_bytes: st.size_bytes,
+    };
+  });
+}
+/** Resolve the HF repo id for a (backend, model-id) pair. */
+export function repoFor(backend, modelId) {
+  const entry = (STT_MODEL_CATALOG[backend] || []).find((m) => m.id === modelId);
+  return entry?.repo || modelId; // allow passing a raw repo id through
+}

package/src/core/voice/transcription.js CHANGED Viewed

@@ -23,7 +23,11 @@ import { logInfo, logWarn } from "#core/logging.js";
 export const WHISPER_LOCAL_PORT = 18765;
 export const DEFAULT_LOCAL = {
-  model: "small",
+  // "auto" = adapt to the machine (mlx/Metal on Apple Silicon, faster-whisper
+  // cuda on NVIDIA, else faster-whisper cpu). Override with "faster" | "mlx".
+  backend: "auto",
+  model: "small",          // faster-whisper model id (tiny|base|small|…)
+  mlx_model: "",           // mlx repo (defaults to the hardware recommendation)
   device: "cpu",
   compute_type: "int8",
   language: "auto",
@@ -34,6 +38,28 @@ export const DEFAULT_LOCAL = {
   timeout_ms: 20 * 60_000,
 };
+// OpenAI's official cloud Whisper. `base_url` is overridable so the same
+// client can target any OpenAI-compatible server (see `custom`).
+export const DEFAULT_OPENAI = {
+  base_url: "https://api.openai.com/v1",
+  model: "whisper-1",
+  language: "auto",
+};
+// A user-supplied, OpenAI-compatible STT server reachable over the network:
+// mlx-audio on this Mac's Metal GPU (localhost:8000), a Radeon/NVIDIA box on
+// the LAN, or anyone's remote endpoint. All expose POST /audio/transcriptions,
+// so they share the exact client as `openai` — only base_url/key/model differ.
+export const DEFAULT_CUSTOM = {
+  base_url: "",   // e.g. http://localhost:8000/v1  or  http://192.168.1.50:9000/v1
+  api_key: "",    // optional — most local servers don't require one
+  model: "",      // e.g. mlx-community/whisper-large-v3-turbo  or  Systran/faster-whisper-large-v3
+  language: "auto",
+};
+/** STT engine ids surfaced to the web admin, in display/fallback order. */
+export const STT_ENGINE_IDS = ["local", "openai", "custom"];
 /**
  * Resolve the effective transcription language. Priority:
  *   explicit local config → config.user.language → "auto" (whisper detects).
@@ -44,29 +70,110 @@ export function resolveTranscriptionLanguage(localCfg, userLang) {
   return "auto";
 }
+/**
+ * Resolve the local engine's effective backend + model in place.
+ *   backend "auto" → mlx (Apple Silicon/Metal), faster-whisper cuda (NVIDIA),
+ *   else faster-whisper cpu.
+ * Safety net: if the chosen mlx model isn't downloaded yet, fall back to
+ * faster-whisper so a live voice turn never stalls on a multi-GB download —
+ * the model-manager UI handles the explicit download.
+ */
+async function resolveLocalBackend(local) {
+  let backend = local.backend || "auto";
+  let rec;
+  try {
+    const { recommendStt } = await import("#core/voice/stt-hardware.js");
+    rec = recommendStt();
+  } catch {
+    rec = { backend: "faster", model: "small", device: "cpu", compute_type: "int8" };
+  }
+  if (backend === "auto") backend = rec.backend;
+  if (backend === "mlx") {
+    const mlxModel = local.mlx_model || rec.model;
+    let downloaded = false;
+    try {
+      const { modelStatusByRepo } = await import("#core/voice/stt-models.js");
+      downloaded = modelStatusByRepo(mlxModel).downloaded;
+    } catch {}
+    if (downloaded) {
+      local.backend = "mlx";
+      local.model = mlxModel;       // whisper-server.js passes this as --model
+      local.device = "metal";
+      local.compute_type = "mlx";
+      return;
+    }
+    backend = "faster";             // not present → don't block voice
+  }
+  // faster-whisper path. On an NVIDIA box, prefer CUDA + float16 unless the
+  // user pinned something explicit.
+  if (rec.backend === "faster" && rec.device === "cuda") {
+    if (!local.device || local.device === "cpu") local.device = "cuda";
+    if (local.compute_type === "int8") local.compute_type = rec.compute_type || "float16";
+  }
+  local.backend = "faster";
+}
 export async function getConfig() {
   try {
     const { readConfig } = await import("#core/config/index.js");
     const cfg = readConfig() || {};
     const t = cfg.transcription || {};
-    const openaiKey = cfg.engines?.openai?.api_key || process.env.OPENAI_API_KEY || "";
     const userLang = cfg.user?.language || "";
     const localBase = { ...DEFAULT_LOCAL, ...(t.local || {}) };
     localBase.language = resolveTranscriptionLanguage(localBase, userLang);
+    await resolveLocalBackend(localBase);
+    // OpenAI cloud: key can live in transcription.openai, the shared
+    // engines.openai block, or the env. base_url defaults to the official API.
+    const openai = { ...DEFAULT_OPENAI, ...(t.openai || {}) };
+    openai.api_key = t.openai?.api_key || cfg.engines?.openai?.api_key || process.env.OPENAI_API_KEY || "";
+    openai.language = resolveTranscriptionLanguage(openai, userLang);
+    // Custom OpenAI-compatible server (mlx-audio / Radeon / NVIDIA / remote).
+    const custom = { ...DEFAULT_CUSTOM, ...(t.custom || {}) };
+    custom.language = resolveTranscriptionLanguage(custom, userLang);
     return {
       provider: t.provider || "auto",
       local: localBase,
-      openaiKey,
+      openai,
+      custom,
+      // kept for backward-compat with callers that read `.openaiKey`
+      openaiKey: openai.api_key,
     };
   } catch {
     return {
       provider: "auto",
       local: { ...DEFAULT_LOCAL },
+      openai: { ...DEFAULT_OPENAI, api_key: process.env.OPENAI_API_KEY || "" },
+      custom: { ...DEFAULT_CUSTOM },
       openaiKey: process.env.OPENAI_API_KEY || "",
     };
   }
 }
+/**
+ * List STT engines + availability for the web admin (mirrors tts listProviders).
+ * @returns {{configured_provider:string, engines:Array<{id,available,configured}>}}
+ */
+export function listSttProviders(rawConfig = {}) {
+  const t = rawConfig.transcription || {};
+  const provider = t.provider || "auto";
+  const openaiKey = t.openai?.api_key || rawConfig.engines?.openai?.api_key || process.env.OPENAI_API_KEY || "";
+  const customUrl = (t.custom?.base_url || "").trim();
+  const engines = [
+    // local whisper is embedded (daemon spawns the subprocess on demand) →
+    // always usable, no credentials needed.
+    { id: "local",  available: true,            configured: true },
+    { id: "openai", available: Boolean(openaiKey), configured: Boolean(openaiKey) },
+    { id: "custom", available: Boolean(customUrl), configured: Boolean(customUrl) },
+  ];
+  return { configured_provider: provider, engines };
+}
 /**
  * Call the local whisper-server.py over HTTP. Does NOT spawn or check the
  * subprocess — that's host/daemon/whisper-server.js's job. If the server is
@@ -138,9 +245,14 @@ export async function transcribeViaLocalServer(filePath, opts) {
   throw lastErr || new Error("transcribeViaLocalServer: unknown failure");
 }
-/** OpenAI Whisper-1 cloud API. Needs an api_key. */
-export async function transcribeOpenAI(filePath, apiKey) {
-  if (!apiKey) throw new Error("openai transcription: no api_key");
+/**
+ * OpenAI-compatible transcription (POST {base_url}/audio/transcriptions with a
+ * multipart `file` + `model`). Works against OpenAI itself and any server that
+ * speaks the same contract: mlx-audio, faster-whisper-server, whisper.cpp
+ * server, etc. `backend` is just the label returned to the caller.
+ */
+export async function transcribeViaOpenAICompatible(filePath, { base_url, api_key, model, language, backend = "openai", timeout_ms = 120_000 } = {}) {
+  const baseUrl = (base_url || DEFAULT_OPENAI.base_url).replace(/\/+$/, "");
   const buf = fs.readFileSync(filePath);
   const ext = path.extname(filePath).slice(1).toLowerCase() || "webm";
   const fileType = ext === "ogg" || ext === "oga" ? "audio/ogg"
@@ -151,28 +263,40 @@ export async function transcribeOpenAI(filePath, apiKey) {
     : "application/octet-stream";
   const form = new FormData();
-  form.append("model", "whisper-1");
+  form.append("model", model || DEFAULT_OPENAI.model);
+  if (language && language !== "auto") form.append("language", language);
   form.append("file", new Blob([buf], { type: fileType }), path.basename(filePath));
-  const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
+  const t0 = Date.now();
+  const res = await fetch(`${baseUrl}/audio/transcriptions`, {
     method: "POST",
-    headers: { authorization: `Bearer ${apiKey}` },
+    // Auth header only when a key is set — local servers usually need none.
+    headers: api_key ? { authorization: `Bearer ${api_key}` } : {},
     body: form,
-    signal: AbortSignal.timeout(60_000),
+    signal: AbortSignal.timeout(timeout_ms),
   });
   if (!res.ok) {
     const errBody = await res.text().catch(() => "");
-    throw new Error(`openai whisper ${res.status}: ${errBody.slice(0, 240)}`);
+    throw new Error(`${backend} stt ${res.status}: ${errBody.slice(0, 240)}`);
   }
   const json = await res.json();
+  logInfo("whisper", `transcribeViaOpenAICompatible(${backend}) ok in ${Date.now() - t0}ms`, {
+    chars: (json.text || "").length, base_url: baseUrl, model: model || DEFAULT_OPENAI.model,
+  });
   return {
     ok: true,
-    backend: "openai",
+    backend,
     text: json.text || "",
     language: json.language || null,
   };
 }
+/** Back-compat shim: OpenAI Whisper-1 cloud API by key. */
+export async function transcribeOpenAI(filePath, apiKey) {
+  if (!apiKey) throw new Error("openai transcription: no api_key");
+  return transcribeViaOpenAICompatible(filePath, { ...DEFAULT_OPENAI, api_key: apiKey, backend: "openai" });
+}
 /**
  * Transcribe a file. Provider chosen by config:
  *   - "openai": cloud only
@@ -188,17 +312,24 @@ export async function transcribe(filePath, overrides = {}) {
   const localOpts = { ...cfg.local, ...overrides };
   if (provider === "openai") {
-    return transcribeOpenAI(filePath, cfg.openaiKey);
+    return transcribeViaOpenAICompatible(filePath, { ...cfg.openai, backend: "openai" });
+  }
+  if (provider === "custom") {
+    if (!cfg.custom.base_url) throw new Error("custom transcription: set transcription.custom.base_url");
+    return transcribeViaOpenAICompatible(filePath, { ...cfg.custom, backend: "custom" });
   }
   if (provider === "local") {
     return transcribeViaLocalServer(filePath, localOpts);
   }
-  // auto: local first, fall back to openai if a key is configured
+  // auto: local first, then a configured remote (custom preferred over openai).
   try {
     return await transcribeViaLocalServer(filePath, localOpts);
   } catch (localErr) {
-    if (cfg.openaiKey) {
-      return transcribeOpenAI(filePath, cfg.openaiKey);
+    if (cfg.custom.base_url) {
+      return transcribeViaOpenAICompatible(filePath, { ...cfg.custom, backend: "custom" });
+    }
+    if (cfg.openai.api_key) {
+      return transcribeViaOpenAICompatible(filePath, { ...cfg.openai, backend: "openai" });
     }
     throw new Error(`local transcription failed: ${localErr.message}`);
   }

package/src/host/daemon/api/desktop.js CHANGED Viewed

@@ -1,6 +1,9 @@
 // Desktop (floating voice window) HTTP surface.
 //
-//   GET  /desktop/status        connected websocket clients count
+//   GET  /desktop/status        running flag (pid) + connected websocket clients
+//   POST /desktop/start         launch the floating window (detached Electron)
+//   POST /desktop/stop          terminate the running window (SIGTERM)
+//   POST /desktop/restart       broadcast a "reload" so live windows re-read config
 //   POST /desktop/message       text (post-STT). Responds 200 immediately;
 //                               the super-agent answer is streamed back over WS
 //                               by the desktop plugin.
@@ -16,18 +19,61 @@ import {
   autostartInstall,
   autostartUninstall,
 } from "#core/desktop/autostart.js";
+import {
+  isDesktopRunning,
+  startDesktopDetached,
+  stopDesktop,
+} from "#core/desktop/process.js";
-export function register(app, { plugins }) {
+export function register(app, { plugins, config }) {
   app.get("/desktop/status", (_req, res) => {
+    // `running` is the live Electron process (pid file) — the source of truth
+    // for the Start/Stop/Restart controls. `connected_clients` is how many of
+    // those windows have an open WS to the daemon (a window can be running but
+    // mid-reconnect), surfaced separately.
+    const running = isDesktopRunning();
     import("../desktop-ws.js")
       .then(({ desktopClients }) => {
-        res.json({
-          ok: true,
-          connected_clients: desktopClients.size,
-          running: desktopClients.size > 0,
-        });
+        res.json({ ok: true, connected_clients: desktopClients.size, running });
+      })
+      .catch(() => res.json({ ok: true, connected_clients: 0, running }));
+  });
+  // POST /desktop/start — launch the floating window (detached Electron). Same
+  // helper the CLI's `apx desktop start` uses. No-op-safe if already running.
+  app.post("/desktop/start", async (_req, res) => {
+    try {
+      const r = await startDesktopDetached({ port: config?.port });
+      if (!r.ok) return res.status(500).json({ ok: false, error: r.error });
+      res.json({ ok: true, pid: r.pid, already: !!r.already });
+    } catch (e) {
+      res.status(500).json({ ok: false, error: e.message });
+    }
+  });
+  // POST /desktop/stop — terminate the running window (SIGTERM). `stopped` is
+  // false when nothing was running.
+  app.post("/desktop/stop", (_req, res) => {
+    const r = stopDesktop();
+    if (!r.ok) return res.status(500).json({ ok: false, error: r.error });
+    res.json({ ok: true, stopped: r.stopped });
+  });
+  // POST /desktop/restart — ask every connected desktop window to reload.
+  // The web admin's "Restart" button hits this after a config change (theme,
+  // position) so the floating window re-reads ~/.apx/config.json and re-applies
+  // it without the user dropping to a terminal. The reload is a soft refresh of
+  // the renderer (main.js repositions + reloads webContents), NOT a process
+  // kill — the Electron app keeps its tray/shortcut. Returns how many windows
+  // were signalled so the UI can tell "reloaded" from "nothing connected".
+  app.post("/desktop/restart", (_req, res) => {
+    import("../desktop-ws.js")
+      .then(({ desktopClients, broadcastDesktop }) => {
+        const reloaded = desktopClients.size;
+        broadcastDesktop({ type: "reload" });
+        res.json({ ok: true, reloaded });
       })
-      .catch(() => res.json({ ok: true, connected_clients: 0, running: false }));
+      .catch((e) => res.status(500).json({ ok: false, error: e.message }));
   });
   app.post("/desktop/message", async (req, res) => {

package/src/host/daemon/api/transcribe.js CHANGED Viewed

@@ -2,10 +2,49 @@
 // Raw audio bytes in the body. Headers:
 //   X-Audio-Format  webm | ogg | wav | mp3 (defaults to webm)
 //   X-Language      ISO code or "auto"
-//   X-Provider      auto | local | openai   (overrides config)
+//   X-Provider      auto | local | openai | custom   (overrides config)
 //
 // Shared by overlay, telegram voice messages, and any external caller.
 export function register(app) {
+  // GET /transcribe/providers — STT engine list + availability for the web
+  // admin (mirror of /tts/providers). local = embedded faster-whisper;
+  // openai = cloud Whisper; custom = any OpenAI-compatible server (mlx-audio
+  // on Metal, a Radeon/NVIDIA box on the LAN, a remote endpoint).
+  app.get("/transcribe/providers", async (_req, res) => {
+    try {
+      const { readConfig } = await import("#core/config/index.js");
+      const { listSttProviders } = await import("#core/voice/transcription.js");
+      res.json(listSttProviders(readConfig()));
+    } catch (e) {
+      res.status(500).json({ error: e.message });
+    }
+  });
+  // GET /transcribe/hardware — detected machine + the recommended local backend
+  // (mlx on Apple Silicon, faster-whisper cuda on NVIDIA, else CPU). Drives the
+  // "engine adapts itself" UX in the web admin.
+  app.get("/transcribe/hardware", async (_req, res) => {
+    try {
+      const { detectHardware, recommendStt } = await import("#core/voice/stt-hardware.js");
+      const hw = detectHardware();
+      res.json({ hardware: hw, recommended: recommendStt(hw) });
+    } catch (e) {
+      res.status(500).json({ error: e.message });
+    }
+  });
+  // GET /transcribe/models?backend=faster|mlx — model catalog with on-disk
+  // status (downloaded? size) for the model-manager UI.
+  app.get("/transcribe/models", async (req, res) => {
+    try {
+      const backend = String(req.query.backend || "faster");
+      const { listSttModels } = await import("#core/voice/stt-models.js");
+      res.json({ backend, models: listSttModels(backend) });
+    } catch (e) {
+      res.status(500).json({ error: e.message });
+    }
+  });
   // GET /transcribe/warmup — load the local whisper model (if needed) and reset
   // its idle watchdog. Callers (e.g. the desktop window) ping this while open so
   // the first real utterance doesn't pay the cold-load cost.

package/src/host/daemon/plugins/desktop/index.js CHANGED Viewed

@@ -140,7 +140,12 @@ async function _handleMessage({ ws, text, previousMessages }, { projects, config
       channel: CHANNELS.DESKTOP,
       ...(slashed.handled ? { contextNote: slashed.contextNote } : {}),
       channelMeta: { voice: true }, // desktop module is voice-first → spoken mode
-      previousMessages: history.slice(0, -1),
+      // WS path: history was just appended with the current user turn (line 87),
+      // so drop it. HTTP path: `previousMessages` came in already excluding the
+      // current user turn (the renderer slices it off before POSTing), so
+      // dropping again would silently strip the last assistant reply — making
+      // every turn look like a fresh conversation to the model.
+      previousMessages: ws ? history.slice(0, -1) : history,
       overrideModel: cfg.model || null,
       signal: controller.signal,
       onToken: (chunk) => { liveBuf += chunk; },