npm - @idl3/claude-control - Versions diffs - 0.1.21 → 0.2.0 - Mend

@idl3/claude-control 0.1.21 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +10 -0
package/bin/cli.js +5 -0
package/bin/setup.sh +60 -0
package/hooks/record-pane.mjs +72 -0
package/lib/config.js +39 -3
package/lib/match.js +39 -26
package/lib/mlx.js +260 -0
package/lib/models.js +66 -0
package/lib/optimize.js +126 -2
package/lib/pane-registry.js +86 -0
package/lib/sessions.js +75 -35
package/lib/shell.js +101 -0
package/lib/tmux.js +77 -11
package/package.json +5 -1
package/scripts/eval-optimize.mjs +46 -0
package/scripts/install-pane-hook.mjs +72 -0
package/server.js +112 -3
package/web/dist/assets/{core-CyYMg33t.js → core-DM2iK52g.js} +1 -1
package/web/dist/assets/index-DwNp83VT.css +1 -0
package/web/dist/assets/index-DwmU8Yna.js +89 -0
package/web/dist/index.html +4 -2
package/web/dist/assets/index-BeJg6Cs1.js +0 -85
package/web/dist/assets/index-Dn7NDGPq.css +0 -1

package/README.md CHANGED Viewed

@@ -20,6 +20,16 @@ npm install -g @idl3/claude-control     # or run once: npx @idl3/claude-control
 **Prerequisites:** Node ≥20 and **tmux** on your `PATH` (`brew install tmux` · `sudo apt install tmux`). Optional: **ttyd** for the in-browser raw terminal (`brew install ttyd` · `sudo apt install ttyd`) — set `CLAUDE_CONTROL_TTYD` to override its path. The web UI ships prebuilt — no build step on install.
+**Optional local AI (no API key):**
+- **Voice → text** — `brew install ffmpeg whisper-cpp` and drop a model at `~/.claude-control/models/ggml-base.en.bin`. The mic in the composer records audio and transcribes it locally.
+- **Prompt enhancer (✨)** — defaults to a **local MLX model** on Apple Silicon. One-time setup:
+  ```bash
+  python3 -m venv ~/.claude-control/mlx-venv
+  ~/.claude-control/mlx-venv/bin/pip install mlx-lm
+  ```
+  claude-control lazily starts `mlx_lm.server` on first use, keeps it warm, and shuts it down when idle. The model (default `mlx-community/Llama-3.2-3B-Instruct-4bit`, ~1.8 GB) auto-downloads on first run. Pick the backend + model in **Settings** (`mlx` → `claude -p` → rules fallback). Without the venv (or on non-Apple hardware) the enhancer falls back to `claude -p`, then a deterministic rules optimiser. Env overrides: `CLAUDE_CONTROL_MLX_PYTHON`, `CLAUDE_CONTROL_MLX_PORT`.
 ```bash
 claude-control                    # start the server (prints the URL)
 claude-control --help             # config + subcommands

package/bin/cli.js CHANGED Viewed

@@ -33,6 +33,7 @@ Local web UI to watch and drive Claude Code tmux sessions.
 Usage:
   claude-control [start]        Start the server (default)
+  claude-control setup              Install local deps (ffmpeg + whisper.cpp + model) for voice input
   claude-control install-service    Install the launchd service (macOS): auto-start + restart
   claude-control uninstall-service  Remove the launchd service
   claude-control --version
@@ -48,6 +49,10 @@ Config (env vars, all optional):
 Requires: Node >=20 and tmux on PATH.`);
     break;
+  case 'setup':
+    runScript('setup.sh');
+    break;
   case 'install-service':
     runScript('install-service.sh');
     break;

package/bin/setup.sh ADDED Viewed

@@ -0,0 +1,60 @@
+#!/bin/bash
+# claude-control setup — install local dependencies for voice transcription.
+#
+# Whisper.cpp is NOT bundled. The 🎤 voice input needs three things, all local
+# (no API key, no cloud): ffmpeg, the whisper-cli binary (Homebrew `whisper-cpp`),
+# and a ggml model under ~/.claude-control/models. This installs/downloads them
+# idempotently. tmux (required to run the app at all) is checked too.
+set -uo pipefail
+MODELS_DIR="$HOME/.claude-control/models"
+MODEL="ggml-base.en.bin"
+MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/$MODEL"
+say() { printf '\n\033[1m%s\033[0m\n' "$*"; }
+ok()  { printf '  \033[32m✓\033[0m %s\n' "$*"; }
+bad() { printf '  \033[31m✗\033[0m %s\n' "$*"; }
+say "claude-control setup — local dependencies"
+# tmux — required for the app itself (sessions live in tmux).
+if command -v tmux >/dev/null 2>&1; then ok "tmux: $(command -v tmux)"; else
+  bad "tmux not found (required). Install: brew install tmux"
+fi
+# Homebrew — the install path for ffmpeg + whisper-cpp on macOS.
+if ! command -v brew >/dev/null 2>&1; then
+  bad "Homebrew not found. Install it from https://brew.sh, then re-run: claude-control setup"
+  exit 1
+fi
+say "Installing ffmpeg + whisper-cpp (Homebrew, skips if already present)…"
+brew install ffmpeg whisper-cpp || {
+  bad "brew install failed — see output above"
+  exit 1
+}
+say "Whisper model (~150 MB, base.en)…"
+mkdir -p "$MODELS_DIR"
+if ls "$MODELS_DIR"/ggml-*.bin >/dev/null 2>&1; then
+  ok "model already present: $(ls "$MODELS_DIR"/ggml-*.bin | head -1)"
+else
+  echo "  downloading $MODEL → $MODELS_DIR"
+  if curl -fL --progress-bar "$MODEL_URL" -o "$MODELS_DIR/$MODEL.partial"; then
+    mv "$MODELS_DIR/$MODEL.partial" "$MODELS_DIR/$MODEL"
+    ok "downloaded $MODEL"
+  else
+    rm -f "$MODELS_DIR/$MODEL.partial"
+    bad "model download failed — check your connection and re-run"
+    exit 1
+  fi
+fi
+say "Verifying voice-transcription chain…"
+command -v ffmpeg >/dev/null 2>&1 && ok "ffmpeg: $(command -v ffmpeg)" || bad "ffmpeg missing"
+command -v whisper-cli >/dev/null 2>&1 && ok "whisper-cli: $(command -v whisper-cli)" || bad "whisper-cli missing (brew install whisper-cpp)"
+ls "$MODELS_DIR"/ggml-*.bin >/dev/null 2>&1 && ok "model: $(ls "$MODELS_DIR"/ggml-*.bin | head -1)" || bad "no ggml model in $MODELS_DIR"
+say "Done. The 🎤 mic (voice → text) is ready."
+echo "  Note: the MLX prompt-enhancer (optional) is separate; the optimiser falls"
+echo "  back to claude -p / rules when MLX isn't set up."

package/hooks/record-pane.mjs ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+/**
+ * record-pane.mjs — Claude Code SessionStart/SessionEnd hook that records the
+ * EXACT tmux-pane ↔ transcript mapping, so Claude Control never has to guess.
+ *
+ * Claude runs this inside its own process, which has `$TMUX_PANE` (the stable
+ * tmux `%N` pane id) in its env and passes the session details on stdin. So
+ * Claude itself authors the link — no title/time inference.
+ *
+ * SessionStart (startup | resume | clear | compact)
+ *   → write ~/.claude-control/panes/<paneId>.json
+ * SessionEnd
+ *   → delete that file
+ *
+ * No-op when not inside tmux ($TMUX_PANE unset). NEVER throws — a hook that
+ * errors must not disrupt Claude, so everything is best-effort and exits 0.
+ */
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { homedir } from 'node:os';
+import path from 'node:path';
+const PANES_DIR = path.join(homedir(), '.claude-control', 'panes');
+/** %5 → "5"; tolerate any tmux pane-id form, keep it filename-safe. */
+function paneFile(tmuxPane) {
+  const safe = String(tmuxPane).replace(/[^A-Za-z0-9_-]/g, '');
+  return safe ? path.join(PANES_DIR, `${safe}.json`) : null;
+}
+async function readStdin() {
+  const chunks = [];
+  for await (const c of process.stdin) chunks.push(c);
+  const raw = Buffer.concat(chunks).toString('utf8').trim();
+  if (!raw) return {};
+  try {
+    return JSON.parse(raw);
+  } catch {
+    return {};
+  }
+}
+async function main() {
+  const tmuxPane = process.env.TMUX_PANE;
+  if (!tmuxPane) return; // not in tmux → nothing to map
+  const file = paneFile(tmuxPane);
+  if (!file) return;
+  const input = await readStdin();
+  const event = input.hook_event_name || '';
+  if (event === 'SessionEnd') {
+    await rm(file, { force: true }).catch(() => {});
+    return;
+  }
+  // SessionStart (and any other start-ish event that carries a transcript).
+  const transcriptPath = input.transcript_path || null;
+  if (!transcriptPath) return;
+  await mkdir(PANES_DIR, { recursive: true }).catch(() => {});
+  const record = {
+    paneId: tmuxPane,
+    sessionId: input.session_id || null,
+    transcriptPath,
+    cwd: input.cwd || null,
+    ts: Date.now(),
+  };
+  await writeFile(file, JSON.stringify(record), 'utf8').catch(() => {});
+}
+main()
+  .catch(() => {})
+  .finally(() => process.exit(0));

package/lib/config.js CHANGED Viewed

@@ -22,6 +22,7 @@
 import fs from 'node:fs';
 import path from 'node:path';
 import os from 'node:os';
+import { detectMachine, recommendMlxModel, recommendClaudeModel } from './models.js';
 // Env lookup mirrors server.js: prefer CLAUDE_CONTROL_<X>, fall back to the
 // legacy COCKPIT_<X> so existing launchers keep working.
@@ -41,14 +42,21 @@ function configPath() {
 const LAUNCH_MAX = 500;
 const OPTIMIZE_MODEL_MAX = 200;
 const CLAUDE_BIN_MAX = 500;
+const MLX_MODEL_MAX = 200;
+const OPTIMIZE_BACKENDS = ['mlx', 'claude', 'rules'];
 /** Defaults, recomputed each call so a changed HOME/env is honoured. */
 function defaults() {
   return {
     launchCommand: 'claude',
     defaultCwd: os.homedir(),
-    optimizeModel: 'claude-haiku-4-5',
+    optimizeModel: recommendClaudeModel(),
     claudeBin: '',
+    // Prompt-enhancer backend: 'mlx' (local model → claude → rules chain),
+    // 'claude' (claude -p → rules), or 'rules' (deterministic, offline).
+    optimizeBackend: 'mlx',
+    // Default MLX model auto-picked for this machine's unified memory.
+    mlxModel: recommendMlxModel(detectMachine().ramGB),
   };
 }
@@ -56,7 +64,7 @@ function defaults() {
  * Read the persisted config, merged over defaults. Never throws — a missing,
  * empty, or corrupt file falls back to defaults. Only known keys are surfaced.
  *
- * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string }}
+ * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }}
  */
 export function readConfig() {
   const base = defaults();
@@ -84,6 +92,15 @@ export function readConfig() {
       typeof parsed.claudeBin === 'string'
         ? parsed.claudeBin
         : base.claudeBin,
+    optimizeBackend:
+      typeof parsed.optimizeBackend === 'string' &&
+      OPTIMIZE_BACKENDS.includes(parsed.optimizeBackend)
+        ? parsed.optimizeBackend
+        : base.optimizeBackend,
+    mlxModel:
+      typeof parsed.mlxModel === 'string' && parsed.mlxModel.trim()
+        ? parsed.mlxModel
+        : base.mlxModel,
   };
 }
@@ -99,7 +116,7 @@ export function readConfig() {
  *    Existence is NOT verified at write time (path may differ across hosts).
  *
  * @param {{ launchCommand?: unknown, defaultCwd?: unknown, optimizeModel?: unknown, claudeBin?: unknown }} partial
- * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string }} the saved config
+ * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }} the saved config
  */
 export function writeConfig(partial = {}) {
   const current = readConfig();
@@ -155,6 +172,25 @@ export function writeConfig(partial = {}) {
     next.claudeBin = bin;
   }
+  if (partial.optimizeBackend !== undefined) {
+    const b = partial.optimizeBackend;
+    if (typeof b !== 'string' || !OPTIMIZE_BACKENDS.includes(b)) {
+      throw new Error(`optimizeBackend must be one of: ${OPTIMIZE_BACKENDS.join(', ')}`);
+    }
+    next.optimizeBackend = b;
+  }
+  if (partial.mlxModel !== undefined) {
+    const m = partial.mlxModel;
+    if (typeof m !== 'string' || !m.trim()) {
+      throw new Error('mlxModel must be a non-empty string');
+    }
+    if (m.length > MLX_MODEL_MAX) {
+      throw new Error(`mlxModel must be ≤${MLX_MODEL_MAX} characters`);
+    }
+    next.mlxModel = m;
+  }
   const dir = dataDir();
   fs.mkdirSync(dir, { recursive: true });
   fs.writeFileSync(configPath(), JSON.stringify(next, null, 2), { mode: 0o600 });

package/lib/match.js CHANGED Viewed

@@ -53,14 +53,15 @@ export function parseEtime(etime) {
 /**
  * Assign transcripts to panes 1:1.
  *
+ * This is the FALLBACK matcher for panes with no SessionStart-hook record (see
+ * lib/pane-registry.js). It uses only deterministic timing signals — title
+ * matching was removed because stale window names mis-routed the chat.
+ *
  * Layered passes (each claims candidates so no transcript is used twice):
- *   1. Title match — a pane's tmux window name uniquely equals a candidate's
- *      customTitle (set by /rename) or aiTitle, cwd-consistent. Strongest:
- *      survives restarts and is independent of timing.
- *   2. Start-time match — candidate birthtime closest to the pane's claude
+ *   1. Start-time match — candidate birthtime closest to the pane's claude
  *      process start (cwd-consistent). A claude proc creates its transcript at
  *      launch, so this binds same-cwd siblings that started at different times.
- *   3. Recency — most-recently-active remaining cwd-consistent candidate.
+ *   2. Recency — most-recently-active remaining cwd-consistent candidate.
  *
  * Panes are processed in a stable (target-sorted) order so results are
  * deterministic regardless of tmux listing order.
@@ -76,29 +77,45 @@ export function assignTranscripts(panes, candidates, opts = {}) {
   const claimed = new Set();
   const ordered = [...panes].sort((a, b) => a.target.localeCompare(b.target));
+  // A candidate is in scope for a pane only if it lives in the pane's OWN
+  // project dir (the slug folder Claude names after the launch cwd). This is the
+  // precise signal: the recorded cwd alone can't tell a legit "session cd'd into
+  // a subdir" from a DIFFERENT deeper session (a git worktree), since both look
+  // like a descendant cwd — that ambiguity let a parent-dir pane steal a child
+  // worktree's transcript. When projectDir isn't supplied (older callers / unit
+  // tests), fall back to the recorded-cwd consistency check.
+  const inScope = (c, pane) => {
+    if (c.projectDir != null && pane.projectDir != null) {
+      return c.projectDir === pane.projectDir;
+    }
+    return isCwdConsistent(c.cwd, pane.cwd);
+  };
   const available = (pane) =>
-    candidates.filter(
-      (c) =>
-        !claimed.has(c.transcriptPath) && isCwdConsistent(c.cwd, pane.cwd),
-    );
+    candidates.filter((c) => !claimed.has(c.transcriptPath) && inScope(c, pane));
   const claim = (pane, cand) => {
     result.set(pane.target, cand);
     claimed.add(cand.transcriptPath);
   };
-  // Pass 1 — unique title match.
-  for (const pane of ordered) {
-    if (result.has(pane.target)) continue;
-    const name = String(pane.windowName || '').trim();
-    if (!name) continue;
-    const hits = available(pane).filter(
-      (c) => c.customTitle === name || c.aiTitle === name,
-    );
-    if (hits.length === 1) claim(pane, hits[0]);
-  }
+  // A transcript can only belong to a pane if it was active at/after the pane's
+  // claude process started (minus slack). Skipped when the pane's start time is
+  // unknown. --resume is safe: resuming appends a record, bumping activity above
+  // the pane start. This is what stops a stale transcript binding to a pane.
+  const temporallyPlausible = (pane, c) => {
+    if (pane.procStartMs == null) return true;
+    const candActive = c.lastActivityMs ?? c.mtime ?? c.birthtimeMs ?? null;
+    return candActive == null || candActive >= pane.procStartMs - startSlackMs;
+  };
-  // Pass 2 — nearest start-time ↔ birthtime.
+  // NOTE: title matching was intentionally removed. A window keeps a stale name
+  // when a pane is reused or /rename'd, so binding on title mis-routed the chat
+  // to an old transcript ("transcript drift"). The exact pane→transcript link now
+  // comes from the SessionStart hook (lib/pane-registry.js), applied in
+  // sessions.js BEFORE this matcher runs; assignTranscripts is the fallback for
+  // panes with no hook record, using only deterministic timing signals below.
+  // Pass 1 — nearest start-time ↔ birthtime.
   for (const pane of ordered) {
     if (result.has(pane.target)) continue;
     if (pane.procStartMs == null) continue;
@@ -122,7 +139,7 @@ export function assignTranscripts(panes, candidates, opts = {}) {
     if (best) claim(pane, best);
   }
-  // Pass 3 — most-recently-active remaining candidate.
+  // Pass 2 — most-recently-active remaining candidate.
   // Gate: when the pane's process start time is known, only consider candidates
   // whose last known activity (lastActivityMs, falling back to file mtime or
   // birthtime) is at or after the pane started (minus startSlackMs). A transcript
@@ -135,11 +152,7 @@ export function assignTranscripts(panes, candidates, opts = {}) {
     if (result.has(pane.target)) continue;
     let best = null;
     for (const c of available(pane)) {
-      // Apply temporal gate only when pane start time is known.
-      if (pane.procStartMs != null) {
-        const candActive = c.lastActivityMs ?? c.mtime ?? c.birthtimeMs ?? null;
-        if (candActive != null && candActive < pane.procStartMs - startSlackMs) continue;
-      }
+      if (!temporallyPlausible(pane, c)) continue;
       if (!best || (c.lastActivityMs ?? 0) > (best.lastActivityMs ?? 0)) best = c;
     }
     if (best) claim(pane, best);

package/lib/mlx.js ADDED Viewed

@@ -0,0 +1,260 @@
+/**
+ * lib/mlx.js — local LLM backend via a managed mlx_lm.server (Apple Silicon).
+ *
+ * Spawns a singleton OpenAI-compatible MLX server on first use, keeps it warm,
+ * and shuts it down after an idle period. No API key, no network — fully local.
+ * Used by the prompt enhancer as the first link in the mlx → claude → rules
+ * chain (server.js handleOptimize).
+ *
+ * Exports:
+ *  - resolveMlxPython() → string | null      (venv python that has mlx_lm)
+ *  - serverBase(port) → string               (pure)
+ *  - buildChatBody(prompt, model, maxTokens) → object  (pure)
+ *  - parseChatContent(json) → string         (pure; throws on bad/empty shape)
+ *  - complete(prompt, { model, port, maxTokens }) → Promise<string>
+ *  - shutdown()                              (kill the child; for exit/tests)
+ *
+ * Config/env: model from config.mlxModel (default below); port via
+ * CLAUDE_CONTROL_MLX_PORT (default 8080); python via CLAUDE_CONTROL_MLX_PYTHON
+ * else ~/.claude-control/mlx-venv/bin/python else a PATH python3 with mlx_lm.
+ */
+import fs from 'node:fs';
+import path from 'node:path';
+import os from 'node:os';
+import { spawn, execFileSync } from 'node:child_process';
+import { readConfig } from './config.js';
+export const DEFAULT_MODEL = 'mlx-community/Llama-3.2-3B-Instruct-4bit';
+// Dedicated port for OUR managed sidecar. NOT 8080 — that's a very common port
+// (LM Studio, other local LLM/TTS servers) and colliding makes us POST our model
+// to a foreign server that can't serve it → hang. Overridable via env.
+const DEFAULT_PORT = Number(process.env.CLAUDE_CONTROL_MLX_PORT) || 4319;
+// How long a SINGLE request waits for the server to be ready before giving up
+// and letting the caller fall back (to claude -p). The spawned server keeps
+// loading in the background, so the next request finds it warm (~1s). Cold
+// model load can take ~30-90s under launchd, so we never block a request that
+// long — we fail over fast and warm up for next time.
+const REQUEST_READY_MS = Number(process.env.CLAUDE_CONTROL_MLX_TIMEOUT_MS) || 8_000;
+const IDLE_MS = 15 * 60_000; // free ~2GB after 15 min idle
+const MAX_TOKENS = 700;
+/** @param {number} [port] */
+export function serverBase(port = DEFAULT_PORT) {
+  return `http://127.0.0.1:${port}`;
+}
+/**
+ * Resolve a python interpreter that can `import mlx_lm`.
+ * @returns {string | null}
+ */
+export function resolveMlxPython() {
+  const envPy = process.env.CLAUDE_CONTROL_MLX_PYTHON;
+  const venvPy = path.join(os.homedir(), '.claude-control', 'mlx-venv', 'bin', 'python');
+  for (const p of [envPy, venvPy]) {
+    if (p && fs.existsSync(p)) return p;
+  }
+  try {
+    const p = execFileSync('which', ['python3'], { encoding: 'utf8' }).trim();
+    if (p) {
+      execFileSync(p, ['-c', 'import mlx_lm'], { stdio: 'ignore' });
+      return p;
+    }
+  } catch {
+    /* no mlx_lm on PATH python */
+  }
+  return null;
+}
+// ── server singleton ────────────────────────────────────────────────────────
+let child = null;
+let childModel = null; // model id the current child was spawned with
+let idleTimer = null;
+function bumpIdle() {
+  if (idleTimer) clearTimeout(idleTimer);
+  idleTimer = setTimeout(() => shutdown(), IDLE_MS);
+  if (idleTimer.unref) idleTimer.unref();
+}
+/** Kill the managed server (no-op if none / external). */
+export function shutdown() {
+  if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
+  if (child) {
+    try { child.kill('SIGTERM'); } catch { /* ignore */ }
+    child = null;
+  }
+  childModel = null;
+}
+async function ping(port) {
+  try {
+    const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
+    return r.ok;
+  } catch {
+    return false;
+  }
+}
+// The model id a server on `port` is currently serving (via /v1/models), or null.
+async function servedModel(port) {
+  try {
+    const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
+    if (!r.ok) return null;
+    const j = await r.json();
+    const id = j?.data?.[0]?.id;
+    return typeof id === 'string' ? id : null;
+  } catch {
+    return null;
+  }
+}
+// Best-effort: kill whatever process holds `port` (used to reclaim the port from
+// an orphaned mlx server that's serving the wrong model). No-op if lsof/kill fail.
+function freePort(port) {
+  try {
+    const out = execFileSync('lsof', ['-ti', `tcp:${port}`], { encoding: 'utf8' }).trim();
+    for (const pid of out.split('\n').filter(Boolean)) {
+      try { process.kill(Number(pid), 'SIGTERM'); } catch { /* already gone */ }
+    }
+  } catch {
+    /* nothing on the port, or lsof unavailable */
+  }
+}
+/**
+ * Is the model already in the local HuggingFace cache (so selecting it won't
+ * trigger a multi-GB download)? Checks `~/.cache/huggingface/hub/models--…`.
+ * @param {string} id @returns {boolean}
+ */
+export function isModelCached(id) {
+  const dir = path.join(
+    process.env.HF_HOME || path.join(os.homedir(), '.cache', 'huggingface'),
+    'hub',
+    `models--${String(id).replace(/\//g, '--')}`,
+  );
+  try {
+    const snaps = path.join(dir, 'snapshots');
+    if (!fs.existsSync(snaps)) return false;
+    return fs.readdirSync(snaps).some((s) => {
+      try {
+        return fs.readdirSync(path.join(snaps, s)).length > 0;
+      } catch {
+        return false;
+      }
+    });
+  } catch {
+    return false;
+  }
+}
+// Spawn the mlx_lm.server child (once). Logs to ~/.claude-control/logs so a
+// failed/slow start is diagnosable. Sets HOME explicitly (launchd may not).
+function spawnServer(model, port) {
+  const py = resolveMlxPython();
+  if (!py) {
+    throw new Error(
+      'mlx_lm not installed — create ~/.claude-control/mlx-venv and `pip install mlx-lm`',
+    );
+  }
+  let out = 'ignore';
+  try {
+    const logPath = path.join(os.homedir(), '.claude-control', 'logs', 'mlx-server.log');
+    fs.mkdirSync(path.dirname(logPath), { recursive: true });
+    out = fs.openSync(logPath, 'a');
+  } catch {
+    /* fall back to ignored stdio */
+  }
+  child = spawn(
+    py,
+    ['-m', 'mlx_lm.server', '--model', model, '--host', '127.0.0.1', '--port', String(port)],
+    { stdio: ['ignore', out, out], env: { ...process.env, HOME: os.homedir() } },
+  );
+  childModel = model;
+  child.on('exit', () => { child = null; childModel = null; });
+}
+// Ensure a server serving EXACTLY `model` is answering on `port`. Reuses our
+// warm child or any server already serving the right model; otherwise restarts
+// — killing a wrong-model child and reclaiming the port from a wrong-model
+// orphan, so swapping models never POSTs a model the running server lacks (which
+// would trigger an in-request download and hang). Waits only REQUEST_READY_MS;
+// if the (new) model is still loading/downloading, throws so the caller falls
+// back while it finishes in the background.
+async function ensureServer(model, port) {
+  if (child && childModel === model && (await ping(port))) return;
+  const served = await servedModel(port);
+  if (served === model) return; // right model already up (orphan/external) → reuse
+  if (child) shutdown(); // our child is serving the wrong model → stop it
+  if (served) freePort(port); // an orphan holds the port with the wrong model → reclaim
+  spawnServer(model, port);
+  const deadline = Date.now() + REQUEST_READY_MS;
+  while (Date.now() < deadline) {
+    await new Promise((r) => setTimeout(r, 600));
+    if ((await servedModel(port)) === model) return;
+  }
+  throw new Error('mlx server still warming up');
+}
+/**
+ * Build the OpenAI chat-completions request body. Pure.
+ * @param {string} prompt @param {string} model @param {number} [maxTokens]
+ */
+export function buildChatBody(prompt, model, maxTokens = MAX_TOKENS) {
+  return {
+    model,
+    messages: [{ role: 'user', content: prompt }],
+    max_tokens: maxTokens,
+    temperature: 0.2,
+  };
+}
+/**
+ * Extract the assistant text from an OpenAI chat-completions response. Pure.
+ * @param {any} json @returns {string}
+ */
+export function parseChatContent(json) {
+  const c = json?.choices?.[0]?.message?.content;
+  if (typeof c !== 'string' || !c.trim()) throw new Error('empty MLX completion');
+  return c;
+}
+/**
+ * Best-effort pre-warm: spawn + load the server in the background so the first
+ * real request is fast. No-op-safe — swallows the "still warming" throw; the
+ * child keeps loading. Call at startup when the MLX backend is selected.
+ * @param {number} [port]
+ */
+export function warm(port = DEFAULT_PORT) {
+  const model = readConfig().mlxModel || DEFAULT_MODEL;
+  ensureServer(model, port).catch(() => {});
+}
+/**
+ * Complete a prompt via the local MLX server (spawning + warming it if needed).
+ * Throws on any failure so the caller can fall through to the next backend.
+ *
+ * @param {string} prompt
+ * @param {{ model?: string, port?: number, maxTokens?: number }} [opts]
+ * @returns {Promise<string>}
+ */
+export async function complete(prompt, { model, port = DEFAULT_PORT, maxTokens = MAX_TOKENS } = {}) {
+  const m = model || readConfig().mlxModel || DEFAULT_MODEL;
+  await ensureServer(m, port);
+  const res = await fetch(serverBase(port) + '/v1/chat/completions', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify(buildChatBody(prompt, m, maxTokens)),
+    signal: AbortSignal.timeout(60_000),
+  });
+  if (!res.ok) throw new Error(`MLX server HTTP ${res.status}`);
+  const json = await res.json();
+  bumpIdle();
+  return parseChatContent(json);
+}
+// Best-effort: don't leave the child server orphaned when the parent exits
+// cleanly. (SIGKILL can't be trapped; an orphan is harmless — ensureServer
+// reuses whatever is already answering on the port.)
+process.on('exit', shutdown);
+process.on('SIGTERM', () => { shutdown(); process.exit(0); });
+process.on('SIGINT', () => { shutdown(); process.exit(0); });