@idl3/claude-control 0.1.21 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,6 +20,16 @@ npm install -g @idl3/claude-control # or run once: npx @idl3/claude-control
20
20
 
21
21
  **Prerequisites:** Node ≥20 and **tmux** on your `PATH` (`brew install tmux` · `sudo apt install tmux`). Optional: **ttyd** for the in-browser raw terminal (`brew install ttyd` · `sudo apt install ttyd`) — set `CLAUDE_CONTROL_TTYD` to override its path. The web UI ships prebuilt — no build step on install.
22
22
 
23
+ **Optional local AI (no API key):**
24
+
25
+ - **Voice → text** — `brew install ffmpeg whisper-cpp` and drop a model at `~/.claude-control/models/ggml-base.en.bin`. The mic in the composer records audio and transcribes it locally.
26
+ - **Prompt enhancer (✨)** — defaults to a **local MLX model** on Apple Silicon. One-time setup:
27
+ ```bash
28
+ python3 -m venv ~/.claude-control/mlx-venv
29
+ ~/.claude-control/mlx-venv/bin/pip install mlx-lm
30
+ ```
31
+ claude-control lazily starts `mlx_lm.server` on first use, keeps it warm, and shuts it down when idle. The model (default `mlx-community/Llama-3.2-3B-Instruct-4bit`, ~1.8 GB) auto-downloads on first run. Pick the backend + model in **Settings** (`mlx` → `claude -p` → rules fallback). Without the venv (or on non-Apple hardware) the enhancer falls back to `claude -p`, then a deterministic rules optimiser. Env overrides: `CLAUDE_CONTROL_MLX_PYTHON`, `CLAUDE_CONTROL_MLX_PORT`.
32
+
23
33
  ```bash
24
34
  claude-control # start the server (prints the URL)
25
35
  claude-control --help # config + subcommands
package/bin/cli.js CHANGED
@@ -33,6 +33,7 @@ Local web UI to watch and drive Claude Code tmux sessions.
33
33
 
34
34
  Usage:
35
35
  claude-control [start] Start the server (default)
36
+ claude-control setup Install local deps (ffmpeg + whisper.cpp + model) for voice input
36
37
  claude-control install-service Install the launchd service (macOS): auto-start + restart
37
38
  claude-control uninstall-service Remove the launchd service
38
39
  claude-control --version
@@ -48,6 +49,10 @@ Config (env vars, all optional):
48
49
  Requires: Node >=20 and tmux on PATH.`);
49
50
  break;
50
51
 
52
+ case 'setup':
53
+ runScript('setup.sh');
54
+ break;
55
+
51
56
  case 'install-service':
52
57
  runScript('install-service.sh');
53
58
  break;
package/bin/setup.sh ADDED
@@ -0,0 +1,60 @@
1
+ #!/bin/bash
2
+ # claude-control setup — install local dependencies for voice transcription.
3
+ #
4
+ # Whisper.cpp is NOT bundled. The 🎤 voice input needs three things, all local
5
+ # (no API key, no cloud): ffmpeg, the whisper-cli binary (Homebrew `whisper-cpp`),
6
+ # and a ggml model under ~/.claude-control/models. This installs/downloads them
7
+ # idempotently. tmux (required to run the app at all) is checked too.
8
+ set -uo pipefail
9
+
10
+ MODELS_DIR="$HOME/.claude-control/models"
11
+ MODEL="ggml-base.en.bin"
12
+ MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/$MODEL"
13
+
14
+ say() { printf '\n\033[1m%s\033[0m\n' "$*"; }
15
+ ok() { printf ' \033[32m✓\033[0m %s\n' "$*"; }
16
+ bad() { printf ' \033[31m✗\033[0m %s\n' "$*"; }
17
+
18
+ say "claude-control setup — local dependencies"
19
+
20
+ # tmux — required for the app itself (sessions live in tmux).
21
+ if command -v tmux >/dev/null 2>&1; then ok "tmux: $(command -v tmux)"; else
22
+ bad "tmux not found (required). Install: brew install tmux"
23
+ fi
24
+
25
+ # Homebrew — the install path for ffmpeg + whisper-cpp on macOS.
26
+ if ! command -v brew >/dev/null 2>&1; then
27
+ bad "Homebrew not found. Install it from https://brew.sh, then re-run: claude-control setup"
28
+ exit 1
29
+ fi
30
+
31
+ say "Installing ffmpeg + whisper-cpp (Homebrew, skips if already present)…"
32
+ brew install ffmpeg whisper-cpp || {
33
+ bad "brew install failed — see output above"
34
+ exit 1
35
+ }
36
+
37
+ say "Whisper model (~150 MB, base.en)…"
38
+ mkdir -p "$MODELS_DIR"
39
+ if ls "$MODELS_DIR"/ggml-*.bin >/dev/null 2>&1; then
40
+ ok "model already present: $(ls "$MODELS_DIR"/ggml-*.bin | head -1)"
41
+ else
42
+ echo " downloading $MODEL → $MODELS_DIR"
43
+ if curl -fL --progress-bar "$MODEL_URL" -o "$MODELS_DIR/$MODEL.partial"; then
44
+ mv "$MODELS_DIR/$MODEL.partial" "$MODELS_DIR/$MODEL"
45
+ ok "downloaded $MODEL"
46
+ else
47
+ rm -f "$MODELS_DIR/$MODEL.partial"
48
+ bad "model download failed — check your connection and re-run"
49
+ exit 1
50
+ fi
51
+ fi
52
+
53
+ say "Verifying voice-transcription chain…"
54
+ command -v ffmpeg >/dev/null 2>&1 && ok "ffmpeg: $(command -v ffmpeg)" || bad "ffmpeg missing"
55
+ command -v whisper-cli >/dev/null 2>&1 && ok "whisper-cli: $(command -v whisper-cli)" || bad "whisper-cli missing (brew install whisper-cpp)"
56
+ ls "$MODELS_DIR"/ggml-*.bin >/dev/null 2>&1 && ok "model: $(ls "$MODELS_DIR"/ggml-*.bin | head -1)" || bad "no ggml model in $MODELS_DIR"
57
+
58
+ say "Done. The 🎤 mic (voice → text) is ready."
59
+ echo " Note: the MLX prompt-enhancer (optional) is separate; the optimiser falls"
60
+ echo " back to claude -p / rules when MLX isn't set up."
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * record-pane.mjs — Claude Code SessionStart/SessionEnd hook that records the
4
+ * EXACT tmux-pane ↔ transcript mapping, so Claude Control never has to guess.
5
+ *
6
+ * Claude runs this inside its own process, which has `$TMUX_PANE` (the stable
7
+ * tmux `%N` pane id) in its env and passes the session details on stdin. So
8
+ * Claude itself authors the link — no title/time inference.
9
+ *
10
+ * SessionStart (startup | resume | clear | compact)
11
+ * → write ~/.claude-control/panes/<paneId>.json
12
+ * SessionEnd
13
+ * → delete that file
14
+ *
15
+ * No-op when not inside tmux ($TMUX_PANE unset). NEVER throws — a hook that
16
+ * errors must not disrupt Claude, so everything is best-effort and exits 0.
17
+ */
18
+ import { mkdir, writeFile, rm } from 'node:fs/promises';
19
+ import { homedir } from 'node:os';
20
+ import path from 'node:path';
21
+
22
+ const PANES_DIR = path.join(homedir(), '.claude-control', 'panes');
23
+
24
+ /** %5 → "5"; tolerate any tmux pane-id form, keep it filename-safe. */
25
+ function paneFile(tmuxPane) {
26
+ const safe = String(tmuxPane).replace(/[^A-Za-z0-9_-]/g, '');
27
+ return safe ? path.join(PANES_DIR, `${safe}.json`) : null;
28
+ }
29
+
30
+ async function readStdin() {
31
+ const chunks = [];
32
+ for await (const c of process.stdin) chunks.push(c);
33
+ const raw = Buffer.concat(chunks).toString('utf8').trim();
34
+ if (!raw) return {};
35
+ try {
36
+ return JSON.parse(raw);
37
+ } catch {
38
+ return {};
39
+ }
40
+ }
41
+
42
+ async function main() {
43
+ const tmuxPane = process.env.TMUX_PANE;
44
+ if (!tmuxPane) return; // not in tmux → nothing to map
45
+ const file = paneFile(tmuxPane);
46
+ if (!file) return;
47
+
48
+ const input = await readStdin();
49
+ const event = input.hook_event_name || '';
50
+
51
+ if (event === 'SessionEnd') {
52
+ await rm(file, { force: true }).catch(() => {});
53
+ return;
54
+ }
55
+
56
+ // SessionStart (and any other start-ish event that carries a transcript).
57
+ const transcriptPath = input.transcript_path || null;
58
+ if (!transcriptPath) return;
59
+ await mkdir(PANES_DIR, { recursive: true }).catch(() => {});
60
+ const record = {
61
+ paneId: tmuxPane,
62
+ sessionId: input.session_id || null,
63
+ transcriptPath,
64
+ cwd: input.cwd || null,
65
+ ts: Date.now(),
66
+ };
67
+ await writeFile(file, JSON.stringify(record), 'utf8').catch(() => {});
68
+ }
69
+
70
+ main()
71
+ .catch(() => {})
72
+ .finally(() => process.exit(0));
package/lib/config.js CHANGED
@@ -22,6 +22,7 @@
22
22
  import fs from 'node:fs';
23
23
  import path from 'node:path';
24
24
  import os from 'node:os';
25
+ import { detectMachine, recommendMlxModel, recommendClaudeModel } from './models.js';
25
26
 
26
27
  // Env lookup mirrors server.js: prefer CLAUDE_CONTROL_<X>, fall back to the
27
28
  // legacy COCKPIT_<X> so existing launchers keep working.
@@ -41,14 +42,21 @@ function configPath() {
41
42
  const LAUNCH_MAX = 500;
42
43
  const OPTIMIZE_MODEL_MAX = 200;
43
44
  const CLAUDE_BIN_MAX = 500;
45
+ const MLX_MODEL_MAX = 200;
46
+ const OPTIMIZE_BACKENDS = ['mlx', 'claude', 'rules'];
44
47
 
45
48
  /** Defaults, recomputed each call so a changed HOME/env is honoured. */
46
49
  function defaults() {
47
50
  return {
48
51
  launchCommand: 'claude',
49
52
  defaultCwd: os.homedir(),
50
- optimizeModel: 'claude-haiku-4-5',
53
+ optimizeModel: recommendClaudeModel(),
51
54
  claudeBin: '',
55
+ // Prompt-enhancer backend: 'mlx' (local model → claude → rules chain),
56
+ // 'claude' (claude -p → rules), or 'rules' (deterministic, offline).
57
+ optimizeBackend: 'mlx',
58
+ // Default MLX model auto-picked for this machine's unified memory.
59
+ mlxModel: recommendMlxModel(detectMachine().ramGB),
52
60
  };
53
61
  }
54
62
 
@@ -56,7 +64,7 @@ function defaults() {
56
64
  * Read the persisted config, merged over defaults. Never throws — a missing,
57
65
  * empty, or corrupt file falls back to defaults. Only known keys are surfaced.
58
66
  *
59
- * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string }}
67
+ * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }}
60
68
  */
61
69
  export function readConfig() {
62
70
  const base = defaults();
@@ -84,6 +92,15 @@ export function readConfig() {
84
92
  typeof parsed.claudeBin === 'string'
85
93
  ? parsed.claudeBin
86
94
  : base.claudeBin,
95
+ optimizeBackend:
96
+ typeof parsed.optimizeBackend === 'string' &&
97
+ OPTIMIZE_BACKENDS.includes(parsed.optimizeBackend)
98
+ ? parsed.optimizeBackend
99
+ : base.optimizeBackend,
100
+ mlxModel:
101
+ typeof parsed.mlxModel === 'string' && parsed.mlxModel.trim()
102
+ ? parsed.mlxModel
103
+ : base.mlxModel,
87
104
  };
88
105
  }
89
106
 
@@ -99,7 +116,7 @@ export function readConfig() {
99
116
  * Existence is NOT verified at write time (path may differ across hosts).
100
117
  *
101
118
  * @param {{ launchCommand?: unknown, defaultCwd?: unknown, optimizeModel?: unknown, claudeBin?: unknown }} partial
102
- * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string }} the saved config
119
+ * @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }} the saved config
103
120
  */
104
121
  export function writeConfig(partial = {}) {
105
122
  const current = readConfig();
@@ -155,6 +172,25 @@ export function writeConfig(partial = {}) {
155
172
  next.claudeBin = bin;
156
173
  }
157
174
 
175
+ if (partial.optimizeBackend !== undefined) {
176
+ const b = partial.optimizeBackend;
177
+ if (typeof b !== 'string' || !OPTIMIZE_BACKENDS.includes(b)) {
178
+ throw new Error(`optimizeBackend must be one of: ${OPTIMIZE_BACKENDS.join(', ')}`);
179
+ }
180
+ next.optimizeBackend = b;
181
+ }
182
+
183
+ if (partial.mlxModel !== undefined) {
184
+ const m = partial.mlxModel;
185
+ if (typeof m !== 'string' || !m.trim()) {
186
+ throw new Error('mlxModel must be a non-empty string');
187
+ }
188
+ if (m.length > MLX_MODEL_MAX) {
189
+ throw new Error(`mlxModel must be ≤${MLX_MODEL_MAX} characters`);
190
+ }
191
+ next.mlxModel = m;
192
+ }
193
+
158
194
  const dir = dataDir();
159
195
  fs.mkdirSync(dir, { recursive: true });
160
196
  fs.writeFileSync(configPath(), JSON.stringify(next, null, 2), { mode: 0o600 });
package/lib/match.js CHANGED
@@ -53,14 +53,15 @@ export function parseEtime(etime) {
53
53
  /**
54
54
  * Assign transcripts to panes 1:1.
55
55
  *
56
+ * This is the FALLBACK matcher for panes with no SessionStart-hook record (see
57
+ * lib/pane-registry.js). It uses only deterministic timing signals — title
58
+ * matching was removed because stale window names mis-routed the chat.
59
+ *
56
60
  * Layered passes (each claims candidates so no transcript is used twice):
57
- * 1. Title match — a pane's tmux window name uniquely equals a candidate's
58
- * customTitle (set by /rename) or aiTitle, cwd-consistent. Strongest:
59
- * survives restarts and is independent of timing.
60
- * 2. Start-time match — candidate birthtime closest to the pane's claude
61
+ * 1. Start-time match — candidate birthtime closest to the pane's claude
61
62
  * process start (cwd-consistent). A claude proc creates its transcript at
62
63
  * launch, so this binds same-cwd siblings that started at different times.
63
- * 3. Recency — most-recently-active remaining cwd-consistent candidate.
64
+ * 2. Recency — most-recently-active remaining cwd-consistent candidate.
64
65
  *
65
66
  * Panes are processed in a stable (target-sorted) order so results are
66
67
  * deterministic regardless of tmux listing order.
@@ -76,29 +77,45 @@ export function assignTranscripts(panes, candidates, opts = {}) {
76
77
  const claimed = new Set();
77
78
  const ordered = [...panes].sort((a, b) => a.target.localeCompare(b.target));
78
79
 
80
+ // A candidate is in scope for a pane only if it lives in the pane's OWN
81
+ // project dir (the slug folder Claude names after the launch cwd). This is the
82
+ // precise signal: the recorded cwd alone can't tell a legit "session cd'd into
83
+ // a subdir" from a DIFFERENT deeper session (a git worktree), since both look
84
+ // like a descendant cwd — that ambiguity let a parent-dir pane steal a child
85
+ // worktree's transcript. When projectDir isn't supplied (older callers / unit
86
+ // tests), fall back to the recorded-cwd consistency check.
87
+ const inScope = (c, pane) => {
88
+ if (c.projectDir != null && pane.projectDir != null) {
89
+ return c.projectDir === pane.projectDir;
90
+ }
91
+ return isCwdConsistent(c.cwd, pane.cwd);
92
+ };
79
93
  const available = (pane) =>
80
- candidates.filter(
81
- (c) =>
82
- !claimed.has(c.transcriptPath) && isCwdConsistent(c.cwd, pane.cwd),
83
- );
94
+ candidates.filter((c) => !claimed.has(c.transcriptPath) && inScope(c, pane));
84
95
 
85
96
  const claim = (pane, cand) => {
86
97
  result.set(pane.target, cand);
87
98
  claimed.add(cand.transcriptPath);
88
99
  };
89
100
 
90
- // Pass 1 unique title match.
91
- for (const pane of ordered) {
92
- if (result.has(pane.target)) continue;
93
- const name = String(pane.windowName || '').trim();
94
- if (!name) continue;
95
- const hits = available(pane).filter(
96
- (c) => c.customTitle === name || c.aiTitle === name,
97
- );
98
- if (hits.length === 1) claim(pane, hits[0]);
99
- }
101
+ // A transcript can only belong to a pane if it was active at/after the pane's
102
+ // claude process started (minus slack). Skipped when the pane's start time is
103
+ // unknown. --resume is safe: resuming appends a record, bumping activity above
104
+ // the pane start. This is what stops a stale transcript binding to a pane.
105
+ const temporallyPlausible = (pane, c) => {
106
+ if (pane.procStartMs == null) return true;
107
+ const candActive = c.lastActivityMs ?? c.mtime ?? c.birthtimeMs ?? null;
108
+ return candActive == null || candActive >= pane.procStartMs - startSlackMs;
109
+ };
100
110
 
101
- // Pass 2 nearest start-time birthtime.
111
+ // NOTE: title matching was intentionally removed. A window keeps a stale name
112
+ // when a pane is reused or /rename'd, so binding on title mis-routed the chat
113
+ // to an old transcript ("transcript drift"). The exact pane→transcript link now
114
+ // comes from the SessionStart hook (lib/pane-registry.js), applied in
115
+ // sessions.js BEFORE this matcher runs; assignTranscripts is the fallback for
116
+ // panes with no hook record, using only deterministic timing signals below.
117
+
118
+ // Pass 1 — nearest start-time ↔ birthtime.
102
119
  for (const pane of ordered) {
103
120
  if (result.has(pane.target)) continue;
104
121
  if (pane.procStartMs == null) continue;
@@ -122,7 +139,7 @@ export function assignTranscripts(panes, candidates, opts = {}) {
122
139
  if (best) claim(pane, best);
123
140
  }
124
141
 
125
- // Pass 3 — most-recently-active remaining candidate.
142
+ // Pass 2 — most-recently-active remaining candidate.
126
143
  // Gate: when the pane's process start time is known, only consider candidates
127
144
  // whose last known activity (lastActivityMs, falling back to file mtime or
128
145
  // birthtime) is at or after the pane started (minus startSlackMs). A transcript
@@ -135,11 +152,7 @@ export function assignTranscripts(panes, candidates, opts = {}) {
135
152
  if (result.has(pane.target)) continue;
136
153
  let best = null;
137
154
  for (const c of available(pane)) {
138
- // Apply temporal gate only when pane start time is known.
139
- if (pane.procStartMs != null) {
140
- const candActive = c.lastActivityMs ?? c.mtime ?? c.birthtimeMs ?? null;
141
- if (candActive != null && candActive < pane.procStartMs - startSlackMs) continue;
142
- }
155
+ if (!temporallyPlausible(pane, c)) continue;
143
156
  if (!best || (c.lastActivityMs ?? 0) > (best.lastActivityMs ?? 0)) best = c;
144
157
  }
145
158
  if (best) claim(pane, best);
package/lib/mlx.js ADDED
@@ -0,0 +1,260 @@
1
+ /**
2
+ * lib/mlx.js — local LLM backend via a managed mlx_lm.server (Apple Silicon).
3
+ *
4
+ * Spawns a singleton OpenAI-compatible MLX server on first use, keeps it warm,
5
+ * and shuts it down after an idle period. No API key, no network — fully local.
6
+ * Used by the prompt enhancer as the first link in the mlx → claude → rules
7
+ * chain (server.js handleOptimize).
8
+ *
9
+ * Exports:
10
+ * - resolveMlxPython() → string | null (venv python that has mlx_lm)
11
+ * - serverBase(port) → string (pure)
12
+ * - buildChatBody(prompt, model, maxTokens) → object (pure)
13
+ * - parseChatContent(json) → string (pure; throws on bad/empty shape)
14
+ * - complete(prompt, { model, port, maxTokens }) → Promise<string>
15
+ * - shutdown() (kill the child; for exit/tests)
16
+ *
17
+ * Config/env: model from config.mlxModel (default below); port via
18
+ * CLAUDE_CONTROL_MLX_PORT (default 8080); python via CLAUDE_CONTROL_MLX_PYTHON
19
+ * else ~/.claude-control/mlx-venv/bin/python else a PATH python3 with mlx_lm.
20
+ */
21
+ import fs from 'node:fs';
22
+ import path from 'node:path';
23
+ import os from 'node:os';
24
+ import { spawn, execFileSync } from 'node:child_process';
25
+ import { readConfig } from './config.js';
26
+
27
+ export const DEFAULT_MODEL = 'mlx-community/Llama-3.2-3B-Instruct-4bit';
28
+ // Dedicated port for OUR managed sidecar. NOT 8080 — that's a very common port
29
+ // (LM Studio, other local LLM/TTS servers) and colliding makes us POST our model
30
+ // to a foreign server that can't serve it → hang. Overridable via env.
31
+ const DEFAULT_PORT = Number(process.env.CLAUDE_CONTROL_MLX_PORT) || 4319;
32
+ // How long a SINGLE request waits for the server to be ready before giving up
33
+ // and letting the caller fall back (to claude -p). The spawned server keeps
34
+ // loading in the background, so the next request finds it warm (~1s). Cold
35
+ // model load can take ~30-90s under launchd, so we never block a request that
36
+ // long — we fail over fast and warm up for next time.
37
+ const REQUEST_READY_MS = Number(process.env.CLAUDE_CONTROL_MLX_TIMEOUT_MS) || 8_000;
38
+ const IDLE_MS = 15 * 60_000; // free ~2GB after 15 min idle
39
+ const MAX_TOKENS = 700;
40
+
41
+ /** @param {number} [port] */
42
+ export function serverBase(port = DEFAULT_PORT) {
43
+ return `http://127.0.0.1:${port}`;
44
+ }
45
+
46
+ /**
47
+ * Resolve a python interpreter that can `import mlx_lm`.
48
+ * @returns {string | null}
49
+ */
50
+ export function resolveMlxPython() {
51
+ const envPy = process.env.CLAUDE_CONTROL_MLX_PYTHON;
52
+ const venvPy = path.join(os.homedir(), '.claude-control', 'mlx-venv', 'bin', 'python');
53
+ for (const p of [envPy, venvPy]) {
54
+ if (p && fs.existsSync(p)) return p;
55
+ }
56
+ try {
57
+ const p = execFileSync('which', ['python3'], { encoding: 'utf8' }).trim();
58
+ if (p) {
59
+ execFileSync(p, ['-c', 'import mlx_lm'], { stdio: 'ignore' });
60
+ return p;
61
+ }
62
+ } catch {
63
+ /* no mlx_lm on PATH python */
64
+ }
65
+ return null;
66
+ }
67
+
68
+ // ── server singleton ────────────────────────────────────────────────────────
69
+ let child = null;
70
+ let childModel = null; // model id the current child was spawned with
71
+ let idleTimer = null;
72
+
73
+ function bumpIdle() {
74
+ if (idleTimer) clearTimeout(idleTimer);
75
+ idleTimer = setTimeout(() => shutdown(), IDLE_MS);
76
+ if (idleTimer.unref) idleTimer.unref();
77
+ }
78
+
79
+ /** Kill the managed server (no-op if none / external). */
80
+ export function shutdown() {
81
+ if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
82
+ if (child) {
83
+ try { child.kill('SIGTERM'); } catch { /* ignore */ }
84
+ child = null;
85
+ }
86
+ childModel = null;
87
+ }
88
+
89
+ async function ping(port) {
90
+ try {
91
+ const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
92
+ return r.ok;
93
+ } catch {
94
+ return false;
95
+ }
96
+ }
97
+
98
+ // The model id a server on `port` is currently serving (via /v1/models), or null.
99
+ async function servedModel(port) {
100
+ try {
101
+ const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
102
+ if (!r.ok) return null;
103
+ const j = await r.json();
104
+ const id = j?.data?.[0]?.id;
105
+ return typeof id === 'string' ? id : null;
106
+ } catch {
107
+ return null;
108
+ }
109
+ }
110
+
111
+ // Best-effort: kill whatever process holds `port` (used to reclaim the port from
112
+ // an orphaned mlx server that's serving the wrong model). No-op if lsof/kill fail.
113
+ function freePort(port) {
114
+ try {
115
+ const out = execFileSync('lsof', ['-ti', `tcp:${port}`], { encoding: 'utf8' }).trim();
116
+ for (const pid of out.split('\n').filter(Boolean)) {
117
+ try { process.kill(Number(pid), 'SIGTERM'); } catch { /* already gone */ }
118
+ }
119
+ } catch {
120
+ /* nothing on the port, or lsof unavailable */
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Is the model already in the local HuggingFace cache (so selecting it won't
126
+ * trigger a multi-GB download)? Checks `~/.cache/huggingface/hub/models--…`.
127
+ * @param {string} id @returns {boolean}
128
+ */
129
+ export function isModelCached(id) {
130
+ const dir = path.join(
131
+ process.env.HF_HOME || path.join(os.homedir(), '.cache', 'huggingface'),
132
+ 'hub',
133
+ `models--${String(id).replace(/\//g, '--')}`,
134
+ );
135
+ try {
136
+ const snaps = path.join(dir, 'snapshots');
137
+ if (!fs.existsSync(snaps)) return false;
138
+ return fs.readdirSync(snaps).some((s) => {
139
+ try {
140
+ return fs.readdirSync(path.join(snaps, s)).length > 0;
141
+ } catch {
142
+ return false;
143
+ }
144
+ });
145
+ } catch {
146
+ return false;
147
+ }
148
+ }
149
+
150
+ // Spawn the mlx_lm.server child (once). Logs to ~/.claude-control/logs so a
151
+ // failed/slow start is diagnosable. Sets HOME explicitly (launchd may not).
152
+ function spawnServer(model, port) {
153
+ const py = resolveMlxPython();
154
+ if (!py) {
155
+ throw new Error(
156
+ 'mlx_lm not installed — create ~/.claude-control/mlx-venv and `pip install mlx-lm`',
157
+ );
158
+ }
159
+ let out = 'ignore';
160
+ try {
161
+ const logPath = path.join(os.homedir(), '.claude-control', 'logs', 'mlx-server.log');
162
+ fs.mkdirSync(path.dirname(logPath), { recursive: true });
163
+ out = fs.openSync(logPath, 'a');
164
+ } catch {
165
+ /* fall back to ignored stdio */
166
+ }
167
+ child = spawn(
168
+ py,
169
+ ['-m', 'mlx_lm.server', '--model', model, '--host', '127.0.0.1', '--port', String(port)],
170
+ { stdio: ['ignore', out, out], env: { ...process.env, HOME: os.homedir() } },
171
+ );
172
+ childModel = model;
173
+ child.on('exit', () => { child = null; childModel = null; });
174
+ }
175
+
176
+ // Ensure a server serving EXACTLY `model` is answering on `port`. Reuses our
177
+ // warm child or any server already serving the right model; otherwise restarts
178
+ // — killing a wrong-model child and reclaiming the port from a wrong-model
179
+ // orphan, so swapping models never POSTs a model the running server lacks (which
180
+ // would trigger an in-request download and hang). Waits only REQUEST_READY_MS;
181
+ // if the (new) model is still loading/downloading, throws so the caller falls
182
+ // back while it finishes in the background.
183
+ async function ensureServer(model, port) {
184
+ if (child && childModel === model && (await ping(port))) return;
185
+ const served = await servedModel(port);
186
+ if (served === model) return; // right model already up (orphan/external) → reuse
187
+ if (child) shutdown(); // our child is serving the wrong model → stop it
188
+ if (served) freePort(port); // an orphan holds the port with the wrong model → reclaim
189
+ spawnServer(model, port);
190
+ const deadline = Date.now() + REQUEST_READY_MS;
191
+ while (Date.now() < deadline) {
192
+ await new Promise((r) => setTimeout(r, 600));
193
+ if ((await servedModel(port)) === model) return;
194
+ }
195
+ throw new Error('mlx server still warming up');
196
+ }
197
+
198
+ /**
199
+ * Build the OpenAI chat-completions request body. Pure.
200
+ * @param {string} prompt @param {string} model @param {number} [maxTokens]
201
+ */
202
+ export function buildChatBody(prompt, model, maxTokens = MAX_TOKENS) {
203
+ return {
204
+ model,
205
+ messages: [{ role: 'user', content: prompt }],
206
+ max_tokens: maxTokens,
207
+ temperature: 0.2,
208
+ };
209
+ }
210
+
211
+ /**
212
+ * Extract the assistant text from an OpenAI chat-completions response. Pure.
213
+ * @param {any} json @returns {string}
214
+ */
215
+ export function parseChatContent(json) {
216
+ const c = json?.choices?.[0]?.message?.content;
217
+ if (typeof c !== 'string' || !c.trim()) throw new Error('empty MLX completion');
218
+ return c;
219
+ }
220
+
221
+ /**
222
+ * Best-effort pre-warm: spawn + load the server in the background so the first
223
+ * real request is fast. No-op-safe — swallows the "still warming" throw; the
224
+ * child keeps loading. Call at startup when the MLX backend is selected.
225
+ * @param {number} [port]
226
+ */
227
+ export function warm(port = DEFAULT_PORT) {
228
+ const model = readConfig().mlxModel || DEFAULT_MODEL;
229
+ ensureServer(model, port).catch(() => {});
230
+ }
231
+
232
+ /**
233
+ * Complete a prompt via the local MLX server (spawning + warming it if needed).
234
+ * Throws on any failure so the caller can fall through to the next backend.
235
+ *
236
+ * @param {string} prompt
237
+ * @param {{ model?: string, port?: number, maxTokens?: number }} [opts]
238
+ * @returns {Promise<string>}
239
+ */
240
+ export async function complete(prompt, { model, port = DEFAULT_PORT, maxTokens = MAX_TOKENS } = {}) {
241
+ const m = model || readConfig().mlxModel || DEFAULT_MODEL;
242
+ await ensureServer(m, port);
243
+ const res = await fetch(serverBase(port) + '/v1/chat/completions', {
244
+ method: 'POST',
245
+ headers: { 'content-type': 'application/json' },
246
+ body: JSON.stringify(buildChatBody(prompt, m, maxTokens)),
247
+ signal: AbortSignal.timeout(60_000),
248
+ });
249
+ if (!res.ok) throw new Error(`MLX server HTTP ${res.status}`);
250
+ const json = await res.json();
251
+ bumpIdle();
252
+ return parseChatContent(json);
253
+ }
254
+
255
+ // Best-effort: don't leave the child server orphaned when the parent exits
256
+ // cleanly. (SIGKILL can't be trapped; an orphan is harmless — ensureServer
257
+ // reuses whatever is already answering on the port.)
258
+ process.on('exit', shutdown);
259
+ process.on('SIGTERM', () => { shutdown(); process.exit(0); });
260
+ process.on('SIGINT', () => { shutdown(); process.exit(0); });