@idl3/claude-control 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/lib/answer.js +335 -9
- package/lib/claude-cli.js +170 -0
- package/lib/config.js +83 -3
- package/lib/match.js +13 -0
- package/lib/mlx.js +260 -0
- package/lib/models.js +66 -0
- package/lib/optimize.js +222 -0
- package/lib/push.js +14 -1
- package/lib/skills.js +147 -0
- package/lib/subagents.js +153 -2
- package/lib/transcribe.js +156 -0
- package/package.json +1 -1
- package/server.js +350 -16
- package/web/dist/assets/{core-BP70UsO-.js → core-CZTz1vMx.js} +1 -1
- package/web/dist/assets/index-Bup-kzmD.js +85 -0
- package/web/dist/assets/index-D21GSqEK.css +1 -0
- package/web/dist/index.html +4 -2
- package/web/dist/sw.js +4 -1
- package/web/dist/assets/index-D2hrAUsb.js +0 -78
- package/web/dist/assets/index-DM_QgpOD.css +0 -1
package/lib/config.js
CHANGED
|
@@ -6,6 +6,13 @@
|
|
|
6
6
|
* overridable to a shell alias like `yolo` or `claude --flags`) and the
|
|
7
7
|
* default cwd new sessions start in.
|
|
8
8
|
*
|
|
9
|
+
* Also holds prompt-optimiser settings:
|
|
10
|
+
* - optimizeModel: the Claude model used for LLM-based prompt optimisation
|
|
11
|
+
* (default 'claude-haiku-4-5').
|
|
12
|
+
* - claudeBin: optional absolute path to the claude CLI binary. Empty string
|
|
13
|
+
* means auto-resolve (resolveClaudeBin() in lib/claude-cli.js tries PATH,
|
|
14
|
+
* then common install locations).
|
|
15
|
+
*
|
|
9
16
|
* Persisted at ~/.claude-control/config.json (honour CLAUDE_CONTROL_DATA when
|
|
10
17
|
* set, matching server.js's env-override convention). Reads never throw —
|
|
11
18
|
* defaults are merged over whatever's on disk. Writes validate strictly and
|
|
@@ -15,6 +22,7 @@
|
|
|
15
22
|
import fs from 'node:fs';
|
|
16
23
|
import path from 'node:path';
|
|
17
24
|
import os from 'node:os';
|
|
25
|
+
import { detectMachine, recommendMlxModel, recommendClaudeModel } from './models.js';
|
|
18
26
|
|
|
19
27
|
// Env lookup mirrors server.js: prefer CLAUDE_CONTROL_<X>, fall back to the
|
|
20
28
|
// legacy COCKPIT_<X> so existing launchers keep working.
|
|
@@ -32,12 +40,23 @@ function configPath() {
|
|
|
32
40
|
}
|
|
33
41
|
|
|
34
42
|
const LAUNCH_MAX = 500;
|
|
43
|
+
const OPTIMIZE_MODEL_MAX = 200;
|
|
44
|
+
const CLAUDE_BIN_MAX = 500;
|
|
45
|
+
const MLX_MODEL_MAX = 200;
|
|
46
|
+
const OPTIMIZE_BACKENDS = ['mlx', 'claude', 'rules'];
|
|
35
47
|
|
|
36
48
|
/** Defaults, recomputed each call so a changed HOME/env is honoured. */
|
|
37
49
|
function defaults() {
|
|
38
50
|
return {
|
|
39
51
|
launchCommand: 'claude',
|
|
40
52
|
defaultCwd: os.homedir(),
|
|
53
|
+
optimizeModel: recommendClaudeModel(),
|
|
54
|
+
claudeBin: '',
|
|
55
|
+
// Prompt-enhancer backend: 'mlx' (local model → claude → rules chain),
|
|
56
|
+
// 'claude' (claude -p → rules), or 'rules' (deterministic, offline).
|
|
57
|
+
optimizeBackend: 'mlx',
|
|
58
|
+
// Default MLX model auto-picked for this machine's unified memory.
|
|
59
|
+
mlxModel: recommendMlxModel(detectMachine().ramGB),
|
|
41
60
|
};
|
|
42
61
|
}
|
|
43
62
|
|
|
@@ -45,7 +64,7 @@ function defaults() {
|
|
|
45
64
|
* Read the persisted config, merged over defaults. Never throws — a missing,
|
|
46
65
|
* empty, or corrupt file falls back to defaults. Only known keys are surfaced.
|
|
47
66
|
*
|
|
48
|
-
* @returns {{ launchCommand: string, defaultCwd: string }}
|
|
67
|
+
* @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }}
|
|
49
68
|
*/
|
|
50
69
|
export function readConfig() {
|
|
51
70
|
const base = defaults();
|
|
@@ -65,6 +84,23 @@ export function readConfig() {
|
|
|
65
84
|
typeof parsed.defaultCwd === 'string' && parsed.defaultCwd.trim()
|
|
66
85
|
? parsed.defaultCwd
|
|
67
86
|
: base.defaultCwd,
|
|
87
|
+
optimizeModel:
|
|
88
|
+
typeof parsed.optimizeModel === 'string' && parsed.optimizeModel.trim()
|
|
89
|
+
? parsed.optimizeModel
|
|
90
|
+
: base.optimizeModel,
|
|
91
|
+
claudeBin:
|
|
92
|
+
typeof parsed.claudeBin === 'string'
|
|
93
|
+
? parsed.claudeBin
|
|
94
|
+
: base.claudeBin,
|
|
95
|
+
optimizeBackend:
|
|
96
|
+
typeof parsed.optimizeBackend === 'string' &&
|
|
97
|
+
OPTIMIZE_BACKENDS.includes(parsed.optimizeBackend)
|
|
98
|
+
? parsed.optimizeBackend
|
|
99
|
+
: base.optimizeBackend,
|
|
100
|
+
mlxModel:
|
|
101
|
+
typeof parsed.mlxModel === 'string' && parsed.mlxModel.trim()
|
|
102
|
+
? parsed.mlxModel
|
|
103
|
+
: base.mlxModel,
|
|
68
104
|
};
|
|
69
105
|
}
|
|
70
106
|
|
|
@@ -75,9 +111,12 @@ export function readConfig() {
|
|
|
75
111
|
* Validation:
|
|
76
112
|
* - launchCommand: non-empty string, ≤500 chars.
|
|
77
113
|
* - defaultCwd: a path that exists and is a directory.
|
|
114
|
+
* - optimizeModel: non-empty string, ≤200 chars.
|
|
115
|
+
* - claudeBin: string ≤500 chars; empty string is allowed (means auto-resolve).
|
|
116
|
+
* Existence is NOT verified at write time (path may differ across hosts).
|
|
78
117
|
*
|
|
79
|
-
* @param {{ launchCommand?: unknown, defaultCwd?: unknown }} partial
|
|
80
|
-
* @returns {{ launchCommand: string, defaultCwd: string }} the saved config
|
|
118
|
+
* @param {{ launchCommand?: unknown, defaultCwd?: unknown, optimizeModel?: unknown, claudeBin?: unknown }} partial
|
|
119
|
+
* @returns {{ launchCommand: string, defaultCwd: string, optimizeModel: string, claudeBin: string, optimizeBackend: string, mlxModel: string }} the saved config
|
|
81
120
|
*/
|
|
82
121
|
export function writeConfig(partial = {}) {
|
|
83
122
|
const current = readConfig();
|
|
@@ -111,6 +150,47 @@ export function writeConfig(partial = {}) {
|
|
|
111
150
|
next.defaultCwd = cwd;
|
|
112
151
|
}
|
|
113
152
|
|
|
153
|
+
if (partial.optimizeModel !== undefined) {
|
|
154
|
+
const model = partial.optimizeModel;
|
|
155
|
+
if (typeof model !== 'string' || !model.trim()) {
|
|
156
|
+
throw new Error('optimizeModel must be a non-empty string');
|
|
157
|
+
}
|
|
158
|
+
if (model.length > OPTIMIZE_MODEL_MAX) {
|
|
159
|
+
throw new Error(`optimizeModel must be ≤${OPTIMIZE_MODEL_MAX} characters`);
|
|
160
|
+
}
|
|
161
|
+
next.optimizeModel = model;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (partial.claudeBin !== undefined) {
|
|
165
|
+
const bin = partial.claudeBin;
|
|
166
|
+
if (typeof bin !== 'string') {
|
|
167
|
+
throw new Error('claudeBin must be a string');
|
|
168
|
+
}
|
|
169
|
+
if (bin.length > CLAUDE_BIN_MAX) {
|
|
170
|
+
throw new Error(`claudeBin must be ≤${CLAUDE_BIN_MAX} characters`);
|
|
171
|
+
}
|
|
172
|
+
next.claudeBin = bin;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (partial.optimizeBackend !== undefined) {
|
|
176
|
+
const b = partial.optimizeBackend;
|
|
177
|
+
if (typeof b !== 'string' || !OPTIMIZE_BACKENDS.includes(b)) {
|
|
178
|
+
throw new Error(`optimizeBackend must be one of: ${OPTIMIZE_BACKENDS.join(', ')}`);
|
|
179
|
+
}
|
|
180
|
+
next.optimizeBackend = b;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (partial.mlxModel !== undefined) {
|
|
184
|
+
const m = partial.mlxModel;
|
|
185
|
+
if (typeof m !== 'string' || !m.trim()) {
|
|
186
|
+
throw new Error('mlxModel must be a non-empty string');
|
|
187
|
+
}
|
|
188
|
+
if (m.length > MLX_MODEL_MAX) {
|
|
189
|
+
throw new Error(`mlxModel must be ≤${MLX_MODEL_MAX} characters`);
|
|
190
|
+
}
|
|
191
|
+
next.mlxModel = m;
|
|
192
|
+
}
|
|
193
|
+
|
|
114
194
|
const dir = dataDir();
|
|
115
195
|
fs.mkdirSync(dir, { recursive: true });
|
|
116
196
|
fs.writeFileSync(configPath(), JSON.stringify(next, null, 2), { mode: 0o600 });
|
package/lib/match.js
CHANGED
|
@@ -123,10 +123,23 @@ export function assignTranscripts(panes, candidates, opts = {}) {
|
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
// Pass 3 — most-recently-active remaining candidate.
|
|
126
|
+
// Gate: when the pane's process start time is known, only consider candidates
|
|
127
|
+
// whose last known activity (lastActivityMs, falling back to file mtime or
|
|
128
|
+
// birthtime) is at or after the pane started (minus startSlackMs). A transcript
|
|
129
|
+
// that was never touched after the pane launched cannot belong to it — that is
|
|
130
|
+
// the "fresh pane inherits old transcript" bug. When procStartMs is unknown,
|
|
131
|
+
// skip the gate so we don't regress panes with missing timing data.
|
|
132
|
+
// NOTE: --resume is safe: Claude appends a record to the old transcript on
|
|
133
|
+
// resume, bumping its mtime/lastActivityMs above the pane's start time.
|
|
126
134
|
for (const pane of ordered) {
|
|
127
135
|
if (result.has(pane.target)) continue;
|
|
128
136
|
let best = null;
|
|
129
137
|
for (const c of available(pane)) {
|
|
138
|
+
// Apply temporal gate only when pane start time is known.
|
|
139
|
+
if (pane.procStartMs != null) {
|
|
140
|
+
const candActive = c.lastActivityMs ?? c.mtime ?? c.birthtimeMs ?? null;
|
|
141
|
+
if (candActive != null && candActive < pane.procStartMs - startSlackMs) continue;
|
|
142
|
+
}
|
|
130
143
|
if (!best || (c.lastActivityMs ?? 0) > (best.lastActivityMs ?? 0)) best = c;
|
|
131
144
|
}
|
|
132
145
|
if (best) claim(pane, best);
|
package/lib/mlx.js
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/mlx.js — local LLM backend via a managed mlx_lm.server (Apple Silicon).
|
|
3
|
+
*
|
|
4
|
+
* Spawns a singleton OpenAI-compatible MLX server on first use, keeps it warm,
|
|
5
|
+
* and shuts it down after an idle period. No API key, no network — fully local.
|
|
6
|
+
* Used by the prompt enhancer as the first link in the mlx → claude → rules
|
|
7
|
+
* chain (server.js handleOptimize).
|
|
8
|
+
*
|
|
9
|
+
* Exports:
|
|
10
|
+
* - resolveMlxPython() → string | null (venv python that has mlx_lm)
|
|
11
|
+
* - serverBase(port) → string (pure)
|
|
12
|
+
* - buildChatBody(prompt, model, maxTokens) → object (pure)
|
|
13
|
+
* - parseChatContent(json) → string (pure; throws on bad/empty shape)
|
|
14
|
+
* - complete(prompt, { model, port, maxTokens }) → Promise<string>
|
|
15
|
+
* - shutdown() (kill the child; for exit/tests)
|
|
16
|
+
*
|
|
17
|
+
* Config/env: model from config.mlxModel (default below); port via
|
|
18
|
+
* CLAUDE_CONTROL_MLX_PORT (default 8080); python via CLAUDE_CONTROL_MLX_PYTHON
|
|
19
|
+
* else ~/.claude-control/mlx-venv/bin/python else a PATH python3 with mlx_lm.
|
|
20
|
+
*/
|
|
21
|
+
import fs from 'node:fs';
|
|
22
|
+
import path from 'node:path';
|
|
23
|
+
import os from 'node:os';
|
|
24
|
+
import { spawn, execFileSync } from 'node:child_process';
|
|
25
|
+
import { readConfig } from './config.js';
|
|
26
|
+
|
|
27
|
+
export const DEFAULT_MODEL = 'mlx-community/Llama-3.2-3B-Instruct-4bit';
|
|
28
|
+
// Dedicated port for OUR managed sidecar. NOT 8080 — that's a very common port
|
|
29
|
+
// (LM Studio, other local LLM/TTS servers) and colliding makes us POST our model
|
|
30
|
+
// to a foreign server that can't serve it → hang. Overridable via env.
|
|
31
|
+
const DEFAULT_PORT = Number(process.env.CLAUDE_CONTROL_MLX_PORT) || 4319;
|
|
32
|
+
// How long a SINGLE request waits for the server to be ready before giving up
|
|
33
|
+
// and letting the caller fall back (to claude -p). The spawned server keeps
|
|
34
|
+
// loading in the background, so the next request finds it warm (~1s). Cold
|
|
35
|
+
// model load can take ~30-90s under launchd, so we never block a request that
|
|
36
|
+
// long — we fail over fast and warm up for next time.
|
|
37
|
+
const REQUEST_READY_MS = Number(process.env.CLAUDE_CONTROL_MLX_TIMEOUT_MS) || 8_000;
|
|
38
|
+
const IDLE_MS = 15 * 60_000; // free ~2GB after 15 min idle
|
|
39
|
+
const MAX_TOKENS = 700;
|
|
40
|
+
|
|
41
|
+
/** @param {number} [port] */
|
|
42
|
+
export function serverBase(port = DEFAULT_PORT) {
|
|
43
|
+
return `http://127.0.0.1:${port}`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Resolve a python interpreter that can `import mlx_lm`.
|
|
48
|
+
* @returns {string | null}
|
|
49
|
+
*/
|
|
50
|
+
export function resolveMlxPython() {
|
|
51
|
+
const envPy = process.env.CLAUDE_CONTROL_MLX_PYTHON;
|
|
52
|
+
const venvPy = path.join(os.homedir(), '.claude-control', 'mlx-venv', 'bin', 'python');
|
|
53
|
+
for (const p of [envPy, venvPy]) {
|
|
54
|
+
if (p && fs.existsSync(p)) return p;
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
const p = execFileSync('which', ['python3'], { encoding: 'utf8' }).trim();
|
|
58
|
+
if (p) {
|
|
59
|
+
execFileSync(p, ['-c', 'import mlx_lm'], { stdio: 'ignore' });
|
|
60
|
+
return p;
|
|
61
|
+
}
|
|
62
|
+
} catch {
|
|
63
|
+
/* no mlx_lm on PATH python */
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ── server singleton ────────────────────────────────────────────────────────
|
|
69
|
+
let child = null;
|
|
70
|
+
let childModel = null; // model id the current child was spawned with
|
|
71
|
+
let idleTimer = null;
|
|
72
|
+
|
|
73
|
+
function bumpIdle() {
|
|
74
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
75
|
+
idleTimer = setTimeout(() => shutdown(), IDLE_MS);
|
|
76
|
+
if (idleTimer.unref) idleTimer.unref();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Kill the managed server (no-op if none / external). */
|
|
80
|
+
export function shutdown() {
|
|
81
|
+
if (idleTimer) { clearTimeout(idleTimer); idleTimer = null; }
|
|
82
|
+
if (child) {
|
|
83
|
+
try { child.kill('SIGTERM'); } catch { /* ignore */ }
|
|
84
|
+
child = null;
|
|
85
|
+
}
|
|
86
|
+
childModel = null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function ping(port) {
|
|
90
|
+
try {
|
|
91
|
+
const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
|
|
92
|
+
return r.ok;
|
|
93
|
+
} catch {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// The model id a server on `port` is currently serving (via /v1/models), or null.
|
|
99
|
+
async function servedModel(port) {
|
|
100
|
+
try {
|
|
101
|
+
const r = await fetch(serverBase(port) + '/v1/models', { signal: AbortSignal.timeout(1500) });
|
|
102
|
+
if (!r.ok) return null;
|
|
103
|
+
const j = await r.json();
|
|
104
|
+
const id = j?.data?.[0]?.id;
|
|
105
|
+
return typeof id === 'string' ? id : null;
|
|
106
|
+
} catch {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Best-effort: kill whatever process holds `port` (used to reclaim the port from
|
|
112
|
+
// an orphaned mlx server that's serving the wrong model). No-op if lsof/kill fail.
|
|
113
|
+
function freePort(port) {
|
|
114
|
+
try {
|
|
115
|
+
const out = execFileSync('lsof', ['-ti', `tcp:${port}`], { encoding: 'utf8' }).trim();
|
|
116
|
+
for (const pid of out.split('\n').filter(Boolean)) {
|
|
117
|
+
try { process.kill(Number(pid), 'SIGTERM'); } catch { /* already gone */ }
|
|
118
|
+
}
|
|
119
|
+
} catch {
|
|
120
|
+
/* nothing on the port, or lsof unavailable */
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Is the model already in the local HuggingFace cache (so selecting it won't
|
|
126
|
+
* trigger a multi-GB download)? Checks `~/.cache/huggingface/hub/models--…`.
|
|
127
|
+
* @param {string} id @returns {boolean}
|
|
128
|
+
*/
|
|
129
|
+
export function isModelCached(id) {
|
|
130
|
+
const dir = path.join(
|
|
131
|
+
process.env.HF_HOME || path.join(os.homedir(), '.cache', 'huggingface'),
|
|
132
|
+
'hub',
|
|
133
|
+
`models--${String(id).replace(/\//g, '--')}`,
|
|
134
|
+
);
|
|
135
|
+
try {
|
|
136
|
+
const snaps = path.join(dir, 'snapshots');
|
|
137
|
+
if (!fs.existsSync(snaps)) return false;
|
|
138
|
+
return fs.readdirSync(snaps).some((s) => {
|
|
139
|
+
try {
|
|
140
|
+
return fs.readdirSync(path.join(snaps, s)).length > 0;
|
|
141
|
+
} catch {
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
} catch {
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Spawn the mlx_lm.server child (once). Logs to ~/.claude-control/logs so a
|
|
151
|
+
// failed/slow start is diagnosable. Sets HOME explicitly (launchd may not).
|
|
152
|
+
function spawnServer(model, port) {
|
|
153
|
+
const py = resolveMlxPython();
|
|
154
|
+
if (!py) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
'mlx_lm not installed — create ~/.claude-control/mlx-venv and `pip install mlx-lm`',
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
let out = 'ignore';
|
|
160
|
+
try {
|
|
161
|
+
const logPath = path.join(os.homedir(), '.claude-control', 'logs', 'mlx-server.log');
|
|
162
|
+
fs.mkdirSync(path.dirname(logPath), { recursive: true });
|
|
163
|
+
out = fs.openSync(logPath, 'a');
|
|
164
|
+
} catch {
|
|
165
|
+
/* fall back to ignored stdio */
|
|
166
|
+
}
|
|
167
|
+
child = spawn(
|
|
168
|
+
py,
|
|
169
|
+
['-m', 'mlx_lm.server', '--model', model, '--host', '127.0.0.1', '--port', String(port)],
|
|
170
|
+
{ stdio: ['ignore', out, out], env: { ...process.env, HOME: os.homedir() } },
|
|
171
|
+
);
|
|
172
|
+
childModel = model;
|
|
173
|
+
child.on('exit', () => { child = null; childModel = null; });
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Ensure a server serving EXACTLY `model` is answering on `port`. Reuses our
|
|
177
|
+
// warm child or any server already serving the right model; otherwise restarts
|
|
178
|
+
// — killing a wrong-model child and reclaiming the port from a wrong-model
|
|
179
|
+
// orphan, so swapping models never POSTs a model the running server lacks (which
|
|
180
|
+
// would trigger an in-request download and hang). Waits only REQUEST_READY_MS;
|
|
181
|
+
// if the (new) model is still loading/downloading, throws so the caller falls
|
|
182
|
+
// back while it finishes in the background.
|
|
183
|
+
async function ensureServer(model, port) {
|
|
184
|
+
if (child && childModel === model && (await ping(port))) return;
|
|
185
|
+
const served = await servedModel(port);
|
|
186
|
+
if (served === model) return; // right model already up (orphan/external) → reuse
|
|
187
|
+
if (child) shutdown(); // our child is serving the wrong model → stop it
|
|
188
|
+
if (served) freePort(port); // an orphan holds the port with the wrong model → reclaim
|
|
189
|
+
spawnServer(model, port);
|
|
190
|
+
const deadline = Date.now() + REQUEST_READY_MS;
|
|
191
|
+
while (Date.now() < deadline) {
|
|
192
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
193
|
+
if ((await servedModel(port)) === model) return;
|
|
194
|
+
}
|
|
195
|
+
throw new Error('mlx server still warming up');
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Build the OpenAI chat-completions request body. Pure.
|
|
200
|
+
* @param {string} prompt @param {string} model @param {number} [maxTokens]
|
|
201
|
+
*/
|
|
202
|
+
export function buildChatBody(prompt, model, maxTokens = MAX_TOKENS) {
|
|
203
|
+
return {
|
|
204
|
+
model,
|
|
205
|
+
messages: [{ role: 'user', content: prompt }],
|
|
206
|
+
max_tokens: maxTokens,
|
|
207
|
+
temperature: 0.2,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Extract the assistant text from an OpenAI chat-completions response. Pure.
|
|
213
|
+
* @param {any} json @returns {string}
|
|
214
|
+
*/
|
|
215
|
+
export function parseChatContent(json) {
|
|
216
|
+
const c = json?.choices?.[0]?.message?.content;
|
|
217
|
+
if (typeof c !== 'string' || !c.trim()) throw new Error('empty MLX completion');
|
|
218
|
+
return c;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Best-effort pre-warm: spawn + load the server in the background so the first
|
|
223
|
+
* real request is fast. No-op-safe — swallows the "still warming" throw; the
|
|
224
|
+
* child keeps loading. Call at startup when the MLX backend is selected.
|
|
225
|
+
* @param {number} [port]
|
|
226
|
+
*/
|
|
227
|
+
export function warm(port = DEFAULT_PORT) {
|
|
228
|
+
const model = readConfig().mlxModel || DEFAULT_MODEL;
|
|
229
|
+
ensureServer(model, port).catch(() => {});
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Complete a prompt via the local MLX server (spawning + warming it if needed).
|
|
234
|
+
* Throws on any failure so the caller can fall through to the next backend.
|
|
235
|
+
*
|
|
236
|
+
* @param {string} prompt
|
|
237
|
+
* @param {{ model?: string, port?: number, maxTokens?: number }} [opts]
|
|
238
|
+
* @returns {Promise<string>}
|
|
239
|
+
*/
|
|
240
|
+
export async function complete(prompt, { model, port = DEFAULT_PORT, maxTokens = MAX_TOKENS } = {}) {
|
|
241
|
+
const m = model || readConfig().mlxModel || DEFAULT_MODEL;
|
|
242
|
+
await ensureServer(m, port);
|
|
243
|
+
const res = await fetch(serverBase(port) + '/v1/chat/completions', {
|
|
244
|
+
method: 'POST',
|
|
245
|
+
headers: { 'content-type': 'application/json' },
|
|
246
|
+
body: JSON.stringify(buildChatBody(prompt, m, maxTokens)),
|
|
247
|
+
signal: AbortSignal.timeout(60_000),
|
|
248
|
+
});
|
|
249
|
+
if (!res.ok) throw new Error(`MLX server HTTP ${res.status}`);
|
|
250
|
+
const json = await res.json();
|
|
251
|
+
bumpIdle();
|
|
252
|
+
return parseChatContent(json);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Best-effort: don't leave the child server orphaned when the parent exits
|
|
256
|
+
// cleanly. (SIGKILL can't be trapped; an orphan is harmless — ensureServer
|
|
257
|
+
// reuses whatever is already answering on the port.)
|
|
258
|
+
process.on('exit', shutdown);
|
|
259
|
+
process.on('SIGTERM', () => { shutdown(); process.exit(0); });
|
|
260
|
+
process.on('SIGINT', () => { shutdown(); process.exit(0); });
|
package/lib/models.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/models.js — curated model catalogs + machine-aware recommendations.
|
|
3
|
+
*
|
|
4
|
+
* The enhancer's Claude and MLX models are picked from these fixed lists (the
|
|
5
|
+
* UI shows dropdowns, not freeform inputs, to minimise typos / bad ids). MLX
|
|
6
|
+
* picks are sized for Apple-Silicon unified memory (16–48 GB), and the default
|
|
7
|
+
* is chosen automatically from the host's detected RAM.
|
|
8
|
+
*
|
|
9
|
+
* Exports:
|
|
10
|
+
* - MLX_MODELS, CLAUDE_MODELS (catalogs)
|
|
11
|
+
* - detectMachine() → { ramGB, arch, platform, appleSilicon }
|
|
12
|
+
* - recommendMlxModel(ramGB) → id
|
|
13
|
+
* - recommendClaudeModel() → id
|
|
14
|
+
*/
|
|
15
|
+
import os from 'node:os';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Curated MLX instruct models (4-bit, no "thinking" mode → clean JSON for the
|
|
19
|
+
* enhancer). `sizeGB` ≈ on-disk weights; `minRamGB` is the unified-memory tier
|
|
20
|
+
* at/above which the model is a comfortable pick alongside other apps.
|
|
21
|
+
* @type {{ id: string, label: string, sizeGB: number, minRamGB: number }[]}
|
|
22
|
+
*/
|
|
23
|
+
export const MLX_MODELS = [
|
|
24
|
+
{ id: 'mlx-community/Llama-3.2-3B-Instruct-4bit', label: 'Llama 3.2 3B', sizeGB: 1.8, minRamGB: 16 },
|
|
25
|
+
{ id: 'mlx-community/Qwen2.5-3B-Instruct-4bit', label: 'Qwen2.5 3B', sizeGB: 1.8, minRamGB: 16 },
|
|
26
|
+
{ id: 'mlx-community/Qwen2.5-7B-Instruct-4bit', label: 'Qwen2.5 7B', sizeGB: 4.3, minRamGB: 24 },
|
|
27
|
+
{ id: 'mlx-community/Llama-3.1-8B-Instruct-4bit', label: 'Llama 3.1 8B', sizeGB: 4.5, minRamGB: 24 },
|
|
28
|
+
{ id: 'mlx-community/Qwen2.5-14B-Instruct-4bit', label: 'Qwen2.5 14B', sizeGB: 8.5, minRamGB: 32 },
|
|
29
|
+
{ id: 'mlx-community/Qwen2.5-32B-Instruct-4bit', label: 'Qwen2.5 32B', sizeGB: 18, minRamGB: 48 },
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Curated Claude models for the `claude -p` enhancer backend/fallback.
|
|
34
|
+
* @type {{ id: string, label: string }[]}
|
|
35
|
+
*/
|
|
36
|
+
export const CLAUDE_MODELS = [
|
|
37
|
+
{ id: 'claude-haiku-4-5', label: 'Haiku 4.5 — fast, cheap' },
|
|
38
|
+
{ id: 'claude-sonnet-4-6', label: 'Sonnet 4.6 — balanced' },
|
|
39
|
+
{ id: 'claude-opus-4-8', label: 'Opus 4.8 — most capable' },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
/** Detect host specs relevant to model selection. */
|
|
43
|
+
export function detectMachine() {
|
|
44
|
+
const ramGB = Math.round(os.totalmem() / 1024 ** 3);
|
|
45
|
+
const arch = os.arch();
|
|
46
|
+
const platform = os.platform();
|
|
47
|
+
return { ramGB, arch, platform, appleSilicon: platform === 'darwin' && arch === 'arm64' };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Recommend an MLX model id for a given unified-memory size. Conservative so it
|
|
52
|
+
* stays snappy alongside the user's other apps: 3B (≤23 GB) → 7B (24–47 GB) →
|
|
53
|
+
* 14B (≥48 GB).
|
|
54
|
+
* @param {number} ramGB
|
|
55
|
+
* @returns {string}
|
|
56
|
+
*/
|
|
57
|
+
export function recommendMlxModel(ramGB) {
|
|
58
|
+
if (ramGB >= 48) return 'mlx-community/Qwen2.5-14B-Instruct-4bit';
|
|
59
|
+
if (ramGB >= 24) return 'mlx-community/Qwen2.5-7B-Instruct-4bit';
|
|
60
|
+
return 'mlx-community/Llama-3.2-3B-Instruct-4bit';
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** The enhancer is a short, cheap task → Haiku is the sensible default. */
|
|
64
|
+
export function recommendClaudeModel() {
|
|
65
|
+
return 'claude-haiku-4-5';
|
|
66
|
+
}
|