escribano 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,16 @@
1
1
  /**
2
- * Escribano - Intelligence Adapter (MLX-VLM)
2
+ * Escribano - Intelligence Adapter (MLX)
3
3
  *
4
- * Implements IntelligenceService using MLX-VLM via Unix domain socket.
5
- * Uses interleaved batching for 4.7x speedup over Ollama sequential processing.
4
+ * Implements IntelligenceService using MLX-VLM and MLX-LM via Unix domain sockets.
5
+ * Uses separate bridge processes for VLM (frame analysis) and LLM (text generation).
6
6
  *
7
7
  * Architecture:
8
- * TypeScript (this file) <--Unix Socket--> Python (mlx_bridge.py)
8
+ * TypeScript (this file) <--Unix Socket--> Python (mlx_bridge.py --mode vlm)
9
+ * TypeScript (this file) <--Unix Socket--> Python (mlx_bridge.py --mode llm)
10
+ *
11
+ * The caller only sees a single IntelligenceService. Internally, we manage:
12
+ * - VLM bridge: spawns lazily on describeImages(), uses -vlm.sock
13
+ * - LLM bridge: spawns lazily on generateText(), uses -llm.sock
9
14
  *
10
15
  * See docs/adr/006-mlx-vlm-adapter.md for full design.
11
16
  */
@@ -15,29 +20,16 @@ import { createConnection } from 'node:net';
15
20
  import { dirname, resolve } from 'node:path';
16
21
  import { fileURLToPath } from 'node:url';
17
22
  const __dirname = dirname(fileURLToPath(import.meta.url));
18
- import { ESCRIBANO_HOME, ESCRIBANO_VENV, ESCRIBANO_VENV_PYTHON, getPythonPath, } from '../python-utils.js';
19
- const DEBUG_MLX = process.env.ESCRIBANO_VERBOSE === 'true';
23
+ import { loadConfig } from '../config.js';
24
+ import { getDbPath } from '../db/index.js';
25
+ import { ESCRIBANO_HOME, ESCRIBANO_VENV_PYTHON, getPythonPath, } from '../python-utils.js';
26
+ import { selectBestMLXModel } from '../utils/model-detector.js';
20
27
  function debugLog(...args) {
21
- if (DEBUG_MLX) {
22
- console.log('[VLM] [MLX]', ...args);
28
+ const config = loadConfig();
29
+ if (config.verbose) {
30
+ console.log('[MLX]', ...args);
23
31
  }
24
32
  }
25
- const DEFAULT_CONFIG = {
26
- model: process.env.ESCRIBANO_VLM_MODEL ??
27
- 'mlx-community/Qwen3-VL-2B-Instruct-bf16',
28
- batchSize: Number(process.env.ESCRIBANO_VLM_BATCH_SIZE) || 4,
29
- maxTokens: Number(process.env.ESCRIBANO_VLM_MAX_TOKENS) || 2000,
30
- socketPath: process.env.ESCRIBANO_MLX_SOCKET_PATH ?? '/tmp/escribano-mlx.sock',
31
- bridgeScript: resolve(__dirname, '../../scripts/mlx_bridge.py'),
32
- startupTimeout: Number(process.env.ESCRIBANO_MLX_STARTUP_TIMEOUT) || 120000,
33
- };
34
- /** pip binary inside Escribano's managed venv. */
35
- const _ESCRIBANO_VENV_PIP = resolve(ESCRIBANO_VENV, 'bin', 'pip');
36
- /**
37
- * Run a command, streaming stdout/stderr directly to the terminal.
38
- * Used for long-running setup tasks (venv creation, pip install) so the
39
- * user can see progress in real time.
40
- */
41
33
  function runVisible(cmd, args) {
42
34
  return new Promise((res, rej) => {
43
35
  const proc = spawn(cmd, args, { stdio: 'inherit' });
@@ -45,9 +37,6 @@ function runVisible(cmd, args) {
45
37
  proc.on('error', rej);
46
38
  });
47
39
  }
48
- /**
49
- * Run a command silently (discard output). Used for quick probe checks.
50
- */
51
40
  function runSilent(cmd, args) {
52
41
  return new Promise((res, rej) => {
53
42
  const proc = spawn(cmd, args, { stdio: 'ignore' });
@@ -55,25 +44,19 @@ function runSilent(cmd, args) {
55
44
  proc.on('error', rej);
56
45
  });
57
46
  }
58
- /**
59
- * Ensure ~/.escribano/venv exists and has mlx-vlm installed.
60
- * Uses plain `python3 -m venv` — no uv, no pip flags, no fuss.
61
- * On first run this takes a few minutes; subsequent runs are instant.
62
- */
63
47
  async function ensureEscribanoVenv() {
64
48
  if (!existsSync(ESCRIBANO_HOME)) {
65
49
  mkdirSync(ESCRIBANO_HOME, { recursive: true });
66
50
  }
67
51
  if (!existsSync(ESCRIBANO_VENV_PYTHON)) {
68
- console.log('[VLM] First-time setup: creating Python environment at ~/.escribano/venv');
69
- await runVisible('python3', ['-m', 'venv', ESCRIBANO_VENV]);
52
+ console.log('[MLX] First-time setup: creating Python environment at ~/.escribano/venv');
53
+ await runVisible('python3', ['-m', 'venv', `${ESCRIBANO_HOME}/venv`]);
70
54
  }
71
- // Check whether mlx-vlm and required runtime deps are already importable (~0.3s probe)
72
55
  let mlxReady = false;
73
56
  try {
74
57
  await runSilent(ESCRIBANO_VENV_PYTHON, [
75
58
  '-c',
76
- 'import mlx_vlm; import torch; import torchvision',
59
+ 'import mlx_vlm; import mlx_lm; import torch; import torchvision',
77
60
  ]);
78
61
  mlxReady = true;
79
62
  }
@@ -81,13 +64,12 @@ async function ensureEscribanoVenv() {
81
64
  // not installed yet
82
65
  }
83
66
  if (!mlxReady) {
84
- console.log('[VLM] Installing mlx-vlm into ~/.escribano/venv (first run — this may take a few minutes)...');
85
- // Ensure pip is available in the venv; ignore failures if ensurepip is disabled.
67
+ console.log('[MLX] Installing mlx-vlm into ~/.escribano/venv (first run — this may take a few minutes)...');
86
68
  try {
87
69
  await runVisible(ESCRIBANO_VENV_PYTHON, ['-m', 'ensurepip', '--upgrade']);
88
70
  }
89
71
  catch {
90
- // ensurepip may be unavailable; continue and rely on existing pip if present.
72
+ // ensurepip may be unavailable
91
73
  }
92
74
  await runVisible(ESCRIBANO_VENV_PYTHON, [
93
75
  '-m',
@@ -96,25 +78,16 @@ async function ensureEscribanoVenv() {
96
78
  'mlx-vlm',
97
79
  'torch',
98
80
  'torchvision',
81
+ 'mlx-lm',
99
82
  ]);
100
- console.log('[VLM] mlx-vlm installed successfully.');
83
+ console.log('[MLX] mlx-vlm and mlx-lm installed successfully.');
101
84
  }
102
85
  return ESCRIBANO_VENV_PYTHON;
103
86
  }
104
- /**
105
- * Resolve the Python executable to use for the MLX bridge.
106
- * If the user has configured an explicit environment, use it.
107
- * Otherwise, transparently create and populate ~/.escribano/venv.
108
- */
109
87
  export async function resolvePythonPath() {
110
88
  return getPythonPath() ?? ensureEscribanoVenv();
111
89
  }
112
- // Global cleanup function to track the current bridge instance
113
90
  let globalCleanup = null;
114
- /**
115
- * Cleanup the MLX bridge process.
116
- * Should be called explicitly before process exit.
117
- */
118
91
  export function cleanupMlxBridge() {
119
92
  if (globalCleanup) {
120
93
  debugLog('Explicit cleanup called');
@@ -122,87 +95,123 @@ export function cleanupMlxBridge() {
122
95
  globalCleanup = null;
123
96
  }
124
97
  }
125
- /**
126
- * Create MLX-VLM intelligence service.
127
- *
128
- * Note: This adapter only implements describeImages() for VLM processing.
129
- * Other methods (classify, generate, etc.) are not implemented and will throw.
130
- */
131
98
  export function createMlxIntelligenceService(_config = {}) {
132
- const mlxConfig = { ...DEFAULT_CONFIG };
133
- const bridge = {
99
+ // Load unified config (respects env vars, config file, and RAM-aware defaults)
100
+ const config = loadConfig();
101
+ const mlxConfig = {
102
+ model: config.vlmModel,
103
+ batchSize: config.vlmBatchSize,
104
+ maxTokens: config.vlmMaxTokens,
105
+ socketPath: config.mlxSocketPath,
106
+ bridgeScript: resolve(__dirname, '../../scripts/mlx_bridge.py'),
107
+ startupTimeout: config.mlxStartupTimeout,
108
+ };
109
+ const vlmBridge = {
110
+ process: null,
111
+ socket: null,
112
+ ready: false,
113
+ connecting: false,
114
+ };
115
+ const llmBridge = {
134
116
  process: null,
135
117
  socket: null,
136
118
  ready: false,
137
119
  connecting: false,
120
+ loadedModel: null,
138
121
  };
139
- // Cleanup on process exit
122
+ const getVlmSocketPath = () => mlxConfig.socketPath.replace('.sock', '-vlm.sock');
123
+ const getLlmSocketPath = () => mlxConfig.socketPath.replace('.sock', '-llm.sock');
140
124
  const cleanup = () => {
141
- if (bridge.socket) {
125
+ if (vlmBridge.socket) {
142
126
  try {
143
- bridge.socket.destroy();
127
+ vlmBridge.socket.destroy();
144
128
  }
145
- catch {
146
- // Ignore
129
+ catch { }
130
+ vlmBridge.socket = null;
131
+ }
132
+ if (vlmBridge.process) {
133
+ try {
134
+ vlmBridge.process.kill('SIGTERM');
147
135
  }
148
- bridge.socket = null;
136
+ catch { }
137
+ vlmBridge.process = null;
149
138
  }
150
- if (bridge.process) {
139
+ const vlmSock = getVlmSocketPath();
140
+ if (existsSync(vlmSock)) {
151
141
  try {
152
- bridge.process.kill('SIGTERM');
142
+ unlinkSync(vlmSock);
153
143
  }
154
- catch {
155
- // Ignore
144
+ catch { }
145
+ }
146
+ vlmBridge.ready = false;
147
+ if (llmBridge.socket) {
148
+ try {
149
+ llmBridge.socket.destroy();
156
150
  }
157
- bridge.process = null;
151
+ catch { }
152
+ llmBridge.socket = null;
158
153
  }
159
- // Clean up socket file if it exists
160
- if (existsSync(mlxConfig.socketPath)) {
154
+ if (llmBridge.process) {
161
155
  try {
162
- unlinkSync(mlxConfig.socketPath);
156
+ llmBridge.process.kill('SIGTERM');
163
157
  }
164
- catch {
165
- // Ignore
158
+ catch { }
159
+ llmBridge.process = null;
160
+ }
161
+ const llmSock = getLlmSocketPath();
162
+ if (existsSync(llmSock)) {
163
+ try {
164
+ unlinkSync(llmSock);
166
165
  }
166
+ catch { }
167
167
  }
168
- bridge.ready = false;
168
+ llmBridge.ready = false;
169
+ llmBridge.loadedModel = null;
169
170
  };
170
- // Register global cleanup
171
171
  globalCleanup = cleanup;
172
- // Also cleanup on process signals
173
172
  process.on('SIGTERM', cleanup);
174
173
  process.on('SIGINT', cleanup);
175
- // Cleanup on beforeExit to ensure it runs before process.exit
176
174
  process.on('beforeExit', cleanup);
177
- /**
178
- * Start the Python bridge process.
179
- */
180
- const startBridge = async () => {
181
- if (bridge.process && bridge.ready) {
175
+ const startBridge = async (bridgeState, mode, _socketPath) => {
176
+ if (bridgeState.process && bridgeState.ready)
182
177
  return;
183
- }
184
- debugLog('Starting MLX bridge...');
185
- // Resolve (and if needed, auto-create) the Python environment before spawning.
178
+ debugLog(`Starting ${mode.toUpperCase()} bridge...`);
186
179
  const pythonPath = await resolvePythonPath();
187
180
  debugLog(`Using Python: ${pythonPath}`);
188
- return new Promise((resolve, reject) => {
189
- bridge.process = spawn(pythonPath, [mlxConfig.bridgeScript], {
181
+ return new Promise((resolvePromise, rejectPromise) => {
182
+ const env = {
183
+ ...process.env,
184
+ ESCRIBANO_MLX_SOCKET_PATH: mlxConfig.socketPath,
185
+ ESCRIBANO_DB_PATH: getDbPath(),
186
+ ESCRIBANO_DEBUG_LLM: String(config.debugLlm),
187
+ };
188
+ // Debug: log env vars being passed to Python bridge
189
+ if (config.debugLlm) {
190
+ console.log(`[MLX] Passing DEBUG_LLM=${config.debugLlm} to ${mode} bridge`);
191
+ console.log(`[MLX] DB_PATH: ${getDbPath()}`);
192
+ }
193
+ if (mode === 'vlm') {
194
+ env.ESCRIBANO_VLM_MODEL = mlxConfig.model;
195
+ env.ESCRIBANO_VLM_BATCH_SIZE = String(mlxConfig.batchSize);
196
+ env.ESCRIBANO_VLM_MAX_TOKENS = String(mlxConfig.maxTokens);
197
+ }
198
+ bridgeState.process = spawn(pythonPath, [mlxConfig.bridgeScript, '--mode', mode], {
190
199
  stdio: ['ignore', 'pipe', 'pipe'],
191
- env: {
192
- ...process.env,
193
- ESCRIBANO_VLM_MODEL: mlxConfig.model,
194
- ESCRIBANO_VLM_BATCH_SIZE: String(mlxConfig.batchSize),
195
- ESCRIBANO_VLM_MAX_TOKENS: String(mlxConfig.maxTokens),
196
- ESCRIBANO_MLX_SOCKET_PATH: mlxConfig.socketPath,
197
- },
200
+ env,
198
201
  });
199
- if (!bridge.process.stdout || !bridge.process.stderr) {
200
- reject(new Error('Failed to create bridge process streams'));
202
+ if (!bridgeState.process.stdout || !bridgeState.process.stderr) {
203
+ rejectPromise(new Error('Failed to create bridge process streams'));
201
204
  return;
202
205
  }
203
- // Handle stdout (ready signal is JSON on first line)
204
206
  let readyReceived = false;
205
- bridge.process.stdout.on('data', (data) => {
207
+ let startupTimer = null;
208
+ const clearStartupTimer = () => {
209
+ if (startupTimer) {
210
+ clearTimeout(startupTimer);
211
+ startupTimer = null;
212
+ }
213
+ };
214
+ bridgeState.process.stdout.on('data', (data) => {
206
215
  const lines = data.toString().trim().split('\n');
207
216
  for (const line of lines) {
208
217
  if (!readyReceived && line.startsWith('{')) {
@@ -210,92 +219,91 @@ export function createMlxIntelligenceService(_config = {}) {
210
219
  const msg = JSON.parse(line);
211
220
  if (msg.status === 'ready') {
212
221
  readyReceived = true;
213
- bridge.ready = true;
214
- debugLog(`Bridge ready: ${msg.model}`);
215
- resolve();
222
+ clearStartupTimer();
223
+ bridgeState.ready = true;
224
+ debugLog(`${mode.toUpperCase()} bridge ready: ${msg.model || msg.mode}`);
225
+ resolvePromise();
216
226
  }
217
227
  }
218
- catch {
219
- // Not JSON, ignore
220
- }
228
+ catch { }
221
229
  }
222
230
  }
223
231
  });
224
- // Handle stderr (logs from Python)
225
- bridge.process.stderr.on('data', (data) => {
232
+ bridgeState.process.stderr.on('data', (data) => {
226
233
  const text = data.toString().trim();
227
- if (text) {
234
+ if (text)
228
235
  console.log(text);
229
- }
230
236
  });
231
- // Handle process exit
232
- bridge.process.on('exit', (code, signal) => {
233
- debugLog(`Bridge exited: code=${code} signal=${signal}`);
234
- bridge.process = null;
235
- bridge.ready = false;
237
+ bridgeState.process.on('exit', (code, signal) => {
238
+ debugLog(`${mode.toUpperCase()} bridge exited: code=${code} signal=${signal}`);
239
+ bridgeState.process = null;
240
+ bridgeState.ready = false;
236
241
  if (!readyReceived) {
237
- reject(new Error(`Bridge failed to start: exit code ${code}`));
242
+ clearStartupTimer();
243
+ rejectPromise(new Error(`${mode.toUpperCase()} bridge failed to start: exit code ${code}`));
238
244
  }
239
245
  });
240
- bridge.process.on('error', (err) => {
241
- debugLog(`Bridge error: ${err.message}`);
246
+ bridgeState.process.on('error', (err) => {
247
+ debugLog(`${mode.toUpperCase()} bridge error: ${err.message}`);
242
248
  if (!readyReceived) {
243
- reject(new Error(`Failed to start bridge: ${err.message}`));
249
+ clearStartupTimer();
250
+ rejectPromise(new Error(`Failed to start ${mode.toUpperCase()} bridge: ${err.message}`));
244
251
  }
245
252
  });
246
- // Timeout for ready signal
247
- setTimeout(() => {
253
+ startupTimer = setTimeout(() => {
248
254
  if (!readyReceived) {
249
- reject(new Error(`Bridge startup timeout (${mlxConfig.startupTimeout / 1000}s)`));
255
+ startupTimer = null;
256
+ rejectPromise(new Error(`${mode.toUpperCase()} bridge startup timeout (${mlxConfig.startupTimeout / 1000}s)`));
250
257
  }
251
258
  }, mlxConfig.startupTimeout);
252
259
  });
253
260
  };
254
- /**
255
- * Connect to the Unix socket.
256
- */
257
- const connect = () => {
258
- return new Promise((resolve, reject) => {
259
- if (bridge.socket && !bridge.socket.destroyed) {
260
- resolve(bridge.socket);
261
+ const connect = (bridgeState, socketPath) => {
262
+ return new Promise((resolvePromise, rejectPromise) => {
263
+ if (bridgeState.socket && !bridgeState.socket.destroyed) {
264
+ resolvePromise(bridgeState.socket);
261
265
  return;
262
266
  }
263
- debugLog(`Connecting to socket: ${mlxConfig.socketPath}`);
264
- const client = createConnection(mlxConfig.socketPath);
267
+ let connectionTimer = null;
268
+ const clearConnectionTimer = () => {
269
+ if (connectionTimer) {
270
+ clearTimeout(connectionTimer);
271
+ connectionTimer = null;
272
+ }
273
+ };
274
+ debugLog(`Connecting to socket: ${socketPath}`);
275
+ const client = createConnection(socketPath);
265
276
  client.on('connect', () => {
277
+ clearConnectionTimer();
266
278
  debugLog('Socket connected');
267
- bridge.socket = client;
268
- resolve(client);
279
+ bridgeState.socket = client;
280
+ resolvePromise(client);
269
281
  });
270
282
  client.on('error', (err) => {
283
+ clearConnectionTimer();
271
284
  debugLog(`Socket error: ${err.message}`);
272
- bridge.socket = null;
273
- reject(new Error(`Socket connection failed: ${err.message}`));
285
+ bridgeState.socket = null;
286
+ rejectPromise(new Error(`Socket connection failed: ${err.message}`));
274
287
  });
275
288
  client.on('close', () => {
276
289
  debugLog('Socket closed');
277
- bridge.socket = null;
290
+ bridgeState.socket = null;
278
291
  });
279
- // Timeout
280
- setTimeout(() => {
281
- if (!bridge.socket) {
292
+ connectionTimer = setTimeout(() => {
293
+ if (!bridgeState.socket) {
294
+ connectionTimer = null;
282
295
  client.destroy();
283
- reject(new Error('Socket connection timeout'));
296
+ rejectPromise(new Error('Socket connection timeout'));
284
297
  }
285
298
  }, 5000);
286
299
  });
287
300
  };
288
- /**
289
- * Send request and receive streaming NDJSON responses.
290
- */
291
- const sendRequest = async (request, onBatch) => {
292
- // Ensure bridge is running
293
- if (!bridge.ready) {
294
- await startBridge();
301
+ const sendRequest = async (bridgeState, socketPath, mode, request, onBatch) => {
302
+ if (!bridgeState.ready) {
303
+ await startBridge(bridgeState, mode, socketPath);
295
304
  }
296
- // Connect to socket
297
- const socket = await connect();
298
- return new Promise((resolve, reject) => {
305
+ const socket = await connect(bridgeState, socketPath);
306
+ return new Promise((resolvePromise, rejectPromise) => {
299
307
  const responses = [];
300
308
  let buffer = '';
301
309
  const onData = (chunk) => {
@@ -309,19 +317,16 @@ export function createMlxIntelligenceService(_config = {}) {
309
317
  try {
310
318
  const response = JSON.parse(line);
311
319
  if ('error' in response && response.error) {
312
- // Error response
313
- reject(new Error(response.error));
320
+ rejectPromise(new Error(response.error));
314
321
  socket.off('data', onData);
315
322
  return;
316
323
  }
324
+ responses.push(response);
317
325
  if ('done' in response && response.done) {
318
- // Final response
319
326
  socket.off('data', onData);
320
- resolve(responses);
327
+ resolvePromise(responses);
321
328
  return;
322
329
  }
323
- // Batch response
324
- responses.push(response);
325
330
  if (onBatch && 'progress' in response) {
326
331
  const resp = response;
327
332
  onBatch(response, resp.progress);
@@ -329,52 +334,32 @@ export function createMlxIntelligenceService(_config = {}) {
329
334
  }
330
335
  catch {
331
336
  debugLog(`Failed to parse response: ${line}`);
332
- // Continue processing, might be partial
333
337
  }
334
338
  }
335
339
  };
336
340
  socket.on('data', onData);
337
341
  socket.on('error', (err) => {
338
342
  socket.off('data', onData);
339
- reject(new Error(`Socket error: ${err.message}`));
343
+ rejectPromise(new Error(`Socket error: ${err.message}`));
340
344
  });
341
- // Send request
342
345
  const requestJson = `${JSON.stringify(request)}\n`;
343
346
  debugLog(`Sending request: id=${request.id} method=${request.method}`);
344
347
  socket.write(requestJson);
345
348
  });
346
349
  };
347
- // Return IntelligenceService implementation
348
350
  return {
349
- /**
350
- * Classify transcript - NOT IMPLEMENTED for MLX backend.
351
- */
352
351
  async classify(_transcript, _visualLogs) {
353
- throw new Error('MLX adapter does not support classify(). Use Ollama backend for this operation.');
352
+ throw new Error('MLX adapter does not support classify(). Use Ollama backend.');
354
353
  },
355
- /**
356
- * Classify segment - NOT IMPLEMENTED for MLX backend.
357
- */
358
354
  async classifySegment(_segment, _transcript) {
359
- throw new Error('MLX adapter does not support classifySegment(). Use Ollama backend for this operation.');
355
+ throw new Error('MLX adapter does not support classifySegment(). Use Ollama backend.');
360
356
  },
361
- /**
362
- * Extract metadata - NOT IMPLEMENTED for MLX backend.
363
- */
364
357
  async extractMetadata(_transcript, _classification, _visualLogs) {
365
- throw new Error('MLX adapter does not support extractMetadata(). Use Ollama backend for this operation.');
358
+ throw new Error('MLX adapter does not support extractMetadata(). Use Ollama backend.');
366
359
  },
367
- /**
368
- * Generate artifact - NOT IMPLEMENTED for MLX backend.
369
- */
370
360
  async generate(_artifactType, _context) {
371
- throw new Error('MLX adapter does not support generate(). Use Ollama backend for this operation.');
361
+ throw new Error('MLX adapter does not support generate(). Use Ollama backend.');
372
362
  },
373
- /**
374
- * Describe images using MLX-VLM with interleaved batching.
375
- *
376
- * This is the primary method for VLM frame processing.
377
- */
378
363
  async describeImages(images, options = {}) {
379
364
  const total = images.length;
380
365
  if (total === 0) {
@@ -390,12 +375,10 @@ export function createMlxIntelligenceService(_config = {}) {
390
375
  if (response.results) {
391
376
  for (const result of response.results) {
392
377
  allResults.push(result);
393
- // Fire callback for each frame
394
378
  if (options.onImageProcessed) {
395
379
  options.onImageProcessed(result, progress);
396
380
  }
397
381
  }
398
- // Log progress every 10 frames
399
382
  if (progress.current % 10 === 0 ||
400
383
  progress.current === progress.total) {
401
384
  console.log(`[VLM] [${progress.current}/${progress.total}] frames processed`);
@@ -403,7 +386,7 @@ export function createMlxIntelligenceService(_config = {}) {
403
386
  }
404
387
  };
405
388
  try {
406
- await sendRequest({
389
+ await sendRequest(vlmBridge, getVlmSocketPath(), 'vlm', {
407
390
  id: requestId,
408
391
  method: 'describe_images',
409
392
  params: {
@@ -427,29 +410,120 @@ export function createMlxIntelligenceService(_config = {}) {
427
410
  throw new Error(`MLX VLM processing failed: ${message}`);
428
411
  }
429
412
  },
430
- /**
431
- * Embed text - NOT IMPLEMENTED for MLX backend.
432
- */
433
413
  async embedText(_texts, _options) {
434
- throw new Error('MLX adapter does not support embedText(). Use Ollama backend for this operation.');
414
+ throw new Error('MLX adapter does not support embedText(). Use Ollama backend.');
435
415
  },
436
- /**
437
- * Extract topics - NOT IMPLEMENTED for MLX backend.
438
- */
439
416
  async extractTopics(_observations) {
440
- throw new Error('MLX adapter does not support extractTopics(). Use Ollama backend for this operation.');
417
+ throw new Error('MLX adapter does not support extractTopics(). Use Ollama backend.');
418
+ },
419
+ async generateText(prompt, options) {
420
+ const modelSelection = await selectBestMLXModel();
421
+ const resolvedModel = options?.model || modelSelection.model;
422
+ const requestId = Date.now();
423
+ const llmSocketPath = getLlmSocketPath();
424
+ try {
425
+ if (llmBridge.loadedModel !== resolvedModel) {
426
+ if (llmBridge.loadedModel) {
427
+ debugLog(`Unloading previous LLM model: ${llmBridge.loadedModel}`);
428
+ await sendRequest(llmBridge, llmSocketPath, 'llm', {
429
+ id: requestId,
430
+ method: 'unload_llm',
431
+ params: {},
432
+ });
433
+ }
434
+ debugLog(`Loading LLM model: ${resolvedModel}`);
435
+ console.log(`[LLM] Loading model: ${resolvedModel}`);
436
+ try {
437
+ await sendRequest(llmBridge, llmSocketPath, 'llm', {
438
+ id: requestId + 1,
439
+ method: 'load_llm',
440
+ params: { model: resolvedModel },
441
+ });
442
+ llmBridge.loadedModel = resolvedModel;
443
+ console.log('[LLM] Model loaded');
444
+ }
445
+ catch (loadError) {
446
+ llmBridge.loadedModel = null;
447
+ throw loadError;
448
+ }
449
+ }
450
+ debugLog(`Generating text (${prompt.length} chars)...`);
451
+ const responses = await sendRequest(llmBridge, llmSocketPath, 'llm', {
452
+ id: requestId + 2,
453
+ method: 'generate_text',
454
+ params: {
455
+ rawPrompt: prompt,
456
+ maxTokens: options?.numPredict ?? 8000,
457
+ temperature: 0.7,
458
+ think: options?.think ?? false,
459
+ debugContext: options?.debugContext,
460
+ },
461
+ });
462
+ if (responses.length === 0) {
463
+ throw new Error('No response from LLM generation');
464
+ }
465
+ const response = responses[0];
466
+ if (response.error) {
467
+ throw new Error(`Text generation failed: ${response.error}`);
468
+ }
469
+ debugLog(`Generated ${response.text?.length || 0} chars`);
470
+ return response.text || '';
471
+ }
472
+ catch (error) {
473
+ const message = error.message;
474
+ console.error(`[LLM] ERROR: ${message}`);
475
+ throw error;
476
+ }
441
477
  },
442
- /**
443
- * Generate text - NOT IMPLEMENTED for MLX backend.
444
- */
445
- async generateText(_prompt, _options) {
446
- throw new Error('MLX adapter does not support generateText(). Use Ollama backend for this operation.');
478
+ async loadLlm(model) {
479
+ const requestId = Date.now();
480
+ const llmSocketPath = getLlmSocketPath();
481
+ if (llmBridge.loadedModel && llmBridge.loadedModel !== model) {
482
+ await sendRequest(llmBridge, llmSocketPath, 'llm', {
483
+ id: requestId,
484
+ method: 'unload_llm',
485
+ params: {},
486
+ });
487
+ }
488
+ try {
489
+ await sendRequest(llmBridge, llmSocketPath, 'llm', {
490
+ id: requestId + 1,
491
+ method: 'load_llm',
492
+ params: { model },
493
+ });
494
+ llmBridge.loadedModel = model;
495
+ }
496
+ catch (loadError) {
497
+ llmBridge.loadedModel = null;
498
+ throw loadError;
499
+ }
500
+ },
501
+ async unloadVlm() {
502
+ if (!vlmBridge.ready)
503
+ return;
504
+ const requestId = Date.now();
505
+ await sendRequest(vlmBridge, getVlmSocketPath(), 'vlm', {
506
+ id: requestId,
507
+ method: 'unload_vlm',
508
+ params: {},
509
+ });
510
+ },
511
+ async unloadLlm() {
512
+ if (!llmBridge.ready)
513
+ return;
514
+ const requestId = Date.now();
515
+ await sendRequest(llmBridge, getLlmSocketPath(), 'llm', {
516
+ id: requestId,
517
+ method: 'unload_llm',
518
+ params: {},
519
+ });
520
+ llmBridge.loadedModel = null;
447
521
  },
448
522
  getResourceName() {
449
523
  return 'mlx-python';
450
524
  },
451
525
  getPid() {
452
- return bridge.process?.pid ?? null;
526
+ return vlmBridge.process?.pid ?? llmBridge.process?.pid ?? null;
453
527
  },
454
528
  };
455
529
  }