navvi 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/mcp/server.mjs ADDED
@@ -0,0 +1,1278 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Navvi MCP Server v2.0.0 — persistent browser personas via Docker containers.
4
+ *
5
+ * Lifecycle:
6
+ * navvi_start (local|remote), navvi_stop, navvi_status, navvi_list
7
+ *
8
+ * Browser control (xdotool + Marionette via navvi-server.py):
9
+ * navvi_open, navvi_click, navvi_fill, navvi_press,
10
+ * navvi_drag, navvi_mousedown, navvi_mouseup, navvi_mousemove,
11
+ * navvi_scroll, navvi_screenshot, navvi_url, navvi_vnc
12
+ *
13
+ * Video recording:
14
+ * navvi_record_start, navvi_record_stop, navvi_record_gif
15
+ *
16
+ * Speaks MCP stdio protocol. Zero dependencies (Node built-ins only).
17
+ */
18
+
19
+ import http from 'http';
20
+ import { execSync, spawn } from 'child_process';
21
+ import fs from 'fs';
22
+ import path from 'path';
23
+ import os from 'os';
24
+
25
+ // --- Constants ---
26
+
27
+ // Package directory — set by bin/navvi.js or inferred from this file's location
28
+ const PACKAGE_DIR = process.env.NAVVI_PACKAGE_DIR || (import.meta.dirname ? path.resolve(import.meta.dirname, '..') : process.cwd());
29
+ const REPO = process.env.NAVVI_REPO || null;
30
+ const MACHINE_TYPE = process.env.NAVVI_MACHINE || 'basicLinux32gb';
31
+ const NAVVI_PORT = 8024;
32
+ const VNC_PORT = 6080;
33
+ const DOCKER_IMAGE = process.env.NAVVI_IMAGE || 'navvi';
34
+ const CONTAINER_PREFIX = 'navvi-';
35
+
36
+ const PIDFILE_FWD = path.join(os.tmpdir(), '.navvi-port-forward.pid');
37
+ const PIDFILE_RECORD = path.join(os.tmpdir(), '.navvi-ffmpeg.pid');
38
+ const STATEFILE = path.join(os.tmpdir(), '.navvi-mode');
39
+ const RECORDINGS_DIR = path.join(os.tmpdir(), 'navvi-recordings');
40
+ const ACTION_LOG = path.join(os.tmpdir(), '.navvi-actions.jsonl');
41
+
42
+ let navviApi = `http://127.0.0.1:${NAVVI_PORT}`;
43
+
44
+ // Track active persona for default targeting
45
+ let activePersona = null;
46
+
47
+ // --- Helpers ---
48
+
49
+ /** Log an action timestamp during recording (for smart trim) */
50
+ function logAction(action, detail) {
51
+ const stateFile = path.join(os.tmpdir(), '.navvi-recording.json');
52
+ try {
53
+ const state = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
54
+ if (!state.active) return;
55
+ } catch { return; }
56
+ const entry = JSON.stringify({ ts: Date.now(), action, detail });
57
+ fs.appendFileSync(ACTION_LOG, entry + '\n');
58
+ }
59
+
60
+ function sh(cmd) {
61
+ try {
62
+ return execSync(cmd, { encoding: 'utf8', timeout: 60000 }).trim();
63
+ } catch (e) {
64
+ return e.stderr ? e.stderr.trim() : e.message;
65
+ }
66
+ }
67
+
68
+ function which(bin) {
69
+ try {
70
+ return execSync(`which ${bin} 2>/dev/null`, { encoding: 'utf8' }).trim();
71
+ } catch {
72
+ return null;
73
+ }
74
+ }
75
+
76
+ /** Run a gh CLI command with CODESPACE_TOKEN as GH_TOKEN */
77
+ function ghSh(cmd) {
78
+ const token = process.env.CODESPACE_TOKEN;
79
+ if (!token) return sh(cmd);
80
+ try {
81
+ return execSync(cmd, {
82
+ encoding: 'utf8',
83
+ timeout: 60000,
84
+ env: { ...process.env, GH_TOKEN: token },
85
+ }).trim();
86
+ } catch (e) {
87
+ return e.stderr ? e.stderr.trim() : e.message;
88
+ }
89
+ }
90
+
91
+ function killPidfile(pidfile) {
92
+ if (!fs.existsSync(pidfile)) return;
93
+ try {
94
+ const pid = parseInt(fs.readFileSync(pidfile, 'utf8').trim());
95
+ process.kill(pid);
96
+ } catch {}
97
+ try { fs.unlinkSync(pidfile); } catch {}
98
+ }
99
+
100
+ function getMode() {
101
+ try { return fs.readFileSync(STATEFILE, 'utf8').trim(); } catch { return null; }
102
+ }
103
+
104
+ function setMode(mode) {
105
+ fs.writeFileSync(STATEFILE, mode);
106
+ }
107
+
108
+ function clearMode() {
109
+ try { fs.unlinkSync(STATEFILE); } catch {}
110
+ }
111
+
112
+ /** Get the container name for a persona */
113
+ function containerName(persona) {
114
+ return `${CONTAINER_PREFIX}${persona}`;
115
+ }
116
+
117
+ /** Get assigned ports for a persona based on container inspect */
118
+ function getContainerPorts(persona) {
119
+ try {
120
+ const name = containerName(persona);
121
+ const info = sh(`docker inspect --format '{{json .NetworkSettings.Ports}}' ${name} 2>/dev/null`);
122
+ const ports = JSON.parse(info);
123
+ const apiPort = ports['8024/tcp']?.[0]?.HostPort || NAVVI_PORT;
124
+ const vncPort = ports['6080/tcp']?.[0]?.HostPort || VNC_PORT;
125
+ return { api: parseInt(apiPort), vnc: parseInt(vncPort) };
126
+ } catch {
127
+ return { api: NAVVI_PORT, vnc: VNC_PORT };
128
+ }
129
+ }
130
+
131
+ /** Read persona YAML file (simple line parser, no deps) */
132
+ function readPersonaYaml(persona) {
133
+ const dirs = [
134
+ path.join(process.cwd(), 'personas'),
135
+ path.join(process.cwd(), '.navvi', 'personas'),
136
+ path.join(os.homedir(), '.navvi', 'personas'),
137
+ path.join(PACKAGE_DIR, 'personas'),
138
+ ];
139
+ for (const dir of dirs) {
140
+ for (const ext of ['.yaml', '.yml']) {
141
+ const filepath = path.join(dir, persona + ext);
142
+ try {
143
+ const text = fs.readFileSync(filepath, 'utf8');
144
+ const result = {};
145
+ for (const line of text.split('\n')) {
146
+ const match = line.match(/^\s{0,2}(\w+):\s*(.+)/);
147
+ if (match) result[match[1]] = match[2].trim();
148
+ }
149
+ return result;
150
+ } catch {}
151
+ }
152
+ }
153
+ return {};
154
+ }
155
+
156
+ /** HTTP call to navvi-server.py API */
157
+ function apiCall(method, apiPath, body, apiBase) {
158
+ return new Promise((resolve, reject) => {
159
+ const base = apiBase || navviApi;
160
+ const url = new URL(apiPath, base);
161
+ const options = {
162
+ hostname: url.hostname,
163
+ port: url.port,
164
+ path: url.pathname + url.search,
165
+ method,
166
+ headers: { 'Content-Type': 'application/json' },
167
+ timeout: 15000,
168
+ };
169
+ const req = http.request(options, (res) => {
170
+ let data = '';
171
+ res.on('data', (chunk) => (data += chunk));
172
+ res.on('end', () => {
173
+ let parsed;
174
+ try { parsed = JSON.parse(data); } catch { parsed = data; }
175
+ if (res.statusCode >= 400) {
176
+ const errMsg = (parsed && parsed.detail) || (parsed && parsed.error) || data || `HTTP ${res.statusCode}`;
177
+ return reject(new Error(`API ${method} ${apiPath} failed (${res.statusCode}): ${errMsg}`));
178
+ }
179
+ resolve(parsed);
180
+ });
181
+ });
182
+ req.on('error', reject);
183
+ req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); });
184
+ if (body) {
185
+ const payload = JSON.stringify(body);
186
+ req.setHeader('Content-Length', Buffer.byteLength(payload));
187
+ req.write(payload);
188
+ }
189
+ req.end();
190
+ });
191
+ }
192
+
193
+ /** Check if navvi-server is reachable */
194
+ function isApiReachable(port) {
195
+ try {
196
+ const result = sh(`curl -sf -o /dev/null -w '%{http_code}' http://127.0.0.1:${port || NAVVI_PORT}/health 2>/dev/null`);
197
+ return result === '200';
198
+ } catch {
199
+ return false;
200
+ }
201
+ }
202
+
203
+ /** List running navvi containers */
204
+ function listContainers() {
205
+ try {
206
+ const output = sh(`docker ps --filter "name=${CONTAINER_PREFIX}" --format '{{json .}}' 2>/dev/null`);
207
+ if (!output) return [];
208
+ return output.split('\n').filter(Boolean).map(line => {
209
+ const c = JSON.parse(line);
210
+ return {
211
+ name: c.Names.replace(CONTAINER_PREFIX, ''),
212
+ id: c.ID,
213
+ state: c.State,
214
+ ports: c.Ports,
215
+ image: c.Image,
216
+ };
217
+ });
218
+ } catch {
219
+ return [];
220
+ }
221
+ }
222
+
223
+ // --- Dependency checks ---
224
+
225
+ function checkLocalDeps() {
226
+ const missing = [];
227
+ if (!which('docker')) {
228
+ missing.push({
229
+ name: 'Docker',
230
+ install: ['brew install --cask docker'],
231
+ });
232
+ }
233
+ return missing;
234
+ }
235
+
236
+ function checkRemoteDeps() {
237
+ const missing = [];
238
+ if (!which('gh')) {
239
+ missing.push({ name: 'GitHub CLI (gh)', install: ['brew install gh'] });
240
+ }
241
+ return missing;
242
+ }
243
+
244
+ function formatMissing(missing) {
245
+ let msg = 'Missing dependencies:\n\n';
246
+ for (const dep of missing) {
247
+ msg += `${dep.name} — install with:\n`;
248
+ for (const cmd of dep.install) msg += ` $ ${cmd}\n`;
249
+ msg += '\n';
250
+ }
251
+ msg += 'Install the missing dependencies and try again.';
252
+ return msg;
253
+ }
254
+
255
+ // --- MCP Tool Definitions ---
256
+
257
+ const TOOLS = [
258
+ {
259
+ name: 'navvi_start',
260
+ description: 'Start a Navvi browser container (Firefox + Xvfb + xdotool). Local=Docker, Remote=Codespace. Workflow: navvi_open(url) → navvi_find(selector) → navvi_click/navvi_fill → navvi_screenshot to verify. All input is OS-level (isTrusted:true). If you hit a CAPTCHA you cannot solve (Arkose/FunCaptcha, image puzzles, reCAPTCHA), call navvi_vnc and send the user the noVNC URL so they can solve it manually.',
261
+ inputSchema: {
262
+ type: 'object',
263
+ properties: {
264
+ persona: { type: 'string', description: 'Persona name (default: "default"). Maps to personas/<name>.yaml and a persistent Docker volume.' },
265
+ mode: { type: 'string', enum: ['local', 'remote'], description: 'Run locally via Docker or in a Codespace (default: local)' },
266
+ name: { type: 'string', description: 'Codespace name to resume (remote mode only, optional)' },
267
+ },
268
+ },
269
+ },
270
+ {
271
+ name: 'navvi_stop',
272
+ description: 'Stop a Navvi container. Stops all if no persona specified. Firefox profile is preserved in the Docker volume.',
273
+ inputSchema: {
274
+ type: 'object',
275
+ properties: {
276
+ persona: { type: 'string', description: 'Persona name (optional — stops all if omitted)' },
277
+ },
278
+ },
279
+ },
280
+ {
281
+ name: 'navvi_status',
282
+ description: 'Show current Navvi state — running containers, API health, active persona.',
283
+ inputSchema: { type: 'object', properties: {} },
284
+ },
285
+ {
286
+ name: 'navvi_list',
287
+ description: 'List available Codespaces for Navvi (remote mode).',
288
+ inputSchema: { type: 'object', properties: {} },
289
+ },
290
+ {
291
+ name: 'navvi_open',
292
+ description: 'Navigate to a URL in the active browser. After navigating, use navvi_find to locate elements on the page, then navvi_click/navvi_fill to interact.',
293
+ inputSchema: {
294
+ type: 'object',
295
+ properties: {
296
+ url: { type: 'string', description: 'URL to navigate to' },
297
+ persona: { type: 'string', description: 'Target persona (optional — uses active if omitted)' },
298
+ },
299
+ required: ['url'],
300
+ },
301
+ },
302
+ {
303
+ name: 'navvi_click',
304
+ description: 'Click at (x, y) screen coordinates using OS-level xdotool input (isTrusted: true). IMPORTANT: Use navvi_find to get coordinates — it returns screen-ready (x, y) values. Do NOT use raw JS getBoundingClientRect() — those are viewport coords that miss the browser chrome offset.',
305
+ inputSchema: {
306
+ type: 'object',
307
+ properties: {
308
+ x: { type: 'number', description: 'X coordinate (pixels from left)' },
309
+ y: { type: 'number', description: 'Y coordinate (pixels from top)' },
310
+ persona: { type: 'string', description: 'Target persona (optional)' },
311
+ },
312
+ required: ['x', 'y'],
313
+ },
314
+ },
315
+ {
316
+ name: 'navvi_fill',
317
+ description: 'Click at (x, y) to focus an input field, then type text using OS-level xdotool. Get coordinates from navvi_find first. Selects existing text (Ctrl+A) before typing to replace any current value.',
318
+ inputSchema: {
319
+ type: 'object',
320
+ properties: {
321
+ x: { type: 'number', description: 'X coordinate of the input field' },
322
+ y: { type: 'number', description: 'Y coordinate of the input field' },
323
+ value: { type: 'string', description: 'Text to type' },
324
+ delay: { type: 'number', description: 'Delay in ms between characters (default: 12). Use 50-100 for natural typing speed.' },
325
+ persona: { type: 'string', description: 'Target persona (optional)' },
326
+ },
327
+ required: ['x', 'y', 'value'],
328
+ },
329
+ },
330
+ {
331
+ name: 'navvi_press',
332
+ description: 'Press a keyboard key (Enter, Tab, Escape, Backspace, ArrowDown, etc.). Sends to currently focused element.',
333
+ inputSchema: {
334
+ type: 'object',
335
+ properties: {
336
+ key: { type: 'string', description: 'Key name (e.g. "Enter", "Tab", "Escape", "Backspace", "ArrowDown")' },
337
+ persona: { type: 'string', description: 'Target persona (optional)' },
338
+ },
339
+ required: ['key'],
340
+ },
341
+ },
342
+ {
343
+ name: 'navvi_drag',
344
+ description: 'Drag from (x1,y1) to (x2,y2) with interpolated mouse moves. Uses OS-level input — works on CAPTCHAs and canvases. Get coordinates from navvi_find.',
345
+ inputSchema: {
346
+ type: 'object',
347
+ properties: {
348
+ x1: { type: 'number', description: 'Start X' },
349
+ y1: { type: 'number', description: 'Start Y' },
350
+ x2: { type: 'number', description: 'End X' },
351
+ y2: { type: 'number', description: 'End Y' },
352
+ steps: { type: 'number', description: 'Interpolation steps (default: 20)' },
353
+ duration: { type: 'number', description: 'Drag duration in seconds (default: 0.3)' },
354
+ persona: { type: 'string', description: 'Target persona (optional)' },
355
+ },
356
+ required: ['x1', 'y1', 'x2', 'y2'],
357
+ },
358
+ },
359
+ {
360
+ name: 'navvi_mousedown',
361
+ description: 'Press and hold mouse button at (x, y). Pair with navvi_mouseup for press-and-hold CAPTCHAs. Get coordinates from navvi_find. WARNING: Arkose Labs/FunCaptcha (Microsoft, Yahoo) cannot be solved inside the container even by a human — the virtual display is fingerprinted. If you detect arkoselabs/funcaptcha in the page, stop and tell the user to use a real browser for that signup.',
362
+ inputSchema: {
363
+ type: 'object',
364
+ properties: {
365
+ x: { type: 'number', description: 'X coordinate' },
366
+ y: { type: 'number', description: 'Y coordinate' },
367
+ persona: { type: 'string', description: 'Target persona (optional)' },
368
+ },
369
+ required: ['x', 'y'],
370
+ },
371
+ },
372
+ {
373
+ name: 'navvi_mouseup',
374
+ description: 'Release mouse button at (x, y). Pair with navvi_mousedown.',
375
+ inputSchema: {
376
+ type: 'object',
377
+ properties: {
378
+ x: { type: 'number', description: 'X coordinate' },
379
+ y: { type: 'number', description: 'Y coordinate' },
380
+ persona: { type: 'string', description: 'Target persona (optional)' },
381
+ },
382
+ required: ['x', 'y'],
383
+ },
384
+ },
385
+ {
386
+ name: 'navvi_mousemove',
387
+ description: 'Move mouse to (x, y) without clicking. Useful for hover effects.',
388
+ inputSchema: {
389
+ type: 'object',
390
+ properties: {
391
+ x: { type: 'number', description: 'X coordinate' },
392
+ y: { type: 'number', description: 'Y coordinate' },
393
+ persona: { type: 'string', description: 'Target persona (optional)' },
394
+ },
395
+ required: ['x', 'y'],
396
+ },
397
+ },
398
+ {
399
+ name: 'navvi_scroll',
400
+ description: 'Scroll the page in a given direction.',
401
+ inputSchema: {
402
+ type: 'object',
403
+ properties: {
404
+ direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Scroll direction (default: down)' },
405
+ amount: { type: 'number', description: 'Number of scroll clicks (default: 3)' },
406
+ persona: { type: 'string', description: 'Target persona (optional)' },
407
+ },
408
+ },
409
+ },
410
+ {
411
+ name: 'navvi_screenshot',
412
+ description: 'Take a screenshot of the virtual display. Returns file path to a PNG image — use Read tool to view it. Use for VISUAL VERIFICATION only (confirming what happened). To get clickable coordinates, use navvi_find instead — screenshot pixel positions include browser chrome and are not reliable for targeting elements.',
413
+ inputSchema: {
414
+ type: 'object',
415
+ properties: {
416
+ persona: { type: 'string', description: 'Target persona (optional)' },
417
+ },
418
+ },
419
+ },
420
+ {
421
+ name: 'navvi_url',
422
+ description: 'Get the current page URL.',
423
+ inputSchema: {
424
+ type: 'object',
425
+ properties: {
426
+ persona: { type: 'string', description: 'Target persona (optional)' },
427
+ },
428
+ },
429
+ },
430
+ {
431
+ name: 'navvi_vnc',
432
+ description: 'Get the noVNC URL for live browser view. Share with the user when human intervention is needed: visual CAPTCHAs that require image recognition, OAuth consent screens, or 2FA code entry. The user opens this URL in their real browser to interact directly.',
433
+ inputSchema: {
434
+ type: 'object',
435
+ properties: {
436
+ persona: { type: 'string', description: 'Target persona (optional)' },
437
+ },
438
+ },
439
+ },
440
+ {
441
+ name: 'navvi_find',
442
+ description: 'Find element(s) by CSS selector and return screen-ready (x, y) coordinates. THIS IS THE PRIMARY WAY TO GET COORDINATES — use before navvi_click, navvi_fill, navvi_drag, navvi_mousedown. Automatically corrects for browser chrome offset. Workflow: navvi_find → get (x, y) → navvi_click/navvi_fill at those coords → navvi_screenshot to verify. For dropdowns: navvi_find the button → navvi_click to open → navvi_find the options (selector="[role=option]", all=true) → navvi_click the desired option.',
443
+ inputSchema: {
444
+ type: 'object',
445
+ properties: {
446
+ selector: { type: 'string', description: 'CSS selector (e.g. "#email", "input[type=password]", "button[type=submit]")' },
447
+ all: { type: 'boolean', description: 'Return all matches (default: false, returns first match only)' },
448
+ persona: { type: 'string', description: 'Target persona (optional)' },
449
+ },
450
+ required: ['selector'],
451
+ },
452
+ },
453
+ // Credentials
454
+ {
455
+ name: 'navvi_creds',
456
+ description: 'Manage credentials stored in gopass inside the container. Three actions: "list" shows available entries (no secrets), "get" retrieves a non-secret field (username, url, email — refuses password), "autofill" reads gopass and fills the login form directly — the password goes from gopass → xdotool → browser, NEVER appearing in this response. Use autofill after navvi_open navigates to a login page.',
457
+ inputSchema: {
458
+ type: 'object',
459
+ properties: {
460
+ action: { type: 'string', enum: ['list', 'get', 'autofill'], description: 'Action: list entries, get a metadata field, or autofill a login form' },
461
+ entry: { type: 'string', description: 'Gopass entry path (e.g. "navvi/default/tuta"). Required for get and autofill.' },
462
+ field: { type: 'string', description: 'Field to retrieve (for "get" action). e.g. "username", "url", "email". Password fields are blocked — use autofill.' },
463
+ username_selector: { type: 'string', description: 'CSS selector for username field (autofill only, default: auto-detect)' },
464
+ password_selector: { type: 'string', description: 'CSS selector for password field (autofill only, default: input[type=password])' },
465
+ persona: { type: 'string', description: 'Target persona (optional)' },
466
+ },
467
+ required: ['action'],
468
+ },
469
+ },
470
+ // Video recording
471
+ {
472
+ name: 'navvi_record_start',
473
+ description: 'Start recording the browser via screenshot polling. Captures frames in background, assembles to MP4 on stop.',
474
+ inputSchema: {
475
+ type: 'object',
476
+ properties: {
477
+ duration: { type: 'number', description: 'Max duration in seconds (default: 30, max: 120)' },
478
+ persona: { type: 'string', description: 'Target persona (optional)' },
479
+ },
480
+ },
481
+ },
482
+ {
483
+ name: 'navvi_record_stop',
484
+ description: 'Stop recording and assemble frames into MP4. Optionally trims dead time between actions.',
485
+ inputSchema: {
486
+ type: 'object',
487
+ properties: {
488
+ trim: { type: 'boolean', description: 'Trim dead time between actions (default: true)' },
489
+ },
490
+ },
491
+ },
492
+ {
493
+ name: 'navvi_record_gif',
494
+ description: 'Convert a recorded video to an optimized GIF (1600px wide, 8fps, palette-optimized).',
495
+ inputSchema: {
496
+ type: 'object',
497
+ properties: {
498
+ input: { type: 'string', description: 'Path to input video. If omitted, uses most recent recording.' },
499
+ },
500
+ },
501
+ },
502
+ ];
503
+
504
+ // --- Tool Handlers ---
505
+
506
+ /** Resolve which persona to target and return its API base URL */
507
+ function resolvePersona(persona) {
508
+ const name = persona || activePersona || 'default';
509
+ const ports = getContainerPorts(name);
510
+ return { name, apiBase: `http://127.0.0.1:${ports.api}` };
511
+ }
512
+
513
+ async function handleTool(name, args) {
514
+ switch (name) {
515
+ // --- Lifecycle ---
516
+
517
+ case 'navvi_start': {
518
+ const mode = args.mode || 'local';
519
+ const persona = args.persona || 'default';
520
+
521
+ if (mode === 'local') {
522
+ const missing = checkLocalDeps();
523
+ if (missing.length > 0) return formatMissing(missing);
524
+
525
+ const cname = containerName(persona);
526
+
527
+ // Check if already running
528
+ const existing = sh(`docker ps -q --filter "name=${cname}" 2>/dev/null`);
529
+ if (existing) {
530
+ const ports = getContainerPorts(persona);
531
+ const reachable = isApiReachable(ports.api);
532
+ activePersona = persona;
533
+ navviApi = `http://127.0.0.1:${ports.api}`;
534
+ return `Container ${cname} already running.\nAPI: http://127.0.0.1:${ports.api} (${reachable ? 'healthy' : 'starting...'})\nVNC: http://127.0.0.1:${ports.vnc}`;
535
+ }
536
+
537
+ // Remove stopped container with same name
538
+ sh(`docker rm ${cname} 2>/dev/null`);
539
+
540
+ // Read persona config for locale/timezone
541
+ const config = readPersonaYaml(persona);
542
+ const locale = config.locale || 'en-US';
543
+ const timezone = config.timezone || 'UTC';
544
+
545
+ // Docker volume for persistent Firefox profile
546
+ const volumeName = `navvi-profile-${persona}`;
547
+
548
+ // Find free ports if default persona ports are taken
549
+ let apiPort = NAVVI_PORT;
550
+ let vncPort = VNC_PORT;
551
+ // For non-default personas, offset ports
552
+ if (persona !== 'default') {
553
+ // Simple hash to get port offset
554
+ let hash = 0;
555
+ for (const ch of persona) hash = ((hash << 5) - hash + ch.charCodeAt(0)) | 0;
556
+ const offset = (Math.abs(hash) % 100) + 1;
557
+ apiPort = NAVVI_PORT + offset;
558
+ vncPort = VNC_PORT + offset;
559
+ }
560
+
561
+ const dockerArgs = [
562
+ 'run', '-d',
563
+ '--name', cname,
564
+ '-p', `${apiPort}:8024`,
565
+ '-p', `${vncPort}:6080`,
566
+ '-v', `${volumeName}:/home/user/.mozilla`,
567
+ '-e', `LOCALE=${locale}`,
568
+ '-e', `TIMEZONE=${timezone}`,
569
+ DOCKER_IMAGE,
570
+ ];
571
+
572
+ const result = sh(`docker ${dockerArgs.join(' ')}`);
573
+ if (result.includes('Error') || result.includes('error')) {
574
+ return `Failed to start container:\n${result}\n\nMake sure the image is built: docker build -t navvi container/`;
575
+ }
576
+
577
+ // Wait for API to be ready
578
+ activePersona = persona;
579
+ navviApi = `http://127.0.0.1:${apiPort}`;
580
+
581
+ let ready = false;
582
+ for (let i = 0; i < 15; i++) {
583
+ await new Promise(r => setTimeout(r, 1000));
584
+ if (isApiReachable(apiPort)) { ready = true; break; }
585
+ }
586
+
587
+ setMode('local');
588
+ return `Navvi started (${persona}).\nContainer: ${cname}\nAPI: http://127.0.0.1:${apiPort} (${ready ? 'healthy' : 'starting...'})\nVNC: http://127.0.0.1:${vncPort}\nVolume: ${volumeName} (persistent Firefox profile)\n\nUse navvi_open to navigate, navvi_screenshot to see the page.`;
589
+ }
590
+
591
+ if (mode === 'remote') {
592
+ if (!REPO) return 'Error: remote mode requires NAVVI_REPO env var (e.g. "Fellowship-dev/navvi"). Set it in your MCP config.';
593
+ const missing = checkRemoteDeps();
594
+ if (missing.length > 0) return formatMissing(missing);
595
+
596
+ const csToken = process.env.CODESPACE_TOKEN;
597
+ const ghEnv = csToken ? { ...process.env, GH_TOKEN: csToken } : process.env;
598
+
599
+ let csName = args.name;
600
+ if (csName) {
601
+ // SSH auto-starts stopped codespaces (gh cs start doesn't exist)
602
+ try {
603
+ execSync(`gh cs ssh -c ${csName} -- echo ready`, { encoding: 'utf8', timeout: 120000, env: ghEnv });
604
+ } catch {}
605
+ } else {
606
+ const stopped = ghSh(`gh cs list --repo ${REPO} --json name,state -q '.[] | select(.state=="Shutdown") | .name'`);
607
+ if (stopped) {
608
+ csName = stopped.split('\n')[0];
609
+ try {
610
+ execSync(`gh cs ssh -c ${csName} -- echo ready`, { encoding: 'utf8', timeout: 120000, env: ghEnv });
611
+ } catch {}
612
+ } else {
613
+ csName = ghSh(`gh cs create --repo ${REPO} --machine ${MACHINE_TYPE} --json name -q '.name'`);
614
+ }
615
+ }
616
+
617
+ if (!csName) return 'Failed to start Codespace. Check gh auth status and CODESPACE_TOKEN env var.';
618
+
619
+ // Wait for navvi-server to be ready inside the codespace
620
+ let apiReady = false;
621
+ for (let i = 0; i < 15; i++) {
622
+ try {
623
+ const check = execSync(
624
+ `gh cs ssh -c ${csName} -- python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:8024/health').read().decode())"`,
625
+ { encoding: 'utf8', timeout: 10000, env: ghEnv }
626
+ ).trim();
627
+ if (check.includes('"ok":true')) { apiReady = true; break; }
628
+ } catch {}
629
+ await new Promise(r => setTimeout(r, 3000));
630
+ }
631
+
632
+ // Port forward both API and VNC
633
+ killPidfile(PIDFILE_FWD);
634
+ const child = spawn('gh', ['cs', 'ports', 'forward', `${NAVVI_PORT}:${NAVVI_PORT}`, `${VNC_PORT}:${VNC_PORT}`, '-c', csName], {
635
+ detached: true,
636
+ stdio: 'ignore',
637
+ env: ghEnv,
638
+ });
639
+ child.unref();
640
+ fs.writeFileSync(PIDFILE_FWD, String(child.pid));
641
+
642
+ await new Promise(r => setTimeout(r, 3000));
643
+ const reachable = isApiReachable(NAVVI_PORT);
644
+ setMode('remote:' + csName);
645
+ activePersona = persona;
646
+ return `Navvi started (remote). Codespace: ${csName}\nAPI: localhost:${NAVVI_PORT} (${reachable ? 'healthy' : apiReady ? 'forwarding...' : 'starting...'})\nVNC: localhost:${VNC_PORT}`;
647
+ }
648
+
649
+ return 'Invalid mode. Use "local" or "remote".';
650
+ }
651
+
652
+ case 'navvi_stop': {
653
+ const persona = args.persona;
654
+
655
+ if (persona) {
656
+ const cname = containerName(persona);
657
+ sh(`docker stop ${cname} 2>/dev/null`);
658
+ sh(`docker rm ${cname} 2>/dev/null`);
659
+ if (activePersona === persona) activePersona = null;
660
+ return `Stopped ${cname}. Firefox profile preserved in volume navvi-profile-${persona}.`;
661
+ }
662
+
663
+ // Stop all navvi containers
664
+ const containers = listContainers();
665
+ if (containers.length === 0) {
666
+ // Also handle remote mode
667
+ const currentMode = getMode();
668
+ if (currentMode && currentMode.startsWith('remote:')) {
669
+ const csName = currentMode.split(':')[1];
670
+ killPidfile(PIDFILE_FWD);
671
+ if (csName) ghSh(`gh cs stop -c ${csName}`);
672
+ clearMode();
673
+ return `Stopped remote Codespace ${csName}.`;
674
+ }
675
+ clearMode();
676
+ return 'No running Navvi containers.';
677
+ }
678
+
679
+ for (const c of containers) {
680
+ sh(`docker stop ${containerName(c.name)} 2>/dev/null`);
681
+ sh(`docker rm ${containerName(c.name)} 2>/dev/null`);
682
+ }
683
+ activePersona = null;
684
+ clearMode();
685
+ return `Stopped ${containers.length} container(s). Firefox profiles preserved in Docker volumes.`;
686
+ }
687
+
688
+ case 'navvi_status': {
689
+ const currentMode = getMode();
690
+ const containers = listContainers();
691
+ let status = `Mode: ${currentMode || 'off'}\nActive persona: ${activePersona || 'none'}`;
692
+
693
+ if (containers.length > 0) {
694
+ status += '\n\nRunning containers:';
695
+ for (const c of containers) {
696
+ const ports = getContainerPorts(c.name);
697
+ const healthy = isApiReachable(ports.api);
698
+ status += `\n ${c.name} — API :${ports.api} (${healthy ? 'healthy' : 'unhealthy'}), VNC :${ports.vnc}`;
699
+ }
700
+ } else {
701
+ status += '\n\nNo running containers. Start one with navvi_start.';
702
+ }
703
+
704
+ if (fs.existsSync(PIDFILE_FWD)) {
705
+ status += `\nPort forward PID: ${fs.readFileSync(PIDFILE_FWD, 'utf8').trim()}`;
706
+ }
707
+
708
+ return status;
709
+ }
710
+
711
+ case 'navvi_list': {
712
+ const missing = checkRemoteDeps();
713
+ if (missing.length > 0) return formatMissing(missing);
714
+
715
+ const output = ghSh(`gh cs list --repo ${REPO} --json name,state,createdAt,machine -q '.[] | "\\(.name) \\(.state) \\(.machine.displayName // "unknown") \\(.createdAt)"'`);
716
+ if (!output) return `No Codespaces found for ${REPO}.`;
717
+ return `Navvi Codespaces:\n${output}`;
718
+ }
719
+
720
+ // --- Browser control ---
721
+
722
+ case 'navvi_open': {
723
+ const { name: pName, apiBase } = resolvePersona(args.persona);
724
+ logAction('open', args.url);
725
+ try {
726
+ const result = await apiCall('POST', '/navigate', { url: args.url }, apiBase);
727
+ return `Opened ${args.url}\nTitle: ${result.title || '(loading...)'}\nURL: ${result.url || args.url}`;
728
+ } catch (e) {
729
+ return `Error navigating: ${e.message}`;
730
+ }
731
+ }
732
+
733
+ case 'navvi_click': {
734
+ const { name: pName, apiBase } = resolvePersona(args.persona);
735
+ logAction('click', `(${args.x}, ${args.y})`);
736
+ try {
737
+ await apiCall('POST', '/click', { x: args.x, y: args.y }, apiBase);
738
+ return `Clicked at (${args.x}, ${args.y})`;
739
+ } catch (e) {
740
+ return `Error: ${e.message}`;
741
+ }
742
+ }
743
+
744
+ case 'navvi_fill': {
745
+ const { name: pName, apiBase } = resolvePersona(args.persona);
746
+ const delay = args.delay !== undefined ? args.delay : 12;
747
+ const fillDurationMs = args.value.length * delay;
748
+ logAction('fill', { x: args.x, y: args.y, text: args.value, durationMs: fillDurationMs });
749
+ try {
750
+ // Click to focus
751
+ await apiCall('POST', '/click', { x: args.x, y: args.y }, apiBase);
752
+ await new Promise(r => setTimeout(r, 100));
753
+ // Type
754
+ await apiCall('POST', '/type', { text: args.value, delay }, apiBase);
755
+ return `Filled at (${args.x}, ${args.y}) with "${args.value}" (${args.value.length} chars)`;
756
+ } catch (e) {
757
+ return `Error: ${e.message}`;
758
+ }
759
+ }
760
+
761
+ case 'navvi_press': {
762
+ const { name: pName, apiBase } = resolvePersona(args.persona);
763
+ logAction('press', args.key);
764
+ try {
765
+ await apiCall('POST', '/key', { key: args.key }, apiBase);
766
+ return `Pressed ${args.key}`;
767
+ } catch (e) {
768
+ return `Error: ${e.message}`;
769
+ }
770
+ }
771
+
772
+ case 'navvi_drag': {
773
+ const { name: pName, apiBase } = resolvePersona(args.persona);
774
+ logAction('drag', { from: [args.x1, args.y1], to: [args.x2, args.y2] });
775
+ try {
776
+ const params = {
777
+ x1: args.x1, y1: args.y1,
778
+ x2: args.x2, y2: args.y2,
779
+ };
780
+ if (args.steps) params.steps = args.steps;
781
+ if (args.duration) params.duration = args.duration;
782
+ await apiCall('POST', '/drag', params, apiBase);
783
+ return `Dragged from (${args.x1}, ${args.y1}) to (${args.x2}, ${args.y2})`;
784
+ } catch (e) {
785
+ return `Error: ${e.message}`;
786
+ }
787
+ }
788
+
789
+ case 'navvi_mousedown': {
790
+ const { name: pName, apiBase } = resolvePersona(args.persona);
791
+ logAction('mousedown', `(${args.x}, ${args.y})`);
792
+ try {
793
+ await apiCall('POST', '/mousedown', { x: args.x, y: args.y }, apiBase);
794
+ return `Mouse down at (${args.x}, ${args.y})`;
795
+ } catch (e) {
796
+ return `Error: ${e.message}`;
797
+ }
798
+ }
799
+
800
+ case 'navvi_mouseup': {
801
+ const { name: pName, apiBase } = resolvePersona(args.persona);
802
+ logAction('mouseup', `(${args.x}, ${args.y})`);
803
+ try {
804
+ await apiCall('POST', '/mouseup', { x: args.x, y: args.y }, apiBase);
805
+ return `Mouse up at (${args.x}, ${args.y})`;
806
+ } catch (e) {
807
+ return `Error: ${e.message}`;
808
+ }
809
+ }
810
+
811
+ case 'navvi_mousemove': {
812
+ const { name: pName, apiBase } = resolvePersona(args.persona);
813
+ logAction('mousemove', `(${args.x}, ${args.y})`);
814
+ try {
815
+ await apiCall('POST', '/mousemove', { x: args.x, y: args.y }, apiBase);
816
+ return `Mouse moved to (${args.x}, ${args.y})`;
817
+ } catch (e) {
818
+ return `Error: ${e.message}`;
819
+ }
820
+ }
821
+
822
+ case 'navvi_scroll': {
823
+ const { name: pName, apiBase } = resolvePersona(args.persona);
824
+ const direction = args.direction || 'down';
825
+ const amount = args.amount || 3;
826
+ logAction('scroll', `${direction} x${amount}`);
827
+ try {
828
+ await apiCall('POST', '/scroll', { direction, amount }, apiBase);
829
+ return `Scrolled ${direction} x${amount}`;
830
+ } catch (e) {
831
+ return `Error: ${e.message}`;
832
+ }
833
+ }
834
+
835
+ case 'navvi_screenshot': {
836
+ const { name: pName, apiBase } = resolvePersona(args.persona);
837
+ try {
838
+ const result = await apiCall('GET', '/screenshot', null, apiBase);
839
+ if (!result.base64) return 'Error: no screenshot data returned.';
840
+
841
+ const imgBuf = Buffer.from(result.base64, 'base64');
842
+ const filename = `navvi-screenshot-${Date.now()}.png`;
843
+ const filepath = path.join(os.tmpdir(), filename);
844
+ fs.writeFileSync(filepath, imgBuf);
845
+
846
+ const sizeKB = Math.round(imgBuf.length / 1024);
847
+ return `Screenshot saved to ${filepath} (${sizeKB}KB).\nUse Read tool to view the image.`;
848
+ } catch (e) {
849
+ return `Error: ${e.message}`;
850
+ }
851
+ }
852
+
853
+ case 'navvi_url': {
854
+ const { name: pName, apiBase } = resolvePersona(args.persona);
855
+ try {
856
+ const result = await apiCall('GET', '/url', null, apiBase);
857
+ return result.url || '(unknown)';
858
+ } catch (e) {
859
+ return `Error: ${e.message}`;
860
+ }
861
+ }
862
+
863
+ case 'navvi_vnc': {
864
+ const persona = args.persona || activePersona || 'default';
865
+ const ports = getContainerPorts(persona);
866
+ return `noVNC: http://127.0.0.1:${ports.vnc}/vnc.html?autoconnect=true\n\nOpen this URL in a browser for live view. Use for:\n- Human CAPTCHA solving\n- OAuth login flows\n- Visual debugging`;
867
+ }
868
+
869
+ case 'navvi_find': {
870
+ const { name: pName, apiBase } = resolvePersona(args.persona);
871
+ logAction('find', args.selector);
872
+ try {
873
+ const params = { selector: args.selector };
874
+ if (args.all) params.all = true;
875
+ const result = await apiCall('POST', '/find', params, apiBase);
876
+ if (!result.found) return `No element found for selector: ${args.selector}`;
877
+ if (result.elements) {
878
+ // Multiple results
879
+ let output = `Found ${result.count} element(s) for "${args.selector}":\n`;
880
+ for (const el of result.elements) {
881
+ if (!el.visible) continue;
882
+ output += ` ${el.tag}${el.id ? '#' + el.id : ''} — (${el.x}, ${el.y}) ${el.width}x${el.height}`;
883
+ if (el.text) output += ` "${el.text.slice(0, 40)}"`;
884
+ if (el.placeholder) output += ` placeholder="${el.placeholder}"`;
885
+ output += '\n';
886
+ }
887
+ return output;
888
+ }
889
+ // Single result
890
+ let output = `Found: ${result.tag}${result.id ? '#' + result.id : ''} at (${result.x}, ${result.y}) ${result.width}x${result.height}`;
891
+ if (result.text) output += `\nText: "${result.text}"`;
892
+ if (result.placeholder) output += `\nPlaceholder: "${result.placeholder}"`;
893
+ if (result.value) output += `\nValue: "${result.value}"`;
894
+ output += `\n\nUse navvi_click x=${result.x} y=${result.y} to click this element.`;
895
+ return output;
896
+ } catch (e) {
897
+ return `Error: ${e.message}`;
898
+ }
899
+ }
900
+
901
+ // --- Credentials ---
902
+
903
+ case 'navvi_creds': {
904
+ const { name: pName, apiBase } = resolvePersona(args.persona);
905
+ const action = args.action;
906
+
907
+ if (action === 'list') {
908
+ try {
909
+ const result = await apiCall('GET', '/creds/list', null, apiBase);
910
+ if (!result.entries || result.entries.length === 0) return 'No credentials stored in gopass. Use gopass to add entries.';
911
+ let output = `Credentials (${result.count} entries):\n`;
912
+ for (const e of result.entries) output += ` ${e}\n`;
913
+ return output;
914
+ } catch (e) {
915
+ return `Error: ${e.message}`;
916
+ }
917
+ }
918
+
919
+ if (action === 'get') {
920
+ if (!args.entry) return 'Error: "entry" is required for get action.';
921
+ if (!args.field) return 'Error: "field" is required for get action (e.g. "username", "url", "email").';
922
+ try {
923
+ const result = await apiCall('POST', '/creds/get', { entry: args.entry, field: args.field }, apiBase);
924
+ return `${args.field}: ${result.value}`;
925
+ } catch (e) {
926
+ return `Error: ${e.message}`;
927
+ }
928
+ }
929
+
930
+ if (action === 'autofill') {
931
+ if (!args.entry) return 'Error: "entry" is required for autofill action.';
932
+ logAction('autofill', args.entry);
933
+ try {
934
+ const params = { entry: args.entry };
935
+ if (args.username_selector) params.username_selector = args.username_selector;
936
+ if (args.password_selector) params.password_selector = args.password_selector;
937
+ const result = await apiCall('POST', '/creds/autofill', params, apiBase);
938
+ return `Autofill complete for "${args.entry}".\nUsername filled at (${result.username_at.join(', ')})\nPassword filled at (${result.password_at.join(', ')})\n\n${result.note}`;
939
+ } catch (e) {
940
+ return `Error: ${e.message}`;
941
+ }
942
+ }
943
+
944
+ return 'Error: action must be "list", "get", or "autofill".';
945
+ }
946
+
947
+ // --- Video recording ---
948
+
949
+ case 'navvi_record_start': {
950
+ const { name: pName, apiBase } = resolvePersona(args.persona);
951
+
952
+ // Check for existing recording
953
+ const stateFile = path.join(os.tmpdir(), '.navvi-recording.json');
954
+ if (fs.existsSync(stateFile)) {
955
+ const state = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
956
+ if (state.active) return `Recording already in progress (${state.frames} frames). Use navvi_record_stop first.`;
957
+ }
958
+
959
+ if (!which('ffmpeg')) return 'Error: ffmpeg not installed. Install with: brew install ffmpeg';
960
+ if (!fs.existsSync(RECORDINGS_DIR)) fs.mkdirSync(RECORDINGS_DIR, { recursive: true });
961
+
962
+ const duration = Math.min(args.duration || 30, 120);
963
+ const fps = 4;
964
+ const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
965
+ const framesDir = path.join(RECORDINGS_DIR, `frames-${ts}`);
966
+ fs.mkdirSync(framesDir, { recursive: true });
967
+
968
+ const state = { active: true, framesDir, ts, fps, duration, frames: 0, startTime: Date.now(), apiBase };
969
+ fs.writeFileSync(stateFile, JSON.stringify(state));
970
+
971
+ // Clear action log
972
+ try { fs.unlinkSync(ACTION_LOG); } catch {}
973
+
974
+ // Capture loop script — hits /screenshot endpoint
975
+ const captureScript = `
976
+ const http = require('http');
977
+ const fs = require('fs');
978
+ const framesDir = ${JSON.stringify(framesDir)};
979
+ const stateFile = ${JSON.stringify(stateFile)};
980
+ const api = ${JSON.stringify(apiBase)};
981
+ const fps = ${fps};
982
+ const maxFrames = ${duration} * fps;
983
+ let frame = 0;
984
+
985
+ function grabFrame() {
986
+ return new Promise((resolve) => {
987
+ const url = new URL('/screenshot', api);
988
+ const req = http.get(url, { timeout: 2000 }, (res) => {
989
+ let data = '';
990
+ res.on('data', (c) => data += c);
991
+ res.on('end', () => {
992
+ try {
993
+ const j = JSON.parse(data);
994
+ if (j.base64) {
995
+ const img = Buffer.from(j.base64, 'base64');
996
+ const name = 'frame-' + String(frame).padStart(6, '0') + '.png';
997
+ fs.writeFileSync(framesDir + '/' + name, img);
998
+ frame++;
999
+ try {
1000
+ const s = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
1001
+ s.frames = frame;
1002
+ fs.writeFileSync(stateFile, JSON.stringify(s));
1003
+ } catch {}
1004
+ }
1005
+ } catch {}
1006
+ resolve();
1007
+ });
1008
+ }).on('error', () => resolve());
1009
+ req.on('timeout', () => { req.destroy(); resolve(); });
1010
+ });
1011
+ }
1012
+
1013
+ async function run() {
1014
+ const interval = 1000 / fps;
1015
+ while (frame < maxFrames) {
1016
+ const t0 = Date.now();
1017
+ try { await grabFrame(); } catch {}
1018
+ const elapsed = Date.now() - t0;
1019
+ const wait = Math.max(0, interval - elapsed);
1020
+ if (wait > 0) await new Promise(r => setTimeout(r, wait));
1021
+ try {
1022
+ const s = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
1023
+ if (!s.active) break;
1024
+ } catch { break; }
1025
+ }
1026
+ try {
1027
+ const s = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
1028
+ s.active = false;
1029
+ s.frames = frame;
1030
+ fs.writeFileSync(stateFile, JSON.stringify(s));
1031
+ } catch {}
1032
+ }
1033
+ run();
1034
+ `;
1035
+
1036
+ const scriptFile = path.join(os.tmpdir(), '.navvi-capture.cjs');
1037
+ fs.writeFileSync(scriptFile, captureScript);
1038
+
1039
+ const nodeBin = which('node') || '/usr/local/bin/node';
1040
+ const logFile = path.join(RECORDINGS_DIR, `capture-${ts}.log`);
1041
+ const logFd = fs.openSync(logFile, 'w');
1042
+ const child = spawn(nodeBin, [scriptFile], {
1043
+ detached: true,
1044
+ stdio: ['ignore', logFd, logFd],
1045
+ });
1046
+ child.unref();
1047
+ fs.writeFileSync(PIDFILE_RECORD, String(child.pid));
1048
+
1049
+ return `Recording started (${fps}fps, max ${duration}s).\nFrames dir: ${framesDir}\nUse navvi_record_stop to finish.`;
1050
+ }
1051
+
1052
+ case 'navvi_record_stop': {
1053
+ const stateFile = path.join(os.tmpdir(), '.navvi-recording.json');
1054
+ if (!fs.existsSync(stateFile)) return 'No active recording found.';
1055
+
1056
+ const state = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
1057
+ state.active = false;
1058
+ fs.writeFileSync(stateFile, JSON.stringify(state));
1059
+
1060
+ // Kill capture process
1061
+ if (fs.existsSync(PIDFILE_RECORD)) {
1062
+ try {
1063
+ const pid = parseInt(fs.readFileSync(PIDFILE_RECORD, 'utf8').trim());
1064
+ process.kill(pid, 'SIGTERM');
1065
+ } catch {}
1066
+ try { fs.unlinkSync(PIDFILE_RECORD); } catch {}
1067
+ }
1068
+
1069
+ await new Promise(r => setTimeout(r, 1000));
1070
+
1071
+ const finalState = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
1072
+ const { framesDir, fps, frames, ts } = finalState;
1073
+
1074
+ if (!frames || frames === 0) {
1075
+ try { fs.unlinkSync(stateFile); } catch {}
1076
+ return 'Recording stopped but no frames were captured.';
1077
+ }
1078
+
1079
+ // Assemble frames into MP4
1080
+ const ffmpegBin = which('ffmpeg') || '/usr/local/bin/ffmpeg';
1081
+ const outputFile = path.join(RECORDINGS_DIR, `${ts}.mp4`);
1082
+ const frameFiles = fs.readdirSync(framesDir).filter(f => f.endsWith('.png')).sort();
1083
+ const concatFile = path.join(framesDir, 'concat.txt');
1084
+ const concatLines = frameFiles.map(f => `file '${path.join(framesDir, f)}'\nduration ${(1/fps).toFixed(4)}`);
1085
+ if (frameFiles.length > 0) concatLines.push(`file '${path.join(framesDir, frameFiles[frameFiles.length - 1])}'`);
1086
+ fs.writeFileSync(concatFile, concatLines.join('\n') + '\n');
1087
+ const assembleResult = sh(`"${ffmpegBin}" -y -f concat -safe 0 -i "${concatFile}" -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" -c:v libx264 -preset fast -crf 23 -pix_fmt yuv420p "${outputFile}" 2>&1`);
1088
+
1089
+ if (!fs.existsSync(outputFile)) {
1090
+ try {
1091
+ for (const f of fs.readdirSync(framesDir)) fs.unlinkSync(path.join(framesDir, f));
1092
+ fs.rmdirSync(framesDir);
1093
+ } catch {}
1094
+ try { fs.unlinkSync(stateFile); } catch {}
1095
+ return `Failed to assemble video.\n${assembleResult}`;
1096
+ }
1097
+
1098
+ const sizeKB = Math.round(fs.statSync(outputFile).size / 1024);
1099
+ const durationSec = (frames / fps).toFixed(1);
1100
+ let result = `Recording stopped.\nFile: ${outputFile}\nFrames: ${frames} at ${fps}fps\nDuration: ${durationSec}s\nSize: ${sizeKB}KB`;
1101
+
1102
+ // Smart trim
1103
+ const shouldTrim = args.trim !== false;
1104
+ if (shouldTrim && fs.existsSync(ACTION_LOG)) {
1105
+ try {
1106
+ const actions = fs.readFileSync(ACTION_LOG, 'utf8').trim().split('\n')
1107
+ .map(line => JSON.parse(line));
1108
+
1109
+ if (actions.length > 0 && frameFiles.length > 0) {
1110
+ const recordingStart = finalState.startTime;
1111
+ const frameDurationMs = 1000 / fps;
1112
+ const BEFORE_MS = 1000;
1113
+ const AFTER_MS = 3000;
1114
+ const keepFrames = new Set();
1115
+
1116
+ for (const action of actions) {
1117
+ const actionOffsetMs = action.ts - recordingStart;
1118
+ const actionFrame = Math.floor(actionOffsetMs / frameDurationMs);
1119
+ const beforeFrames = Math.ceil(BEFORE_MS / frameDurationMs);
1120
+ let afterMs = AFTER_MS;
1121
+ if (action.action === 'fill' && action.detail && action.detail.durationMs) {
1122
+ afterMs = action.detail.durationMs + AFTER_MS;
1123
+ }
1124
+ const afterFrames = Math.ceil(afterMs / frameDurationMs);
1125
+ const start = Math.max(0, actionFrame - beforeFrames);
1126
+ const end = Math.min(frameFiles.length - 1, actionFrame + afterFrames);
1127
+ for (let i = start; i <= end; i++) keepFrames.add(i);
1128
+ }
1129
+
1130
+ if (keepFrames.size < frameFiles.length * 0.8) {
1131
+ const trimmedFrames = frameFiles.filter((_, i) => keepFrames.has(i));
1132
+ const trimConcatFile = path.join(framesDir, 'concat-trimmed.txt');
1133
+ const trimLines = trimmedFrames.map(f => `file '${path.join(framesDir, f)}'\nduration ${(1/fps).toFixed(4)}`);
1134
+ if (trimmedFrames.length > 0) trimLines.push(`file '${path.join(framesDir, trimmedFrames[trimmedFrames.length - 1])}'`);
1135
+ fs.writeFileSync(trimConcatFile, trimLines.join('\n') + '\n');
1136
+
1137
+ const trimmedFile = path.join(RECORDINGS_DIR, `${ts}-trimmed.mp4`);
1138
+ sh(`"${ffmpegBin}" -y -f concat -safe 0 -i "${trimConcatFile}" -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" -c:v libx264 -preset fast -crf 23 -pix_fmt yuv420p "${trimmedFile}" 2>&1`);
1139
+
1140
+ if (fs.existsSync(trimmedFile)) {
1141
+ const trimSizeKB = Math.round(fs.statSync(trimmedFile).size / 1024);
1142
+ const trimDurationSec = (trimmedFrames.length / fps).toFixed(1);
1143
+ result += `\n\nTrimmed: ${trimmedFile}\nDuration: ${trimDurationSec}s (${trimSizeKB}KB)`;
1144
+ }
1145
+ } else {
1146
+ result += '\n\n(Trim skipped — not enough dead time.)';
1147
+ }
1148
+ }
1149
+ } catch (trimErr) {
1150
+ result += `\n\n(Trim failed: ${trimErr.message})`;
1151
+ }
1152
+ try { fs.unlinkSync(ACTION_LOG); } catch {}
1153
+ }
1154
+
1155
+ // Clean up frames
1156
+ try {
1157
+ for (const f of fs.readdirSync(framesDir)) fs.unlinkSync(path.join(framesDir, f));
1158
+ fs.rmdirSync(framesDir);
1159
+ } catch {}
1160
+ try { fs.unlinkSync(stateFile); } catch {}
1161
+
1162
+ result += '\n\nConvert to GIF with navvi_record_gif.';
1163
+ return result;
1164
+ }
1165
+
1166
+ case 'navvi_record_gif': {
1167
+ if (!which('ffmpeg')) return 'Error: ffmpeg not installed.';
1168
+
1169
+ let input = args.input;
1170
+ if (!input) {
1171
+ if (!fs.existsSync(RECORDINGS_DIR)) return 'No recordings directory found.';
1172
+ const files = fs.readdirSync(RECORDINGS_DIR)
1173
+ .filter(f => f.match(/\.(mp4|mov)$/))
1174
+ .sort()
1175
+ .reverse();
1176
+ if (files.length === 0) return 'No recordings found.';
1177
+ input = path.join(RECORDINGS_DIR, files[0]);
1178
+ }
1179
+
1180
+ if (!fs.existsSync(input)) return `Error: input file not found: ${input}`;
1181
+
1182
+ const output = input.replace(/\.(mp4|mov)$/, '.gif');
1183
+ const palette = path.join(os.tmpdir(), '.navvi-palette.png');
1184
+
1185
+ const pass1 = sh(`ffmpeg -y -i "${input}" -vf "fps=8,scale=1600:-1:flags=lanczos,palettegen" "${palette}" 2>&1`);
1186
+ if (!fs.existsSync(palette)) return `GIF palette generation failed.\n${pass1}`;
1187
+
1188
+ sh(`ffmpeg -y -i "${input}" -i "${palette}" -lavfi "fps=8,scale=1600:-1:flags=lanczos [x]; [x][1:v] paletteuse" "${output}" 2>&1`);
1189
+
1190
+ try { fs.unlinkSync(palette); } catch {}
1191
+
1192
+ if (!fs.existsSync(output)) return 'GIF conversion failed.';
1193
+
1194
+ const sizeKB = Math.round(fs.statSync(output).size / 1024);
1195
+ return `GIF created: ${output} (${sizeKB}KB)\n\nDo NOT use Read on this file.`;
1196
+ }
1197
+
1198
+ default:
1199
+ return `Unknown tool: ${name}`;
1200
+ }
1201
+ }
1202
+
1203
+ // --- MCP stdio protocol ---
1204
+
1205
+ let msgBuffer = '';
1206
+
1207
+ process.stdin.setEncoding('utf8');
1208
+ process.stdin.on('data', (chunk) => {
1209
+ msgBuffer += chunk;
1210
+ const lines = msgBuffer.split('\n');
1211
+ msgBuffer = lines.pop();
1212
+ for (const line of lines) {
1213
+ if (!line.trim()) continue;
1214
+ try {
1215
+ handleMessage(JSON.parse(line));
1216
+ } catch {}
1217
+ }
1218
+ });
1219
+
1220
+ function send(msg) {
1221
+ process.stdout.write(JSON.stringify(msg) + '\n');
1222
+ }
1223
+
1224
+ async function handleMessage(msg) {
1225
+ const { id, method, params } = msg;
1226
+
1227
+ switch (method) {
1228
+ case 'initialize':
1229
+ send({
1230
+ jsonrpc: '2.0',
1231
+ id,
1232
+ result: {
1233
+ protocolVersion: '2024-11-05',
1234
+ capabilities: { tools: {} },
1235
+ serverInfo: { name: 'navvi', version: '2.0.0' },
1236
+ },
1237
+ });
1238
+ break;
1239
+
1240
+ case 'notifications/initialized':
1241
+ break;
1242
+
1243
+ case 'tools/list':
1244
+ send({ jsonrpc: '2.0', id, result: { tools: TOOLS } });
1245
+ break;
1246
+
1247
+ case 'tools/call': {
1248
+ const { name, arguments: callArgs } = params;
1249
+ try {
1250
+ const result = await handleTool(name, callArgs || {});
1251
+ send({
1252
+ jsonrpc: '2.0',
1253
+ id,
1254
+ result: { content: [{ type: 'text', text: String(result) }] },
1255
+ });
1256
+ } catch (e) {
1257
+ send({
1258
+ jsonrpc: '2.0',
1259
+ id,
1260
+ result: { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true },
1261
+ });
1262
+ }
1263
+ break;
1264
+ }
1265
+
1266
+ default:
1267
+ if (id) {
1268
+ send({ jsonrpc: '2.0', id, error: { code: -32601, message: `Unknown method: ${method}` } });
1269
+ }
1270
+ }
1271
+ }
1272
+
1273
+ // Cleanup on exit
1274
+ process.on('exit', () => {
1275
+ killPidfile(PIDFILE_FWD);
1276
+ });
1277
+
1278
+ process.stderr.write('Navvi MCP server started (v2.0.0)\n');