@vellumai/cli 0.4.42 → 0.4.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/local.ts CHANGED
@@ -8,16 +8,15 @@ import {
8
8
  writeFileSync,
9
9
  } from "fs";
10
10
  import { createRequire } from "module";
11
- import { createConnection } from "net";
12
11
  import { homedir, hostname, networkInterfaces, platform } from "os";
13
12
  import { dirname, join } from "path";
14
13
 
15
14
  import {
16
- defaultLocalResources,
17
15
  loadLatestAssistant,
18
16
  type LocalInstanceResources,
19
17
  } from "./assistant-config.js";
20
18
  import { GATEWAY_PORT } from "./constants.js";
19
+ import { httpHealthCheck, waitForDaemonReady } from "./http-client.js";
21
20
  import { stopProcessByPidFile } from "./process.js";
22
21
  import { openLogFile, pipeToLogFile } from "./xdg-log.js";
23
22
 
@@ -136,23 +135,6 @@ function resolveAssistantIndexPath(): string | undefined {
136
135
  return undefined;
137
136
  }
138
137
 
139
- async function waitForSocketFile(
140
- socketPath: string,
141
- timeoutMs = 60000,
142
- ): Promise<boolean> {
143
- if (existsSync(socketPath)) return true;
144
-
145
- const start = Date.now();
146
- while (Date.now() - start < timeoutMs) {
147
- if (existsSync(socketPath)) {
148
- return true;
149
- }
150
- await new Promise((r) => setTimeout(r, 100));
151
- }
152
-
153
- return existsSync(socketPath);
154
- }
155
-
156
138
  function ensureBunInstalled(): void {
157
139
  const bunBinDir = join(homedir(), ".bun", "bin");
158
140
  const pathWithBun = [
@@ -216,18 +198,15 @@ function resolveDaemonMainPath(assistantIndex: string): string {
216
198
 
217
199
  async function startDaemonFromSource(
218
200
  assistantIndex: string,
219
- resources?: LocalInstanceResources,
201
+ resources: LocalInstanceResources,
220
202
  ): Promise<void> {
221
203
  const daemonMainPath = resolveDaemonMainPath(assistantIndex);
222
204
 
223
- const defaults = defaultLocalResources();
224
- const res = resources ?? defaults;
225
205
  // Ensure the directory containing PID/socket files exists. For named
226
206
  // instances this is instanceDir/.vellum/ (matching daemon's getRootDir()).
227
- mkdirSync(dirname(res.pidFile), { recursive: true });
207
+ mkdirSync(dirname(resources.pidFile), { recursive: true });
228
208
 
229
- const pidFile = res.pidFile;
230
- const socketFile = res.socketPath;
209
+ const pidFile = resources.pidFile;
231
210
 
232
211
  // --- Lifecycle guard: prevent split-brain daemon state ---
233
212
  if (existsSync(pidFile)) {
@@ -247,23 +226,21 @@ async function startDaemonFromSource(
247
226
  } catch {}
248
227
  }
249
228
 
250
- if (await isSocketResponsive(socketFile)) {
251
- const ownerPid = findSocketOwnerPid(socketFile);
252
- if (ownerPid) {
253
- writeFileSync(pidFile, String(ownerPid), "utf-8");
229
+ // PID file was stale or missing — check if daemon is responding via HTTP
230
+ if (await isDaemonResponsive(resources.daemonPort)) {
231
+ // Recover PID tracking so lifecycle commands (sleep, retire,
232
+ // stopLocalProcesses) can manage this daemon process.
233
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
234
+ if (recoveredPid) {
254
235
  console.log(
255
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
236
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
256
237
  );
257
238
  } else {
258
- console.log(" Assistant socket is responsive — skipping restart\n");
239
+ console.log(" Assistant is responsive — skipping restart\n");
259
240
  }
260
241
  return;
261
242
  }
262
243
 
263
- try {
264
- unlinkSync(socketFile);
265
- } catch {}
266
-
267
244
  const env: Record<string, string | undefined> = {
268
245
  ...process.env,
269
246
  RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
@@ -277,7 +254,6 @@ async function startDaemonFromSource(
277
254
  env.BASE_DATA_DIR = resources.instanceDir;
278
255
  env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
279
256
  env.GATEWAY_PORT = String(resources.gatewayPort);
280
- env.VELLUM_DAEMON_SOCKET = resources.socketPath;
281
257
  env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
282
258
  delete env.QDRANT_URL;
283
259
  }
@@ -305,19 +281,16 @@ async function startDaemonFromSource(
305
281
  // assistant-side equivalent.
306
282
  async function startDaemonWatchFromSource(
307
283
  assistantIndex: string,
308
- resources?: LocalInstanceResources,
284
+ resources: LocalInstanceResources,
309
285
  ): Promise<void> {
310
286
  const mainPath = resolveDaemonMainPath(assistantIndex);
311
287
  if (!existsSync(mainPath)) {
312
288
  throw new Error(`Daemon main.ts not found at ${mainPath}`);
313
289
  }
314
290
 
315
- const defaults = defaultLocalResources();
316
- const res = resources ?? defaults;
317
- mkdirSync(dirname(res.pidFile), { recursive: true });
291
+ mkdirSync(dirname(resources.pidFile), { recursive: true });
318
292
 
319
- const pidFile = res.pidFile;
320
- const socketFile = res.socketPath;
293
+ const pidFile = resources.pidFile;
321
294
 
322
295
  // --- Lifecycle guard: prevent split-brain daemon state ---
323
296
  // If a daemon is already running, skip spawning a new one.
@@ -339,26 +312,21 @@ async function startDaemonWatchFromSource(
339
312
  } catch {}
340
313
  }
341
314
 
342
- // PID file was stale or missing, but a daemon with a different PID may
343
- // still be listening on the socket. Check before starting a new one.
344
- if (await isSocketResponsive(socketFile)) {
345
- const ownerPid = findSocketOwnerPid(socketFile);
346
- if (ownerPid) {
347
- writeFileSync(pidFile, String(ownerPid), "utf-8");
315
+ // PID file was stale or missing check if daemon is responding via HTTP
316
+ if (await isDaemonResponsive(resources.daemonPort)) {
317
+ // Recover PID tracking so lifecycle commands (sleep, retire,
318
+ // stopLocalProcesses) can manage this daemon process.
319
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
320
+ if (recoveredPid) {
348
321
  console.log(
349
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
322
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
350
323
  );
351
324
  } else {
352
- console.log(" Assistant socket is responsive — skipping restart\n");
325
+ console.log(" Assistant is responsive — skipping restart\n");
353
326
  }
354
327
  return;
355
328
  }
356
329
 
357
- // Socket is unresponsive or missing — safe to clean up and start fresh.
358
- try {
359
- unlinkSync(socketFile);
360
- } catch {}
361
-
362
330
  const env: Record<string, string | undefined> = {
363
331
  ...process.env,
364
332
  RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
@@ -368,7 +336,6 @@ async function startDaemonWatchFromSource(
368
336
  env.BASE_DATA_DIR = resources.instanceDir;
369
337
  env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
370
338
  env.GATEWAY_PORT = String(resources.gatewayPort);
371
- env.VELLUM_DAEMON_SOCKET = resources.socketPath;
372
339
  env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
373
340
  delete env.QDRANT_URL;
374
341
  }
@@ -457,59 +424,55 @@ function readWorkspaceIngressPublicBaseUrl(
457
424
  }
458
425
  }
459
426
 
460
- /** Use lsof to discover the PID of the process listening on a Unix socket.
461
- * Returns the PID if found, undefined otherwise. */
462
- function findSocketOwnerPid(socketPath: string): number | undefined {
427
+ /**
428
+ * Check if the daemon is responsive by hitting its HTTP `/healthz` endpoint.
429
+ * This replaces the socket-based `isSocketResponsive()` check.
430
+ */
431
+ async function isDaemonResponsive(daemonPort: number): Promise<boolean> {
432
+ return httpHealthCheck(daemonPort);
433
+ }
434
+
435
+ /**
436
+ * Find the PID of the process listening on the given TCP port.
437
+ * Uses `lsof` on macOS/Linux. Returns undefined if no listener is found
438
+ * or the command fails.
439
+ */
440
+ function findPidListeningOnPort(port: number): number | undefined {
463
441
  try {
464
442
  const output = execFileSync(
465
443
  "lsof",
466
- ["-U", "-a", "-F", "p", "--", socketPath],
467
- {
468
- encoding: "utf-8",
469
- timeout: 3000,
470
- stdio: ["ignore", "pipe", "ignore"],
471
- },
472
- );
473
- // lsof -F p outputs lines like "p1234" — extract the first PID
474
- const match = output.match(/^p(\d+)/m);
475
- if (match) {
476
- const pid = parseInt(match[1], 10);
477
- if (!isNaN(pid)) return pid;
478
- }
444
+ ["-iTCP:" + port, "-sTCP:LISTEN", "-t"],
445
+ { encoding: "utf-8", timeout: 3000, stdio: ["ignore", "pipe", "ignore"] },
446
+ ).trim();
447
+ // lsof -t may return multiple PIDs (one per line); take the first.
448
+ const pid = parseInt(output.split("\n")[0], 10);
449
+ return isNaN(pid) ? undefined : pid;
479
450
  } catch {
480
- // lsof not available or failed — cannot recover PID
451
+ return undefined;
481
452
  }
482
- return undefined;
483
453
  }
484
454
 
485
- /** Try a TCP connect to the Unix socket. Returns true if the handshake
486
- * completes within the timeout false on connection refused, timeout,
487
- * or missing socket file. */
488
- function isSocketResponsive(
489
- socketPath: string,
490
- timeoutMs = 1500,
491
- ): Promise<boolean> {
492
- if (!existsSync(socketPath)) return Promise.resolve(false);
493
- return new Promise((resolve) => {
494
- const socket = createConnection(socketPath);
495
- const timer = setTimeout(() => {
496
- socket.destroy();
497
- resolve(false);
498
- }, timeoutMs);
499
- socket.on("connect", () => {
500
- clearTimeout(timer);
501
- socket.destroy();
502
- resolve(true);
503
- });
504
- socket.on("error", () => {
505
- clearTimeout(timer);
506
- socket.destroy();
507
- resolve(false);
508
- });
509
- });
455
+ /**
456
+ * Recover PID tracking for a daemon that is already responsive on its HTTP
457
+ * port but whose PID file is stale or missing. Looks up the listener PID
458
+ * via `lsof` and writes it to `pidFile` so lifecycle commands (sleep, retire,
459
+ * wake) can target the running process.
460
+ *
461
+ * Returns the recovered PID, or undefined if recovery failed.
462
+ */
463
+ function recoverPidFile(
464
+ pidFile: string,
465
+ daemonPort: number,
466
+ ): number | undefined {
467
+ const pid = findPidListeningOnPort(daemonPort);
468
+ if (pid) {
469
+ mkdirSync(dirname(pidFile), { recursive: true });
470
+ writeFileSync(pidFile, String(pid), "utf-8");
471
+ }
472
+ return pid;
510
473
  }
511
474
 
512
- async function discoverPublicUrl(port?: number): Promise<string | undefined> {
475
+ export async function discoverPublicUrl(port?: number): Promise<string | undefined> {
513
476
  const effectivePort = port ?? GATEWAY_PORT;
514
477
  const cloud = process.env.VELLUM_CLOUD;
515
478
 
@@ -577,7 +540,7 @@ async function discoverPublicUrl(port?: number): Promise<string | undefined> {
577
540
  * Returns the macOS Bonjour/mDNS `.local` hostname (e.g. "Vargass-Mac-Mini.local"),
578
541
  * or undefined if not running on macOS or the hostname cannot be determined.
579
542
  */
580
- function getMacLocalHostname(): string | undefined {
543
+ export function getMacLocalHostname(): string | undefined {
581
544
  const host = hostname();
582
545
  if (!host) return undefined;
583
546
  // macOS hostnames already end with .local when Bonjour is active
@@ -598,7 +561,7 @@ function getMacLocalHostname(): string | undefined {
598
561
  * Skips link-local addresses (169.254.x.x) and IPv6.
599
562
  * Returns undefined if no suitable address is found.
600
563
  */
601
- function getLocalLanIPv4(): string | undefined {
564
+ export function getLocalLanIPv4(): string | undefined {
602
565
  const ifaces = networkInterfaces();
603
566
 
604
567
  // Priority interfaces in order
@@ -641,7 +604,7 @@ function getLocalLanIPv4(): string | undefined {
641
604
  // assistant-side equivalent.
642
605
  export async function startLocalDaemon(
643
606
  watch: boolean = false,
644
- resources?: LocalInstanceResources,
607
+ resources: LocalInstanceResources,
645
608
  ): Promise<void> {
646
609
  if (process.env.VELLUM_DESKTOP_APP && !watch) {
647
610
  // When running inside the desktop app, the CLI owns the daemon lifecycle.
@@ -656,10 +619,7 @@ export async function startLocalDaemon(
656
619
  );
657
620
  }
658
621
 
659
- const defaults = defaultLocalResources();
660
- const res = resources ?? defaults;
661
- const pidFile = res.pidFile;
662
- const socketFile = res.socketPath;
622
+ const pidFile = resources.pidFile;
663
623
 
664
624
  // If a daemon is already running, skip spawning a new one.
665
625
  // This prevents cascading kill→restart cycles when multiple callers
@@ -685,19 +645,18 @@ export async function startLocalDaemon(
685
645
 
686
646
  if (!daemonAlive) {
687
647
  // The PID file was stale or missing, but a daemon with a different PID
688
- // may still be listening on the socket (e.g. if the PID file was
689
- // overwritten by a crashed restart attempt). Check before deleting.
690
- if (await isSocketResponsive(socketFile)) {
648
+ // may still be listening on the HTTP port (e.g. if the PID file was
649
+ // overwritten by a crashed restart attempt). Check before starting a new one.
650
+ if (await isDaemonResponsive(resources.daemonPort)) {
691
651
  // Restore PID tracking so lifecycle commands (sleep, retire,
692
652
  // stopLocalProcesses) can manage this daemon process.
693
- const ownerPid = findSocketOwnerPid(socketFile);
694
- if (ownerPid) {
695
- writeFileSync(pidFile, String(ownerPid), "utf-8");
653
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
654
+ if (recoveredPid) {
696
655
  console.log(
697
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
656
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
698
657
  );
699
658
  } else {
700
- console.log(" Assistant socket is responsive — skipping restart\n");
659
+ console.log(" Assistant is responsive — skipping restart\n");
701
660
  }
702
661
  // Ensure bun is available for runtime features (browser, skills install)
703
662
  // even when reusing an existing daemon.
@@ -705,17 +664,12 @@ export async function startLocalDaemon(
705
664
  return;
706
665
  }
707
666
 
708
- // Socket is unresponsive or missing — safe to clean up and start fresh.
709
- try {
710
- unlinkSync(socketFile);
711
- } catch {}
712
-
713
667
  console.log("🔨 Starting assistant...");
714
668
 
715
669
  // Ensure bun is available for runtime features (browser, skills install)
716
670
  ensureBunInstalled();
717
671
 
718
- // Ensure the directory containing PID/socket files exists
672
+ // Ensure the directory containing PID files exists
719
673
  mkdirSync(dirname(pidFile), { recursive: true });
720
674
 
721
675
  // Build a minimal environment for the daemon. When launched from the
@@ -739,7 +693,6 @@ export async function startLocalDaemon(
739
693
  "RUNTIME_HTTP_PORT",
740
694
  "VELLUM_DAEMON_TCP_PORT",
741
695
  "VELLUM_DAEMON_TCP_HOST",
742
- "VELLUM_DAEMON_SOCKET",
743
696
  "VELLUM_KEYCHAIN_BROKER_SOCKET",
744
697
  "VELLUM_DEBUG",
745
698
  "SENTRY_DSN",
@@ -757,7 +710,6 @@ export async function startLocalDaemon(
757
710
  daemonEnv.BASE_DATA_DIR = resources.instanceDir;
758
711
  daemonEnv.RUNTIME_HTTP_PORT = String(resources.daemonPort);
759
712
  daemonEnv.GATEWAY_PORT = String(resources.gatewayPort);
760
- daemonEnv.VELLUM_DAEMON_SOCKET = resources.socketPath;
761
713
  daemonEnv.QDRANT_HTTP_PORT = String(resources.qdrantPort);
762
714
  delete daemonEnv.QDRANT_URL;
763
715
  }
@@ -791,34 +743,34 @@ export async function startLocalDaemon(
791
743
  ensureBunInstalled();
792
744
  }
793
745
 
794
- // Wait for socket at ~/.vellum/vellum.sock (up to 60s — fresh installs
746
+ // Wait for daemon to respond on HTTP (up to 60s — fresh installs
795
747
  // may need 30-60s for Qdrant download, migrations, and first-time init)
796
- let socketReady = await waitForSocketFile(socketFile, 60000);
748
+ let daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
797
749
 
798
- // Dev fallback: if the bundled daemon did not create a socket in time,
750
+ // Dev fallback: if the bundled daemon did not become ready in time,
799
751
  // fall back to source daemon startup so local `./build.sh run` still works.
800
- if (!socketReady) {
752
+ if (!daemonReady) {
801
753
  const assistantIndex = resolveAssistantIndexPath();
802
754
  if (assistantIndex) {
803
755
  console.log(
804
- " Bundled assistant socket not ready after 60s — falling back to source assistant...",
756
+ " Bundled assistant not ready after 60s — falling back to source assistant...",
805
757
  );
806
- // Kill the bundled daemon to avoid two processes competing for the same socket/port
807
- await stopProcessByPidFile(pidFile, "bundled daemon", [socketFile]);
758
+ // Kill the bundled daemon to avoid two processes competing for the same port
759
+ await stopProcessByPidFile(pidFile, "bundled daemon");
808
760
  if (watch) {
809
761
  await startDaemonWatchFromSource(assistantIndex, resources);
810
762
  } else {
811
763
  await startDaemonFromSource(assistantIndex, resources);
812
764
  }
813
- socketReady = await waitForSocketFile(socketFile, 60000);
765
+ daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
814
766
  }
815
767
  }
816
768
 
817
- if (socketReady) {
818
- console.log(" Assistant socket ready\n");
769
+ if (daemonReady) {
770
+ console.log(" Assistant ready\n");
819
771
  } else {
820
772
  console.log(
821
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
773
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
822
774
  );
823
775
  }
824
776
  } else {
@@ -831,29 +783,26 @@ export async function startLocalDaemon(
831
783
  " Ensure the daemon binary is bundled alongside the CLI, or run from the source tree.",
832
784
  );
833
785
  }
834
- const defaults = defaultLocalResources();
835
- const res = resources ?? defaults;
836
-
837
786
  if (watch) {
838
787
  await startDaemonWatchFromSource(assistantIndex, resources);
839
788
 
840
- const socketReady = await waitForSocketFile(res.socketPath, 60000);
841
- if (socketReady) {
842
- console.log(" Assistant socket ready\n");
789
+ const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
790
+ if (daemonReady) {
791
+ console.log(" Assistant ready\n");
843
792
  } else {
844
793
  console.log(
845
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
794
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
846
795
  );
847
796
  }
848
797
  } else {
849
798
  await startDaemonFromSource(assistantIndex, resources);
850
799
 
851
- const socketReady = await waitForSocketFile(res.socketPath, 60000);
852
- if (socketReady) {
853
- console.log(" Assistant socket ready\n");
800
+ const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
801
+ if (daemonReady) {
802
+ console.log(" Assistant ready\n");
854
803
  } else {
855
804
  console.log(
856
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
805
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
857
806
  );
858
807
  }
859
808
  }
@@ -1078,7 +1027,7 @@ export async function startGateway(
1078
1027
 
1079
1028
  /**
1080
1029
  * Stop any locally-running daemon and gateway processes
1081
- * and clean up PID/socket files. Called when hatch fails partway through
1030
+ * and clean up PID files. Called when hatch fails partway through
1082
1031
  * so we don't leave orphaned processes with no lock file entry.
1083
1032
  *
1084
1033
  * When `resources` is provided, uses instance-specific paths instead of
@@ -1091,8 +1040,7 @@ export async function stopLocalProcesses(
1091
1040
  ? join(resources.instanceDir, ".vellum")
1092
1041
  : join(homedir(), ".vellum");
1093
1042
  const daemonPidFile = resources?.pidFile ?? join(vellumDir, "vellum.pid");
1094
- const socketFile = resources?.socketPath ?? join(vellumDir, "vellum.sock");
1095
- await stopProcessByPidFile(daemonPidFile, "daemon", [socketFile]);
1043
+ await stopProcessByPidFile(daemonPidFile, "daemon");
1096
1044
 
1097
1045
  const gatewayPidFile = join(vellumDir, "gateway.pid");
1098
1046
  await stopProcessByPidFile(gatewayPidFile, "gateway", undefined, 7000);
@@ -11,6 +11,11 @@ export function exec(
11
11
  stdio: ["pipe", "pipe", "pipe"],
12
12
  });
13
13
 
14
+ let stdout = "";
15
+ child.stdout.on("data", (data: Buffer) => {
16
+ stdout += data.toString();
17
+ });
18
+
14
19
  let stderr = "";
15
20
  child.stderr.on("data", (data: Buffer) => {
16
21
  stderr += data.toString();
@@ -21,7 +26,10 @@ export function exec(
21
26
  resolve();
22
27
  } else {
23
28
  const msg = `"${command} ${args.join(" ")}" exited with code ${code}`;
24
- reject(new Error(stderr.trim() ? `${msg}\n${stderr.trim()}` : msg));
29
+ const output = [stderr.trim(), stdout.trim()]
30
+ .filter(Boolean)
31
+ .join("\n");
32
+ reject(new Error(output ? `${msg}\n${output}` : msg));
25
33
  }
26
34
  });
27
35
  child.on("error", reject);