@vellumai/cli 0.4.42 → 0.4.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/local.ts CHANGED
@@ -1,6 +1,5 @@
1
1
  import { execFileSync, execSync, spawn } from "child_process";
2
2
  import {
3
- closeSync,
4
3
  existsSync,
5
4
  mkdirSync,
6
5
  readFileSync,
@@ -8,16 +7,15 @@ import {
8
7
  writeFileSync,
9
8
  } from "fs";
10
9
  import { createRequire } from "module";
11
- import { createConnection } from "net";
12
10
  import { homedir, hostname, networkInterfaces, platform } from "os";
13
11
  import { dirname, join } from "path";
14
12
 
15
13
  import {
16
- defaultLocalResources,
17
14
  loadLatestAssistant,
18
15
  type LocalInstanceResources,
19
16
  } from "./assistant-config.js";
20
17
  import { GATEWAY_PORT } from "./constants.js";
18
+ import { httpHealthCheck, waitForDaemonReady } from "./http-client.js";
21
19
  import { stopProcessByPidFile } from "./process.js";
22
20
  import { openLogFile, pipeToLogFile } from "./xdg-log.js";
23
21
 
@@ -136,23 +134,6 @@ function resolveAssistantIndexPath(): string | undefined {
136
134
  return undefined;
137
135
  }
138
136
 
139
- async function waitForSocketFile(
140
- socketPath: string,
141
- timeoutMs = 60000,
142
- ): Promise<boolean> {
143
- if (existsSync(socketPath)) return true;
144
-
145
- const start = Date.now();
146
- while (Date.now() - start < timeoutMs) {
147
- if (existsSync(socketPath)) {
148
- return true;
149
- }
150
- await new Promise((r) => setTimeout(r, 100));
151
- }
152
-
153
- return existsSync(socketPath);
154
- }
155
-
156
137
  function ensureBunInstalled(): void {
157
138
  const bunBinDir = join(homedir(), ".bun", "bin");
158
139
  const pathWithBun = [
@@ -216,18 +197,15 @@ function resolveDaemonMainPath(assistantIndex: string): string {
216
197
 
217
198
  async function startDaemonFromSource(
218
199
  assistantIndex: string,
219
- resources?: LocalInstanceResources,
200
+ resources: LocalInstanceResources,
220
201
  ): Promise<void> {
221
202
  const daemonMainPath = resolveDaemonMainPath(assistantIndex);
222
203
 
223
- const defaults = defaultLocalResources();
224
- const res = resources ?? defaults;
225
204
  // Ensure the directory containing PID/socket files exists. For named
226
205
  // instances this is instanceDir/.vellum/ (matching daemon's getRootDir()).
227
- mkdirSync(dirname(res.pidFile), { recursive: true });
206
+ mkdirSync(dirname(resources.pidFile), { recursive: true });
228
207
 
229
- const pidFile = res.pidFile;
230
- const socketFile = res.socketPath;
208
+ const pidFile = resources.pidFile;
231
209
 
232
210
  // --- Lifecycle guard: prevent split-brain daemon state ---
233
211
  if (existsSync(pidFile)) {
@@ -247,23 +225,21 @@ async function startDaemonFromSource(
247
225
  } catch {}
248
226
  }
249
227
 
250
- if (await isSocketResponsive(socketFile)) {
251
- const ownerPid = findSocketOwnerPid(socketFile);
252
- if (ownerPid) {
253
- writeFileSync(pidFile, String(ownerPid), "utf-8");
228
+ // PID file was stale or missing — check if daemon is responding via HTTP
229
+ if (await isDaemonResponsive(resources.daemonPort)) {
230
+ // Recover PID tracking so lifecycle commands (sleep, retire,
231
+ // stopLocalProcesses) can manage this daemon process.
232
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
233
+ if (recoveredPid) {
254
234
  console.log(
255
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
235
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
256
236
  );
257
237
  } else {
258
- console.log(" Assistant socket is responsive — skipping restart\n");
238
+ console.log(" Assistant is responsive — skipping restart\n");
259
239
  }
260
240
  return;
261
241
  }
262
242
 
263
- try {
264
- unlinkSync(socketFile);
265
- } catch {}
266
-
267
243
  const env: Record<string, string | undefined> = {
268
244
  ...process.env,
269
245
  RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
@@ -277,21 +253,17 @@ async function startDaemonFromSource(
277
253
  env.BASE_DATA_DIR = resources.instanceDir;
278
254
  env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
279
255
  env.GATEWAY_PORT = String(resources.gatewayPort);
280
- env.VELLUM_DAEMON_SOCKET = resources.socketPath;
281
256
  env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
282
257
  delete env.QDRANT_URL;
283
258
  }
284
259
 
285
- // Use fd inheritance instead of pipes so the daemon's stdout/stderr survive
286
- // after the parent (hatch) exits. Bun does not ignore SIGPIPE, so piped
287
- // stdio would kill the daemon on its first write after the parent closes.
288
- const logFd = openLogFile("hatch.log");
260
+ const daemonLogFd = openLogFile("hatch.log");
289
261
  const child = spawn("bun", ["run", daemonMainPath], {
290
262
  detached: true,
291
- stdio: ["ignore", logFd, logFd],
263
+ stdio: ["ignore", "pipe", "pipe"],
292
264
  env,
293
265
  });
294
- if (typeof logFd === "number") closeSync(logFd);
266
+ pipeToLogFile(child, daemonLogFd, "daemon");
295
267
  child.unref();
296
268
 
297
269
  if (child.pid) {
@@ -305,19 +277,16 @@ async function startDaemonFromSource(
305
277
  // assistant-side equivalent.
306
278
  async function startDaemonWatchFromSource(
307
279
  assistantIndex: string,
308
- resources?: LocalInstanceResources,
280
+ resources: LocalInstanceResources,
309
281
  ): Promise<void> {
310
282
  const mainPath = resolveDaemonMainPath(assistantIndex);
311
283
  if (!existsSync(mainPath)) {
312
284
  throw new Error(`Daemon main.ts not found at ${mainPath}`);
313
285
  }
314
286
 
315
- const defaults = defaultLocalResources();
316
- const res = resources ?? defaults;
317
- mkdirSync(dirname(res.pidFile), { recursive: true });
287
+ mkdirSync(dirname(resources.pidFile), { recursive: true });
318
288
 
319
- const pidFile = res.pidFile;
320
- const socketFile = res.socketPath;
289
+ const pidFile = resources.pidFile;
321
290
 
322
291
  // --- Lifecycle guard: prevent split-brain daemon state ---
323
292
  // If a daemon is already running, skip spawning a new one.
@@ -339,26 +308,21 @@ async function startDaemonWatchFromSource(
339
308
  } catch {}
340
309
  }
341
310
 
342
- // PID file was stale or missing, but a daemon with a different PID may
343
- // still be listening on the socket. Check before starting a new one.
344
- if (await isSocketResponsive(socketFile)) {
345
- const ownerPid = findSocketOwnerPid(socketFile);
346
- if (ownerPid) {
347
- writeFileSync(pidFile, String(ownerPid), "utf-8");
311
+ // PID file was stale or missing check if daemon is responding via HTTP
312
+ if (await isDaemonResponsive(resources.daemonPort)) {
313
+ // Recover PID tracking so lifecycle commands (sleep, retire,
314
+ // stopLocalProcesses) can manage this daemon process.
315
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
316
+ if (recoveredPid) {
348
317
  console.log(
349
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
318
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
350
319
  );
351
320
  } else {
352
- console.log(" Assistant socket is responsive — skipping restart\n");
321
+ console.log(" Assistant is responsive — skipping restart\n");
353
322
  }
354
323
  return;
355
324
  }
356
325
 
357
- // Socket is unresponsive or missing — safe to clean up and start fresh.
358
- try {
359
- unlinkSync(socketFile);
360
- } catch {}
361
-
362
326
  const env: Record<string, string | undefined> = {
363
327
  ...process.env,
364
328
  RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
@@ -368,7 +332,6 @@ async function startDaemonWatchFromSource(
368
332
  env.BASE_DATA_DIR = resources.instanceDir;
369
333
  env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
370
334
  env.GATEWAY_PORT = String(resources.gatewayPort);
371
- env.VELLUM_DAEMON_SOCKET = resources.socketPath;
372
335
  env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
373
336
  delete env.QDRANT_URL;
374
337
  }
@@ -457,59 +420,57 @@ function readWorkspaceIngressPublicBaseUrl(
457
420
  }
458
421
  }
459
422
 
460
- /** Use lsof to discover the PID of the process listening on a Unix socket.
461
- * Returns the PID if found, undefined otherwise. */
462
- function findSocketOwnerPid(socketPath: string): number | undefined {
423
+ /**
424
+ * Check if the daemon is responsive by hitting its HTTP `/healthz` endpoint.
425
+ * This replaces the socket-based `isSocketResponsive()` check.
426
+ */
427
+ async function isDaemonResponsive(daemonPort: number): Promise<boolean> {
428
+ return httpHealthCheck(daemonPort);
429
+ }
430
+
431
+ /**
432
+ * Find the PID of the process listening on the given TCP port.
433
+ * Uses `lsof` on macOS/Linux. Returns undefined if no listener is found
434
+ * or the command fails.
435
+ */
436
+ function findPidListeningOnPort(port: number): number | undefined {
463
437
  try {
464
438
  const output = execFileSync(
465
439
  "lsof",
466
- ["-U", "-a", "-F", "p", "--", socketPath],
467
- {
468
- encoding: "utf-8",
469
- timeout: 3000,
470
- stdio: ["ignore", "pipe", "ignore"],
471
- },
472
- );
473
- // lsof -F p outputs lines like "p1234" — extract the first PID
474
- const match = output.match(/^p(\d+)/m);
475
- if (match) {
476
- const pid = parseInt(match[1], 10);
477
- if (!isNaN(pid)) return pid;
478
- }
440
+ ["-iTCP:" + port, "-sTCP:LISTEN", "-t"],
441
+ { encoding: "utf-8", timeout: 3000, stdio: ["ignore", "pipe", "ignore"] },
442
+ ).trim();
443
+ // lsof -t may return multiple PIDs (one per line); take the first.
444
+ const pid = parseInt(output.split("\n")[0], 10);
445
+ return isNaN(pid) ? undefined : pid;
479
446
  } catch {
480
- // lsof not available or failed — cannot recover PID
447
+ return undefined;
481
448
  }
482
- return undefined;
483
449
  }
484
450
 
485
- /** Try a TCP connect to the Unix socket. Returns true if the handshake
486
- * completes within the timeout false on connection refused, timeout,
487
- * or missing socket file. */
488
- function isSocketResponsive(
489
- socketPath: string,
490
- timeoutMs = 1500,
491
- ): Promise<boolean> {
492
- if (!existsSync(socketPath)) return Promise.resolve(false);
493
- return new Promise((resolve) => {
494
- const socket = createConnection(socketPath);
495
- const timer = setTimeout(() => {
496
- socket.destroy();
497
- resolve(false);
498
- }, timeoutMs);
499
- socket.on("connect", () => {
500
- clearTimeout(timer);
501
- socket.destroy();
502
- resolve(true);
503
- });
504
- socket.on("error", () => {
505
- clearTimeout(timer);
506
- socket.destroy();
507
- resolve(false);
508
- });
509
- });
451
+ /**
452
+ * Recover PID tracking for a daemon that is already responsive on its HTTP
453
+ * port but whose PID file is stale or missing. Looks up the listener PID
454
+ * via `lsof` and writes it to `pidFile` so lifecycle commands (sleep, retire,
455
+ * wake) can target the running process.
456
+ *
457
+ * Returns the recovered PID, or undefined if recovery failed.
458
+ */
459
+ function recoverPidFile(
460
+ pidFile: string,
461
+ daemonPort: number,
462
+ ): number | undefined {
463
+ const pid = findPidListeningOnPort(daemonPort);
464
+ if (pid) {
465
+ mkdirSync(dirname(pidFile), { recursive: true });
466
+ writeFileSync(pidFile, String(pid), "utf-8");
467
+ }
468
+ return pid;
510
469
  }
511
470
 
512
- async function discoverPublicUrl(port?: number): Promise<string | undefined> {
471
+ export async function discoverPublicUrl(
472
+ port?: number,
473
+ ): Promise<string | undefined> {
513
474
  const effectivePort = port ?? GATEWAY_PORT;
514
475
  const cloud = process.env.VELLUM_CLOUD;
515
476
 
@@ -577,7 +538,7 @@ async function discoverPublicUrl(port?: number): Promise<string | undefined> {
577
538
  * Returns the macOS Bonjour/mDNS `.local` hostname (e.g. "Vargass-Mac-Mini.local"),
578
539
  * or undefined if not running on macOS or the hostname cannot be determined.
579
540
  */
580
- function getMacLocalHostname(): string | undefined {
541
+ export function getMacLocalHostname(): string | undefined {
581
542
  const host = hostname();
582
543
  if (!host) return undefined;
583
544
  // macOS hostnames already end with .local when Bonjour is active
@@ -598,7 +559,7 @@ function getMacLocalHostname(): string | undefined {
598
559
  * Skips link-local addresses (169.254.x.x) and IPv6.
599
560
  * Returns undefined if no suitable address is found.
600
561
  */
601
- function getLocalLanIPv4(): string | undefined {
562
+ export function getLocalLanIPv4(): string | undefined {
602
563
  const ifaces = networkInterfaces();
603
564
 
604
565
  // Priority interfaces in order
@@ -641,7 +602,7 @@ function getLocalLanIPv4(): string | undefined {
641
602
  // assistant-side equivalent.
642
603
  export async function startLocalDaemon(
643
604
  watch: boolean = false,
644
- resources?: LocalInstanceResources,
605
+ resources: LocalInstanceResources,
645
606
  ): Promise<void> {
646
607
  if (process.env.VELLUM_DESKTOP_APP && !watch) {
647
608
  // When running inside the desktop app, the CLI owns the daemon lifecycle.
@@ -656,10 +617,7 @@ export async function startLocalDaemon(
656
617
  );
657
618
  }
658
619
 
659
- const defaults = defaultLocalResources();
660
- const res = resources ?? defaults;
661
- const pidFile = res.pidFile;
662
- const socketFile = res.socketPath;
620
+ const pidFile = resources.pidFile;
663
621
 
664
622
  // If a daemon is already running, skip spawning a new one.
665
623
  // This prevents cascading kill→restart cycles when multiple callers
@@ -685,19 +643,18 @@ export async function startLocalDaemon(
685
643
 
686
644
  if (!daemonAlive) {
687
645
  // The PID file was stale or missing, but a daemon with a different PID
688
- // may still be listening on the socket (e.g. if the PID file was
689
- // overwritten by a crashed restart attempt). Check before deleting.
690
- if (await isSocketResponsive(socketFile)) {
646
+ // may still be listening on the HTTP port (e.g. if the PID file was
647
+ // overwritten by a crashed restart attempt). Check before starting a new one.
648
+ if (await isDaemonResponsive(resources.daemonPort)) {
691
649
  // Restore PID tracking so lifecycle commands (sleep, retire,
692
650
  // stopLocalProcesses) can manage this daemon process.
693
- const ownerPid = findSocketOwnerPid(socketFile);
694
- if (ownerPid) {
695
- writeFileSync(pidFile, String(ownerPid), "utf-8");
651
+ const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
652
+ if (recoveredPid) {
696
653
  console.log(
697
- ` Assistant socket is responsive (pid ${ownerPid}) — skipping restart\n`,
654
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
698
655
  );
699
656
  } else {
700
- console.log(" Assistant socket is responsive — skipping restart\n");
657
+ console.log(" Assistant is responsive — skipping restart\n");
701
658
  }
702
659
  // Ensure bun is available for runtime features (browser, skills install)
703
660
  // even when reusing an existing daemon.
@@ -705,17 +662,12 @@ export async function startLocalDaemon(
705
662
  return;
706
663
  }
707
664
 
708
- // Socket is unresponsive or missing — safe to clean up and start fresh.
709
- try {
710
- unlinkSync(socketFile);
711
- } catch {}
712
-
713
665
  console.log("🔨 Starting assistant...");
714
666
 
715
667
  // Ensure bun is available for runtime features (browser, skills install)
716
668
  ensureBunInstalled();
717
669
 
718
- // Ensure the directory containing PID/socket files exists
670
+ // Ensure the directory containing PID files exists
719
671
  mkdirSync(dirname(pidFile), { recursive: true });
720
672
 
721
673
  // Build a minimal environment for the daemon. When launched from the
@@ -739,7 +691,6 @@ export async function startLocalDaemon(
739
691
  "RUNTIME_HTTP_PORT",
740
692
  "VELLUM_DAEMON_TCP_PORT",
741
693
  "VELLUM_DAEMON_TCP_HOST",
742
- "VELLUM_DAEMON_SOCKET",
743
694
  "VELLUM_KEYCHAIN_BROKER_SOCKET",
744
695
  "VELLUM_DEBUG",
745
696
  "SENTRY_DSN",
@@ -757,23 +708,18 @@ export async function startLocalDaemon(
757
708
  daemonEnv.BASE_DATA_DIR = resources.instanceDir;
758
709
  daemonEnv.RUNTIME_HTTP_PORT = String(resources.daemonPort);
759
710
  daemonEnv.GATEWAY_PORT = String(resources.gatewayPort);
760
- daemonEnv.VELLUM_DAEMON_SOCKET = resources.socketPath;
761
711
  daemonEnv.QDRANT_HTTP_PORT = String(resources.qdrantPort);
762
712
  delete daemonEnv.QDRANT_URL;
763
713
  }
764
714
 
765
- // Use fd inheritance instead of pipes so the daemon's stdout/stderr
766
- // survive after the parent (hatch) exits. Bun does not ignore SIGPIPE,
767
- // so piped stdio would kill the daemon on its first write after the
768
- // parent closes.
769
715
  const daemonLogFd = openLogFile("hatch.log");
770
716
  const child = spawn(daemonBinary, [], {
771
717
  cwd: dirname(daemonBinary),
772
718
  detached: true,
773
- stdio: ["ignore", daemonLogFd, daemonLogFd],
719
+ stdio: ["ignore", "pipe", "pipe"],
774
720
  env: daemonEnv,
775
721
  });
776
- if (typeof daemonLogFd === "number") closeSync(daemonLogFd);
722
+ pipeToLogFile(child, daemonLogFd, "daemon");
777
723
  child.unref();
778
724
  const daemonPid = child.pid;
779
725
 
@@ -791,34 +737,34 @@ export async function startLocalDaemon(
791
737
  ensureBunInstalled();
792
738
  }
793
739
 
794
- // Wait for socket at ~/.vellum/vellum.sock (up to 60s — fresh installs
740
+ // Wait for daemon to respond on HTTP (up to 60s — fresh installs
795
741
  // may need 30-60s for Qdrant download, migrations, and first-time init)
796
- let socketReady = await waitForSocketFile(socketFile, 60000);
742
+ let daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
797
743
 
798
- // Dev fallback: if the bundled daemon did not create a socket in time,
744
+ // Dev fallback: if the bundled daemon did not become ready in time,
799
745
  // fall back to source daemon startup so local `./build.sh run` still works.
800
- if (!socketReady) {
746
+ if (!daemonReady) {
801
747
  const assistantIndex = resolveAssistantIndexPath();
802
748
  if (assistantIndex) {
803
749
  console.log(
804
- " Bundled assistant socket not ready after 60s — falling back to source assistant...",
750
+ " Bundled assistant not ready after 60s — falling back to source assistant...",
805
751
  );
806
- // Kill the bundled daemon to avoid two processes competing for the same socket/port
807
- await stopProcessByPidFile(pidFile, "bundled daemon", [socketFile]);
752
+ // Kill the bundled daemon to avoid two processes competing for the same port
753
+ await stopProcessByPidFile(pidFile, "bundled daemon");
808
754
  if (watch) {
809
755
  await startDaemonWatchFromSource(assistantIndex, resources);
810
756
  } else {
811
757
  await startDaemonFromSource(assistantIndex, resources);
812
758
  }
813
- socketReady = await waitForSocketFile(socketFile, 60000);
759
+ daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
814
760
  }
815
761
  }
816
762
 
817
- if (socketReady) {
818
- console.log(" Assistant socket ready\n");
763
+ if (daemonReady) {
764
+ console.log(" Assistant ready\n");
819
765
  } else {
820
766
  console.log(
821
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
767
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
822
768
  );
823
769
  }
824
770
  } else {
@@ -831,29 +777,26 @@ export async function startLocalDaemon(
831
777
  " Ensure the daemon binary is bundled alongside the CLI, or run from the source tree.",
832
778
  );
833
779
  }
834
- const defaults = defaultLocalResources();
835
- const res = resources ?? defaults;
836
-
837
780
  if (watch) {
838
781
  await startDaemonWatchFromSource(assistantIndex, resources);
839
782
 
840
- const socketReady = await waitForSocketFile(res.socketPath, 60000);
841
- if (socketReady) {
842
- console.log(" Assistant socket ready\n");
783
+ const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
784
+ if (daemonReady) {
785
+ console.log(" Assistant ready\n");
843
786
  } else {
844
787
  console.log(
845
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
788
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
846
789
  );
847
790
  }
848
791
  } else {
849
792
  await startDaemonFromSource(assistantIndex, resources);
850
793
 
851
- const socketReady = await waitForSocketFile(res.socketPath, 60000);
852
- if (socketReady) {
853
- console.log(" Assistant socket ready\n");
794
+ const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
795
+ if (daemonReady) {
796
+ console.log(" Assistant ready\n");
854
797
  } else {
855
798
  console.log(
856
- " ⚠️ Assistant socket did not appear within 60s — continuing anyway\n",
799
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
857
800
  );
858
801
  }
859
802
  }
@@ -867,6 +810,16 @@ export async function startGateway(
867
810
  ): Promise<string> {
868
811
  const effectiveGatewayPort = resources?.gatewayPort ?? GATEWAY_PORT;
869
812
 
813
+ // Kill any existing gateway process before spawning a new one.
814
+ // Without this, crashed/stale gateways accumulate as zombies — the old
815
+ // process holds the port (or lingers after losing it), and every restart
816
+ // attempt spawns yet another process that fails with EADDRINUSE.
817
+ const gwPidDir = resources
818
+ ? join(resources.instanceDir, ".vellum")
819
+ : join(homedir(), ".vellum");
820
+ const gwPidFile = join(gwPidDir, "gateway.pid");
821
+ await stopProcessByPidFile(gwPidFile, "gateway");
822
+
870
823
  const publicUrl = await discoverPublicUrl(effectiveGatewayPort);
871
824
  if (publicUrl) {
872
825
  console.log(` Public URL: ${publicUrl}`);
@@ -1003,15 +956,13 @@ export async function startGateway(
1003
956
  );
1004
957
  }
1005
958
 
1006
- // Use fd inheritance (not pipes) so the gateway survives after the
1007
- // hatch CLI exits — Bun does not ignore SIGPIPE.
1008
959
  const gatewayLogFd = openLogFile("hatch.log");
1009
960
  gateway = spawn(gatewayBinary, [], {
1010
961
  detached: true,
1011
- stdio: ["ignore", gatewayLogFd, gatewayLogFd],
962
+ stdio: ["ignore", "pipe", "pipe"],
1012
963
  env: gatewayEnv,
1013
964
  });
1014
- if (typeof gatewayLogFd === "number") closeSync(gatewayLogFd);
965
+ pipeToLogFile(gateway, gatewayLogFd, "gateway");
1015
966
  } else {
1016
967
  // Source tree / bunx: resolve the gateway source directory and run via bun.
1017
968
  const gatewayDir = resolveGatewayDir();
@@ -1022,10 +973,10 @@ export async function startGateway(
1022
973
  gateway = spawn("bun", bunArgs, {
1023
974
  cwd: gatewayDir,
1024
975
  detached: true,
1025
- stdio: ["ignore", gwLogFd, gwLogFd],
976
+ stdio: ["ignore", "pipe", "pipe"],
1026
977
  env: gatewayEnv,
1027
978
  });
1028
- if (typeof gwLogFd === "number") closeSync(gwLogFd);
979
+ pipeToLogFile(gateway, gwLogFd, "gateway");
1029
980
  if (watch) {
1030
981
  console.log(" Gateway started in watch mode (bun --watch)");
1031
982
  }
@@ -1078,7 +1029,7 @@ export async function startGateway(
1078
1029
 
1079
1030
  /**
1080
1031
  * Stop any locally-running daemon and gateway processes
1081
- * and clean up PID/socket files. Called when hatch fails partway through
1032
+ * and clean up PID files. Called when hatch fails partway through
1082
1033
  * so we don't leave orphaned processes with no lock file entry.
1083
1034
  *
1084
1035
  * When `resources` is provided, uses instance-specific paths instead of
@@ -1091,8 +1042,7 @@ export async function stopLocalProcesses(
1091
1042
  ? join(resources.instanceDir, ".vellum")
1092
1043
  : join(homedir(), ".vellum");
1093
1044
  const daemonPidFile = resources?.pidFile ?? join(vellumDir, "vellum.pid");
1094
- const socketFile = resources?.socketPath ?? join(vellumDir, "vellum.sock");
1095
- await stopProcessByPidFile(daemonPidFile, "daemon", [socketFile]);
1045
+ await stopProcessByPidFile(daemonPidFile, "daemon");
1096
1046
 
1097
1047
  const gatewayPidFile = join(vellumDir, "gateway.pid");
1098
1048
  await stopProcessByPidFile(gatewayPidFile, "gateway", undefined, 7000);
@@ -11,6 +11,11 @@ export function exec(
11
11
  stdio: ["pipe", "pipe", "pipe"],
12
12
  });
13
13
 
14
+ let stdout = "";
15
+ child.stdout.on("data", (data: Buffer) => {
16
+ stdout += data.toString();
17
+ });
18
+
14
19
  let stderr = "";
15
20
  child.stderr.on("data", (data: Buffer) => {
16
21
  stderr += data.toString();
@@ -21,7 +26,10 @@ export function exec(
21
26
  resolve();
22
27
  } else {
23
28
  const msg = `"${command} ${args.join(" ")}" exited with code ${code}`;
24
- reject(new Error(stderr.trim() ? `${msg}\n${stderr.trim()}` : msg));
29
+ const output = [stderr.trim(), stdout.trim()]
30
+ .filter(Boolean)
31
+ .join("\n");
32
+ reject(new Error(output ? `${msg}\n${output}` : msg));
25
33
  }
26
34
  });
27
35
  child.on("error", reject);
@@ -1,8 +1,20 @@
1
- import { closeSync, mkdirSync, openSync, writeSync } from "fs";
2
1
  import type { ChildProcess } from "child_process";
2
+ import {
3
+ closeSync,
4
+ copyFileSync,
5
+ existsSync,
6
+ mkdirSync,
7
+ openSync,
8
+ statSync,
9
+ writeFileSync,
10
+ writeSync,
11
+ } from "fs";
3
12
  import { homedir } from "os";
4
13
  import { join } from "path";
5
14
 
15
+ /** Regex matching pino-pretty's short time prefix, e.g. `[12:07:37.467] `. */
16
+ const PINO_TIME_RE = /^\[\d{2}:\d{2}:\d{2}\.\d{3}\]\s*/;
17
+
6
18
  /** Returns the XDG-compatible log directory for Vellum CLI logs. */
7
19
  export function getLogDir(): string {
8
20
  const configHome = process.env.XDG_CONFIG_HOME || join(homedir(), ".config");
@@ -23,6 +35,36 @@ export function openLogFile(name: string): number | "ignore" {
23
35
  }
24
36
  }
25
37
 
38
+ /** Truncate (or create) a log file so each session starts fresh. */
39
+ export function resetLogFile(name: string): void {
40
+ try {
41
+ const dir = getLogDir();
42
+ mkdirSync(dir, { recursive: true });
43
+ writeFileSync(join(dir, name), "");
44
+ } catch {
45
+ /* best-effort */
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Copy the current log file into `destDir` with a timestamped name so that
51
+ * previous session logs are preserved for debugging. No-op when the source
52
+ * file is missing or empty.
53
+ */
54
+ export function archiveLogFile(name: string, destDir: string): void {
55
+ try {
56
+ const srcPath = join(getLogDir(), name);
57
+ if (!existsSync(srcPath) || statSync(srcPath).size === 0) return;
58
+
59
+ mkdirSync(destDir, { recursive: true });
60
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
61
+ const base = name.replace(/\.log$/, "");
62
+ copyFileSync(srcPath, join(destDir, `${base}-${ts}.log`));
63
+ } catch {
64
+ /* best-effort */
65
+ }
66
+ }
67
+
26
68
  /** Close a file descriptor returned by openLogFile (no-op for "ignore"). */
27
69
  export function closeLogFile(fd: number | "ignore"): void {
28
70
  if (typeof fd === "number") {
@@ -46,7 +88,8 @@ export function writeToLogFile(fd: number | "ignore", msg: string): void {
46
88
  }
47
89
 
48
90
  /** Pipe a child process's stdout/stderr to a shared log file descriptor,
49
- * prefixing each line with a tag (e.g. "[daemon]" or "[gateway]").
91
+ * prefixing each line with an ISO timestamp and tag (e.g. "[daemon]").
92
+ * Strips pino-pretty's redundant short time prefix when present.
50
93
  * Streams are unref'd so they don't prevent the parent from exiting.
51
94
  * The fd is closed automatically when both streams end. */
52
95
  export function pipeToLogFile(
@@ -80,9 +123,10 @@ export function pipeToLogFile(
80
123
  for (let i = 0; i < lines.length; i++) {
81
124
  if (i === lines.length - 1 && lines[i] === "") break;
82
125
  const nl = i < lines.length - 1 ? "\n" : "";
126
+ const stripped = lines[i].replace(PINO_TIME_RE, "");
83
127
  const prefix = `${new Date().toISOString()} ${tagLabel} `;
84
128
  try {
85
- writeSync(numFd, prefix + lines[i] + nl);
129
+ writeSync(numFd, prefix + stripped + nl);
86
130
  } catch {
87
131
  /* best-effort */
88
132
  }