@vellumai/cli 0.8.5 → 0.8.7-dev.202606052118.34cd356

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/AGENTS.md +6 -0
  2. package/bun.lock +8 -0
  3. package/knip.json +6 -1
  4. package/node_modules/@vellumai/environments/bun.lock +24 -0
  5. package/node_modules/@vellumai/environments/package.json +18 -0
  6. package/node_modules/@vellumai/environments/src/__tests__/package-boundary.test.ts +95 -0
  7. package/node_modules/@vellumai/environments/src/index.ts +11 -0
  8. package/{src/lib/environments → node_modules/@vellumai/environments/src}/seeds.ts +5 -9
  9. package/node_modules/@vellumai/environments/tsconfig.json +20 -0
  10. package/node_modules/@vellumai/local-mode/bun.lock +29 -0
  11. package/node_modules/@vellumai/local-mode/package.json +22 -0
  12. package/node_modules/@vellumai/local-mode/src/__tests__/environment.test.ts +116 -0
  13. package/node_modules/@vellumai/local-mode/src/__tests__/gateway-proxy.test.ts +79 -0
  14. package/node_modules/@vellumai/local-mode/src/__tests__/hatch.test.ts +108 -0
  15. package/node_modules/@vellumai/local-mode/src/__tests__/package-boundary.test.ts +104 -0
  16. package/node_modules/@vellumai/local-mode/src/__tests__/wake.test.ts +66 -0
  17. package/node_modules/@vellumai/local-mode/src/config.ts +66 -0
  18. package/node_modules/@vellumai/local-mode/src/environment.ts +62 -0
  19. package/node_modules/@vellumai/local-mode/src/gateway-proxy.ts +109 -0
  20. package/node_modules/@vellumai/local-mode/src/guardian-token.ts +122 -0
  21. package/node_modules/@vellumai/local-mode/src/hatch.ts +92 -0
  22. package/node_modules/@vellumai/local-mode/src/index.ts +48 -0
  23. package/node_modules/@vellumai/local-mode/src/lockfile-contract.test.ts +173 -0
  24. package/node_modules/@vellumai/local-mode/src/lockfile-contract.ts +114 -0
  25. package/node_modules/@vellumai/local-mode/src/lockfile.test.ts +235 -0
  26. package/node_modules/@vellumai/local-mode/src/lockfile.ts +133 -0
  27. package/node_modules/@vellumai/local-mode/src/retire.ts +58 -0
  28. package/node_modules/@vellumai/local-mode/src/util.ts +102 -0
  29. package/node_modules/@vellumai/local-mode/src/wake.ts +78 -0
  30. package/node_modules/@vellumai/local-mode/tsconfig.json +16 -0
  31. package/package.json +12 -1
  32. package/src/__tests__/assistant-client-refresh.test.ts +182 -0
  33. package/src/__tests__/backup.test.ts +38 -0
  34. package/src/__tests__/clean.test.ts +179 -0
  35. package/src/__tests__/client-token.test.ts +87 -0
  36. package/src/__tests__/client-tui-refresh.test.ts +170 -0
  37. package/src/__tests__/cloudflare-tunnel.test.ts +137 -0
  38. package/src/__tests__/connect-import.test.ts +317 -0
  39. package/src/__tests__/devices.test.ts +272 -0
  40. package/src/__tests__/env-drift.test.ts +32 -44
  41. package/src/__tests__/flags.test.ts +248 -0
  42. package/src/__tests__/guardian-token.test.ts +126 -2
  43. package/src/__tests__/multi-local.test.ts +1 -1
  44. package/src/__tests__/orphan-detection.test.ts +8 -6
  45. package/src/__tests__/pair.test.ts +271 -0
  46. package/src/__tests__/paired-lifecycle.test.ts +116 -0
  47. package/src/__tests__/recover.test.ts +307 -0
  48. package/src/__tests__/segments-to-plain-text.test.ts +37 -0
  49. package/src/__tests__/tui-midsession-refresh.test.ts +166 -0
  50. package/src/__tests__/unpair.test.ts +163 -0
  51. package/src/__tests__/wake.test.ts +215 -0
  52. package/src/commands/backup.ts +2 -0
  53. package/src/commands/client.ts +569 -39
  54. package/src/commands/connect/import.ts +217 -0
  55. package/src/commands/connect.ts +31 -0
  56. package/src/commands/devices.ts +247 -0
  57. package/src/commands/env.ts +1 -1
  58. package/src/commands/flags.ts +269 -0
  59. package/src/commands/gateway/token.ts +73 -0
  60. package/src/commands/gateway.ts +29 -0
  61. package/src/commands/logs.ts +6 -18
  62. package/src/commands/pair.ts +222 -0
  63. package/src/commands/ps.ts +57 -41
  64. package/src/commands/recover.ts +47 -9
  65. package/src/commands/restore.ts +8 -1
  66. package/src/commands/retire.ts +23 -70
  67. package/src/commands/rollback.ts +2 -14
  68. package/src/commands/sleep.ts +7 -0
  69. package/src/commands/ssh.ts +5 -24
  70. package/src/commands/teleport.ts +34 -26
  71. package/src/commands/tunnel.ts +46 -2
  72. package/src/commands/unpair.ts +118 -0
  73. package/src/commands/upgrade.ts +8 -16
  74. package/src/commands/wake.ts +75 -45
  75. package/src/components/DefaultMainScreen.tsx +100 -14
  76. package/src/index.ts +22 -0
  77. package/src/lib/__tests__/lifecycle-reporter.test.ts +59 -0
  78. package/src/lib/__tests__/step-runner.test.ts +49 -1
  79. package/src/lib/assistant-client.ts +58 -37
  80. package/src/lib/assistant-config.ts +28 -3
  81. package/src/lib/cloudflare-tunnel.ts +276 -0
  82. package/src/lib/config-utils.ts +24 -3
  83. package/src/lib/confirm-action.ts +57 -0
  84. package/src/lib/docker.ts +82 -8
  85. package/src/lib/environments/__tests__/paths.test.ts +2 -1
  86. package/src/lib/environments/__tests__/seeds.test.ts +2 -1
  87. package/src/lib/environments/paths.ts +1 -1
  88. package/src/lib/environments/resolve.ts +11 -35
  89. package/src/lib/guardian-token.ts +132 -9
  90. package/src/lib/hatch-local.ts +75 -33
  91. package/src/lib/http-client.ts +1 -3
  92. package/src/lib/lifecycle-reporter.ts +31 -0
  93. package/src/lib/local.ts +193 -298
  94. package/src/lib/orphan-detection.ts +9 -5
  95. package/src/lib/pgrep.ts +5 -1
  96. package/src/lib/platform-client.ts +97 -49
  97. package/src/lib/process.ts +109 -39
  98. package/src/lib/retire-local.ts +28 -14
  99. package/src/lib/segments-to-plain-text.ts +35 -0
  100. package/src/lib/step-runner.ts +67 -7
  101. package/src/lib/sync-cloud-assistants.ts +17 -0
  102. /package/{src/lib/environments → node_modules/@vellumai/environments/src}/types.ts +0 -0
package/src/lib/local.ts CHANGED
@@ -17,7 +17,11 @@ import {
17
17
  } from "./assistant-config.js";
18
18
  import { GATEWAY_PORT } from "./constants.js";
19
19
  import { httpHealthCheck, waitForDaemonReady } from "./http-client.js";
20
- import { stopProcessByPidFile } from "./process.js";
20
+ import {
21
+ resolveProcessState,
22
+ stopProcess,
23
+ stopProcessByPidFile,
24
+ } from "./process.js";
21
25
  import { openLogFile, pipeToLogFile } from "./xdg-log.js";
22
26
 
23
27
  const _require = createRequire(import.meta.url);
@@ -226,8 +230,10 @@ function resolveAssistantIndexPath(): string | undefined {
226
230
  }
227
231
 
228
232
  try {
229
- const vellumPkgPath = _require.resolve("vellum/package.json");
230
- const resolved = join(dirname(vellumPkgPath), "src", "index.ts");
233
+ const assistantPkgPath = _require.resolve(
234
+ "@vellumai/assistant/package.json",
235
+ );
236
+ const resolved = join(dirname(assistantPkgPath), "src", "index.ts");
231
237
  if (existsSync(resolved)) {
232
238
  return resolved;
233
239
  }
@@ -319,80 +325,16 @@ type DaemonStartOptions = {
319
325
  signingKey?: string;
320
326
  };
321
327
 
322
- async function startDaemonFromSource(
323
- assistantIndex: string,
324
- resources: LocalInstanceResources,
328
+ /**
329
+ * Apply per-instance resource overrides and shared daemon options to an
330
+ * environment object. Called from all daemon spawn paths (source, watch,
331
+ * bundled binary) to eliminate drift between the three.
332
+ */
333
+ function applyDaemonEnvOverrides(
334
+ env: Record<string, string | undefined>,
335
+ resources: LocalInstanceResources | undefined,
325
336
  options?: DaemonStartOptions,
326
- ): Promise<void> {
327
- const foreground = options?.foreground ?? false;
328
- const daemonMainPath = resolveDaemonMainPath(assistantIndex);
329
-
330
- // Ensure the directory containing PID/socket files exists. For named
331
- // instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
332
- const pidFile = getDaemonPidPath(resources);
333
- mkdirSync(dirname(pidFile), { recursive: true });
334
-
335
- // --- Lifecycle guard: prevent split-brain daemon state ---
336
- if (existsSync(pidFile)) {
337
- try {
338
- const content = readFileSync(pidFile, "utf-8").trim();
339
-
340
- // Another caller is already spawning the daemon — wait for it
341
- // instead of racing to spawn a duplicate.
342
- if (content === "starting") {
343
- console.log(
344
- " Assistant is starting — waiting for it to become ready...",
345
- );
346
- if (await waitForDaemonReady(resources.daemonPort, 60000)) {
347
- console.log(" Assistant is ready\n");
348
- return;
349
- }
350
- // The other spawn may have failed; clean up and proceed to spawn.
351
- try {
352
- unlinkSync(pidFile);
353
- } catch {}
354
- }
355
-
356
- const pid = parseInt(content, 10);
357
- if (!isNaN(pid)) {
358
- try {
359
- process.kill(pid, 0);
360
- console.log(` Assistant already running (pid ${pid})\n`);
361
- return;
362
- } catch {
363
- try {
364
- unlinkSync(pidFile);
365
- } catch {}
366
- }
367
- }
368
- } catch {}
369
- }
370
-
371
- // PID file was stale or missing — check if daemon is responding via HTTP
372
- if (await isDaemonResponsive(resources.daemonPort)) {
373
- // Recover PID tracking so lifecycle commands (sleep, retire,
374
- // stopLocalProcesses) can manage this daemon process.
375
- const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
376
- if (recoveredPid) {
377
- console.log(
378
- ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
379
- );
380
- } else {
381
- console.log(" Assistant is responsive — skipping restart\n");
382
- }
383
- return;
384
- }
385
-
386
- const env: Record<string, string | undefined> = {
387
- ...process.env,
388
- RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
389
- VELLUM_CLOUD: "local",
390
- VELLUM_DEV: "1",
391
- VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
392
- ...(options?.signingKey
393
- ? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
394
- : {}),
395
- };
337
+ ): void {
396
338
  if (resources) {
397
339
  env.VELLUM_WORKSPACE_DIR = join(
398
340
  resources.instanceDir,
@@ -414,25 +356,75 @@ async function startDaemonFromSource(
414
356
  env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
415
357
  delete env.QDRANT_URL;
416
358
  }
359
+ if (options?.signingKey) {
360
+ env.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
361
+ }
417
362
  if (options?.defaultWorkspaceConfigPath) {
418
363
  env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
419
364
  options.defaultWorkspaceConfigPath;
420
365
  }
421
-
422
366
  applyIpcSocketDirOverride(env);
367
+ }
368
+
369
+ function logDaemonReadiness(ready: boolean): void {
370
+ if (ready) {
371
+ console.log(" Assistant ready\n");
372
+ } else {
373
+ console.log(
374
+ " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
375
+ );
376
+ }
377
+ }
378
+
379
+ async function startDaemonFromSource(
380
+ assistantIndex: string,
381
+ resources: LocalInstanceResources,
382
+ options?: DaemonStartOptions,
383
+ ): Promise<void> {
384
+ const foreground = options?.foreground ?? false;
385
+ const daemonMainPath = resolveDaemonMainPath(assistantIndex);
386
+
387
+ // Ensure the directory containing PID/socket files exists. For named
388
+ // instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
389
+ const pidFile = getDaemonPidPath(resources);
390
+ mkdirSync(dirname(pidFile), { recursive: true });
391
+
392
+ // --- Lifecycle guard: prevent split-brain daemon state ---
393
+ if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
394
+
395
+ const daemonState = await resolveProcessState(
396
+ pidFile,
397
+ resources.daemonPort,
398
+ "Assistant",
399
+ );
400
+ if (daemonState.status === "healthy") {
401
+ console.log(` Assistant already running (pid ${daemonState.pid})\n`);
402
+ return;
403
+ }
404
+
405
+ if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
406
+
407
+ const env: Record<string, string | undefined> = {
408
+ ...process.env,
409
+ RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
410
+ VELLUM_CLOUD: "local",
411
+ VELLUM_DEV: "1",
412
+ VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
413
+ };
414
+ applyDaemonEnvOverrides(env, resources, options);
423
415
 
424
416
  // Write a sentinel PID file before spawning so concurrent hatch() calls
425
417
  // detect the in-progress spawn and wait instead of racing.
426
418
  writeFileSync(pidFile, "starting", "utf-8");
427
419
 
428
420
  const child = foreground
429
- ? spawn("bun", ["run", daemonMainPath], {
421
+ ? spawn(process.execPath, ["run", daemonMainPath], {
430
422
  stdio: "inherit",
431
423
  env,
432
424
  })
433
425
  : (() => {
434
426
  const daemonLogFd = openLogFile("hatch.log");
435
- const c = spawn("bun", ["run", daemonMainPath], {
427
+ const c = spawn(process.execPath, ["run", daemonMainPath], {
436
428
  detached: true,
437
429
  stdio: ["ignore", "pipe", "pipe"],
438
430
  env,
@@ -469,101 +461,34 @@ async function startDaemonWatchFromSource(
469
461
  mkdirSync(dirname(pidFile), { recursive: true });
470
462
 
471
463
  // --- Lifecycle guard: prevent split-brain daemon state ---
472
- // If a daemon is already running, skip spawning a new one.
473
- if (existsSync(pidFile)) {
474
- try {
475
- const content = readFileSync(pidFile, "utf-8").trim();
464
+ if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
476
465
 
477
- // Another caller is already spawning the daemon — wait for it
478
- // instead of racing to spawn a duplicate.
479
- if (content === "starting") {
480
- console.log(
481
- " Assistant is starting — waiting for it to become ready...",
482
- );
483
- if (await waitForDaemonReady(resources.daemonPort, 60000)) {
484
- console.log(" Assistant is ready\n");
485
- return;
486
- }
487
- // The other spawn may have failed; clean up and proceed to spawn.
488
- try {
489
- unlinkSync(pidFile);
490
- } catch {}
491
- }
492
-
493
- const pid = parseInt(content, 10);
494
- if (!isNaN(pid)) {
495
- try {
496
- process.kill(pid, 0); // Check if alive
497
- console.log(` Assistant already running (pid ${pid})\n`);
498
- return;
499
- } catch {
500
- // Process doesn't exist, clean up stale PID file
501
- try {
502
- unlinkSync(pidFile);
503
- } catch {}
504
- }
505
- }
506
- } catch {}
507
- }
508
-
509
- // PID file was stale or missing — check if daemon is responding via HTTP
510
- if (await isDaemonResponsive(resources.daemonPort)) {
511
- // Recover PID tracking so lifecycle commands (sleep, retire,
512
- // stopLocalProcesses) can manage this daemon process.
513
- const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
514
- if (recoveredPid) {
515
- console.log(
516
- ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
517
- );
518
- } else {
519
- console.log(" Assistant is responsive — skipping restart\n");
520
- }
466
+ const daemonState = await resolveProcessState(
467
+ pidFile,
468
+ resources.daemonPort,
469
+ "Assistant",
470
+ );
471
+ if (daemonState.status === "healthy") {
472
+ console.log(` Assistant already running (pid ${daemonState.pid})\n`);
521
473
  return;
522
474
  }
523
475
 
476
+ if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
477
+
524
478
  const env: Record<string, string | undefined> = {
525
479
  ...process.env,
526
480
  RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
527
481
  VELLUM_DEV: "1",
528
482
  VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
529
- ...(options?.signingKey
530
- ? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
531
- : {}),
532
483
  };
533
- if (resources) {
534
- env.VELLUM_WORKSPACE_DIR = join(
535
- resources.instanceDir,
536
- ".vellum",
537
- "workspace",
538
- );
539
- env.GATEWAY_SECURITY_DIR = join(
540
- resources.instanceDir,
541
- ".vellum",
542
- "protected",
543
- );
544
- env.CREDENTIAL_SECURITY_DIR = join(
545
- resources.instanceDir,
546
- ".vellum",
547
- "protected",
548
- );
549
- env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
550
- env.GATEWAY_PORT = String(resources.gatewayPort);
551
- env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
552
- delete env.QDRANT_URL;
553
- }
554
- if (options?.defaultWorkspaceConfigPath) {
555
- env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
556
- options.defaultWorkspaceConfigPath;
557
- }
558
-
559
- applyIpcSocketDirOverride(env);
484
+ applyDaemonEnvOverrides(env, resources, options);
560
485
 
561
486
  // Write a sentinel PID file before spawning so concurrent hatch() calls
562
487
  // detect the in-progress spawn and wait instead of racing.
563
488
  writeFileSync(pidFile, "starting", "utf-8");
564
489
 
565
490
  const daemonLogFd = openLogFile("hatch.log");
566
- const child = spawn("bun", ["--watch", "run", mainPath], {
491
+ const child = spawn(process.execPath, ["--watch", "run", mainPath], {
567
492
  detached: true,
568
493
  stdio: ["ignore", "pipe", "pipe"],
569
494
  env,
@@ -591,6 +516,18 @@ function resolveGatewayDir(): string {
591
516
  return sourceDir;
592
517
  }
593
518
 
519
+ // npm-installed: @vellumai/cli and @vellumai/vellum-gateway are siblings
520
+ const npmGatewayDir = join(
521
+ import.meta.dir,
522
+ "..",
523
+ "..",
524
+ "..",
525
+ "vellum-gateway",
526
+ );
527
+ if (isGatewaySourceDir(npmGatewayDir)) {
528
+ return npmGatewayDir;
529
+ }
530
+
594
531
  // Compiled binary: gateway/ bundled adjacent to the CLI executable.
595
532
  const binGateway = join(dirname(process.execPath), "gateway");
596
533
  if (isGatewaySourceDir(binGateway)) {
@@ -660,6 +597,63 @@ function recoverPidFile(
660
597
  return pid;
661
598
  }
662
599
 
600
+ /**
601
+ * Handle the "starting" sentinel in a PID file. When another caller is
602
+ * already spawning the daemon, wait for it to become ready instead of
603
+ * racing to spawn a duplicate.
604
+ *
605
+ * Returns `true` if the daemon became ready (caller should return early),
606
+ * `false` if the spawn failed or the sentinel wasn't present (caller
607
+ * should proceed). Cleans up the PID file on failure.
608
+ */
609
+ async function awaitStartingSentinel(
610
+ pidFile: string,
611
+ daemonPort: number,
612
+ ): Promise<boolean> {
613
+ if (!existsSync(pidFile)) return false;
614
+ try {
615
+ const content = readFileSync(pidFile, "utf-8").trim();
616
+ if (content !== "starting") return false;
617
+ } catch {
618
+ return false;
619
+ }
620
+
621
+ console.log(" Assistant is starting — waiting for it to become ready...");
622
+ if (await waitForDaemonReady(daemonPort, 60000)) {
623
+ console.log(" Assistant is ready\n");
624
+ return true;
625
+ }
626
+ try {
627
+ unlinkSync(pidFile);
628
+ } catch {}
629
+ return false;
630
+ }
631
+
632
+ /**
633
+ * Check if a daemon without a valid PID file is still reachable on its
634
+ * HTTP port (orphaned process). If so, recover its PID file so lifecycle
635
+ * commands can manage it.
636
+ *
637
+ * Returns `true` if an orphaned daemon was found (caller should skip
638
+ * starting a new one), `false` otherwise.
639
+ */
640
+ async function checkOrphanedDaemon(
641
+ pidFile: string,
642
+ daemonPort: number,
643
+ ): Promise<boolean> {
644
+ if (!(await isDaemonResponsive(daemonPort))) return false;
645
+
646
+ const recoveredPid = recoverPidFile(pidFile, daemonPort);
647
+ if (recoveredPid) {
648
+ console.log(
649
+ ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
650
+ );
651
+ } else {
652
+ console.log(" Assistant is responsive — skipping restart\n");
653
+ }
654
+ return true;
655
+ }
656
+
663
657
  export async function discoverPublicUrl(
664
658
  port?: number,
665
659
  ): Promise<string | undefined> {
@@ -900,64 +894,24 @@ export async function startLocalDaemon(
900
894
 
901
895
  const pidFile = getDaemonPidPath(resources);
902
896
 
903
- // If a daemon is already running, skip spawning a new one.
904
- // This prevents cascading kill→restart cycles when multiple callers
905
- // invoke hatch() concurrently (setupDaemonClient + ensureDaemonConnected).
906
- let daemonAlive = false;
907
- if (existsSync(pidFile)) {
908
- try {
909
- const content = readFileSync(pidFile, "utf-8").trim();
910
-
911
- // Another caller is already spawning the daemon — wait for it
912
- // instead of racing to spawn a duplicate.
913
- if (content === "starting") {
914
- console.log(
915
- " Assistant is starting — waiting for it to become ready...",
916
- );
917
- if (await waitForDaemonReady(resources.daemonPort, 60000)) {
918
- console.log(" Assistant is ready\n");
919
- ensureBunInstalled();
920
- return;
921
- }
922
- // The other spawn may have failed; clean up and proceed to spawn.
923
- try {
924
- unlinkSync(pidFile);
925
- } catch {}
926
- }
897
+ // --- Lifecycle guard: prevent split-brain daemon state ---
898
+ if (await awaitStartingSentinel(pidFile, resources.daemonPort)) {
899
+ ensureBunInstalled();
900
+ return;
901
+ }
927
902
 
928
- const pid = parseInt(content, 10);
929
- if (!isNaN(pid)) {
930
- try {
931
- process.kill(pid, 0); // Check if alive
932
- daemonAlive = true;
933
- console.log(` Assistant already running (pid ${pid})\n`);
934
- } catch {
935
- // Process doesn't exist, clean up stale PID file
936
- try {
937
- unlinkSync(pidFile);
938
- } catch {}
939
- }
940
- }
941
- } catch {}
903
+ const daemonState = await resolveProcessState(
904
+ pidFile,
905
+ resources.daemonPort,
906
+ "Assistant",
907
+ );
908
+ const daemonAlive = daemonState.status === "healthy";
909
+ if (daemonAlive) {
910
+ console.log(` Assistant already running (pid ${daemonState.pid})\n`);
942
911
  }
943
912
 
944
913
  if (!daemonAlive) {
945
- // The PID file was stale or missing, but a daemon with a different PID
946
- // may still be listening on the HTTP port (e.g. if the PID file was
947
- // overwritten by a crashed restart attempt). Check before starting a new one.
948
- if (await isDaemonResponsive(resources.daemonPort)) {
949
- // Restore PID tracking so lifecycle commands (sleep, retire,
950
- // stopLocalProcesses) can manage this daemon process.
951
- const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
952
- if (recoveredPid) {
953
- console.log(
954
- ` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
955
- );
956
- } else {
957
- console.log(" Assistant is responsive — skipping restart\n");
958
- }
959
- // Ensure bun is available for runtime features (browser, skills install)
960
- // even when reusing an existing daemon.
914
+ if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) {
961
915
  ensureBunInstalled();
962
916
  return;
963
917
  }
@@ -1013,39 +967,7 @@ export async function startLocalDaemon(
1013
967
  daemonEnv[key] = process.env[key]!;
1014
968
  }
1015
969
  }
1016
- if (options?.defaultWorkspaceConfigPath) {
1017
- daemonEnv.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
1018
- options.defaultWorkspaceConfigPath;
1019
- }
1020
- // When running a named instance, override env so the daemon resolves
1021
- // all paths under the instance directory and listens on its own port.
1022
- if (resources) {
1023
- daemonEnv.VELLUM_WORKSPACE_DIR = join(
1024
- resources.instanceDir,
1025
- ".vellum",
1026
- "workspace",
1027
- );
1028
- daemonEnv.GATEWAY_SECURITY_DIR = join(
1029
- resources.instanceDir,
1030
- ".vellum",
1031
- "protected",
1032
- );
1033
- daemonEnv.CREDENTIAL_SECURITY_DIR = join(
1034
- resources.instanceDir,
1035
- ".vellum",
1036
- "protected",
1037
- );
1038
- daemonEnv.RUNTIME_HTTP_PORT = String(resources.daemonPort);
1039
- daemonEnv.GATEWAY_PORT = String(resources.gatewayPort);
1040
- daemonEnv.QDRANT_HTTP_PORT = String(resources.qdrantPort);
1041
- delete daemonEnv.QDRANT_URL;
1042
- }
1043
-
1044
- if (options?.signingKey) {
1045
- daemonEnv.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
1046
- }
1047
-
1048
- applyIpcSocketDirOverride(daemonEnv);
970
+ applyDaemonEnvOverrides(daemonEnv, resources, options);
1049
971
 
1050
972
  // Write a sentinel PID file before spawning so concurrent hatch() calls
1051
973
  // see the file and fall through to the isDaemonResponsive() port check
@@ -1112,13 +1034,7 @@ export async function startLocalDaemon(
1112
1034
  }
1113
1035
  }
1114
1036
 
1115
- if (daemonReady) {
1116
- console.log(" Assistant ready\n");
1117
- } else {
1118
- console.log(
1119
- " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
1120
- );
1121
- }
1037
+ logDaemonReadiness(daemonReady);
1122
1038
  } else {
1123
1039
  console.log("🔨 Starting local assistant...");
1124
1040
 
@@ -1131,34 +1047,17 @@ export async function startLocalDaemon(
1131
1047
  }
1132
1048
  if (watch) {
1133
1049
  await startDaemonWatchFromSource(assistantIndex, resources, options);
1134
-
1135
- const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
1136
- if (daemonReady) {
1137
- console.log(" Assistant ready\n");
1138
- } else {
1139
- console.log(
1140
- " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
1141
- );
1142
- }
1143
1050
  } else {
1144
1051
  await startDaemonFromSource(assistantIndex, resources, options);
1145
-
1146
- const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
1147
- if (daemonReady) {
1148
- console.log(" Assistant ready\n");
1149
- } else {
1150
- console.log(
1151
- " ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
1152
- );
1153
- }
1154
1052
  }
1053
+ logDaemonReadiness(await waitForDaemonReady(resources.daemonPort, 60000));
1155
1054
  }
1156
1055
  }
1157
1056
 
1158
1057
  export async function startGateway(
1159
1058
  watch: boolean = false,
1160
1059
  resources?: LocalInstanceResources,
1161
- options?: { signingKey?: string },
1060
+ options?: { signingKey?: string; bootstrapSecret?: string },
1162
1061
  ): Promise<string> {
1163
1062
  const effectiveGatewayPort = resources?.gatewayPort ?? GATEWAY_PORT;
1164
1063
 
@@ -1194,6 +1093,9 @@ export async function startGateway(
1194
1093
  ...(options?.signingKey
1195
1094
  ? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
1196
1095
  : {}),
1096
+ ...(options?.bootstrapSecret
1097
+ ? { GUARDIAN_BOOTSTRAP_SECRET: options.bootstrapSecret }
1098
+ : {}),
1197
1099
  ...(watch
1198
1100
  ? {
1199
1101
  VELLUM_DEV: "1",
@@ -1247,7 +1149,7 @@ export async function startGateway(
1247
1149
  ? ["--watch", "run", "src/index.ts", "--vellum-gateway"]
1248
1150
  : ["run", "src/index.ts", "--vellum-gateway"];
1249
1151
  const gwLogFd = openLogFile("hatch.log");
1250
- gateway = spawn("bun", bunArgs, {
1152
+ gateway = spawn(process.execPath, bunArgs, {
1251
1153
  cwd: gatewayDir,
1252
1154
  detached: true,
1253
1155
  stdio: ["ignore", "pipe", "pipe"],
@@ -1273,27 +1175,7 @@ export async function startGateway(
1273
1175
  // Wait for the gateway to be responsive before returning. Without this,
1274
1176
  // callers may try to connect before the HTTP server is listening and get
1275
1177
  // connection-refused errors.
1276
- const start = Date.now();
1277
- const timeoutMs = 30000;
1278
- let ready = false;
1279
- while (Date.now() - start < timeoutMs) {
1280
- try {
1281
- const res = await fetch(
1282
- `http://localhost:${effectiveGatewayPort}/healthz`,
1283
- {
1284
- signal: AbortSignal.timeout(2000),
1285
- },
1286
- );
1287
- if (res.ok) {
1288
- ready = true;
1289
- break;
1290
- }
1291
- } catch {
1292
- // Gateway not ready yet
1293
- }
1294
- await new Promise((r) => setTimeout(r, 250));
1295
- }
1296
-
1178
+ const ready = await waitForDaemonReady(effectiveGatewayPort, 30000);
1297
1179
  if (!ready) {
1298
1180
  console.warn(
1299
1181
  "⚠ Gateway started but health check did not respond within 30s",
@@ -1304,6 +1186,20 @@ export async function startGateway(
1304
1186
  return gatewayUrl;
1305
1187
  }
1306
1188
 
1189
+ /** Check whether a PID belongs to an ngrok process via its command line. */
1190
+ function isNgrokProcess(pid: number): boolean {
1191
+ try {
1192
+ const output = execFileSync("ps", ["-p", String(pid), "-o", "command="], {
1193
+ encoding: "utf-8",
1194
+ timeout: 3000,
1195
+ stdio: ["ignore", "pipe", "ignore"],
1196
+ }).trim();
1197
+ return /ngrok/.test(output);
1198
+ } catch {
1199
+ return false;
1200
+ }
1201
+ }
1202
+
1307
1203
  /**
1308
1204
  * Stop any locally-running daemon and gateway processes
1309
1205
  * and clean up PID files. Called when hatch fails partway through
@@ -1326,15 +1222,14 @@ export async function stopLocalProcesses(
1326
1222
 
1327
1223
  // Kill ngrok directly by PID rather than using stopProcessByPidFile, because
1328
1224
  // isVellumProcess() won't match the ngrok binary — resulting in a no-op that
1329
- // leaves ngrok running.
1225
+ // leaves ngrok running. Verify the PID still belongs to ngrok before killing
1226
+ // to avoid hitting an unrelated process if the OS has reused the PID.
1330
1227
  const ngrokPidFile = join(vellumDir, "ngrok.pid");
1331
1228
  if (existsSync(ngrokPidFile)) {
1332
1229
  try {
1333
1230
  const pid = parseInt(readFileSync(ngrokPidFile, "utf-8").trim(), 10);
1334
- if (!isNaN(pid)) {
1335
- try {
1336
- process.kill(pid, "SIGTERM");
1337
- } catch {}
1231
+ if (!isNaN(pid) && isNgrokProcess(pid)) {
1232
+ await stopProcess(pid, "ngrok");
1338
1233
  }
1339
1234
  unlinkSync(ngrokPidFile);
1340
1235
  } catch {}
@@ -58,7 +58,7 @@ export function readPidFile(pidFile: string): string | null {
58
58
  return pid || null;
59
59
  }
60
60
 
61
- export function isProcessAlive(pid: string): boolean {
61
+ export function isPidAlive(pid: string): boolean {
62
62
  try {
63
63
  process.kill(parseInt(pid, 10), 0);
64
64
  return true;
@@ -138,10 +138,14 @@ export async function detectOrphanedProcesses(
138
138
  // Process table scan — discover orphaned processes by scanning the OS
139
139
  // process table rather than reading PID files from the workspace.
140
140
  try {
141
- const output = await execOutput("sh", [
142
- "-c",
143
- "ps ax -o pid=,ppid=,args= | grep -E 'vellum|qdrant|openclaw' | grep -v grep",
144
- ]);
141
+ const output = await execOutput(
142
+ "sh",
143
+ [
144
+ "-c",
145
+ "ps ax -o pid=,ppid=,args= | grep -E 'vellum|qdrant|openclaw' | grep -v grep",
146
+ ],
147
+ { timeoutMs: 5_000 },
148
+ );
145
149
  const procs = parseRemotePs(output);
146
150
  const ownPid = String(process.pid);
147
151
 
package/src/lib/pgrep.ts CHANGED
@@ -1,8 +1,12 @@
1
1
  import { execOutput } from "./step-runner";
2
2
 
3
+ const PGREP_TIMEOUT_MS = 5_000;
4
+
3
5
  export async function pgrepExact(name: string): Promise<string[]> {
4
6
  try {
5
- const output = await execOutput("pgrep", ["-x", name]);
7
+ const output = await execOutput("pgrep", ["-x", name], {
8
+ timeoutMs: PGREP_TIMEOUT_MS,
9
+ });
6
10
  return output.trim().split("\n").filter(Boolean);
7
11
  } catch {
8
12
  return [];