@vellumai/cli 0.8.4 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +17 -1
- package/knip.json +2 -1
- package/package.json +1 -1
- package/src/__tests__/api-key-check.test.ts +78 -0
- package/src/__tests__/backup.test.ts +38 -0
- package/src/__tests__/recover.test.ts +307 -0
- package/src/__tests__/retire.test.ts +241 -0
- package/src/__tests__/wake.test.ts +215 -0
- package/src/commands/backup.ts +2 -0
- package/src/commands/client.ts +62 -32
- package/src/commands/flags.ts +197 -0
- package/src/commands/gateway/token.ts +73 -0
- package/src/commands/gateway.ts +29 -0
- package/src/commands/logs.ts +6 -18
- package/src/commands/ps.ts +41 -41
- package/src/commands/recover.ts +47 -9
- package/src/commands/restore.ts +8 -1
- package/src/commands/retire.ts +145 -55
- package/src/commands/roadmap.ts +449 -0
- package/src/commands/rollback.ts +2 -14
- package/src/commands/ssh.ts +5 -24
- package/src/commands/teleport.ts +34 -26
- package/src/commands/upgrade.ts +8 -16
- package/src/commands/wake.ts +68 -45
- package/src/index.ts +9 -0
- package/src/lib/__tests__/port-allocator.test.ts +117 -0
- package/src/lib/__tests__/step-runner.test.ts +133 -0
- package/src/lib/api-key-check.ts +40 -0
- package/src/lib/assistant-config.ts +13 -0
- package/src/lib/config-utils.ts +24 -3
- package/src/lib/docker.ts +72 -8
- package/src/lib/hatch-local.ts +15 -2
- package/src/lib/http-client.ts +1 -3
- package/src/lib/local.ts +173 -292
- package/src/lib/orphan-detection.ts +9 -5
- package/src/lib/pgrep.ts +5 -1
- package/src/lib/platform-client.ts +97 -49
- package/src/lib/port-allocator.ts +93 -0
- package/src/lib/process.ts +109 -39
- package/src/lib/statefulset.ts +0 -10
- package/src/lib/step-runner.ts +102 -9
- package/src/lib/sync-cloud-assistants.ts +17 -0
- package/src/shared/provider-env-vars.ts +1 -0
package/src/lib/local.ts
CHANGED
|
@@ -17,7 +17,11 @@ import {
|
|
|
17
17
|
} from "./assistant-config.js";
|
|
18
18
|
import { GATEWAY_PORT } from "./constants.js";
|
|
19
19
|
import { httpHealthCheck, waitForDaemonReady } from "./http-client.js";
|
|
20
|
-
import {
|
|
20
|
+
import {
|
|
21
|
+
resolveProcessState,
|
|
22
|
+
stopProcess,
|
|
23
|
+
stopProcessByPidFile,
|
|
24
|
+
} from "./process.js";
|
|
21
25
|
import { openLogFile, pipeToLogFile } from "./xdg-log.js";
|
|
22
26
|
|
|
23
27
|
const _require = createRequire(import.meta.url);
|
|
@@ -319,80 +323,16 @@ type DaemonStartOptions = {
|
|
|
319
323
|
signingKey?: string;
|
|
320
324
|
};
|
|
321
325
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
326
|
+
/**
|
|
327
|
+
* Apply per-instance resource overrides and shared daemon options to an
|
|
328
|
+
* environment object. Called from all daemon spawn paths (source, watch,
|
|
329
|
+
* bundled binary) to eliminate drift between the three.
|
|
330
|
+
*/
|
|
331
|
+
function applyDaemonEnvOverrides(
|
|
332
|
+
env: Record<string, string | undefined>,
|
|
333
|
+
resources: LocalInstanceResources | undefined,
|
|
325
334
|
options?: DaemonStartOptions,
|
|
326
|
-
):
|
|
327
|
-
const foreground = options?.foreground ?? false;
|
|
328
|
-
const daemonMainPath = resolveDaemonMainPath(assistantIndex);
|
|
329
|
-
|
|
330
|
-
// Ensure the directory containing PID/socket files exists. For named
|
|
331
|
-
// instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
|
|
332
|
-
const pidFile = getDaemonPidPath(resources);
|
|
333
|
-
mkdirSync(dirname(pidFile), { recursive: true });
|
|
334
|
-
|
|
335
|
-
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
336
|
-
if (existsSync(pidFile)) {
|
|
337
|
-
try {
|
|
338
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
339
|
-
|
|
340
|
-
// Another caller is already spawning the daemon — wait for it
|
|
341
|
-
// instead of racing to spawn a duplicate.
|
|
342
|
-
if (content === "starting") {
|
|
343
|
-
console.log(
|
|
344
|
-
" Assistant is starting — waiting for it to become ready...",
|
|
345
|
-
);
|
|
346
|
-
if (await waitForDaemonReady(resources.daemonPort, 60000)) {
|
|
347
|
-
console.log(" Assistant is ready\n");
|
|
348
|
-
return;
|
|
349
|
-
}
|
|
350
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
351
|
-
try {
|
|
352
|
-
unlinkSync(pidFile);
|
|
353
|
-
} catch {}
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
const pid = parseInt(content, 10);
|
|
357
|
-
if (!isNaN(pid)) {
|
|
358
|
-
try {
|
|
359
|
-
process.kill(pid, 0);
|
|
360
|
-
console.log(` Assistant already running (pid ${pid})\n`);
|
|
361
|
-
return;
|
|
362
|
-
} catch {
|
|
363
|
-
try {
|
|
364
|
-
unlinkSync(pidFile);
|
|
365
|
-
} catch {}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
} catch {}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
// PID file was stale or missing — check if daemon is responding via HTTP
|
|
372
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
373
|
-
// Recover PID tracking so lifecycle commands (sleep, retire,
|
|
374
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
375
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
376
|
-
if (recoveredPid) {
|
|
377
|
-
console.log(
|
|
378
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
379
|
-
);
|
|
380
|
-
} else {
|
|
381
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
382
|
-
}
|
|
383
|
-
return;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
const env: Record<string, string | undefined> = {
|
|
387
|
-
...process.env,
|
|
388
|
-
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
389
|
-
VELLUM_CLOUD: "local",
|
|
390
|
-
VELLUM_DEV: "1",
|
|
391
|
-
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
392
|
-
...(options?.signingKey
|
|
393
|
-
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
394
|
-
: {}),
|
|
395
|
-
};
|
|
335
|
+
): void {
|
|
396
336
|
if (resources) {
|
|
397
337
|
env.VELLUM_WORKSPACE_DIR = join(
|
|
398
338
|
resources.instanceDir,
|
|
@@ -414,12 +354,62 @@ async function startDaemonFromSource(
|
|
|
414
354
|
env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
415
355
|
delete env.QDRANT_URL;
|
|
416
356
|
}
|
|
357
|
+
if (options?.signingKey) {
|
|
358
|
+
env.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
|
|
359
|
+
}
|
|
417
360
|
if (options?.defaultWorkspaceConfigPath) {
|
|
418
361
|
env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
419
362
|
options.defaultWorkspaceConfigPath;
|
|
420
363
|
}
|
|
421
|
-
|
|
422
364
|
applyIpcSocketDirOverride(env);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function logDaemonReadiness(ready: boolean): void {
|
|
368
|
+
if (ready) {
|
|
369
|
+
console.log(" Assistant ready\n");
|
|
370
|
+
} else {
|
|
371
|
+
console.log(
|
|
372
|
+
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
373
|
+
);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async function startDaemonFromSource(
|
|
378
|
+
assistantIndex: string,
|
|
379
|
+
resources: LocalInstanceResources,
|
|
380
|
+
options?: DaemonStartOptions,
|
|
381
|
+
): Promise<void> {
|
|
382
|
+
const foreground = options?.foreground ?? false;
|
|
383
|
+
const daemonMainPath = resolveDaemonMainPath(assistantIndex);
|
|
384
|
+
|
|
385
|
+
// Ensure the directory containing PID/socket files exists. For named
|
|
386
|
+
// instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
|
|
387
|
+
const pidFile = getDaemonPidPath(resources);
|
|
388
|
+
mkdirSync(dirname(pidFile), { recursive: true });
|
|
389
|
+
|
|
390
|
+
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
391
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
|
|
392
|
+
|
|
393
|
+
const daemonState = await resolveProcessState(
|
|
394
|
+
pidFile,
|
|
395
|
+
resources.daemonPort,
|
|
396
|
+
"Assistant",
|
|
397
|
+
);
|
|
398
|
+
if (daemonState.status === "healthy") {
|
|
399
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
|
|
404
|
+
|
|
405
|
+
const env: Record<string, string | undefined> = {
|
|
406
|
+
...process.env,
|
|
407
|
+
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
408
|
+
VELLUM_CLOUD: "local",
|
|
409
|
+
VELLUM_DEV: "1",
|
|
410
|
+
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
411
|
+
};
|
|
412
|
+
applyDaemonEnvOverrides(env, resources, options);
|
|
423
413
|
|
|
424
414
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
425
415
|
// detect the in-progress spawn and wait instead of racing.
|
|
@@ -469,94 +459,27 @@ async function startDaemonWatchFromSource(
|
|
|
469
459
|
mkdirSync(dirname(pidFile), { recursive: true });
|
|
470
460
|
|
|
471
461
|
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
472
|
-
|
|
473
|
-
if (existsSync(pidFile)) {
|
|
474
|
-
try {
|
|
475
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
476
|
-
|
|
477
|
-
// Another caller is already spawning the daemon — wait for it
|
|
478
|
-
// instead of racing to spawn a duplicate.
|
|
479
|
-
if (content === "starting") {
|
|
480
|
-
console.log(
|
|
481
|
-
" Assistant is starting — waiting for it to become ready...",
|
|
482
|
-
);
|
|
483
|
-
if (await waitForDaemonReady(resources.daemonPort, 60000)) {
|
|
484
|
-
console.log(" Assistant is ready\n");
|
|
485
|
-
return;
|
|
486
|
-
}
|
|
487
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
488
|
-
try {
|
|
489
|
-
unlinkSync(pidFile);
|
|
490
|
-
} catch {}
|
|
491
|
-
}
|
|
462
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
|
|
492
463
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
// Process doesn't exist, clean up stale PID file
|
|
501
|
-
try {
|
|
502
|
-
unlinkSync(pidFile);
|
|
503
|
-
} catch {}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
} catch {}
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
// PID file was stale or missing — check if daemon is responding via HTTP
|
|
510
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
511
|
-
// Recover PID tracking so lifecycle commands (sleep, retire,
|
|
512
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
513
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
514
|
-
if (recoveredPid) {
|
|
515
|
-
console.log(
|
|
516
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
517
|
-
);
|
|
518
|
-
} else {
|
|
519
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
520
|
-
}
|
|
464
|
+
const daemonState = await resolveProcessState(
|
|
465
|
+
pidFile,
|
|
466
|
+
resources.daemonPort,
|
|
467
|
+
"Assistant",
|
|
468
|
+
);
|
|
469
|
+
if (daemonState.status === "healthy") {
|
|
470
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
521
471
|
return;
|
|
522
472
|
}
|
|
523
473
|
|
|
474
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
|
|
475
|
+
|
|
524
476
|
const env: Record<string, string | undefined> = {
|
|
525
477
|
...process.env,
|
|
526
478
|
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
527
479
|
VELLUM_DEV: "1",
|
|
528
480
|
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
529
|
-
...(options?.signingKey
|
|
530
|
-
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
531
|
-
: {}),
|
|
532
481
|
};
|
|
533
|
-
|
|
534
|
-
env.VELLUM_WORKSPACE_DIR = join(
|
|
535
|
-
resources.instanceDir,
|
|
536
|
-
".vellum",
|
|
537
|
-
"workspace",
|
|
538
|
-
);
|
|
539
|
-
env.GATEWAY_SECURITY_DIR = join(
|
|
540
|
-
resources.instanceDir,
|
|
541
|
-
".vellum",
|
|
542
|
-
"protected",
|
|
543
|
-
);
|
|
544
|
-
env.CREDENTIAL_SECURITY_DIR = join(
|
|
545
|
-
resources.instanceDir,
|
|
546
|
-
".vellum",
|
|
547
|
-
"protected",
|
|
548
|
-
);
|
|
549
|
-
env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
|
|
550
|
-
env.GATEWAY_PORT = String(resources.gatewayPort);
|
|
551
|
-
env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
552
|
-
delete env.QDRANT_URL;
|
|
553
|
-
}
|
|
554
|
-
if (options?.defaultWorkspaceConfigPath) {
|
|
555
|
-
env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
556
|
-
options.defaultWorkspaceConfigPath;
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
applyIpcSocketDirOverride(env);
|
|
482
|
+
applyDaemonEnvOverrides(env, resources, options);
|
|
560
483
|
|
|
561
484
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
562
485
|
// detect the in-progress spawn and wait instead of racing.
|
|
@@ -660,6 +583,63 @@ function recoverPidFile(
|
|
|
660
583
|
return pid;
|
|
661
584
|
}
|
|
662
585
|
|
|
586
|
+
/**
|
|
587
|
+
* Handle the "starting" sentinel in a PID file. When another caller is
|
|
588
|
+
* already spawning the daemon, wait for it to become ready instead of
|
|
589
|
+
* racing to spawn a duplicate.
|
|
590
|
+
*
|
|
591
|
+
* Returns `true` if the daemon became ready (caller should return early),
|
|
592
|
+
* `false` if the spawn failed or the sentinel wasn't present (caller
|
|
593
|
+
* should proceed). Cleans up the PID file on failure.
|
|
594
|
+
*/
|
|
595
|
+
async function awaitStartingSentinel(
|
|
596
|
+
pidFile: string,
|
|
597
|
+
daemonPort: number,
|
|
598
|
+
): Promise<boolean> {
|
|
599
|
+
if (!existsSync(pidFile)) return false;
|
|
600
|
+
try {
|
|
601
|
+
const content = readFileSync(pidFile, "utf-8").trim();
|
|
602
|
+
if (content !== "starting") return false;
|
|
603
|
+
} catch {
|
|
604
|
+
return false;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
console.log(" Assistant is starting — waiting for it to become ready...");
|
|
608
|
+
if (await waitForDaemonReady(daemonPort, 60000)) {
|
|
609
|
+
console.log(" Assistant is ready\n");
|
|
610
|
+
return true;
|
|
611
|
+
}
|
|
612
|
+
try {
|
|
613
|
+
unlinkSync(pidFile);
|
|
614
|
+
} catch {}
|
|
615
|
+
return false;
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
/**
|
|
619
|
+
* Check if a daemon without a valid PID file is still reachable on its
|
|
620
|
+
* HTTP port (orphaned process). If so, recover its PID file so lifecycle
|
|
621
|
+
* commands can manage it.
|
|
622
|
+
*
|
|
623
|
+
* Returns `true` if an orphaned daemon was found (caller should skip
|
|
624
|
+
* starting a new one), `false` otherwise.
|
|
625
|
+
*/
|
|
626
|
+
async function checkOrphanedDaemon(
|
|
627
|
+
pidFile: string,
|
|
628
|
+
daemonPort: number,
|
|
629
|
+
): Promise<boolean> {
|
|
630
|
+
if (!(await isDaemonResponsive(daemonPort))) return false;
|
|
631
|
+
|
|
632
|
+
const recoveredPid = recoverPidFile(pidFile, daemonPort);
|
|
633
|
+
if (recoveredPid) {
|
|
634
|
+
console.log(
|
|
635
|
+
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
636
|
+
);
|
|
637
|
+
} else {
|
|
638
|
+
console.log(" Assistant is responsive — skipping restart\n");
|
|
639
|
+
}
|
|
640
|
+
return true;
|
|
641
|
+
}
|
|
642
|
+
|
|
663
643
|
export async function discoverPublicUrl(
|
|
664
644
|
port?: number,
|
|
665
645
|
): Promise<string | undefined> {
|
|
@@ -900,64 +880,24 @@ export async function startLocalDaemon(
|
|
|
900
880
|
|
|
901
881
|
const pidFile = getDaemonPidPath(resources);
|
|
902
882
|
|
|
903
|
-
//
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
try {
|
|
909
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
910
|
-
|
|
911
|
-
// Another caller is already spawning the daemon — wait for it
|
|
912
|
-
// instead of racing to spawn a duplicate.
|
|
913
|
-
if (content === "starting") {
|
|
914
|
-
console.log(
|
|
915
|
-
" Assistant is starting — waiting for it to become ready...",
|
|
916
|
-
);
|
|
917
|
-
if (await waitForDaemonReady(resources.daemonPort, 60000)) {
|
|
918
|
-
console.log(" Assistant is ready\n");
|
|
919
|
-
ensureBunInstalled();
|
|
920
|
-
return;
|
|
921
|
-
}
|
|
922
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
923
|
-
try {
|
|
924
|
-
unlinkSync(pidFile);
|
|
925
|
-
} catch {}
|
|
926
|
-
}
|
|
883
|
+
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
884
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) {
|
|
885
|
+
ensureBunInstalled();
|
|
886
|
+
return;
|
|
887
|
+
}
|
|
927
888
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
try {
|
|
937
|
-
unlinkSync(pidFile);
|
|
938
|
-
} catch {}
|
|
939
|
-
}
|
|
940
|
-
}
|
|
941
|
-
} catch {}
|
|
889
|
+
const daemonState = await resolveProcessState(
|
|
890
|
+
pidFile,
|
|
891
|
+
resources.daemonPort,
|
|
892
|
+
"Assistant",
|
|
893
|
+
);
|
|
894
|
+
const daemonAlive = daemonState.status === "healthy";
|
|
895
|
+
if (daemonAlive) {
|
|
896
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
942
897
|
}
|
|
943
898
|
|
|
944
899
|
if (!daemonAlive) {
|
|
945
|
-
|
|
946
|
-
// may still be listening on the HTTP port (e.g. if the PID file was
|
|
947
|
-
// overwritten by a crashed restart attempt). Check before starting a new one.
|
|
948
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
949
|
-
// Restore PID tracking so lifecycle commands (sleep, retire,
|
|
950
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
951
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
952
|
-
if (recoveredPid) {
|
|
953
|
-
console.log(
|
|
954
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
955
|
-
);
|
|
956
|
-
} else {
|
|
957
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
958
|
-
}
|
|
959
|
-
// Ensure bun is available for runtime features (browser, skills install)
|
|
960
|
-
// even when reusing an existing daemon.
|
|
900
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) {
|
|
961
901
|
ensureBunInstalled();
|
|
962
902
|
return;
|
|
963
903
|
}
|
|
@@ -1013,39 +953,7 @@ export async function startLocalDaemon(
|
|
|
1013
953
|
daemonEnv[key] = process.env[key]!;
|
|
1014
954
|
}
|
|
1015
955
|
}
|
|
1016
|
-
|
|
1017
|
-
daemonEnv.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
1018
|
-
options.defaultWorkspaceConfigPath;
|
|
1019
|
-
}
|
|
1020
|
-
// When running a named instance, override env so the daemon resolves
|
|
1021
|
-
// all paths under the instance directory and listens on its own port.
|
|
1022
|
-
if (resources) {
|
|
1023
|
-
daemonEnv.VELLUM_WORKSPACE_DIR = join(
|
|
1024
|
-
resources.instanceDir,
|
|
1025
|
-
".vellum",
|
|
1026
|
-
"workspace",
|
|
1027
|
-
);
|
|
1028
|
-
daemonEnv.GATEWAY_SECURITY_DIR = join(
|
|
1029
|
-
resources.instanceDir,
|
|
1030
|
-
".vellum",
|
|
1031
|
-
"protected",
|
|
1032
|
-
);
|
|
1033
|
-
daemonEnv.CREDENTIAL_SECURITY_DIR = join(
|
|
1034
|
-
resources.instanceDir,
|
|
1035
|
-
".vellum",
|
|
1036
|
-
"protected",
|
|
1037
|
-
);
|
|
1038
|
-
daemonEnv.RUNTIME_HTTP_PORT = String(resources.daemonPort);
|
|
1039
|
-
daemonEnv.GATEWAY_PORT = String(resources.gatewayPort);
|
|
1040
|
-
daemonEnv.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
1041
|
-
delete daemonEnv.QDRANT_URL;
|
|
1042
|
-
}
|
|
1043
|
-
|
|
1044
|
-
if (options?.signingKey) {
|
|
1045
|
-
daemonEnv.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
applyIpcSocketDirOverride(daemonEnv);
|
|
956
|
+
applyDaemonEnvOverrides(daemonEnv, resources, options);
|
|
1049
957
|
|
|
1050
958
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
1051
959
|
// see the file and fall through to the isDaemonResponsive() port check
|
|
@@ -1112,13 +1020,7 @@ export async function startLocalDaemon(
|
|
|
1112
1020
|
}
|
|
1113
1021
|
}
|
|
1114
1022
|
|
|
1115
|
-
|
|
1116
|
-
console.log(" Assistant ready\n");
|
|
1117
|
-
} else {
|
|
1118
|
-
console.log(
|
|
1119
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1120
|
-
);
|
|
1121
|
-
}
|
|
1023
|
+
logDaemonReadiness(daemonReady);
|
|
1122
1024
|
} else {
|
|
1123
1025
|
console.log("🔨 Starting local assistant...");
|
|
1124
1026
|
|
|
@@ -1131,34 +1033,17 @@ export async function startLocalDaemon(
|
|
|
1131
1033
|
}
|
|
1132
1034
|
if (watch) {
|
|
1133
1035
|
await startDaemonWatchFromSource(assistantIndex, resources, options);
|
|
1134
|
-
|
|
1135
|
-
const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
|
|
1136
|
-
if (daemonReady) {
|
|
1137
|
-
console.log(" Assistant ready\n");
|
|
1138
|
-
} else {
|
|
1139
|
-
console.log(
|
|
1140
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1141
|
-
);
|
|
1142
|
-
}
|
|
1143
1036
|
} else {
|
|
1144
1037
|
await startDaemonFromSource(assistantIndex, resources, options);
|
|
1145
|
-
|
|
1146
|
-
const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
|
|
1147
|
-
if (daemonReady) {
|
|
1148
|
-
console.log(" Assistant ready\n");
|
|
1149
|
-
} else {
|
|
1150
|
-
console.log(
|
|
1151
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1152
|
-
);
|
|
1153
|
-
}
|
|
1154
1038
|
}
|
|
1039
|
+
logDaemonReadiness(await waitForDaemonReady(resources.daemonPort, 60000));
|
|
1155
1040
|
}
|
|
1156
1041
|
}
|
|
1157
1042
|
|
|
1158
1043
|
export async function startGateway(
|
|
1159
1044
|
watch: boolean = false,
|
|
1160
1045
|
resources?: LocalInstanceResources,
|
|
1161
|
-
options?: { signingKey?: string },
|
|
1046
|
+
options?: { signingKey?: string; bootstrapSecret?: string },
|
|
1162
1047
|
): Promise<string> {
|
|
1163
1048
|
const effectiveGatewayPort = resources?.gatewayPort ?? GATEWAY_PORT;
|
|
1164
1049
|
|
|
@@ -1194,6 +1079,9 @@ export async function startGateway(
|
|
|
1194
1079
|
...(options?.signingKey
|
|
1195
1080
|
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
1196
1081
|
: {}),
|
|
1082
|
+
...(options?.bootstrapSecret
|
|
1083
|
+
? { GUARDIAN_BOOTSTRAP_SECRET: options.bootstrapSecret }
|
|
1084
|
+
: {}),
|
|
1197
1085
|
...(watch
|
|
1198
1086
|
? {
|
|
1199
1087
|
VELLUM_DEV: "1",
|
|
@@ -1273,27 +1161,7 @@ export async function startGateway(
|
|
|
1273
1161
|
// Wait for the gateway to be responsive before returning. Without this,
|
|
1274
1162
|
// callers may try to connect before the HTTP server is listening and get
|
|
1275
1163
|
// connection-refused errors.
|
|
1276
|
-
const
|
|
1277
|
-
const timeoutMs = 30000;
|
|
1278
|
-
let ready = false;
|
|
1279
|
-
while (Date.now() - start < timeoutMs) {
|
|
1280
|
-
try {
|
|
1281
|
-
const res = await fetch(
|
|
1282
|
-
`http://localhost:${effectiveGatewayPort}/healthz`,
|
|
1283
|
-
{
|
|
1284
|
-
signal: AbortSignal.timeout(2000),
|
|
1285
|
-
},
|
|
1286
|
-
);
|
|
1287
|
-
if (res.ok) {
|
|
1288
|
-
ready = true;
|
|
1289
|
-
break;
|
|
1290
|
-
}
|
|
1291
|
-
} catch {
|
|
1292
|
-
// Gateway not ready yet
|
|
1293
|
-
}
|
|
1294
|
-
await new Promise((r) => setTimeout(r, 250));
|
|
1295
|
-
}
|
|
1296
|
-
|
|
1164
|
+
const ready = await waitForDaemonReady(effectiveGatewayPort, 30000);
|
|
1297
1165
|
if (!ready) {
|
|
1298
1166
|
console.warn(
|
|
1299
1167
|
"⚠ Gateway started but health check did not respond within 30s",
|
|
@@ -1304,6 +1172,20 @@ export async function startGateway(
|
|
|
1304
1172
|
return gatewayUrl;
|
|
1305
1173
|
}
|
|
1306
1174
|
|
|
1175
|
+
/** Check whether a PID belongs to an ngrok process via its command line. */
|
|
1176
|
+
function isNgrokProcess(pid: number): boolean {
|
|
1177
|
+
try {
|
|
1178
|
+
const output = execFileSync("ps", ["-p", String(pid), "-o", "command="], {
|
|
1179
|
+
encoding: "utf-8",
|
|
1180
|
+
timeout: 3000,
|
|
1181
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
1182
|
+
}).trim();
|
|
1183
|
+
return /ngrok/.test(output);
|
|
1184
|
+
} catch {
|
|
1185
|
+
return false;
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1307
1189
|
/**
|
|
1308
1190
|
* Stop any locally-running daemon and gateway processes
|
|
1309
1191
|
* and clean up PID files. Called when hatch fails partway through
|
|
@@ -1326,15 +1208,14 @@ export async function stopLocalProcesses(
|
|
|
1326
1208
|
|
|
1327
1209
|
// Kill ngrok directly by PID rather than using stopProcessByPidFile, because
|
|
1328
1210
|
// isVellumProcess() won't match the ngrok binary — resulting in a no-op that
|
|
1329
|
-
// leaves ngrok running.
|
|
1211
|
+
// leaves ngrok running. Verify the PID still belongs to ngrok before killing
|
|
1212
|
+
// to avoid hitting an unrelated process if the OS has reused the PID.
|
|
1330
1213
|
const ngrokPidFile = join(vellumDir, "ngrok.pid");
|
|
1331
1214
|
if (existsSync(ngrokPidFile)) {
|
|
1332
1215
|
try {
|
|
1333
1216
|
const pid = parseInt(readFileSync(ngrokPidFile, "utf-8").trim(), 10);
|
|
1334
|
-
if (!isNaN(pid)) {
|
|
1335
|
-
|
|
1336
|
-
process.kill(pid, "SIGTERM");
|
|
1337
|
-
} catch {}
|
|
1217
|
+
if (!isNaN(pid) && isNgrokProcess(pid)) {
|
|
1218
|
+
await stopProcess(pid, "ngrok");
|
|
1338
1219
|
}
|
|
1339
1220
|
unlinkSync(ngrokPidFile);
|
|
1340
1221
|
} catch {}
|
|
@@ -58,7 +58,7 @@ export function readPidFile(pidFile: string): string | null {
|
|
|
58
58
|
return pid || null;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
export function
|
|
61
|
+
export function isPidAlive(pid: string): boolean {
|
|
62
62
|
try {
|
|
63
63
|
process.kill(parseInt(pid, 10), 0);
|
|
64
64
|
return true;
|
|
@@ -138,10 +138,14 @@ export async function detectOrphanedProcesses(
|
|
|
138
138
|
// Process table scan — discover orphaned processes by scanning the OS
|
|
139
139
|
// process table rather than reading PID files from the workspace.
|
|
140
140
|
try {
|
|
141
|
-
const output = await execOutput(
|
|
142
|
-
"
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
const output = await execOutput(
|
|
142
|
+
"sh",
|
|
143
|
+
[
|
|
144
|
+
"-c",
|
|
145
|
+
"ps ax -o pid=,ppid=,args= | grep -E 'vellum|qdrant|openclaw' | grep -v grep",
|
|
146
|
+
],
|
|
147
|
+
{ timeoutMs: 5_000 },
|
|
148
|
+
);
|
|
145
149
|
const procs = parseRemotePs(output);
|
|
146
150
|
const ownPid = String(process.pid);
|
|
147
151
|
|
package/src/lib/pgrep.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { execOutput } from "./step-runner";
|
|
2
2
|
|
|
3
|
+
const PGREP_TIMEOUT_MS = 5_000;
|
|
4
|
+
|
|
3
5
|
export async function pgrepExact(name: string): Promise<string[]> {
|
|
4
6
|
try {
|
|
5
|
-
const output = await execOutput("pgrep", ["-x", name]
|
|
7
|
+
const output = await execOutput("pgrep", ["-x", name], {
|
|
8
|
+
timeoutMs: PGREP_TIMEOUT_MS,
|
|
9
|
+
});
|
|
6
10
|
return output.trim().split("\n").filter(Boolean);
|
|
7
11
|
} catch {
|
|
8
12
|
return [];
|