@vellumai/cli 0.8.5 → 0.8.7-dev.202606052118.34cd356
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +6 -0
- package/bun.lock +8 -0
- package/knip.json +6 -1
- package/node_modules/@vellumai/environments/bun.lock +24 -0
- package/node_modules/@vellumai/environments/package.json +18 -0
- package/node_modules/@vellumai/environments/src/__tests__/package-boundary.test.ts +95 -0
- package/node_modules/@vellumai/environments/src/index.ts +11 -0
- package/{src/lib/environments → node_modules/@vellumai/environments/src}/seeds.ts +5 -9
- package/node_modules/@vellumai/environments/tsconfig.json +20 -0
- package/node_modules/@vellumai/local-mode/bun.lock +29 -0
- package/node_modules/@vellumai/local-mode/package.json +22 -0
- package/node_modules/@vellumai/local-mode/src/__tests__/environment.test.ts +116 -0
- package/node_modules/@vellumai/local-mode/src/__tests__/gateway-proxy.test.ts +79 -0
- package/node_modules/@vellumai/local-mode/src/__tests__/hatch.test.ts +108 -0
- package/node_modules/@vellumai/local-mode/src/__tests__/package-boundary.test.ts +104 -0
- package/node_modules/@vellumai/local-mode/src/__tests__/wake.test.ts +66 -0
- package/node_modules/@vellumai/local-mode/src/config.ts +66 -0
- package/node_modules/@vellumai/local-mode/src/environment.ts +62 -0
- package/node_modules/@vellumai/local-mode/src/gateway-proxy.ts +109 -0
- package/node_modules/@vellumai/local-mode/src/guardian-token.ts +122 -0
- package/node_modules/@vellumai/local-mode/src/hatch.ts +92 -0
- package/node_modules/@vellumai/local-mode/src/index.ts +48 -0
- package/node_modules/@vellumai/local-mode/src/lockfile-contract.test.ts +173 -0
- package/node_modules/@vellumai/local-mode/src/lockfile-contract.ts +114 -0
- package/node_modules/@vellumai/local-mode/src/lockfile.test.ts +235 -0
- package/node_modules/@vellumai/local-mode/src/lockfile.ts +133 -0
- package/node_modules/@vellumai/local-mode/src/retire.ts +58 -0
- package/node_modules/@vellumai/local-mode/src/util.ts +102 -0
- package/node_modules/@vellumai/local-mode/src/wake.ts +78 -0
- package/node_modules/@vellumai/local-mode/tsconfig.json +16 -0
- package/package.json +12 -1
- package/src/__tests__/assistant-client-refresh.test.ts +182 -0
- package/src/__tests__/backup.test.ts +38 -0
- package/src/__tests__/clean.test.ts +179 -0
- package/src/__tests__/client-token.test.ts +87 -0
- package/src/__tests__/client-tui-refresh.test.ts +170 -0
- package/src/__tests__/cloudflare-tunnel.test.ts +137 -0
- package/src/__tests__/connect-import.test.ts +317 -0
- package/src/__tests__/devices.test.ts +272 -0
- package/src/__tests__/env-drift.test.ts +32 -44
- package/src/__tests__/flags.test.ts +248 -0
- package/src/__tests__/guardian-token.test.ts +126 -2
- package/src/__tests__/multi-local.test.ts +1 -1
- package/src/__tests__/orphan-detection.test.ts +8 -6
- package/src/__tests__/pair.test.ts +271 -0
- package/src/__tests__/paired-lifecycle.test.ts +116 -0
- package/src/__tests__/recover.test.ts +307 -0
- package/src/__tests__/segments-to-plain-text.test.ts +37 -0
- package/src/__tests__/tui-midsession-refresh.test.ts +166 -0
- package/src/__tests__/unpair.test.ts +163 -0
- package/src/__tests__/wake.test.ts +215 -0
- package/src/commands/backup.ts +2 -0
- package/src/commands/client.ts +569 -39
- package/src/commands/connect/import.ts +217 -0
- package/src/commands/connect.ts +31 -0
- package/src/commands/devices.ts +247 -0
- package/src/commands/env.ts +1 -1
- package/src/commands/flags.ts +269 -0
- package/src/commands/gateway/token.ts +73 -0
- package/src/commands/gateway.ts +29 -0
- package/src/commands/logs.ts +6 -18
- package/src/commands/pair.ts +222 -0
- package/src/commands/ps.ts +57 -41
- package/src/commands/recover.ts +47 -9
- package/src/commands/restore.ts +8 -1
- package/src/commands/retire.ts +23 -70
- package/src/commands/rollback.ts +2 -14
- package/src/commands/sleep.ts +7 -0
- package/src/commands/ssh.ts +5 -24
- package/src/commands/teleport.ts +34 -26
- package/src/commands/tunnel.ts +46 -2
- package/src/commands/unpair.ts +118 -0
- package/src/commands/upgrade.ts +8 -16
- package/src/commands/wake.ts +75 -45
- package/src/components/DefaultMainScreen.tsx +100 -14
- package/src/index.ts +22 -0
- package/src/lib/__tests__/lifecycle-reporter.test.ts +59 -0
- package/src/lib/__tests__/step-runner.test.ts +49 -1
- package/src/lib/assistant-client.ts +58 -37
- package/src/lib/assistant-config.ts +28 -3
- package/src/lib/cloudflare-tunnel.ts +276 -0
- package/src/lib/config-utils.ts +24 -3
- package/src/lib/confirm-action.ts +57 -0
- package/src/lib/docker.ts +82 -8
- package/src/lib/environments/__tests__/paths.test.ts +2 -1
- package/src/lib/environments/__tests__/seeds.test.ts +2 -1
- package/src/lib/environments/paths.ts +1 -1
- package/src/lib/environments/resolve.ts +11 -35
- package/src/lib/guardian-token.ts +132 -9
- package/src/lib/hatch-local.ts +75 -33
- package/src/lib/http-client.ts +1 -3
- package/src/lib/lifecycle-reporter.ts +31 -0
- package/src/lib/local.ts +193 -298
- package/src/lib/orphan-detection.ts +9 -5
- package/src/lib/pgrep.ts +5 -1
- package/src/lib/platform-client.ts +97 -49
- package/src/lib/process.ts +109 -39
- package/src/lib/retire-local.ts +28 -14
- package/src/lib/segments-to-plain-text.ts +35 -0
- package/src/lib/step-runner.ts +67 -7
- package/src/lib/sync-cloud-assistants.ts +17 -0
- /package/{src/lib/environments → node_modules/@vellumai/environments/src}/types.ts +0 -0
package/src/lib/local.ts
CHANGED
|
@@ -17,7 +17,11 @@ import {
|
|
|
17
17
|
} from "./assistant-config.js";
|
|
18
18
|
import { GATEWAY_PORT } from "./constants.js";
|
|
19
19
|
import { httpHealthCheck, waitForDaemonReady } from "./http-client.js";
|
|
20
|
-
import {
|
|
20
|
+
import {
|
|
21
|
+
resolveProcessState,
|
|
22
|
+
stopProcess,
|
|
23
|
+
stopProcessByPidFile,
|
|
24
|
+
} from "./process.js";
|
|
21
25
|
import { openLogFile, pipeToLogFile } from "./xdg-log.js";
|
|
22
26
|
|
|
23
27
|
const _require = createRequire(import.meta.url);
|
|
@@ -226,8 +230,10 @@ function resolveAssistantIndexPath(): string | undefined {
|
|
|
226
230
|
}
|
|
227
231
|
|
|
228
232
|
try {
|
|
229
|
-
const
|
|
230
|
-
|
|
233
|
+
const assistantPkgPath = _require.resolve(
|
|
234
|
+
"@vellumai/assistant/package.json",
|
|
235
|
+
);
|
|
236
|
+
const resolved = join(dirname(assistantPkgPath), "src", "index.ts");
|
|
231
237
|
if (existsSync(resolved)) {
|
|
232
238
|
return resolved;
|
|
233
239
|
}
|
|
@@ -319,80 +325,16 @@ type DaemonStartOptions = {
|
|
|
319
325
|
signingKey?: string;
|
|
320
326
|
};
|
|
321
327
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
328
|
+
/**
|
|
329
|
+
* Apply per-instance resource overrides and shared daemon options to an
|
|
330
|
+
* environment object. Called from all daemon spawn paths (source, watch,
|
|
331
|
+
* bundled binary) to eliminate drift between the three.
|
|
332
|
+
*/
|
|
333
|
+
function applyDaemonEnvOverrides(
|
|
334
|
+
env: Record<string, string | undefined>,
|
|
335
|
+
resources: LocalInstanceResources | undefined,
|
|
325
336
|
options?: DaemonStartOptions,
|
|
326
|
-
):
|
|
327
|
-
const foreground = options?.foreground ?? false;
|
|
328
|
-
const daemonMainPath = resolveDaemonMainPath(assistantIndex);
|
|
329
|
-
|
|
330
|
-
// Ensure the directory containing PID/socket files exists. For named
|
|
331
|
-
// instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
|
|
332
|
-
const pidFile = getDaemonPidPath(resources);
|
|
333
|
-
mkdirSync(dirname(pidFile), { recursive: true });
|
|
334
|
-
|
|
335
|
-
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
336
|
-
if (existsSync(pidFile)) {
|
|
337
|
-
try {
|
|
338
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
339
|
-
|
|
340
|
-
// Another caller is already spawning the daemon — wait for it
|
|
341
|
-
// instead of racing to spawn a duplicate.
|
|
342
|
-
if (content === "starting") {
|
|
343
|
-
console.log(
|
|
344
|
-
" Assistant is starting — waiting for it to become ready...",
|
|
345
|
-
);
|
|
346
|
-
if (await waitForDaemonReady(resources.daemonPort, 60000)) {
|
|
347
|
-
console.log(" Assistant is ready\n");
|
|
348
|
-
return;
|
|
349
|
-
}
|
|
350
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
351
|
-
try {
|
|
352
|
-
unlinkSync(pidFile);
|
|
353
|
-
} catch {}
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
const pid = parseInt(content, 10);
|
|
357
|
-
if (!isNaN(pid)) {
|
|
358
|
-
try {
|
|
359
|
-
process.kill(pid, 0);
|
|
360
|
-
console.log(` Assistant already running (pid ${pid})\n`);
|
|
361
|
-
return;
|
|
362
|
-
} catch {
|
|
363
|
-
try {
|
|
364
|
-
unlinkSync(pidFile);
|
|
365
|
-
} catch {}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
} catch {}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
// PID file was stale or missing — check if daemon is responding via HTTP
|
|
372
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
373
|
-
// Recover PID tracking so lifecycle commands (sleep, retire,
|
|
374
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
375
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
376
|
-
if (recoveredPid) {
|
|
377
|
-
console.log(
|
|
378
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
379
|
-
);
|
|
380
|
-
} else {
|
|
381
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
382
|
-
}
|
|
383
|
-
return;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
const env: Record<string, string | undefined> = {
|
|
387
|
-
...process.env,
|
|
388
|
-
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
389
|
-
VELLUM_CLOUD: "local",
|
|
390
|
-
VELLUM_DEV: "1",
|
|
391
|
-
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
392
|
-
...(options?.signingKey
|
|
393
|
-
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
394
|
-
: {}),
|
|
395
|
-
};
|
|
337
|
+
): void {
|
|
396
338
|
if (resources) {
|
|
397
339
|
env.VELLUM_WORKSPACE_DIR = join(
|
|
398
340
|
resources.instanceDir,
|
|
@@ -414,25 +356,75 @@ async function startDaemonFromSource(
|
|
|
414
356
|
env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
415
357
|
delete env.QDRANT_URL;
|
|
416
358
|
}
|
|
359
|
+
if (options?.signingKey) {
|
|
360
|
+
env.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
|
|
361
|
+
}
|
|
417
362
|
if (options?.defaultWorkspaceConfigPath) {
|
|
418
363
|
env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
419
364
|
options.defaultWorkspaceConfigPath;
|
|
420
365
|
}
|
|
421
|
-
|
|
422
366
|
applyIpcSocketDirOverride(env);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function logDaemonReadiness(ready: boolean): void {
|
|
370
|
+
if (ready) {
|
|
371
|
+
console.log(" Assistant ready\n");
|
|
372
|
+
} else {
|
|
373
|
+
console.log(
|
|
374
|
+
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function startDaemonFromSource(
|
|
380
|
+
assistantIndex: string,
|
|
381
|
+
resources: LocalInstanceResources,
|
|
382
|
+
options?: DaemonStartOptions,
|
|
383
|
+
): Promise<void> {
|
|
384
|
+
const foreground = options?.foreground ?? false;
|
|
385
|
+
const daemonMainPath = resolveDaemonMainPath(assistantIndex);
|
|
386
|
+
|
|
387
|
+
// Ensure the directory containing PID/socket files exists. For named
|
|
388
|
+
// instances this is instanceDir/.vellum/workspace/ (matching daemon's getWorkspaceDir()).
|
|
389
|
+
const pidFile = getDaemonPidPath(resources);
|
|
390
|
+
mkdirSync(dirname(pidFile), { recursive: true });
|
|
391
|
+
|
|
392
|
+
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
393
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
|
|
394
|
+
|
|
395
|
+
const daemonState = await resolveProcessState(
|
|
396
|
+
pidFile,
|
|
397
|
+
resources.daemonPort,
|
|
398
|
+
"Assistant",
|
|
399
|
+
);
|
|
400
|
+
if (daemonState.status === "healthy") {
|
|
401
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
|
|
406
|
+
|
|
407
|
+
const env: Record<string, string | undefined> = {
|
|
408
|
+
...process.env,
|
|
409
|
+
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
410
|
+
VELLUM_CLOUD: "local",
|
|
411
|
+
VELLUM_DEV: "1",
|
|
412
|
+
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
413
|
+
};
|
|
414
|
+
applyDaemonEnvOverrides(env, resources, options);
|
|
423
415
|
|
|
424
416
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
425
417
|
// detect the in-progress spawn and wait instead of racing.
|
|
426
418
|
writeFileSync(pidFile, "starting", "utf-8");
|
|
427
419
|
|
|
428
420
|
const child = foreground
|
|
429
|
-
? spawn(
|
|
421
|
+
? spawn(process.execPath, ["run", daemonMainPath], {
|
|
430
422
|
stdio: "inherit",
|
|
431
423
|
env,
|
|
432
424
|
})
|
|
433
425
|
: (() => {
|
|
434
426
|
const daemonLogFd = openLogFile("hatch.log");
|
|
435
|
-
const c = spawn(
|
|
427
|
+
const c = spawn(process.execPath, ["run", daemonMainPath], {
|
|
436
428
|
detached: true,
|
|
437
429
|
stdio: ["ignore", "pipe", "pipe"],
|
|
438
430
|
env,
|
|
@@ -469,101 +461,34 @@ async function startDaemonWatchFromSource(
|
|
|
469
461
|
mkdirSync(dirname(pidFile), { recursive: true });
|
|
470
462
|
|
|
471
463
|
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
472
|
-
|
|
473
|
-
if (existsSync(pidFile)) {
|
|
474
|
-
try {
|
|
475
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
464
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) return;
|
|
476
465
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
console.log(" Assistant is ready\n");
|
|
485
|
-
return;
|
|
486
|
-
}
|
|
487
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
488
|
-
try {
|
|
489
|
-
unlinkSync(pidFile);
|
|
490
|
-
} catch {}
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
const pid = parseInt(content, 10);
|
|
494
|
-
if (!isNaN(pid)) {
|
|
495
|
-
try {
|
|
496
|
-
process.kill(pid, 0); // Check if alive
|
|
497
|
-
console.log(` Assistant already running (pid ${pid})\n`);
|
|
498
|
-
return;
|
|
499
|
-
} catch {
|
|
500
|
-
// Process doesn't exist, clean up stale PID file
|
|
501
|
-
try {
|
|
502
|
-
unlinkSync(pidFile);
|
|
503
|
-
} catch {}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
} catch {}
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
// PID file was stale or missing — check if daemon is responding via HTTP
|
|
510
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
511
|
-
// Recover PID tracking so lifecycle commands (sleep, retire,
|
|
512
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
513
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
514
|
-
if (recoveredPid) {
|
|
515
|
-
console.log(
|
|
516
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
517
|
-
);
|
|
518
|
-
} else {
|
|
519
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
520
|
-
}
|
|
466
|
+
const daemonState = await resolveProcessState(
|
|
467
|
+
pidFile,
|
|
468
|
+
resources.daemonPort,
|
|
469
|
+
"Assistant",
|
|
470
|
+
);
|
|
471
|
+
if (daemonState.status === "healthy") {
|
|
472
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
521
473
|
return;
|
|
522
474
|
}
|
|
523
475
|
|
|
476
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) return;
|
|
477
|
+
|
|
524
478
|
const env: Record<string, string | undefined> = {
|
|
525
479
|
...process.env,
|
|
526
480
|
RUNTIME_HTTP_PORT: process.env.RUNTIME_HTTP_PORT || "7821",
|
|
527
481
|
VELLUM_DEV: "1",
|
|
528
482
|
VELLUM_ENVIRONMENT: process.env.VELLUM_ENVIRONMENT || "local",
|
|
529
|
-
...(options?.signingKey
|
|
530
|
-
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
531
|
-
: {}),
|
|
532
483
|
};
|
|
533
|
-
|
|
534
|
-
env.VELLUM_WORKSPACE_DIR = join(
|
|
535
|
-
resources.instanceDir,
|
|
536
|
-
".vellum",
|
|
537
|
-
"workspace",
|
|
538
|
-
);
|
|
539
|
-
env.GATEWAY_SECURITY_DIR = join(
|
|
540
|
-
resources.instanceDir,
|
|
541
|
-
".vellum",
|
|
542
|
-
"protected",
|
|
543
|
-
);
|
|
544
|
-
env.CREDENTIAL_SECURITY_DIR = join(
|
|
545
|
-
resources.instanceDir,
|
|
546
|
-
".vellum",
|
|
547
|
-
"protected",
|
|
548
|
-
);
|
|
549
|
-
env.RUNTIME_HTTP_PORT = String(resources.daemonPort);
|
|
550
|
-
env.GATEWAY_PORT = String(resources.gatewayPort);
|
|
551
|
-
env.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
552
|
-
delete env.QDRANT_URL;
|
|
553
|
-
}
|
|
554
|
-
if (options?.defaultWorkspaceConfigPath) {
|
|
555
|
-
env.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
556
|
-
options.defaultWorkspaceConfigPath;
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
applyIpcSocketDirOverride(env);
|
|
484
|
+
applyDaemonEnvOverrides(env, resources, options);
|
|
560
485
|
|
|
561
486
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
562
487
|
// detect the in-progress spawn and wait instead of racing.
|
|
563
488
|
writeFileSync(pidFile, "starting", "utf-8");
|
|
564
489
|
|
|
565
490
|
const daemonLogFd = openLogFile("hatch.log");
|
|
566
|
-
const child = spawn(
|
|
491
|
+
const child = spawn(process.execPath, ["--watch", "run", mainPath], {
|
|
567
492
|
detached: true,
|
|
568
493
|
stdio: ["ignore", "pipe", "pipe"],
|
|
569
494
|
env,
|
|
@@ -591,6 +516,18 @@ function resolveGatewayDir(): string {
|
|
|
591
516
|
return sourceDir;
|
|
592
517
|
}
|
|
593
518
|
|
|
519
|
+
// npm-installed: @vellumai/cli and @vellumai/vellum-gateway are siblings
|
|
520
|
+
const npmGatewayDir = join(
|
|
521
|
+
import.meta.dir,
|
|
522
|
+
"..",
|
|
523
|
+
"..",
|
|
524
|
+
"..",
|
|
525
|
+
"vellum-gateway",
|
|
526
|
+
);
|
|
527
|
+
if (isGatewaySourceDir(npmGatewayDir)) {
|
|
528
|
+
return npmGatewayDir;
|
|
529
|
+
}
|
|
530
|
+
|
|
594
531
|
// Compiled binary: gateway/ bundled adjacent to the CLI executable.
|
|
595
532
|
const binGateway = join(dirname(process.execPath), "gateway");
|
|
596
533
|
if (isGatewaySourceDir(binGateway)) {
|
|
@@ -660,6 +597,63 @@ function recoverPidFile(
|
|
|
660
597
|
return pid;
|
|
661
598
|
}
|
|
662
599
|
|
|
600
|
+
/**
|
|
601
|
+
* Handle the "starting" sentinel in a PID file. When another caller is
|
|
602
|
+
* already spawning the daemon, wait for it to become ready instead of
|
|
603
|
+
* racing to spawn a duplicate.
|
|
604
|
+
*
|
|
605
|
+
* Returns `true` if the daemon became ready (caller should return early),
|
|
606
|
+
* `false` if the spawn failed or the sentinel wasn't present (caller
|
|
607
|
+
* should proceed). Cleans up the PID file on failure.
|
|
608
|
+
*/
|
|
609
|
+
async function awaitStartingSentinel(
|
|
610
|
+
pidFile: string,
|
|
611
|
+
daemonPort: number,
|
|
612
|
+
): Promise<boolean> {
|
|
613
|
+
if (!existsSync(pidFile)) return false;
|
|
614
|
+
try {
|
|
615
|
+
const content = readFileSync(pidFile, "utf-8").trim();
|
|
616
|
+
if (content !== "starting") return false;
|
|
617
|
+
} catch {
|
|
618
|
+
return false;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
console.log(" Assistant is starting — waiting for it to become ready...");
|
|
622
|
+
if (await waitForDaemonReady(daemonPort, 60000)) {
|
|
623
|
+
console.log(" Assistant is ready\n");
|
|
624
|
+
return true;
|
|
625
|
+
}
|
|
626
|
+
try {
|
|
627
|
+
unlinkSync(pidFile);
|
|
628
|
+
} catch {}
|
|
629
|
+
return false;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Check if a daemon without a valid PID file is still reachable on its
|
|
634
|
+
* HTTP port (orphaned process). If so, recover its PID file so lifecycle
|
|
635
|
+
* commands can manage it.
|
|
636
|
+
*
|
|
637
|
+
* Returns `true` if an orphaned daemon was found (caller should skip
|
|
638
|
+
* starting a new one), `false` otherwise.
|
|
639
|
+
*/
|
|
640
|
+
async function checkOrphanedDaemon(
|
|
641
|
+
pidFile: string,
|
|
642
|
+
daemonPort: number,
|
|
643
|
+
): Promise<boolean> {
|
|
644
|
+
if (!(await isDaemonResponsive(daemonPort))) return false;
|
|
645
|
+
|
|
646
|
+
const recoveredPid = recoverPidFile(pidFile, daemonPort);
|
|
647
|
+
if (recoveredPid) {
|
|
648
|
+
console.log(
|
|
649
|
+
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
650
|
+
);
|
|
651
|
+
} else {
|
|
652
|
+
console.log(" Assistant is responsive — skipping restart\n");
|
|
653
|
+
}
|
|
654
|
+
return true;
|
|
655
|
+
}
|
|
656
|
+
|
|
663
657
|
export async function discoverPublicUrl(
|
|
664
658
|
port?: number,
|
|
665
659
|
): Promise<string | undefined> {
|
|
@@ -900,64 +894,24 @@ export async function startLocalDaemon(
|
|
|
900
894
|
|
|
901
895
|
const pidFile = getDaemonPidPath(resources);
|
|
902
896
|
|
|
903
|
-
//
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
try {
|
|
909
|
-
const content = readFileSync(pidFile, "utf-8").trim();
|
|
910
|
-
|
|
911
|
-
// Another caller is already spawning the daemon — wait for it
|
|
912
|
-
// instead of racing to spawn a duplicate.
|
|
913
|
-
if (content === "starting") {
|
|
914
|
-
console.log(
|
|
915
|
-
" Assistant is starting — waiting for it to become ready...",
|
|
916
|
-
);
|
|
917
|
-
if (await waitForDaemonReady(resources.daemonPort, 60000)) {
|
|
918
|
-
console.log(" Assistant is ready\n");
|
|
919
|
-
ensureBunInstalled();
|
|
920
|
-
return;
|
|
921
|
-
}
|
|
922
|
-
// The other spawn may have failed; clean up and proceed to spawn.
|
|
923
|
-
try {
|
|
924
|
-
unlinkSync(pidFile);
|
|
925
|
-
} catch {}
|
|
926
|
-
}
|
|
897
|
+
// --- Lifecycle guard: prevent split-brain daemon state ---
|
|
898
|
+
if (await awaitStartingSentinel(pidFile, resources.daemonPort)) {
|
|
899
|
+
ensureBunInstalled();
|
|
900
|
+
return;
|
|
901
|
+
}
|
|
927
902
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
try {
|
|
937
|
-
unlinkSync(pidFile);
|
|
938
|
-
} catch {}
|
|
939
|
-
}
|
|
940
|
-
}
|
|
941
|
-
} catch {}
|
|
903
|
+
const daemonState = await resolveProcessState(
|
|
904
|
+
pidFile,
|
|
905
|
+
resources.daemonPort,
|
|
906
|
+
"Assistant",
|
|
907
|
+
);
|
|
908
|
+
const daemonAlive = daemonState.status === "healthy";
|
|
909
|
+
if (daemonAlive) {
|
|
910
|
+
console.log(` Assistant already running (pid ${daemonState.pid})\n`);
|
|
942
911
|
}
|
|
943
912
|
|
|
944
913
|
if (!daemonAlive) {
|
|
945
|
-
|
|
946
|
-
// may still be listening on the HTTP port (e.g. if the PID file was
|
|
947
|
-
// overwritten by a crashed restart attempt). Check before starting a new one.
|
|
948
|
-
if (await isDaemonResponsive(resources.daemonPort)) {
|
|
949
|
-
// Restore PID tracking so lifecycle commands (sleep, retire,
|
|
950
|
-
// stopLocalProcesses) can manage this daemon process.
|
|
951
|
-
const recoveredPid = recoverPidFile(pidFile, resources.daemonPort);
|
|
952
|
-
if (recoveredPid) {
|
|
953
|
-
console.log(
|
|
954
|
-
` Assistant is responsive (pid ${recoveredPid}) — skipping restart\n`,
|
|
955
|
-
);
|
|
956
|
-
} else {
|
|
957
|
-
console.log(" Assistant is responsive — skipping restart\n");
|
|
958
|
-
}
|
|
959
|
-
// Ensure bun is available for runtime features (browser, skills install)
|
|
960
|
-
// even when reusing an existing daemon.
|
|
914
|
+
if (await checkOrphanedDaemon(pidFile, resources.daemonPort)) {
|
|
961
915
|
ensureBunInstalled();
|
|
962
916
|
return;
|
|
963
917
|
}
|
|
@@ -1013,39 +967,7 @@ export async function startLocalDaemon(
|
|
|
1013
967
|
daemonEnv[key] = process.env[key]!;
|
|
1014
968
|
}
|
|
1015
969
|
}
|
|
1016
|
-
|
|
1017
|
-
daemonEnv.VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH =
|
|
1018
|
-
options.defaultWorkspaceConfigPath;
|
|
1019
|
-
}
|
|
1020
|
-
// When running a named instance, override env so the daemon resolves
|
|
1021
|
-
// all paths under the instance directory and listens on its own port.
|
|
1022
|
-
if (resources) {
|
|
1023
|
-
daemonEnv.VELLUM_WORKSPACE_DIR = join(
|
|
1024
|
-
resources.instanceDir,
|
|
1025
|
-
".vellum",
|
|
1026
|
-
"workspace",
|
|
1027
|
-
);
|
|
1028
|
-
daemonEnv.GATEWAY_SECURITY_DIR = join(
|
|
1029
|
-
resources.instanceDir,
|
|
1030
|
-
".vellum",
|
|
1031
|
-
"protected",
|
|
1032
|
-
);
|
|
1033
|
-
daemonEnv.CREDENTIAL_SECURITY_DIR = join(
|
|
1034
|
-
resources.instanceDir,
|
|
1035
|
-
".vellum",
|
|
1036
|
-
"protected",
|
|
1037
|
-
);
|
|
1038
|
-
daemonEnv.RUNTIME_HTTP_PORT = String(resources.daemonPort);
|
|
1039
|
-
daemonEnv.GATEWAY_PORT = String(resources.gatewayPort);
|
|
1040
|
-
daemonEnv.QDRANT_HTTP_PORT = String(resources.qdrantPort);
|
|
1041
|
-
delete daemonEnv.QDRANT_URL;
|
|
1042
|
-
}
|
|
1043
|
-
|
|
1044
|
-
if (options?.signingKey) {
|
|
1045
|
-
daemonEnv.ACTOR_TOKEN_SIGNING_KEY = options.signingKey;
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
applyIpcSocketDirOverride(daemonEnv);
|
|
970
|
+
applyDaemonEnvOverrides(daemonEnv, resources, options);
|
|
1049
971
|
|
|
1050
972
|
// Write a sentinel PID file before spawning so concurrent hatch() calls
|
|
1051
973
|
// see the file and fall through to the isDaemonResponsive() port check
|
|
@@ -1112,13 +1034,7 @@ export async function startLocalDaemon(
|
|
|
1112
1034
|
}
|
|
1113
1035
|
}
|
|
1114
1036
|
|
|
1115
|
-
|
|
1116
|
-
console.log(" Assistant ready\n");
|
|
1117
|
-
} else {
|
|
1118
|
-
console.log(
|
|
1119
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1120
|
-
);
|
|
1121
|
-
}
|
|
1037
|
+
logDaemonReadiness(daemonReady);
|
|
1122
1038
|
} else {
|
|
1123
1039
|
console.log("🔨 Starting local assistant...");
|
|
1124
1040
|
|
|
@@ -1131,34 +1047,17 @@ export async function startLocalDaemon(
|
|
|
1131
1047
|
}
|
|
1132
1048
|
if (watch) {
|
|
1133
1049
|
await startDaemonWatchFromSource(assistantIndex, resources, options);
|
|
1134
|
-
|
|
1135
|
-
const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
|
|
1136
|
-
if (daemonReady) {
|
|
1137
|
-
console.log(" Assistant ready\n");
|
|
1138
|
-
} else {
|
|
1139
|
-
console.log(
|
|
1140
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1141
|
-
);
|
|
1142
|
-
}
|
|
1143
1050
|
} else {
|
|
1144
1051
|
await startDaemonFromSource(assistantIndex, resources, options);
|
|
1145
|
-
|
|
1146
|
-
const daemonReady = await waitForDaemonReady(resources.daemonPort, 60000);
|
|
1147
|
-
if (daemonReady) {
|
|
1148
|
-
console.log(" Assistant ready\n");
|
|
1149
|
-
} else {
|
|
1150
|
-
console.log(
|
|
1151
|
-
" ⚠️ Assistant did not become ready within 60s — continuing anyway\n",
|
|
1152
|
-
);
|
|
1153
|
-
}
|
|
1154
1052
|
}
|
|
1053
|
+
logDaemonReadiness(await waitForDaemonReady(resources.daemonPort, 60000));
|
|
1155
1054
|
}
|
|
1156
1055
|
}
|
|
1157
1056
|
|
|
1158
1057
|
export async function startGateway(
|
|
1159
1058
|
watch: boolean = false,
|
|
1160
1059
|
resources?: LocalInstanceResources,
|
|
1161
|
-
options?: { signingKey?: string },
|
|
1060
|
+
options?: { signingKey?: string; bootstrapSecret?: string },
|
|
1162
1061
|
): Promise<string> {
|
|
1163
1062
|
const effectiveGatewayPort = resources?.gatewayPort ?? GATEWAY_PORT;
|
|
1164
1063
|
|
|
@@ -1194,6 +1093,9 @@ export async function startGateway(
|
|
|
1194
1093
|
...(options?.signingKey
|
|
1195
1094
|
? { ACTOR_TOKEN_SIGNING_KEY: options.signingKey }
|
|
1196
1095
|
: {}),
|
|
1096
|
+
...(options?.bootstrapSecret
|
|
1097
|
+
? { GUARDIAN_BOOTSTRAP_SECRET: options.bootstrapSecret }
|
|
1098
|
+
: {}),
|
|
1197
1099
|
...(watch
|
|
1198
1100
|
? {
|
|
1199
1101
|
VELLUM_DEV: "1",
|
|
@@ -1247,7 +1149,7 @@ export async function startGateway(
|
|
|
1247
1149
|
? ["--watch", "run", "src/index.ts", "--vellum-gateway"]
|
|
1248
1150
|
: ["run", "src/index.ts", "--vellum-gateway"];
|
|
1249
1151
|
const gwLogFd = openLogFile("hatch.log");
|
|
1250
|
-
gateway = spawn(
|
|
1152
|
+
gateway = spawn(process.execPath, bunArgs, {
|
|
1251
1153
|
cwd: gatewayDir,
|
|
1252
1154
|
detached: true,
|
|
1253
1155
|
stdio: ["ignore", "pipe", "pipe"],
|
|
@@ -1273,27 +1175,7 @@ export async function startGateway(
|
|
|
1273
1175
|
// Wait for the gateway to be responsive before returning. Without this,
|
|
1274
1176
|
// callers may try to connect before the HTTP server is listening and get
|
|
1275
1177
|
// connection-refused errors.
|
|
1276
|
-
const
|
|
1277
|
-
const timeoutMs = 30000;
|
|
1278
|
-
let ready = false;
|
|
1279
|
-
while (Date.now() - start < timeoutMs) {
|
|
1280
|
-
try {
|
|
1281
|
-
const res = await fetch(
|
|
1282
|
-
`http://localhost:${effectiveGatewayPort}/healthz`,
|
|
1283
|
-
{
|
|
1284
|
-
signal: AbortSignal.timeout(2000),
|
|
1285
|
-
},
|
|
1286
|
-
);
|
|
1287
|
-
if (res.ok) {
|
|
1288
|
-
ready = true;
|
|
1289
|
-
break;
|
|
1290
|
-
}
|
|
1291
|
-
} catch {
|
|
1292
|
-
// Gateway not ready yet
|
|
1293
|
-
}
|
|
1294
|
-
await new Promise((r) => setTimeout(r, 250));
|
|
1295
|
-
}
|
|
1296
|
-
|
|
1178
|
+
const ready = await waitForDaemonReady(effectiveGatewayPort, 30000);
|
|
1297
1179
|
if (!ready) {
|
|
1298
1180
|
console.warn(
|
|
1299
1181
|
"⚠ Gateway started but health check did not respond within 30s",
|
|
@@ -1304,6 +1186,20 @@ export async function startGateway(
|
|
|
1304
1186
|
return gatewayUrl;
|
|
1305
1187
|
}
|
|
1306
1188
|
|
|
1189
|
+
/** Check whether a PID belongs to an ngrok process via its command line. */
|
|
1190
|
+
function isNgrokProcess(pid: number): boolean {
|
|
1191
|
+
try {
|
|
1192
|
+
const output = execFileSync("ps", ["-p", String(pid), "-o", "command="], {
|
|
1193
|
+
encoding: "utf-8",
|
|
1194
|
+
timeout: 3000,
|
|
1195
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
1196
|
+
}).trim();
|
|
1197
|
+
return /ngrok/.test(output);
|
|
1198
|
+
} catch {
|
|
1199
|
+
return false;
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1307
1203
|
/**
|
|
1308
1204
|
* Stop any locally-running daemon and gateway processes
|
|
1309
1205
|
* and clean up PID files. Called when hatch fails partway through
|
|
@@ -1326,15 +1222,14 @@ export async function stopLocalProcesses(
|
|
|
1326
1222
|
|
|
1327
1223
|
// Kill ngrok directly by PID rather than using stopProcessByPidFile, because
|
|
1328
1224
|
// isVellumProcess() won't match the ngrok binary — resulting in a no-op that
|
|
1329
|
-
// leaves ngrok running.
|
|
1225
|
+
// leaves ngrok running. Verify the PID still belongs to ngrok before killing
|
|
1226
|
+
// to avoid hitting an unrelated process if the OS has reused the PID.
|
|
1330
1227
|
const ngrokPidFile = join(vellumDir, "ngrok.pid");
|
|
1331
1228
|
if (existsSync(ngrokPidFile)) {
|
|
1332
1229
|
try {
|
|
1333
1230
|
const pid = parseInt(readFileSync(ngrokPidFile, "utf-8").trim(), 10);
|
|
1334
|
-
if (!isNaN(pid)) {
|
|
1335
|
-
|
|
1336
|
-
process.kill(pid, "SIGTERM");
|
|
1337
|
-
} catch {}
|
|
1231
|
+
if (!isNaN(pid) && isNgrokProcess(pid)) {
|
|
1232
|
+
await stopProcess(pid, "ngrok");
|
|
1338
1233
|
}
|
|
1339
1234
|
unlinkSync(ngrokPidFile);
|
|
1340
1235
|
} catch {}
|
|
@@ -58,7 +58,7 @@ export function readPidFile(pidFile: string): string | null {
|
|
|
58
58
|
return pid || null;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
export function
|
|
61
|
+
export function isPidAlive(pid: string): boolean {
|
|
62
62
|
try {
|
|
63
63
|
process.kill(parseInt(pid, 10), 0);
|
|
64
64
|
return true;
|
|
@@ -138,10 +138,14 @@ export async function detectOrphanedProcesses(
|
|
|
138
138
|
// Process table scan — discover orphaned processes by scanning the OS
|
|
139
139
|
// process table rather than reading PID files from the workspace.
|
|
140
140
|
try {
|
|
141
|
-
const output = await execOutput(
|
|
142
|
-
"
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
const output = await execOutput(
|
|
142
|
+
"sh",
|
|
143
|
+
[
|
|
144
|
+
"-c",
|
|
145
|
+
"ps ax -o pid=,ppid=,args= | grep -E 'vellum|qdrant|openclaw' | grep -v grep",
|
|
146
|
+
],
|
|
147
|
+
{ timeoutMs: 5_000 },
|
|
148
|
+
);
|
|
145
149
|
const procs = parseRemotePs(output);
|
|
146
150
|
const ownPid = String(process.pid);
|
|
147
151
|
|
package/src/lib/pgrep.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { execOutput } from "./step-runner";
|
|
2
2
|
|
|
3
|
+
const PGREP_TIMEOUT_MS = 5_000;
|
|
4
|
+
|
|
3
5
|
export async function pgrepExact(name: string): Promise<string[]> {
|
|
4
6
|
try {
|
|
5
|
-
const output = await execOutput("pgrep", ["-x", name]
|
|
7
|
+
const output = await execOutput("pgrep", ["-x", name], {
|
|
8
|
+
timeoutMs: PGREP_TIMEOUT_MS,
|
|
9
|
+
});
|
|
6
10
|
return output.trim().split("\n").filter(Boolean);
|
|
7
11
|
} catch {
|
|
8
12
|
return [];
|