@vellumai/cli 0.4.56 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/docker.ts CHANGED
@@ -1,15 +1,20 @@
1
- import { spawn as nodeSpawn } from "child_process";
2
- import { existsSync, watch as fsWatch } from "fs";
1
+ import { chmodSync, existsSync, mkdirSync, watch as fsWatch } from "fs";
2
+ import { arch, platform } from "os";
3
3
  import { dirname, join } from "path";
4
4
 
5
5
  // Direct import — bun embeds this at compile time so it works in compiled binaries.
6
6
  import cliPkg from "../../package.json";
7
7
 
8
- import { saveAssistantEntry, setActiveAssistant } from "./assistant-config";
8
+ import {
9
+ findAssistantByName,
10
+ saveAssistantEntry,
11
+ setActiveAssistant,
12
+ } from "./assistant-config";
9
13
  import type { AssistantEntry } from "./assistant-config";
10
14
  import { DEFAULT_GATEWAY_PORT } from "./constants";
11
15
  import type { Species } from "./constants";
12
16
  import { leaseGuardianToken } from "./guardian-token";
17
+ import { isVellumProcess, stopProcess } from "./process";
13
18
  import { generateInstanceName } from "./random-name";
14
19
  import { exec, execOutput } from "./step-runner";
15
20
  import {
@@ -19,73 +24,217 @@ import {
19
24
  writeToLogFile,
20
25
  } from "./xdg-log";
21
26
 
22
- type ServiceName = "assistant" | "credential-executor" | "gateway";
27
+ export type ServiceName = "assistant" | "credential-executor" | "gateway";
23
28
 
24
29
  const DOCKERHUB_ORG = "vellumai";
25
- const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
30
+ export const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
26
31
  assistant: `${DOCKERHUB_ORG}/vellum-assistant`,
27
32
  "credential-executor": `${DOCKERHUB_ORG}/vellum-credential-executor`,
28
33
  gateway: `${DOCKERHUB_ORG}/vellum-gateway`,
29
34
  };
30
35
 
31
36
  /** Internal ports exposed by each service's Dockerfile. */
32
- const ASSISTANT_INTERNAL_PORT = 3001;
33
- const GATEWAY_INTERNAL_PORT = 7830;
37
+ export const ASSISTANT_INTERNAL_PORT = 3001;
38
+ export const GATEWAY_INTERNAL_PORT = 7830;
39
+
40
+ /** Max time to wait for the assistant container to emit the readiness sentinel. */
41
+ export const DOCKER_READY_TIMEOUT_MS = 3 * 60 * 1000;
42
+
43
+ /** Directory for user-local binary installs (no sudo required). */
44
+ const LOCAL_BIN_DIR = join(
45
+ process.env.HOME || process.env.USERPROFILE || ".",
46
+ ".local",
47
+ "bin",
48
+ );
34
49
 
35
50
  /**
36
- * Checks whether the `docker` CLI and daemon are available on the system.
37
- * Installs Colima and Docker via Homebrew if the CLI is missing, and starts
38
- * Colima if the Docker daemon is not reachable.
51
+ * Returns the macOS architecture suffix used by GitHub release artifacts.
52
+ * Maps Node's `arch()` values to the names used in release URLs.
39
53
  */
40
- async function ensureDockerInstalled(): Promise<void> {
41
- let installed = false;
54
+ function releaseArch(): string {
55
+ const a = arch();
56
+ if (a === "arm64") return "aarch64";
57
+ if (a === "x64") return "x86_64";
58
+ return a;
59
+ }
60
+
61
+ /**
62
+ * Downloads a file from `url` to `destPath`, makes it executable, and returns
63
+ * the destination path. Throws on failure.
64
+ */
65
+ async function downloadBinary(
66
+ url: string,
67
+ destPath: string,
68
+ label: string,
69
+ ): Promise<void> {
70
+ console.log(` ⬇ Downloading ${label}...`);
71
+ await exec("bash", [
72
+ "-c",
73
+ `curl -fsSL -o "${destPath}" "${url}" && chmod +x "${destPath}"`,
74
+ ]);
75
+ }
76
+
77
+ /**
78
+ * Downloads and extracts a `.tar.gz` archive into `destDir`.
79
+ */
80
+ async function downloadAndExtract(
81
+ url: string,
82
+ destDir: string,
83
+ label: string,
84
+ ): Promise<void> {
85
+ console.log(` ⬇ Downloading ${label}...`);
86
+ await exec("bash", ["-c", `curl -fsSL "${url}" | tar xz -C "${destDir}"`]);
87
+ }
88
+
89
+ /**
90
+ * Installs Docker CLI, Colima, and Lima by downloading pre-built binaries
91
+ * directly into ~/.vellum/bin/. No Homebrew or sudo required.
92
+ *
93
+ * Falls back to Homebrew if available (e.g. admin users who prefer it).
94
+ */
95
+ async function installDockerToolchain(): Promise<void> {
96
+ // Try Homebrew first if available — it handles updates and dependencies.
97
+ let hasBrew = false;
42
98
  try {
43
- await execOutput("docker", ["--version"]);
44
- installed = true;
99
+ await execOutput("brew", ["--version"]);
100
+ hasBrew = true;
45
101
  } catch {
46
- // docker CLI not found — install it
102
+ // brew not found
47
103
  }
48
104
 
49
- if (!installed) {
50
- // Check whether Homebrew is available before attempting to use it.
51
- let hasBrew = false;
105
+ if (hasBrew) {
106
+ console.log("🐳 Docker not found. Installing via Homebrew...");
52
107
  try {
53
- await execOutput("brew", ["--version"]);
54
- hasBrew = true;
108
+ await exec("brew", ["install", "colima", "docker"]);
109
+ return;
55
110
  } catch {
56
- // brew not found
111
+ console.log(
112
+ " ⚠ Homebrew install failed, falling back to direct binary download...",
113
+ );
57
114
  }
115
+ }
58
116
 
59
- if (!hasBrew) {
60
- console.log("🍺 Homebrew not found. Installing Homebrew...");
61
- try {
62
- await exec("bash", [
63
- "-c",
64
- 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"',
65
- ]);
66
- } catch (err) {
67
- const message = err instanceof Error ? err.message : String(err);
68
- throw new Error(
69
- `Failed to install Homebrew. Please install Docker manually from https://www.docker.com/products/docker-desktop/\n${message}`,
70
- );
71
- }
117
+ // Direct binary install — no sudo required.
118
+ console.log(
119
+ "🐳 Docker not found. Installing Docker, Colima, and Lima to ~/.local/bin/...",
120
+ );
72
121
 
73
- // Homebrew on Apple Silicon installs to /opt/homebrew; add it to PATH
74
- // so subsequent brew/colima/docker invocations work in this session.
75
- if (!process.env.PATH?.includes("/opt/homebrew")) {
76
- process.env.PATH = `/opt/homebrew/bin:/opt/homebrew/sbin:${process.env.PATH}`;
77
- }
78
- }
122
+ mkdirSync(LOCAL_BIN_DIR, { recursive: true });
79
123
 
80
- console.log("🐳 Docker not found. Installing via Homebrew...");
81
- try {
82
- await exec("brew", ["install", "colima", "docker"]);
83
- } catch (err) {
84
- const message = err instanceof Error ? err.message : String(err);
124
+ const cpuArch = releaseArch();
125
+ const isMac = platform() === "darwin";
126
+
127
+ if (!isMac) {
128
+ throw new Error(
129
+ "Automatic Docker installation is only supported on macOS. " +
130
+ "Please install Docker manually: https://docs.docker.com/engine/install/",
131
+ );
132
+ }
133
+
134
+ // --- Docker CLI ---
135
+ // Docker publishes static binaries at download.docker.com.
136
+ const dockerArch = cpuArch === "aarch64" ? "aarch64" : "x86_64";
137
+ const dockerTarUrl = `https://download.docker.com/mac/static/stable/${dockerArch}/docker-27.5.1.tgz`;
138
+ const dockerTmpDir = join(LOCAL_BIN_DIR, ".docker-tmp");
139
+ mkdirSync(dockerTmpDir, { recursive: true });
140
+ try {
141
+ await downloadAndExtract(dockerTarUrl, dockerTmpDir, "Docker CLI");
142
+ // The archive extracts to docker/docker — move it to our bin dir.
143
+ await exec("mv", [
144
+ join(dockerTmpDir, "docker", "docker"),
145
+ join(LOCAL_BIN_DIR, "docker"),
146
+ ]);
147
+ chmodSync(join(LOCAL_BIN_DIR, "docker"), 0o755);
148
+ } finally {
149
+ await exec("rm", ["-rf", dockerTmpDir]).catch(() => {});
150
+ }
151
+
152
+ // --- Colima ---
153
+ const colimaArch = cpuArch === "aarch64" ? "arm64" : "x86_64";
154
+ const colimaUrl = `https://github.com/abiosoft/colima/releases/latest/download/colima-Darwin-${colimaArch}`;
155
+ await downloadBinary(colimaUrl, join(LOCAL_BIN_DIR, "colima"), "Colima");
156
+
157
+ // --- Lima ---
158
+ // Lima publishes tar.gz archives with bin/limactl and other tools.
159
+ const limaArch = cpuArch === "aarch64" ? "arm64" : "x86_64";
160
+ const limaVersionUrl =
161
+ "https://api.github.com/repos/lima-vm/lima/releases/latest";
162
+ let limaVersion: string;
163
+ try {
164
+ const resp = await fetch(limaVersionUrl);
165
+ if (!resp.ok) {
166
+ throw new Error(
167
+ `GitHub API returned ${resp.status}` +
168
+ (resp.status === 403
169
+ ? " (rate-limited) — try again later."
170
+ : `. Check your network connection.`),
171
+ );
172
+ }
173
+ const data = (await resp.json()) as { tag_name?: string };
174
+ if (!data.tag_name) {
175
+ throw new Error("GitHub API response missing tag_name.");
176
+ }
177
+ limaVersion = data.tag_name; // e.g. "v1.0.3"
178
+ } catch (err) {
179
+ const message = err instanceof Error ? err.message : String(err);
180
+ throw new Error(`Failed to fetch latest Lima version: ${message}`);
181
+ }
182
+ const limaVersionNum = limaVersion.replace(/^v/, ""); // "1.0.3"
183
+ const limaTarUrl = `https://github.com/lima-vm/lima/releases/download/${limaVersion}/lima-${limaVersionNum}-Darwin-${limaArch}.tar.gz`;
184
+ // Lima archives contain bin/limactl, bin/lima, share/lima/..., so extract
185
+ // into the parent (~/.local/) so that limactl lands in ~/.local/bin/.
186
+ const localDir = dirname(LOCAL_BIN_DIR);
187
+ await downloadAndExtract(limaTarUrl, localDir, "Lima");
188
+
189
+ // Verify all binaries are in place.
190
+ for (const bin of ["docker", "colima", "limactl"]) {
191
+ if (!existsSync(join(LOCAL_BIN_DIR, bin))) {
85
192
  throw new Error(
86
- `Failed to install Docker via Homebrew. Please install Docker manually.\n${message}`,
193
+ `${bin} binary not found after installation. Please install Docker manually.`,
87
194
  );
88
195
  }
196
+ }
197
+
198
+ console.log(" ✅ Docker toolchain installed to ~/.local/bin/");
199
+ }
200
+
201
+ /**
202
+ * Ensures ~/.local/bin/ is on PATH for this process so that docker, colima,
203
+ * and limactl are discoverable.
204
+ */
205
+ function ensureLocalBinOnPath(): void {
206
+ const currentPath = process.env.PATH || "";
207
+ if (!currentPath.includes(LOCAL_BIN_DIR)) {
208
+ process.env.PATH = `${LOCAL_BIN_DIR}:${currentPath}`;
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Checks whether the `docker` CLI and daemon are available on the system.
214
+ * Installs Colima and Docker via direct binary download if missing (no sudo
215
+ * required), and starts Colima if the Docker daemon is not reachable.
216
+ */
217
+ async function ensureDockerInstalled(): Promise<void> {
218
+ // Always add ~/.local/bin to PATH so previously installed binaries are found.
219
+ ensureLocalBinOnPath();
220
+
221
+ // Check that docker, colima, and limactl are all available. If any is
222
+ // missing (e.g. partial install from a previous failure), re-run install.
223
+ const toolchainComplete = await (async () => {
224
+ try {
225
+ await execOutput("docker", ["--version"]);
226
+ await execOutput("colima", ["version"]);
227
+ await execOutput("limactl", ["--version"]);
228
+ return true;
229
+ } catch {
230
+ return false;
231
+ }
232
+ })();
233
+
234
+ if (!toolchainComplete) {
235
+ await installDockerToolchain();
236
+ // Re-check PATH after install.
237
+ ensureLocalBinOnPath();
89
238
 
90
239
  try {
91
240
  await execOutput("docker", ["--version"]);
@@ -97,7 +246,7 @@ async function ensureDockerInstalled(): Promise<void> {
97
246
  }
98
247
  }
99
248
 
100
- // Verify the Docker daemon is reachable; start Colima if it isn't
249
+ // Verify the Docker daemon is reachable; start Colima if it isn't.
101
250
  try {
102
251
  await exec("docker", ["info"]);
103
252
  } catch {
@@ -128,51 +277,20 @@ async function ensureDockerInstalled(): Promise<void> {
128
277
  }
129
278
  }
130
279
 
131
- /**
132
- * Creates a line-buffered output prefixer that prepends a tag to each
133
- * line from a container's stdout/stderr. Calls `onLine` for each complete
134
- * line so the caller can detect sentinel output (e.g. hatch completion).
135
- */
136
- function createLinePrefixer(
137
- stream: NodeJS.WritableStream,
138
- prefix: string,
139
- onLine?: (line: string) => void,
140
- ): { write(data: Buffer): void; flush(): void } {
141
- let remainder = "";
142
- return {
143
- write(data: Buffer) {
144
- const text = remainder + data.toString();
145
- const lines = text.split("\n");
146
- remainder = lines.pop() ?? "";
147
- for (const line of lines) {
148
- stream.write(` [${prefix}] ${line}\n`);
149
- onLine?.(line);
150
- }
151
- },
152
- flush() {
153
- if (remainder) {
154
- stream.write(` [${prefix}] ${remainder}\n`);
155
- onLine?.(remainder);
156
- remainder = "";
157
- }
158
- },
159
- };
160
- }
161
-
162
280
  /** Derive the Docker resource names from the instance name. */
163
- function dockerResourceNames(instanceName: string) {
281
+ export function dockerResourceNames(instanceName: string) {
164
282
  return {
165
283
  assistantContainer: `${instanceName}-assistant`,
166
284
  cesContainer: `${instanceName}-credential-executor`,
167
- dataVolume: `vellum-data-${instanceName}`,
285
+ dataVolume: `${instanceName}-data`,
168
286
  gatewayContainer: `${instanceName}-gateway`,
169
- network: `vellum-net-${instanceName}`,
170
- socketVolume: `vellum-ces-bootstrap-${instanceName}`,
287
+ network: `${instanceName}-net`,
288
+ socketVolume: `${instanceName}-socket`,
171
289
  };
172
290
  }
173
291
 
174
292
  /** Silently attempt to stop and remove a Docker container. */
175
- async function removeContainer(containerName: string): Promise<void> {
293
+ export async function removeContainer(containerName: string): Promise<void> {
176
294
  try {
177
295
  await exec("docker", ["stop", containerName]);
178
296
  } catch {
@@ -188,6 +306,20 @@ async function removeContainer(containerName: string): Promise<void> {
188
306
  export async function retireDocker(name: string): Promise<void> {
189
307
  console.log(`\u{1F5D1}\ufe0f Stopping Docker containers for '${name}'...\n`);
190
308
 
309
+ // Stop the file watcher process if one is tracked for this instance.
310
+ const entry = findAssistantByName(name);
311
+ const watcherPid =
312
+ typeof entry?.watcherPid === "number" ? entry.watcherPid : null;
313
+ if (watcherPid !== null) {
314
+ if (isVellumProcess(watcherPid)) {
315
+ await stopProcess(watcherPid, "file-watcher");
316
+ } else {
317
+ console.log(
318
+ `PID ${watcherPid} is not a vellum process — skipping stale file-watcher PID.`,
319
+ );
320
+ }
321
+ }
322
+
191
323
  const res = dockerResourceNames(name);
192
324
 
193
325
  await removeContainer(res.cesContainer);
@@ -197,7 +329,7 @@ export async function retireDocker(name: string): Promise<void> {
197
329
  // Also clean up a legacy single-container instance if it exists
198
330
  await removeContainer(name);
199
331
 
200
- // Remove shared network and volumes
332
+ // Remove network and volumes
201
333
  try {
202
334
  await exec("docker", ["network", "rm", res.network]);
203
335
  } catch {
@@ -303,13 +435,14 @@ function serviceImageConfigs(
303
435
  async function buildAllImages(
304
436
  repoRoot: string,
305
437
  imageTags: Record<ServiceName, string>,
438
+ log: (msg: string) => void,
306
439
  ): Promise<void> {
307
440
  const configs = serviceImageConfigs(repoRoot, imageTags);
308
- console.log("🔨 Building all images in parallel...");
441
+ log("🔨 Building all images in parallel...");
309
442
  await Promise.all(
310
443
  Object.entries(configs).map(async ([name, config]) => {
311
444
  await buildImage(config);
312
- console.log(`✅ ${name} built`);
445
+ log(`✅ ${name} built`);
313
446
  }),
314
447
  );
315
448
  }
@@ -319,13 +452,14 @@ async function buildAllImages(
319
452
  * service. Each container joins a shared Docker bridge network so they
320
453
  * can be restarted independently.
321
454
  */
322
- function serviceDockerRunArgs(opts: {
455
+ export function serviceDockerRunArgs(opts: {
456
+ extraAssistantEnv?: Record<string, string>;
323
457
  gatewayPort: number;
324
458
  imageTags: Record<ServiceName, string>;
325
459
  instanceName: string;
326
460
  res: ReturnType<typeof dockerResourceNames>;
327
461
  }): Record<ServiceName, () => string[]> {
328
- const { gatewayPort, imageTags, instanceName, res } = opts;
462
+ const { extraAssistantEnv, gatewayPort, imageTags, instanceName, res } = opts;
329
463
  return {
330
464
  assistant: () => {
331
465
  const args: string[] = [
@@ -349,6 +483,11 @@ function serviceDockerRunArgs(opts: {
349
483
  args.push("-e", `${envVar}=${process.env[envVar]}`);
350
484
  }
351
485
  }
486
+ if (extraAssistantEnv) {
487
+ for (const [key, value] of Object.entries(extraAssistantEnv)) {
488
+ args.push("-e", `${key}=${value}`);
489
+ }
490
+ }
352
491
  args.push(imageTags.assistant);
353
492
  return args;
354
493
  },
@@ -371,6 +510,8 @@ function serviceDockerRunArgs(opts: {
371
510
  `ASSISTANT_HOST=${res.assistantContainer}`,
372
511
  "-e",
373
512
  `RUNTIME_HTTP_PORT=${ASSISTANT_INTERNAL_PORT}`,
513
+ "-e",
514
+ "RUNTIME_PROXY_ENABLED=true",
374
515
  imageTags.gateway,
375
516
  ],
376
517
  "credential-executor": () => [
@@ -379,7 +520,6 @@ function serviceDockerRunArgs(opts: {
379
520
  "-d",
380
521
  "--name",
381
522
  res.cesContainer,
382
- `--network=${res.network}`,
383
523
  "-v",
384
524
  `${res.socketVolume}:/run/ces-bootstrap`,
385
525
  "-v",
@@ -396,28 +536,32 @@ function serviceDockerRunArgs(opts: {
396
536
  }
397
537
 
398
538
  /** The order in which services must be started. */
399
- const SERVICE_START_ORDER: ServiceName[] = [
539
+ export const SERVICE_START_ORDER: ServiceName[] = [
400
540
  "assistant",
401
541
  "gateway",
402
542
  "credential-executor",
403
543
  ];
404
544
 
405
545
  /** Start all three containers in dependency order. */
406
- async function startContainers(opts: {
407
- gatewayPort: number;
408
- imageTags: Record<ServiceName, string>;
409
- instanceName: string;
410
- res: ReturnType<typeof dockerResourceNames>;
411
- }): Promise<void> {
546
+ export async function startContainers(
547
+ opts: {
548
+ extraAssistantEnv?: Record<string, string>;
549
+ gatewayPort: number;
550
+ imageTags: Record<ServiceName, string>;
551
+ instanceName: string;
552
+ res: ReturnType<typeof dockerResourceNames>;
553
+ },
554
+ log: (msg: string) => void,
555
+ ): Promise<void> {
412
556
  const runArgs = serviceDockerRunArgs(opts);
413
557
  for (const service of SERVICE_START_ORDER) {
414
- console.log(`🚀 Starting ${service} container...`);
558
+ log(`🚀 Starting ${service} container...`);
415
559
  await exec("docker", runArgs[service]());
416
560
  }
417
561
  }
418
562
 
419
563
  /** Stop and remove all three containers (ignoring errors). */
420
- async function stopContainers(
564
+ export async function stopContainers(
421
565
  res: ReturnType<typeof dockerResourceNames>,
422
566
  ): Promise<void> {
423
567
  await removeContainer(res.cesContainer);
@@ -586,250 +730,257 @@ export async function hatchDocker(
586
730
  ): Promise<void> {
587
731
  resetLogFile("hatch.log");
588
732
 
589
- await ensureDockerInstalled();
590
-
591
- const instanceName = generateInstanceName(species, name);
592
- const gatewayPort = DEFAULT_GATEWAY_PORT;
593
-
594
- const imageTags: Record<ServiceName, string> = {
595
- assistant: "",
596
- "credential-executor": "",
597
- gateway: "",
733
+ let logFd = openLogFile("hatch.log");
734
+ const log = (msg: string): void => {
735
+ console.log(msg);
736
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
598
737
  };
599
738
 
600
- let repoRoot: string | undefined;
601
-
602
- if (watch) {
603
- repoRoot = findRepoRoot();
604
- const localTag = `local-${instanceName}`;
605
- imageTags.assistant = `vellum-assistant:${localTag}`;
606
- imageTags.gateway = `vellum-gateway:${localTag}`;
607
- imageTags["credential-executor"] = `vellum-credential-executor:${localTag}`;
608
-
609
- console.log(`🥚 Hatching Docker assistant: ${instanceName}`);
610
- console.log(` Species: ${species}`);
611
- console.log(` Mode: development (watch)`);
612
- console.log(` Repo: ${repoRoot}`);
613
- console.log(` Images (local build):`);
614
- console.log(` assistant: ${imageTags.assistant}`);
615
- console.log(` gateway: ${imageTags.gateway}`);
616
- console.log(
617
- ` credential-executor: ${imageTags["credential-executor"]}`,
618
- );
619
- console.log("");
739
+ try {
740
+ await ensureDockerInstalled();
620
741
 
621
- const logFd = openLogFile("hatch.log");
622
- try {
623
- await buildAllImages(repoRoot, imageTags);
624
- } catch (err) {
625
- const message = err instanceof Error ? err.message : String(err);
626
- writeToLogFile(
627
- logFd,
628
- `[docker-build] ${new Date().toISOString()} ERROR\n${message}\n`,
629
- );
630
- closeLogFile(logFd);
631
- throw err;
632
- }
633
- closeLogFile(logFd);
634
- console.log("✅ Docker images built\n");
635
- } else {
636
- const version = cliPkg.version;
637
- const versionTag = version ? `v${version}` : "latest";
638
- imageTags.assistant = `${DOCKERHUB_IMAGES.assistant}:${versionTag}`;
639
- imageTags.gateway = `${DOCKERHUB_IMAGES.gateway}:${versionTag}`;
640
- imageTags["credential-executor"] =
641
- `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`;
642
-
643
- console.log(`🥚 Hatching Docker assistant: ${instanceName}`);
644
- console.log(` Species: ${species}`);
645
- console.log(` Images:`);
646
- console.log(` assistant: ${imageTags.assistant}`);
647
- console.log(` gateway: ${imageTags.gateway}`);
648
- console.log(
649
- ` credential-executor: ${imageTags["credential-executor"]}`,
650
- );
651
- console.log("");
742
+ const instanceName = generateInstanceName(species, name);
743
+ const gatewayPort = DEFAULT_GATEWAY_PORT;
652
744
 
653
- const logFd = openLogFile("hatch.log");
654
- console.log("📦 Pulling Docker images...");
655
- try {
745
+ const imageTags: Record<ServiceName, string> = {
746
+ assistant: "",
747
+ "credential-executor": "",
748
+ gateway: "",
749
+ };
750
+
751
+ let repoRoot: string | undefined;
752
+
753
+ if (watch) {
754
+ repoRoot = findRepoRoot();
755
+ const localTag = `local-${instanceName}`;
756
+ imageTags.assistant = `vellum-assistant:${localTag}`;
757
+ imageTags.gateway = `vellum-gateway:${localTag}`;
758
+ imageTags["credential-executor"] =
759
+ `vellum-credential-executor:${localTag}`;
760
+
761
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
762
+ log(` Species: ${species}`);
763
+ log(` Mode: development (watch)`);
764
+ log(` Repo: ${repoRoot}`);
765
+ log(` Images (local build):`);
766
+ log(` assistant: ${imageTags.assistant}`);
767
+ log(` gateway: ${imageTags.gateway}`);
768
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
769
+ log("");
770
+
771
+ await buildAllImages(repoRoot, imageTags, log);
772
+ log("✅ Docker images built");
773
+ } else {
774
+ const version = cliPkg.version;
775
+ const versionTag = version ? `v${version}` : "latest";
776
+ imageTags.assistant = `${DOCKERHUB_IMAGES.assistant}:${versionTag}`;
777
+ imageTags.gateway = `${DOCKERHUB_IMAGES.gateway}:${versionTag}`;
778
+ imageTags["credential-executor"] =
779
+ `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`;
780
+
781
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
782
+ log(` Species: ${species}`);
783
+ log(` Images:`);
784
+ log(` assistant: ${imageTags.assistant}`);
785
+ log(` gateway: ${imageTags.gateway}`);
786
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
787
+ log("");
788
+
789
+ log("📦 Pulling Docker images...");
656
790
  await exec("docker", ["pull", imageTags.assistant]);
657
791
  await exec("docker", ["pull", imageTags.gateway]);
658
792
  await exec("docker", ["pull", imageTags["credential-executor"]]);
659
- } catch (err) {
660
- const message = err instanceof Error ? err.message : String(err);
661
- writeToLogFile(
662
- logFd,
663
- `[docker-pull] ${new Date().toISOString()} ERROR\n${message}\n`,
664
- );
665
- closeLogFile(logFd);
666
- throw err;
793
+ log("✅ Docker images pulled");
667
794
  }
668
- closeLogFile(logFd);
669
- console.log("✅ Docker images pulled\n");
670
- }
671
795
 
672
- const res = dockerResourceNames(instanceName);
796
+ const res = dockerResourceNames(instanceName);
673
797
 
674
- // Create shared network and volumes
675
- console.log("📁 Creating shared network and volumes...");
676
- await exec("docker", ["network", "create", res.network]);
677
- await exec("docker", ["volume", "create", res.dataVolume]);
678
- await exec("docker", ["volume", "create", res.socketVolume]);
798
+ log("📁 Creating network and volumes...");
799
+ await exec("docker", ["network", "create", res.network]);
800
+ await exec("docker", ["volume", "create", res.dataVolume]);
801
+ await exec("docker", ["volume", "create", res.socketVolume]);
679
802
 
680
- await startContainers({ gatewayPort, imageTags, instanceName, res });
803
+ await startContainers({ gatewayPort, imageTags, instanceName, res }, log);
681
804
 
682
- const runtimeUrl = `http://localhost:${gatewayPort}`;
683
- const dockerEntry: AssistantEntry = {
684
- assistantId: instanceName,
685
- runtimeUrl,
686
- cloud: "docker",
687
- species,
688
- hatchedAt: new Date().toISOString(),
689
- volume: res.dataVolume,
690
- };
691
- saveAssistantEntry(dockerEntry);
692
- setActiveAssistant(instanceName);
693
-
694
- // The assistant image runs the daemon directly (not via the CLI hatch
695
- // command), so we watch for the DaemonServer readiness message instead
696
- // of the CLI's "Local assistant hatched!" sentinel.
697
- await tailContainerUntilReady({
698
- containerName: res.assistantContainer,
699
- detached: watch ? false : detached,
700
- dockerEntry,
701
- instanceName,
702
- runtimeUrl,
703
- sentinel: "DaemonServer started",
704
- });
805
+ const runtimeUrl = `http://localhost:${gatewayPort}`;
806
+ const dockerEntry: AssistantEntry = {
807
+ assistantId: instanceName,
808
+ runtimeUrl,
809
+ cloud: "docker",
810
+ species,
811
+ hatchedAt: new Date().toISOString(),
812
+ volume: res.dataVolume,
813
+ };
814
+ saveAssistantEntry(dockerEntry);
815
+ setActiveAssistant(instanceName);
705
816
 
706
- if (watch && repoRoot) {
707
- const stopWatcher = startFileWatcher({
708
- gatewayPort,
709
- imageTags,
817
+ const { ready } = await waitForGatewayAndLease({
818
+ containerName: res.assistantContainer,
819
+ detached: watch ? false : detached,
710
820
  instanceName,
711
- repoRoot,
712
- res,
821
+ logFd,
822
+ runtimeUrl,
713
823
  });
714
824
 
715
- await new Promise<void>((resolve) => {
716
- const cleanup = async () => {
717
- console.log("\n🛑 Shutting down...");
718
- stopWatcher();
719
- await stopContainers(res);
720
- console.log("✅ Docker instance stopped.");
721
- resolve();
722
- };
723
-
724
- process.on("SIGINT", () => void cleanup());
725
- process.on("SIGTERM", () => void cleanup());
726
- });
825
+ if (!ready && !(watch && repoRoot)) {
826
+ throw new Error("Timed out waiting for assistant to become ready");
827
+ }
828
+
829
+ if (watch && repoRoot) {
830
+ saveAssistantEntry({ ...dockerEntry, watcherPid: process.pid });
831
+
832
+ const stopWatcher = startFileWatcher({
833
+ gatewayPort,
834
+ imageTags,
835
+ instanceName,
836
+ repoRoot,
837
+ res,
838
+ });
839
+
840
+ await new Promise<void>((resolve) => {
841
+ const cleanup = async () => {
842
+ log("\n🛑 Shutting down...");
843
+ stopWatcher();
844
+ await stopContainers(res);
845
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
846
+ log("✅ Docker instance stopped.");
847
+ resolve();
848
+ };
849
+
850
+ // SIGINT (Ctrl+C): full cleanup including stopping containers.
851
+ process.on("SIGINT", () => void cleanup());
852
+
853
+ // SIGTERM (from `vellum retire`): exit quickly — the caller
854
+ // handles container teardown, so we only need to close the
855
+ // file watchers and let the process terminate.
856
+ process.on("SIGTERM", () => {
857
+ stopWatcher();
858
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
859
+ resolve();
860
+ });
861
+ });
862
+ }
863
+ } finally {
864
+ closeLogFile(logFd);
865
+ logFd = "ignore";
727
866
  }
728
867
  }
729
868
 
730
869
  /**
731
870
  * In detached mode, print instance details and return immediately.
732
- * Otherwise, tail the given container's logs until the sentinel string
733
- * appears, then attempt to lease a guardian token and report readiness.
871
+ * Otherwise, poll the gateway health check until it responds, then
872
+ * lease a guardian token.
734
873
  */
735
- async function tailContainerUntilReady(opts: {
874
+ async function waitForGatewayAndLease(opts: {
736
875
  containerName: string;
737
876
  detached: boolean;
738
- dockerEntry: AssistantEntry;
739
877
  instanceName: string;
878
+ logFd: number | "ignore";
740
879
  runtimeUrl: string;
741
- sentinel: string;
742
- }): Promise<void> {
743
- const {
744
- containerName,
745
- detached,
746
- dockerEntry,
747
- instanceName,
748
- runtimeUrl,
749
- sentinel,
750
- } = opts;
880
+ }): Promise<{ ready: boolean }> {
881
+ const { containerName, detached, instanceName, logFd, runtimeUrl } = opts;
882
+
883
+ const log = (msg: string): void => {
884
+ console.log(msg);
885
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
886
+ };
751
887
 
752
888
  if (detached) {
753
- console.log("\n✅ Docker assistant hatched!\n");
754
- console.log("Instance details:");
755
- console.log(` Name: ${instanceName}`);
756
- console.log(` Runtime: ${runtimeUrl}`);
757
- console.log(` Container: ${containerName}`);
758
- console.log("");
759
- console.log(`Stop with: vellum retire ${instanceName}`);
760
- return;
889
+ log("\n✅ Docker assistant hatched!\n");
890
+ log("Instance details:");
891
+ log(` Name: ${instanceName}`);
892
+ log(` Runtime: ${runtimeUrl}`);
893
+ log(` Container: ${containerName}`);
894
+ log("");
895
+ log(`Stop with: vellum retire ${instanceName}`);
896
+ return { ready: true };
761
897
  }
762
898
 
763
- console.log(` Container: ${containerName}`);
764
- console.log(` Runtime: ${runtimeUrl}`);
765
- console.log("");
899
+ log(` Container: ${containerName}`);
900
+ log(` Runtime: ${runtimeUrl}`);
901
+ log("");
902
+ log("Waiting for assistant to become ready...");
766
903
 
767
- await new Promise<void>((resolve, reject) => {
768
- const child = nodeSpawn("docker", ["logs", "-f", containerName], {
769
- stdio: ["ignore", "pipe", "pipe"],
770
- });
904
+ const readyUrl = `${runtimeUrl}/readyz`;
905
+ const start = Date.now();
906
+ let ready = false;
771
907
 
772
- const handleLine = (line: string): void => {
773
- if (line.includes(sentinel)) {
774
- process.nextTick(async () => {
775
- try {
776
- const tokenData = await leaseGuardianToken(
777
- runtimeUrl,
778
- instanceName,
779
- );
780
- dockerEntry.bearerToken = tokenData.accessToken;
781
- saveAssistantEntry(dockerEntry);
782
- } catch (err) {
783
- console.warn(
784
- `\u26a0\ufe0f Could not lease guardian token: ${err instanceof Error ? err.message : err}`,
785
- );
786
- }
787
-
788
- console.log("");
789
- console.log(`\u2705 Docker containers are up and running!`);
790
- console.log(` Name: ${instanceName}`);
791
- console.log(` Runtime: ${runtimeUrl}`);
792
- console.log("");
793
- child.kill();
794
- resolve();
795
- });
908
+ while (Date.now() - start < DOCKER_READY_TIMEOUT_MS) {
909
+ try {
910
+ const resp = await fetch(readyUrl, {
911
+ signal: AbortSignal.timeout(5000),
912
+ });
913
+ if (resp.ok) {
914
+ ready = true;
915
+ break;
796
916
  }
797
- };
917
+ const body = await resp.text();
918
+ let detail = "";
919
+ try {
920
+ const json = JSON.parse(body);
921
+ const parts = [json.status];
922
+ if (json.upstream != null) parts.push(`upstream=${json.upstream}`);
923
+ detail = ` — ${parts.join(", ")}`;
924
+ } catch {}
925
+ log(`Readiness check: ${resp.status}${detail} (retrying...)`);
926
+ } catch {
927
+ // Connection refused / timeout — not up yet
928
+ }
929
+ await new Promise((r) => setTimeout(r, 1000));
930
+ }
798
931
 
799
- const stdoutPrefixer = createLinePrefixer(
800
- process.stdout,
801
- "docker",
802
- handleLine,
803
- );
804
- const stderrPrefixer = createLinePrefixer(
805
- process.stderr,
806
- "docker",
807
- handleLine,
808
- );
932
+ if (!ready) {
933
+ log("");
934
+ log(` \u26a0\ufe0f Timed out waiting for assistant to become ready.`);
935
+ log(` The container is still running.`);
936
+ log(` Check logs with: docker logs -f ${containerName}`);
937
+ log("");
938
+ return { ready: false };
939
+ }
809
940
 
810
- child.stdout?.on("data", (data: Buffer) => stdoutPrefixer.write(data));
811
- child.stderr?.on("data", (data: Buffer) => stderrPrefixer.write(data));
812
- child.stdout?.on("end", () => stdoutPrefixer.flush());
813
- child.stderr?.on("end", () => stderrPrefixer.flush());
941
+ const elapsedSec = ((Date.now() - start) / 1000).toFixed(1);
942
+ log(`Assistant ready after ${elapsedSec}s`);
814
943
 
815
- child.on("close", (code) => {
816
- if (
817
- code === 0 ||
818
- code === null ||
819
- code === 130 ||
820
- code === 137 ||
821
- code === 143
822
- ) {
823
- resolve();
824
- } else {
825
- reject(new Error(`Docker container exited with code ${code}`));
826
- }
827
- });
828
- child.on("error", reject);
944
+ // Lease guardian token. The /readyz check confirms both gateway and
945
+ // assistant are reachable. Retry with backoff in case there is a brief
946
+ // window where readiness passes but the guardian endpoint is not yet ready.
947
+ log(`Guardian token lease: starting for ${instanceName} at ${runtimeUrl}`);
948
+ const leaseStart = Date.now();
949
+ const leaseDeadline = start + DOCKER_READY_TIMEOUT_MS;
950
+ let leaseSuccess = false;
951
+ let lastLeaseError: string | undefined;
829
952
 
830
- process.on("SIGINT", () => {
831
- child.kill();
832
- resolve();
833
- });
834
- });
953
+ while (Date.now() < leaseDeadline) {
954
+ try {
955
+ const tokenData = await leaseGuardianToken(runtimeUrl, instanceName);
956
+ const leaseElapsed = ((Date.now() - leaseStart) / 1000).toFixed(1);
957
+ log(
958
+ `Guardian token lease: success after ${leaseElapsed}s (principalId=${tokenData.guardianPrincipalId}, expiresAt=${tokenData.accessTokenExpiresAt})`,
959
+ );
960
+ leaseSuccess = true;
961
+ break;
962
+ } catch (err) {
963
+ lastLeaseError =
964
+ err instanceof Error ? (err.stack ?? err.message) : String(err);
965
+ // Log periodically so the user knows we're still trying
966
+ const elapsed = ((Date.now() - leaseStart) / 1000).toFixed(0);
967
+ log(
968
+ `Guardian token lease: attempt failed after ${elapsed}s (${lastLeaseError.split("\n")[0]}), retrying...`,
969
+ );
970
+ }
971
+ await new Promise((r) => setTimeout(r, 2000));
972
+ }
973
+
974
+ if (!leaseSuccess) {
975
+ log(
976
+ `\u26a0\ufe0f Guardian token lease: FAILED after ${((Date.now() - leaseStart) / 1000).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
977
+ );
978
+ }
979
+
980
+ log("");
981
+ log(`\u2705 Docker containers are up and running!`);
982
+ log(` Name: ${instanceName}`);
983
+ log(` Runtime: ${runtimeUrl}`);
984
+ log("");
985
+ return { ready: true };
835
986
  }