@vellumai/cli 0.4.55 → 0.4.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/docker.ts CHANGED
@@ -1,13 +1,20 @@
1
- import { spawn as nodeSpawn } from "child_process";
2
- import { existsSync } from "fs";
3
- import { createRequire } from "module";
1
+ import { existsSync, watch as fsWatch } from "fs";
4
2
  import { dirname, join } from "path";
5
3
 
6
- import { saveAssistantEntry, setActiveAssistant } from "./assistant-config";
4
+ // Direct import bun embeds this at compile time so it works in compiled binaries.
5
+ import cliPkg from "../../package.json";
6
+
7
+ import {
8
+ findAssistantByName,
9
+ saveAssistantEntry,
10
+ setActiveAssistant,
11
+ } from "./assistant-config";
7
12
  import type { AssistantEntry } from "./assistant-config";
8
13
  import { DEFAULT_GATEWAY_PORT } from "./constants";
9
14
  import type { Species } from "./constants";
10
- import { generateRandomSuffix } from "./random-name";
15
+ import { leaseGuardianToken } from "./guardian-token";
16
+ import { isVellumProcess, stopProcess } from "./process";
17
+ import { generateInstanceName } from "./random-name";
11
18
  import { exec, execOutput } from "./step-runner";
12
19
  import {
13
20
  closeLogFile,
@@ -16,7 +23,21 @@ import {
16
23
  writeToLogFile,
17
24
  } from "./xdg-log";
18
25
 
19
- const _require = createRequire(import.meta.url);
26
+ export type ServiceName = "assistant" | "credential-executor" | "gateway";
27
+
28
+ const DOCKERHUB_ORG = "vellumai";
29
+ export const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
30
+ assistant: `${DOCKERHUB_ORG}/vellum-assistant`,
31
+ "credential-executor": `${DOCKERHUB_ORG}/vellum-credential-executor`,
32
+ gateway: `${DOCKERHUB_ORG}/vellum-gateway`,
33
+ };
34
+
35
+ /** Internal ports exposed by each service's Dockerfile. */
36
+ export const ASSISTANT_INTERNAL_PORT = 3001;
37
+ export const GATEWAY_INTERNAL_PORT = 7830;
38
+
39
+ /** Max time to wait for the assistant container to emit the readiness sentinel. */
40
+ export const DOCKER_READY_TIMEOUT_MS = 3 * 60 * 1000;
20
41
 
21
42
  /**
22
43
  * Checks whether the `docker` CLI and daemon are available on the system.
@@ -114,367 +135,711 @@ async function ensureDockerInstalled(): Promise<void> {
114
135
  }
115
136
  }
116
137
 
117
- interface DockerRoot {
118
- /** Directory to use as the Docker build context */
119
- root: string;
120
- /** Relative path from root to the directory containing the Dockerfiles */
121
- dockerfileDir: string;
138
+ /** Derive the Docker resource names from the instance name. */
139
+ export function dockerResourceNames(instanceName: string) {
140
+ return {
141
+ assistantContainer: `${instanceName}-assistant`,
142
+ cesContainer: `${instanceName}-credential-executor`,
143
+ dataVolume: `vellum-data-${instanceName}`,
144
+ gatewayContainer: `${instanceName}-gateway`,
145
+ network: `vellum-net-${instanceName}`,
146
+ socketVolume: `vellum-ces-bootstrap-${instanceName}`,
147
+ };
122
148
  }
123
149
 
124
- /**
125
- * Locate the directory containing the Dockerfile. In the source tree the
126
- * Dockerfiles live under `meta/`, but when installed as an npm package they
127
- * are at the package root.
128
- */
129
- function findDockerRoot(developmentMode: boolean = false): DockerRoot {
130
- // Source tree: cli/src/lib/ -> repo root (Dockerfiles in meta/)
131
- const sourceTreeRoot = join(import.meta.dir, "..", "..", "..");
132
- if (existsSync(join(sourceTreeRoot, "meta", "Dockerfile"))) {
133
- return { root: sourceTreeRoot, dockerfileDir: "meta" };
150
+ /** Silently attempt to stop and remove a Docker container. */
151
+ export async function removeContainer(containerName: string): Promise<void> {
152
+ try {
153
+ await exec("docker", ["stop", containerName]);
154
+ } catch {
155
+ // container may not exist or already stopped
134
156
  }
157
+ try {
158
+ await exec("docker", ["rm", containerName]);
159
+ } catch {
160
+ // container may not exist or already removed
161
+ }
162
+ }
163
+
164
+ export async function retireDocker(name: string): Promise<void> {
165
+ console.log(`\u{1F5D1}\ufe0f Stopping Docker containers for '${name}'...\n`);
166
+
167
+ // Stop the file watcher process if one is tracked for this instance.
168
+ const entry = findAssistantByName(name);
169
+ const watcherPid =
170
+ typeof entry?.watcherPid === "number" ? entry.watcherPid : null;
171
+ if (watcherPid !== null) {
172
+ if (isVellumProcess(watcherPid)) {
173
+ await stopProcess(watcherPid, "file-watcher");
174
+ } else {
175
+ console.log(
176
+ `PID ${watcherPid} is not a vellum process — skipping stale file-watcher PID.`,
177
+ );
178
+ }
179
+ }
180
+
181
+ const res = dockerResourceNames(name);
182
+
183
+ await removeContainer(res.cesContainer);
184
+ await removeContainer(res.gatewayContainer);
185
+ await removeContainer(res.assistantContainer);
135
186
 
136
- // bunx layout: @vellumai/cli/src/lib/ -> ../../../.. -> node_modules -> vellum/
137
- const bunxRoot = join(import.meta.dir, "..", "..", "..", "..", "vellum");
138
- if (existsSync(join(bunxRoot, "Dockerfile"))) {
139
- return { root: bunxRoot, dockerfileDir: "." };
187
+ // Also clean up a legacy single-container instance if it exists
188
+ await removeContainer(name);
189
+
190
+ // Remove shared network and volumes
191
+ try {
192
+ await exec("docker", ["network", "rm", res.network]);
193
+ } catch {
194
+ // network may not exist
195
+ }
196
+ for (const vol of [res.dataVolume, res.socketVolume]) {
197
+ try {
198
+ await exec("docker", ["volume", "rm", vol]);
199
+ } catch {
200
+ // volume may not exist
201
+ }
140
202
  }
141
203
 
142
- // Walk up from cwd looking for meta/Dockerfile (source checkout)
143
- let dir = process.cwd();
204
+ console.log(`\u2705 Docker instance retired.`);
205
+ }
206
+
207
+ /**
208
+ * Walk up from `startDir` looking for a directory that contains
209
+ * `assistant/Dockerfile`. Returns the path if found, otherwise `undefined`.
210
+ */
211
+ function walkUpForRepoRoot(startDir: string): string | undefined {
212
+ let dir = startDir;
144
213
  while (true) {
145
- if (existsSync(join(dir, "meta", "Dockerfile"))) {
146
- return { root: dir, dockerfileDir: "meta" };
214
+ if (existsSync(join(dir, "assistant", "Dockerfile"))) {
215
+ return dir;
147
216
  }
148
217
  const parent = dirname(dir);
149
218
  if (parent === dir) break;
150
219
  dir = parent;
151
220
  }
221
+ return undefined;
222
+ }
152
223
 
153
- // In development mode, walk up from the executable path to find the repo
154
- // root. This handles the macOS app bundle case where the binary lives inside
155
- // the repo at e.g. clients/macos/dist/Vellum.app/Contents/MacOS/.
156
- if (developmentMode) {
157
- let execDir = dirname(process.execPath);
158
- while (true) {
159
- if (existsSync(join(execDir, "meta", "Dockerfile.development"))) {
160
- return { root: execDir, dockerfileDir: "meta" };
161
- }
162
- const parent = dirname(execDir);
163
- if (parent === execDir) break;
164
- execDir = parent;
165
- }
224
+ /**
225
+ * Locate the repository root by walking up from `cli/src/lib/` until we
226
+ * find a directory containing the expected Dockerfiles.
227
+ */
228
+ function findRepoRoot(): string {
229
+ // cli/src/lib/ -> repo root (works when running from source via bun)
230
+ const sourceTreeRoot = join(import.meta.dir, "..", "..", "..");
231
+ if (existsSync(join(sourceTreeRoot, "assistant", "Dockerfile"))) {
232
+ return sourceTreeRoot;
166
233
  }
167
234
 
168
- // macOS app bundle: Contents/MacOS/vellum-cli -> Contents/Resources/Dockerfile
169
- const appResourcesDir = join(dirname(process.execPath), "..", "Resources");
170
- if (existsSync(join(appResourcesDir, "Dockerfile"))) {
171
- return { root: appResourcesDir, dockerfileDir: "." };
235
+ // Walk up from the compiled binary's location. When the CLI is bundled
236
+ // inside the macOS app (e.g. .../dist/Vellum.app/Contents/MacOS/vellum-cli),
237
+ // the binary still lives inside the repo tree, so walking up will
238
+ // eventually reach the repo root.
239
+ const execRoot = walkUpForRepoRoot(dirname(process.execPath));
240
+ if (execRoot) {
241
+ return execRoot;
172
242
  }
173
243
 
174
- // Fall back to Node module resolution for the `vellum` package
175
- try {
176
- const vellumPkgPath = _require.resolve("vellum/package.json");
177
- const vellumDir = dirname(vellumPkgPath);
178
- if (existsSync(join(vellumDir, "Dockerfile"))) {
179
- return { root: vellumDir, dockerfileDir: "." };
180
- }
181
- } catch {
182
- // resolution failed
244
+ // Walk up from cwd as a final fallback
245
+ const cwdRoot = walkUpForRepoRoot(process.cwd());
246
+ if (cwdRoot) {
247
+ return cwdRoot;
183
248
  }
184
249
 
185
250
  throw new Error(
186
- "Could not find Dockerfile. Run this command from within the " +
187
- "vellum-assistant repository, or ensure the vellum package is installed.",
251
+ "Could not find repository root containing assistant/Dockerfile. " +
252
+ "Run this command from within the vellum-assistant repository.",
253
+ );
254
+ }
255
+
256
+ interface ServiceImageConfig {
257
+ context: string;
258
+ dockerfile: string;
259
+ tag: string;
260
+ }
261
+
262
+ async function buildImage(config: ServiceImageConfig): Promise<void> {
263
+ await exec(
264
+ "docker",
265
+ ["build", "-f", config.dockerfile, "-t", config.tag, "."],
266
+ { cwd: config.context },
267
+ );
268
+ }
269
+
270
+ function serviceImageConfigs(
271
+ repoRoot: string,
272
+ imageTags: Record<ServiceName, string>,
273
+ ): Record<ServiceName, ServiceImageConfig> {
274
+ return {
275
+ assistant: {
276
+ context: repoRoot,
277
+ dockerfile: "assistant/Dockerfile",
278
+ tag: imageTags.assistant,
279
+ },
280
+ "credential-executor": {
281
+ context: repoRoot,
282
+ dockerfile: "credential-executor/Dockerfile",
283
+ tag: imageTags["credential-executor"],
284
+ },
285
+ gateway: {
286
+ context: join(repoRoot, "gateway"),
287
+ dockerfile: "Dockerfile",
288
+ tag: imageTags.gateway,
289
+ },
290
+ };
291
+ }
292
+
293
+ async function buildAllImages(
294
+ repoRoot: string,
295
+ imageTags: Record<ServiceName, string>,
296
+ log: (msg: string) => void,
297
+ ): Promise<void> {
298
+ const configs = serviceImageConfigs(repoRoot, imageTags);
299
+ log("🔨 Building all images in parallel...");
300
+ await Promise.all(
301
+ Object.entries(configs).map(async ([name, config]) => {
302
+ await buildImage(config);
303
+ log(`✅ ${name} built`);
304
+ }),
188
305
  );
189
306
  }
190
307
 
191
308
  /**
192
- * Creates a line-buffered output prefixer that prepends `[docker]` to each
193
- * line from the container's stdout/stderr. Calls `onLine` for each complete
194
- * line so the caller can detect sentinel output (e.g. hatch completion).
309
+ * Returns a function that builds the `docker run` arguments for a given
310
+ * service. Each container joins a shared Docker bridge network so they
311
+ * can be restarted independently.
195
312
  */
196
- function createLinePrefixer(
197
- stream: NodeJS.WritableStream,
198
- onLine?: (line: string) => void,
199
- ): { write(data: Buffer): void; flush(): void } {
200
- let remainder = "";
313
+ export function serviceDockerRunArgs(opts: {
314
+ extraAssistantEnv?: Record<string, string>;
315
+ gatewayPort: number;
316
+ imageTags: Record<ServiceName, string>;
317
+ instanceName: string;
318
+ res: ReturnType<typeof dockerResourceNames>;
319
+ }): Record<ServiceName, () => string[]> {
320
+ const { extraAssistantEnv, gatewayPort, imageTags, instanceName, res } = opts;
201
321
  return {
202
- write(data: Buffer) {
203
- const text = remainder + data.toString();
204
- const lines = text.split("\n");
205
- remainder = lines.pop() ?? "";
206
- for (const line of lines) {
207
- stream.write(` [docker] ${line}\n`);
208
- onLine?.(line);
322
+ assistant: () => {
323
+ const args: string[] = [
324
+ "run",
325
+ "--init",
326
+ "-d",
327
+ "--name",
328
+ res.assistantContainer,
329
+ `--network=${res.network}`,
330
+ "-v",
331
+ `${res.dataVolume}:/data`,
332
+ "-v",
333
+ `${res.socketVolume}:/run/ces-bootstrap`,
334
+ "-e",
335
+ `VELLUM_ASSISTANT_NAME=${instanceName}`,
336
+ "-e",
337
+ "RUNTIME_HTTP_HOST=0.0.0.0",
338
+ ];
339
+ for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
340
+ if (process.env[envVar]) {
341
+ args.push("-e", `${envVar}=${process.env[envVar]}`);
342
+ }
209
343
  }
210
- },
211
- flush() {
212
- if (remainder) {
213
- stream.write(` [docker] ${remainder}\n`);
214
- onLine?.(remainder);
215
- remainder = "";
344
+ if (extraAssistantEnv) {
345
+ for (const [key, value] of Object.entries(extraAssistantEnv)) {
346
+ args.push("-e", `${key}=${value}`);
347
+ }
216
348
  }
349
+ args.push(imageTags.assistant);
350
+ return args;
217
351
  },
352
+ gateway: () => [
353
+ "run",
354
+ "--init",
355
+ "-d",
356
+ "--name",
357
+ res.gatewayContainer,
358
+ `--network=${res.network}`,
359
+ "-p",
360
+ `${gatewayPort}:${GATEWAY_INTERNAL_PORT}`,
361
+ "-v",
362
+ `${res.dataVolume}:/data`,
363
+ "-e",
364
+ "BASE_DATA_DIR=/data",
365
+ "-e",
366
+ `GATEWAY_PORT=${GATEWAY_INTERNAL_PORT}`,
367
+ "-e",
368
+ `ASSISTANT_HOST=${res.assistantContainer}`,
369
+ "-e",
370
+ `RUNTIME_HTTP_PORT=${ASSISTANT_INTERNAL_PORT}`,
371
+ "-e",
372
+ "RUNTIME_PROXY_ENABLED=true",
373
+ imageTags.gateway,
374
+ ],
375
+ "credential-executor": () => [
376
+ "run",
377
+ "--init",
378
+ "-d",
379
+ "--name",
380
+ res.cesContainer,
381
+ `--network=${res.network}`,
382
+ "-v",
383
+ `${res.socketVolume}:/run/ces-bootstrap`,
384
+ "-v",
385
+ `${res.dataVolume}:/data:ro`,
386
+ "-e",
387
+ "CES_MODE=managed",
388
+ "-e",
389
+ "CES_BOOTSTRAP_SOCKET_DIR=/run/ces-bootstrap",
390
+ "-e",
391
+ "CES_ASSISTANT_DATA_MOUNT=/data",
392
+ imageTags["credential-executor"],
393
+ ],
218
394
  };
219
395
  }
220
396
 
221
- async function fetchRemoteBearerToken(
222
- containerName: string,
223
- ): Promise<string | null> {
224
- try {
225
- const remoteCmd =
226
- 'cat ~/.vellum.lock.json 2>/dev/null || cat ~/.vellum.lockfile.json 2>/dev/null || echo "{}"';
227
- const output = await execOutput("docker", [
228
- "exec",
229
- containerName,
230
- "sh",
231
- "-c",
232
- remoteCmd,
233
- ]);
234
- const data = JSON.parse(output.trim());
235
- const assistants = data.assistants;
236
- if (Array.isArray(assistants) && assistants.length > 0) {
237
- const token = assistants[0].bearerToken;
238
- if (typeof token === "string" && token) {
239
- return token;
240
- }
241
- }
242
- return null;
243
- } catch {
244
- return null;
397
+ /** The order in which services must be started. */
398
+ export const SERVICE_START_ORDER: ServiceName[] = [
399
+ "assistant",
400
+ "gateway",
401
+ "credential-executor",
402
+ ];
403
+
404
+ /** Start all three containers in dependency order. */
405
+ export async function startContainers(
406
+ opts: {
407
+ extraAssistantEnv?: Record<string, string>;
408
+ gatewayPort: number;
409
+ imageTags: Record<ServiceName, string>;
410
+ instanceName: string;
411
+ res: ReturnType<typeof dockerResourceNames>;
412
+ },
413
+ log: (msg: string) => void,
414
+ ): Promise<void> {
415
+ const runArgs = serviceDockerRunArgs(opts);
416
+ for (const service of SERVICE_START_ORDER) {
417
+ log(`🚀 Starting ${service} container...`);
418
+ await exec("docker", runArgs[service]());
245
419
  }
246
420
  }
247
421
 
248
- export async function retireDocker(name: string): Promise<void> {
249
- console.log(`\u{1F5D1}\ufe0f Stopping Docker container '${name}'...\n`);
422
+ /** Stop and remove all three containers (ignoring errors). */
423
+ export async function stopContainers(
424
+ res: ReturnType<typeof dockerResourceNames>,
425
+ ): Promise<void> {
426
+ await removeContainer(res.cesContainer);
427
+ await removeContainer(res.gatewayContainer);
428
+ await removeContainer(res.assistantContainer);
429
+ }
250
430
 
251
- try {
252
- await exec("docker", ["stop", name]);
253
- } catch (error) {
254
- console.warn(
255
- `\u26a0\ufe0f Failed to stop container: ${error instanceof Error ? error.message : error}`,
256
- );
431
+ /**
432
+ * Determine which services are affected by a changed file path relative
433
+ * to the repository root.
434
+ */
435
+ function affectedServices(
436
+ filePath: string,
437
+ repoRoot: string,
438
+ ): Set<ServiceName> {
439
+ const rel = filePath.startsWith(repoRoot)
440
+ ? filePath.slice(repoRoot.length + 1)
441
+ : filePath;
442
+
443
+ const affected = new Set<ServiceName>();
444
+
445
+ if (rel.startsWith("assistant/")) {
446
+ affected.add("assistant");
447
+ }
448
+ if (rel.startsWith("credential-executor/")) {
449
+ affected.add("credential-executor");
450
+ }
451
+ if (rel.startsWith("gateway/")) {
452
+ affected.add("gateway");
453
+ }
454
+ // Shared packages affect both assistant and credential-executor
455
+ if (rel.startsWith("packages/")) {
456
+ affected.add("assistant");
457
+ affected.add("credential-executor");
257
458
  }
258
459
 
259
- try {
260
- await exec("docker", ["rm", name]);
261
- } catch (error) {
262
- console.warn(
263
- `\u26a0\ufe0f Failed to remove container: ${error instanceof Error ? error.message : error}`,
264
- );
460
+ return affected;
461
+ }
462
+
463
+ /**
464
+ * Watch for file changes in the assistant, gateway, credential-executor,
465
+ * and packages directories. When changes are detected, rebuild the affected
466
+ * images and restart their containers.
467
+ */
468
+ function startFileWatcher(opts: {
469
+ gatewayPort: number;
470
+ imageTags: Record<ServiceName, string>;
471
+ instanceName: string;
472
+ repoRoot: string;
473
+ res: ReturnType<typeof dockerResourceNames>;
474
+ }): () => void {
475
+ const { gatewayPort, imageTags, instanceName, repoRoot, res } = opts;
476
+
477
+ const watchDirs = [
478
+ join(repoRoot, "assistant"),
479
+ join(repoRoot, "credential-executor"),
480
+ join(repoRoot, "gateway"),
481
+ join(repoRoot, "packages"),
482
+ ];
483
+
484
+ let debounceTimer: ReturnType<typeof setTimeout> | null = null;
485
+ let pendingServices = new Set<ServiceName>();
486
+ let rebuilding = false;
487
+
488
+ const configs = serviceImageConfigs(repoRoot, imageTags);
489
+ const runArgs = serviceDockerRunArgs({
490
+ gatewayPort,
491
+ imageTags,
492
+ instanceName,
493
+ res,
494
+ });
495
+ const containerForService: Record<ServiceName, string> = {
496
+ assistant: res.assistantContainer,
497
+ "credential-executor": res.cesContainer,
498
+ gateway: res.gatewayContainer,
499
+ };
500
+
501
+ async function rebuildAndRestart(): Promise<void> {
502
+ if (rebuilding) return;
503
+ rebuilding = true;
504
+
505
+ const services = pendingServices;
506
+ pendingServices = new Set();
507
+
508
+ const serviceNames = [...services].join(", ");
509
+ console.log(`\n🔄 Changes detected — rebuilding: ${serviceNames}`);
510
+
511
+ try {
512
+ await Promise.all(
513
+ [...services].map(async (service) => {
514
+ console.log(`🔨 Building ${service}...`);
515
+ await buildImage(configs[service]);
516
+ console.log(`✅ ${service} built`);
517
+ }),
518
+ );
519
+
520
+ for (const service of services) {
521
+ const container = containerForService[service];
522
+ console.log(`🔄 Restarting ${container}...`);
523
+ await removeContainer(container);
524
+ await exec("docker", runArgs[service]());
525
+ }
526
+
527
+ console.log("✅ Rebuild complete — watching for changes...\n");
528
+ } catch (err) {
529
+ console.error(
530
+ `❌ Rebuild failed: ${err instanceof Error ? err.message : err}`,
531
+ );
532
+ console.log(" Watching for changes...\n");
533
+ } finally {
534
+ rebuilding = false;
535
+ if (pendingServices.size > 0) {
536
+ rebuildAndRestart();
537
+ }
538
+ }
265
539
  }
266
540
 
267
- console.log(`\u2705 Docker instance retired.`);
541
+ const watchers: ReturnType<typeof fsWatch>[] = [];
542
+
543
+ for (const dir of watchDirs) {
544
+ if (!existsSync(dir)) continue;
545
+ const watcher = fsWatch(dir, { recursive: true }, (_event, filename) => {
546
+ if (!filename) return;
547
+ if (
548
+ filename.includes("node_modules") ||
549
+ filename.includes(".env") ||
550
+ filename.startsWith(".")
551
+ ) {
552
+ return;
553
+ }
554
+
555
+ const fullPath = join(dir, filename);
556
+ const services = affectedServices(fullPath, repoRoot);
557
+ if (services.size === 0) return;
558
+
559
+ for (const s of services) {
560
+ pendingServices.add(s);
561
+ }
562
+
563
+ if (debounceTimer) clearTimeout(debounceTimer);
564
+ debounceTimer = setTimeout(() => {
565
+ debounceTimer = null;
566
+ rebuildAndRestart();
567
+ }, 500);
568
+ });
569
+ watchers.push(watcher);
570
+ }
571
+
572
+ console.log("👀 Watching for file changes in:");
573
+ console.log(" assistant/, gateway/, credential-executor/, packages/");
574
+ console.log("");
575
+
576
+ return () => {
577
+ for (const watcher of watchers) {
578
+ watcher.close();
579
+ }
580
+ if (debounceTimer) clearTimeout(debounceTimer);
581
+ };
268
582
  }
269
583
 
270
584
  export async function hatchDocker(
271
585
  species: Species,
272
586
  detached: boolean,
273
587
  name: string | null,
274
- watch: boolean,
588
+ watch: boolean = false,
275
589
  ): Promise<void> {
276
590
  resetLogFile("hatch.log");
277
591
 
278
- await ensureDockerInstalled();
592
+ let logFd = openLogFile("hatch.log");
593
+ const log = (msg: string): void => {
594
+ console.log(msg);
595
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
596
+ };
279
597
 
280
- let repoRoot: string;
281
- let dockerfileDir: string;
282
598
  try {
283
- ({ root: repoRoot, dockerfileDir } = findDockerRoot(watch));
284
- } catch (err) {
285
- const message = err instanceof Error ? err.message : String(err);
286
- const logFd = openLogFile("hatch.log");
287
- writeToLogFile(
599
+ await ensureDockerInstalled();
600
+
601
+ const instanceName = generateInstanceName(species, name);
602
+ const gatewayPort = DEFAULT_GATEWAY_PORT;
603
+
604
+ const imageTags: Record<ServiceName, string> = {
605
+ assistant: "",
606
+ "credential-executor": "",
607
+ gateway: "",
608
+ };
609
+
610
+ let repoRoot: string | undefined;
611
+
612
+ if (watch) {
613
+ repoRoot = findRepoRoot();
614
+ const localTag = `local-${instanceName}`;
615
+ imageTags.assistant = `vellum-assistant:${localTag}`;
616
+ imageTags.gateway = `vellum-gateway:${localTag}`;
617
+ imageTags["credential-executor"] =
618
+ `vellum-credential-executor:${localTag}`;
619
+
620
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
621
+ log(` Species: ${species}`);
622
+ log(` Mode: development (watch)`);
623
+ log(` Repo: ${repoRoot}`);
624
+ log(` Images (local build):`);
625
+ log(` assistant: ${imageTags.assistant}`);
626
+ log(` gateway: ${imageTags.gateway}`);
627
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
628
+ log("");
629
+
630
+ await buildAllImages(repoRoot, imageTags, log);
631
+ log("✅ Docker images built");
632
+ } else {
633
+ const version = cliPkg.version;
634
+ const versionTag = version ? `v${version}` : "latest";
635
+ imageTags.assistant = `${DOCKERHUB_IMAGES.assistant}:${versionTag}`;
636
+ imageTags.gateway = `${DOCKERHUB_IMAGES.gateway}:${versionTag}`;
637
+ imageTags["credential-executor"] =
638
+ `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`;
639
+
640
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
641
+ log(` Species: ${species}`);
642
+ log(` Images:`);
643
+ log(` assistant: ${imageTags.assistant}`);
644
+ log(` gateway: ${imageTags.gateway}`);
645
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
646
+ log("");
647
+
648
+ log("📦 Pulling Docker images...");
649
+ await exec("docker", ["pull", imageTags.assistant]);
650
+ await exec("docker", ["pull", imageTags.gateway]);
651
+ await exec("docker", ["pull", imageTags["credential-executor"]]);
652
+ log("✅ Docker images pulled");
653
+ }
654
+
655
+ const res = dockerResourceNames(instanceName);
656
+
657
+ log("📁 Creating shared network and volumes...");
658
+ await exec("docker", ["network", "create", res.network]);
659
+ await exec("docker", ["volume", "create", res.dataVolume]);
660
+ await exec("docker", ["volume", "create", res.socketVolume]);
661
+
662
+ await startContainers({ gatewayPort, imageTags, instanceName, res }, log);
663
+
664
+ const runtimeUrl = `http://localhost:${gatewayPort}`;
665
+ const dockerEntry: AssistantEntry = {
666
+ assistantId: instanceName,
667
+ runtimeUrl,
668
+ cloud: "docker",
669
+ species,
670
+ hatchedAt: new Date().toISOString(),
671
+ volume: res.dataVolume,
672
+ };
673
+ saveAssistantEntry(dockerEntry);
674
+ setActiveAssistant(instanceName);
675
+
676
+ const { ready } = await waitForGatewayAndLease({
677
+ containerName: res.assistantContainer,
678
+ detached: watch ? false : detached,
679
+ instanceName,
288
680
  logFd,
289
- `[docker-hatch] ${new Date().toISOString()} ERROR\n${message}\n`,
290
- );
291
- closeLogFile(logFd);
292
- console.error(message);
293
- throw err;
294
- }
681
+ runtimeUrl,
682
+ });
295
683
 
296
- const instanceName = name ?? `${species}-${generateRandomSuffix()}`;
297
- const dockerfileName = watch ? "Dockerfile.development" : "Dockerfile";
298
- const dockerfile = join(dockerfileDir, dockerfileName);
299
- const dockerfilePath = join(repoRoot, dockerfile);
684
+ if (!ready && !(watch && repoRoot)) {
685
+ throw new Error("Timed out waiting for assistant to become ready");
686
+ }
300
687
 
301
- if (!existsSync(dockerfilePath)) {
302
- const message = `Error: ${dockerfile} not found at ${dockerfilePath}`;
303
- const logFd = openLogFile("hatch.log");
304
- writeToLogFile(
305
- logFd,
306
- `[docker-hatch] ${new Date().toISOString()} ERROR\n${message}\n`,
307
- );
688
+ if (watch && repoRoot) {
689
+ saveAssistantEntry({ ...dockerEntry, watcherPid: process.pid });
690
+
691
+ const stopWatcher = startFileWatcher({
692
+ gatewayPort,
693
+ imageTags,
694
+ instanceName,
695
+ repoRoot,
696
+ res,
697
+ });
698
+
699
+ await new Promise<void>((resolve) => {
700
+ const cleanup = async () => {
701
+ log("\n🛑 Shutting down...");
702
+ stopWatcher();
703
+ await stopContainers(res);
704
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
705
+ log("✅ Docker instance stopped.");
706
+ resolve();
707
+ };
708
+
709
+ // SIGINT (Ctrl+C): full cleanup including stopping containers.
710
+ process.on("SIGINT", () => void cleanup());
711
+
712
+ // SIGTERM (from `vellum retire`): exit quickly — the caller
713
+ // handles container teardown, so we only need to close the
714
+ // file watchers and let the process terminate.
715
+ process.on("SIGTERM", () => {
716
+ stopWatcher();
717
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
718
+ resolve();
719
+ });
720
+ });
721
+ }
722
+ } finally {
308
723
  closeLogFile(logFd);
309
- console.error(message);
310
- process.exit(1);
724
+ logFd = "ignore";
311
725
  }
726
+ }
312
727
 
313
- console.log(`🥚 Hatching Docker assistant: ${instanceName}`);
314
- console.log(` Species: ${species}`);
315
- console.log(` Dockerfile: ${dockerfile}`);
316
- if (watch) {
317
- console.log(` Mode: development (watch)`);
318
- }
319
- console.log("");
728
+ /**
729
+ * In detached mode, print instance details and return immediately.
730
+ * Otherwise, poll the gateway health check until it responds, then
731
+ * lease a guardian token.
732
+ */
733
+ async function waitForGatewayAndLease(opts: {
734
+ containerName: string;
735
+ detached: boolean;
736
+ instanceName: string;
737
+ logFd: number | "ignore";
738
+ runtimeUrl: string;
739
+ }): Promise<{ ready: boolean }> {
740
+ const { containerName, detached, instanceName, logFd, runtimeUrl } = opts;
741
+
742
+ const log = (msg: string): void => {
743
+ console.log(msg);
744
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
745
+ };
320
746
 
321
- const imageTag = `vellum-assistant:${instanceName}`;
322
- const logFd = openLogFile("hatch.log");
323
- console.log("🔨 Building Docker image...");
324
- try {
325
- await exec("docker", ["build", "-f", dockerfile, "-t", imageTag, "."], {
326
- cwd: repoRoot,
327
- });
328
- } catch (err) {
329
- const message = err instanceof Error ? err.message : String(err);
330
- writeToLogFile(
331
- logFd,
332
- `[docker-build] ${new Date().toISOString()} ERROR\n${message}\n`,
333
- );
334
- closeLogFile(logFd);
335
- throw err;
747
+ if (detached) {
748
+ log("\n✅ Docker assistant hatched!\n");
749
+ log("Instance details:");
750
+ log(` Name: ${instanceName}`);
751
+ log(` Runtime: ${runtimeUrl}`);
752
+ log(` Container: ${containerName}`);
753
+ log("");
754
+ log(`Stop with: vellum retire ${instanceName}`);
755
+ return { ready: true };
336
756
  }
337
- closeLogFile(logFd);
338
- console.log("✅ Docker image built\n");
339
-
340
- const gatewayPort = DEFAULT_GATEWAY_PORT;
341
- const runArgs: string[] = [
342
- "run",
343
- "--init",
344
- "--name",
345
- instanceName,
346
- "-p",
347
- `${gatewayPort}:${gatewayPort}`,
348
- ];
349
757
 
350
- // Pass through environment variables the assistant needs
351
- for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
352
- if (process.env[envVar]) {
353
- runArgs.push("-e", `${envVar}=${process.env[envVar]}`);
758
+ log(` Container: ${containerName}`);
759
+ log(` Runtime: ${runtimeUrl}`);
760
+ log("");
761
+ log("Waiting for assistant to become ready...");
762
+
763
+ const readyUrl = `${runtimeUrl}/readyz`;
764
+ const start = Date.now();
765
+ let ready = false;
766
+
767
+ while (Date.now() - start < DOCKER_READY_TIMEOUT_MS) {
768
+ try {
769
+ const resp = await fetch(readyUrl, {
770
+ signal: AbortSignal.timeout(5000),
771
+ });
772
+ if (resp.ok) {
773
+ ready = true;
774
+ break;
775
+ }
776
+ const body = await resp.text();
777
+ let detail = "";
778
+ try {
779
+ const json = JSON.parse(body);
780
+ const parts = [json.status];
781
+ if (json.upstream != null) parts.push(`upstream=${json.upstream}`);
782
+ detail = ` — ${parts.join(", ")}`;
783
+ } catch {}
784
+ log(`Readiness check: ${resp.status}${detail} (retrying...)`);
785
+ } catch {
786
+ // Connection refused / timeout — not up yet
354
787
  }
788
+ await new Promise((r) => setTimeout(r, 1000));
355
789
  }
356
790
 
357
- // Pass the instance name so the inner hatch uses the same assistant ID
358
- // instead of generating a new random one.
359
- runArgs.push("-e", `VELLUM_ASSISTANT_NAME=${instanceName}`);
360
-
361
- // Mount source volumes in watch mode for hot reloading
362
- if (watch) {
363
- runArgs.push(
364
- "-v",
365
- `${join(repoRoot, "assistant", "src")}:/app/assistant/src`,
366
- "-v",
367
- `${join(repoRoot, "gateway", "src")}:/app/gateway/src`,
368
- "-v",
369
- `${join(repoRoot, "cli", "src")}:/app/cli/src`,
370
- );
791
+ if (!ready) {
792
+ log("");
793
+ log(` \u26a0\ufe0f Timed out waiting for assistant to become ready.`);
794
+ log(` The container is still running.`);
795
+ log(` Check logs with: docker logs -f ${containerName}`);
796
+ log("");
797
+ return { ready: false };
371
798
  }
372
799
 
373
- // Docker containers bind to 0.0.0.0 so localhost always works. Skip
374
- // mDNS/LAN discovery — the .local hostname often fails to resolve on the
375
- // host machine itself (mDNS is designed for cross-device discovery).
376
- const runtimeUrl = `http://localhost:${gatewayPort}`;
377
- const dockerEntry: AssistantEntry = {
378
- assistantId: instanceName,
379
- runtimeUrl,
380
- cloud: "docker",
381
- species,
382
- hatchedAt: new Date().toISOString(),
383
- };
384
- saveAssistantEntry(dockerEntry);
385
- setActiveAssistant(instanceName);
386
-
387
- // The Dockerfiles already define a CMD that runs `vellum hatch --keep-alive`.
388
- // Only override CMD when a non-default species is specified, since that
389
- // requires an extra argument the Dockerfile doesn't include.
390
- const containerCmd: string[] =
391
- species !== "vellum"
392
- ? [
393
- "vellum",
394
- "hatch",
395
- species,
396
- ...(watch ? ["--watch"] : []),
397
- "--keep-alive",
398
- ]
399
- : [];
400
-
401
- // Always start the container detached so it keeps running after the CLI exits.
402
- runArgs.push("-d");
403
- console.log("🚀 Starting Docker container...");
404
- await exec("docker", [...runArgs, imageTag, ...containerCmd], {
405
- cwd: repoRoot,
406
- });
800
+ const elapsedSec = ((Date.now() - start) / 1000).toFixed(1);
801
+ log(`Assistant ready after ${elapsedSec}s`);
407
802
 
408
- if (detached) {
409
- console.log("\n✅ Docker assistant hatched!\n");
410
- console.log("Instance details:");
411
- console.log(` Name: ${instanceName}`);
412
- console.log(` Runtime: ${runtimeUrl}`);
413
- console.log(` Container: ${instanceName}`);
414
- console.log("");
415
- console.log(`Stop with: docker stop ${instanceName}`);
416
- } else {
417
- console.log(` Container: ${instanceName}`);
418
- console.log(` Runtime: ${runtimeUrl}`);
419
- console.log("");
420
-
421
- // Tail container logs until the inner hatch completes, then exit and
422
- // leave the container running in the background.
423
- await new Promise<void>((resolve, reject) => {
424
- const child = nodeSpawn("docker", ["logs", "-f", instanceName], {
425
- stdio: ["ignore", "pipe", "pipe"],
426
- });
803
+ // Lease guardian token. The /readyz check confirms both gateway and
804
+ // assistant are reachable. Retry with backoff in case there is a brief
805
+ // window where readiness passes but the guardian endpoint is not yet ready.
806
+ log(`Guardian token lease: starting for ${instanceName} at ${runtimeUrl}`);
807
+ const leaseStart = Date.now();
808
+ const leaseDeadline = start + DOCKER_READY_TIMEOUT_MS;
809
+ let leaseSuccess = false;
810
+ let lastLeaseError: string | undefined;
427
811
 
428
- const handleLine = (line: string): void => {
429
- if (line.includes("Local assistant hatched!")) {
430
- process.nextTick(async () => {
431
- const remoteBearerToken =
432
- await fetchRemoteBearerToken(instanceName);
433
- if (remoteBearerToken) {
434
- dockerEntry.bearerToken = remoteBearerToken;
435
- saveAssistantEntry(dockerEntry);
436
- }
437
-
438
- console.log("");
439
- console.log(`\u2705 Docker container is up and running!`);
440
- console.log(` Name: ${instanceName}`);
441
- console.log(` Runtime: ${runtimeUrl}`);
442
- console.log("");
443
- child.kill();
444
- resolve();
445
- });
446
- }
447
- };
448
-
449
- const stdoutPrefixer = createLinePrefixer(process.stdout, handleLine);
450
- const stderrPrefixer = createLinePrefixer(process.stderr, handleLine);
451
-
452
- child.stdout?.on("data", (data: Buffer) => stdoutPrefixer.write(data));
453
- child.stderr?.on("data", (data: Buffer) => stderrPrefixer.write(data));
454
- child.stdout?.on("end", () => stdoutPrefixer.flush());
455
- child.stderr?.on("end", () => stderrPrefixer.flush());
456
-
457
- child.on("close", (code) => {
458
- // The log tail may exit if the container stops before the sentinel
459
- // is seen, or we killed it after detecting the sentinel.
460
- if (
461
- code === 0 ||
462
- code === null ||
463
- code === 130 ||
464
- code === 137 ||
465
- code === 143
466
- ) {
467
- resolve();
468
- } else {
469
- reject(new Error(`Docker container exited with code ${code}`));
470
- }
471
- });
472
- child.on("error", reject);
812
+ while (Date.now() < leaseDeadline) {
813
+ try {
814
+ const tokenData = await leaseGuardianToken(runtimeUrl, instanceName);
815
+ const leaseElapsed = ((Date.now() - leaseStart) / 1000).toFixed(1);
816
+ log(
817
+ `Guardian token lease: success after ${leaseElapsed}s (principalId=${tokenData.guardianPrincipalId}, expiresAt=${tokenData.accessTokenExpiresAt})`,
818
+ );
819
+ leaseSuccess = true;
820
+ break;
821
+ } catch (err) {
822
+ lastLeaseError =
823
+ err instanceof Error ? (err.stack ?? err.message) : String(err);
824
+ // Log periodically so the user knows we're still trying
825
+ const elapsed = ((Date.now() - leaseStart) / 1000).toFixed(0);
826
+ log(
827
+ `Guardian token lease: attempt failed after ${elapsed}s (${lastLeaseError.split("\n")[0]}), retrying...`,
828
+ );
829
+ }
830
+ await new Promise((r) => setTimeout(r, 2000));
831
+ }
473
832
 
474
- process.on("SIGINT", () => {
475
- child.kill();
476
- resolve();
477
- });
478
- });
833
+ if (!leaseSuccess) {
834
+ log(
835
+ `\u26a0\ufe0f Guardian token lease: FAILED after ${((Date.now() - leaseStart) / 1000).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
836
+ );
479
837
  }
838
+
839
+ log("");
840
+ log(`\u2705 Docker containers are up and running!`);
841
+ log(` Name: ${instanceName}`);
842
+ log(` Runtime: ${runtimeUrl}`);
843
+ log("");
844
+ return { ready: true };
480
845
  }