@vellumai/cli 0.4.56 → 0.4.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/docker.ts CHANGED
@@ -1,15 +1,19 @@
1
- import { spawn as nodeSpawn } from "child_process";
2
1
  import { existsSync, watch as fsWatch } from "fs";
3
2
  import { dirname, join } from "path";
4
3
 
5
4
  // Direct import — bun embeds this at compile time so it works in compiled binaries.
6
5
  import cliPkg from "../../package.json";
7
6
 
8
- import { saveAssistantEntry, setActiveAssistant } from "./assistant-config";
7
+ import {
8
+ findAssistantByName,
9
+ saveAssistantEntry,
10
+ setActiveAssistant,
11
+ } from "./assistant-config";
9
12
  import type { AssistantEntry } from "./assistant-config";
10
13
  import { DEFAULT_GATEWAY_PORT } from "./constants";
11
14
  import type { Species } from "./constants";
12
15
  import { leaseGuardianToken } from "./guardian-token";
16
+ import { isVellumProcess, stopProcess } from "./process";
13
17
  import { generateInstanceName } from "./random-name";
14
18
  import { exec, execOutput } from "./step-runner";
15
19
  import {
@@ -19,18 +23,21 @@ import {
19
23
  writeToLogFile,
20
24
  } from "./xdg-log";
21
25
 
22
- type ServiceName = "assistant" | "credential-executor" | "gateway";
26
+ export type ServiceName = "assistant" | "credential-executor" | "gateway";
23
27
 
24
28
  const DOCKERHUB_ORG = "vellumai";
25
- const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
29
+ export const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
26
30
  assistant: `${DOCKERHUB_ORG}/vellum-assistant`,
27
31
  "credential-executor": `${DOCKERHUB_ORG}/vellum-credential-executor`,
28
32
  gateway: `${DOCKERHUB_ORG}/vellum-gateway`,
29
33
  };
30
34
 
31
35
  /** Internal ports exposed by each service's Dockerfile. */
32
- const ASSISTANT_INTERNAL_PORT = 3001;
33
- const GATEWAY_INTERNAL_PORT = 7830;
36
+ export const ASSISTANT_INTERNAL_PORT = 3001;
37
+ export const GATEWAY_INTERNAL_PORT = 7830;
38
+
39
+ /** Max time to wait for the assistant container to emit the readiness sentinel. */
40
+ export const DOCKER_READY_TIMEOUT_MS = 3 * 60 * 1000;
34
41
 
35
42
  /**
36
43
  * Checks whether the `docker` CLI and daemon are available on the system.
@@ -128,39 +135,8 @@ async function ensureDockerInstalled(): Promise<void> {
128
135
  }
129
136
  }
130
137
 
131
- /**
132
- * Creates a line-buffered output prefixer that prepends a tag to each
133
- * line from a container's stdout/stderr. Calls `onLine` for each complete
134
- * line so the caller can detect sentinel output (e.g. hatch completion).
135
- */
136
- function createLinePrefixer(
137
- stream: NodeJS.WritableStream,
138
- prefix: string,
139
- onLine?: (line: string) => void,
140
- ): { write(data: Buffer): void; flush(): void } {
141
- let remainder = "";
142
- return {
143
- write(data: Buffer) {
144
- const text = remainder + data.toString();
145
- const lines = text.split("\n");
146
- remainder = lines.pop() ?? "";
147
- for (const line of lines) {
148
- stream.write(` [${prefix}] ${line}\n`);
149
- onLine?.(line);
150
- }
151
- },
152
- flush() {
153
- if (remainder) {
154
- stream.write(` [${prefix}] ${remainder}\n`);
155
- onLine?.(remainder);
156
- remainder = "";
157
- }
158
- },
159
- };
160
- }
161
-
162
138
  /** Derive the Docker resource names from the instance name. */
163
- function dockerResourceNames(instanceName: string) {
139
+ export function dockerResourceNames(instanceName: string) {
164
140
  return {
165
141
  assistantContainer: `${instanceName}-assistant`,
166
142
  cesContainer: `${instanceName}-credential-executor`,
@@ -172,7 +148,7 @@ function dockerResourceNames(instanceName: string) {
172
148
  }
173
149
 
174
150
  /** Silently attempt to stop and remove a Docker container. */
175
- async function removeContainer(containerName: string): Promise<void> {
151
+ export async function removeContainer(containerName: string): Promise<void> {
176
152
  try {
177
153
  await exec("docker", ["stop", containerName]);
178
154
  } catch {
@@ -188,6 +164,20 @@ async function removeContainer(containerName: string): Promise<void> {
188
164
  export async function retireDocker(name: string): Promise<void> {
189
165
  console.log(`\u{1F5D1}\ufe0f Stopping Docker containers for '${name}'...\n`);
190
166
 
167
+ // Stop the file watcher process if one is tracked for this instance.
168
+ const entry = findAssistantByName(name);
169
+ const watcherPid =
170
+ typeof entry?.watcherPid === "number" ? entry.watcherPid : null;
171
+ if (watcherPid !== null) {
172
+ if (isVellumProcess(watcherPid)) {
173
+ await stopProcess(watcherPid, "file-watcher");
174
+ } else {
175
+ console.log(
176
+ `PID ${watcherPid} is not a vellum process — skipping stale file-watcher PID.`,
177
+ );
178
+ }
179
+ }
180
+
191
181
  const res = dockerResourceNames(name);
192
182
 
193
183
  await removeContainer(res.cesContainer);
@@ -303,13 +293,14 @@ function serviceImageConfigs(
303
293
  async function buildAllImages(
304
294
  repoRoot: string,
305
295
  imageTags: Record<ServiceName, string>,
296
+ log: (msg: string) => void,
306
297
  ): Promise<void> {
307
298
  const configs = serviceImageConfigs(repoRoot, imageTags);
308
- console.log("🔨 Building all images in parallel...");
299
+ log("🔨 Building all images in parallel...");
309
300
  await Promise.all(
310
301
  Object.entries(configs).map(async ([name, config]) => {
311
302
  await buildImage(config);
312
- console.log(`✅ ${name} built`);
303
+ log(`✅ ${name} built`);
313
304
  }),
314
305
  );
315
306
  }
@@ -319,13 +310,14 @@ async function buildAllImages(
319
310
  * service. Each container joins a shared Docker bridge network so they
320
311
  * can be restarted independently.
321
312
  */
322
- function serviceDockerRunArgs(opts: {
313
+ export function serviceDockerRunArgs(opts: {
314
+ extraAssistantEnv?: Record<string, string>;
323
315
  gatewayPort: number;
324
316
  imageTags: Record<ServiceName, string>;
325
317
  instanceName: string;
326
318
  res: ReturnType<typeof dockerResourceNames>;
327
319
  }): Record<ServiceName, () => string[]> {
328
- const { gatewayPort, imageTags, instanceName, res } = opts;
320
+ const { extraAssistantEnv, gatewayPort, imageTags, instanceName, res } = opts;
329
321
  return {
330
322
  assistant: () => {
331
323
  const args: string[] = [
@@ -349,6 +341,11 @@ function serviceDockerRunArgs(opts: {
349
341
  args.push("-e", `${envVar}=${process.env[envVar]}`);
350
342
  }
351
343
  }
344
+ if (extraAssistantEnv) {
345
+ for (const [key, value] of Object.entries(extraAssistantEnv)) {
346
+ args.push("-e", `${key}=${value}`);
347
+ }
348
+ }
352
349
  args.push(imageTags.assistant);
353
350
  return args;
354
351
  },
@@ -371,6 +368,8 @@ function serviceDockerRunArgs(opts: {
371
368
  `ASSISTANT_HOST=${res.assistantContainer}`,
372
369
  "-e",
373
370
  `RUNTIME_HTTP_PORT=${ASSISTANT_INTERNAL_PORT}`,
371
+ "-e",
372
+ "RUNTIME_PROXY_ENABLED=true",
374
373
  imageTags.gateway,
375
374
  ],
376
375
  "credential-executor": () => [
@@ -396,28 +395,32 @@ function serviceDockerRunArgs(opts: {
396
395
  }
397
396
 
398
397
  /** The order in which services must be started. */
399
- const SERVICE_START_ORDER: ServiceName[] = [
398
+ export const SERVICE_START_ORDER: ServiceName[] = [
400
399
  "assistant",
401
400
  "gateway",
402
401
  "credential-executor",
403
402
  ];
404
403
 
405
404
  /** Start all three containers in dependency order. */
406
- async function startContainers(opts: {
407
- gatewayPort: number;
408
- imageTags: Record<ServiceName, string>;
409
- instanceName: string;
410
- res: ReturnType<typeof dockerResourceNames>;
411
- }): Promise<void> {
405
+ export async function startContainers(
406
+ opts: {
407
+ extraAssistantEnv?: Record<string, string>;
408
+ gatewayPort: number;
409
+ imageTags: Record<ServiceName, string>;
410
+ instanceName: string;
411
+ res: ReturnType<typeof dockerResourceNames>;
412
+ },
413
+ log: (msg: string) => void,
414
+ ): Promise<void> {
412
415
  const runArgs = serviceDockerRunArgs(opts);
413
416
  for (const service of SERVICE_START_ORDER) {
414
- console.log(`🚀 Starting ${service} container...`);
417
+ log(`🚀 Starting ${service} container...`);
415
418
  await exec("docker", runArgs[service]());
416
419
  }
417
420
  }
418
421
 
419
422
  /** Stop and remove all three containers (ignoring errors). */
420
- async function stopContainers(
423
+ export async function stopContainers(
421
424
  res: ReturnType<typeof dockerResourceNames>,
422
425
  ): Promise<void> {
423
426
  await removeContainer(res.cesContainer);
@@ -586,250 +589,257 @@ export async function hatchDocker(
586
589
  ): Promise<void> {
587
590
  resetLogFile("hatch.log");
588
591
 
589
- await ensureDockerInstalled();
590
-
591
- const instanceName = generateInstanceName(species, name);
592
- const gatewayPort = DEFAULT_GATEWAY_PORT;
593
-
594
- const imageTags: Record<ServiceName, string> = {
595
- assistant: "",
596
- "credential-executor": "",
597
- gateway: "",
592
+ let logFd = openLogFile("hatch.log");
593
+ const log = (msg: string): void => {
594
+ console.log(msg);
595
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
598
596
  };
599
597
 
600
- let repoRoot: string | undefined;
601
-
602
- if (watch) {
603
- repoRoot = findRepoRoot();
604
- const localTag = `local-${instanceName}`;
605
- imageTags.assistant = `vellum-assistant:${localTag}`;
606
- imageTags.gateway = `vellum-gateway:${localTag}`;
607
- imageTags["credential-executor"] = `vellum-credential-executor:${localTag}`;
608
-
609
- console.log(`🥚 Hatching Docker assistant: ${instanceName}`);
610
- console.log(` Species: ${species}`);
611
- console.log(` Mode: development (watch)`);
612
- console.log(` Repo: ${repoRoot}`);
613
- console.log(` Images (local build):`);
614
- console.log(` assistant: ${imageTags.assistant}`);
615
- console.log(` gateway: ${imageTags.gateway}`);
616
- console.log(
617
- ` credential-executor: ${imageTags["credential-executor"]}`,
618
- );
619
- console.log("");
598
+ try {
599
+ await ensureDockerInstalled();
620
600
 
621
- const logFd = openLogFile("hatch.log");
622
- try {
623
- await buildAllImages(repoRoot, imageTags);
624
- } catch (err) {
625
- const message = err instanceof Error ? err.message : String(err);
626
- writeToLogFile(
627
- logFd,
628
- `[docker-build] ${new Date().toISOString()} ERROR\n${message}\n`,
629
- );
630
- closeLogFile(logFd);
631
- throw err;
632
- }
633
- closeLogFile(logFd);
634
- console.log("✅ Docker images built\n");
635
- } else {
636
- const version = cliPkg.version;
637
- const versionTag = version ? `v${version}` : "latest";
638
- imageTags.assistant = `${DOCKERHUB_IMAGES.assistant}:${versionTag}`;
639
- imageTags.gateway = `${DOCKERHUB_IMAGES.gateway}:${versionTag}`;
640
- imageTags["credential-executor"] =
641
- `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`;
642
-
643
- console.log(`🥚 Hatching Docker assistant: ${instanceName}`);
644
- console.log(` Species: ${species}`);
645
- console.log(` Images:`);
646
- console.log(` assistant: ${imageTags.assistant}`);
647
- console.log(` gateway: ${imageTags.gateway}`);
648
- console.log(
649
- ` credential-executor: ${imageTags["credential-executor"]}`,
650
- );
651
- console.log("");
601
+ const instanceName = generateInstanceName(species, name);
602
+ const gatewayPort = DEFAULT_GATEWAY_PORT;
652
603
 
653
- const logFd = openLogFile("hatch.log");
654
- console.log("📦 Pulling Docker images...");
655
- try {
604
+ const imageTags: Record<ServiceName, string> = {
605
+ assistant: "",
606
+ "credential-executor": "",
607
+ gateway: "",
608
+ };
609
+
610
+ let repoRoot: string | undefined;
611
+
612
+ if (watch) {
613
+ repoRoot = findRepoRoot();
614
+ const localTag = `local-${instanceName}`;
615
+ imageTags.assistant = `vellum-assistant:${localTag}`;
616
+ imageTags.gateway = `vellum-gateway:${localTag}`;
617
+ imageTags["credential-executor"] =
618
+ `vellum-credential-executor:${localTag}`;
619
+
620
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
621
+ log(` Species: ${species}`);
622
+ log(` Mode: development (watch)`);
623
+ log(` Repo: ${repoRoot}`);
624
+ log(` Images (local build):`);
625
+ log(` assistant: ${imageTags.assistant}`);
626
+ log(` gateway: ${imageTags.gateway}`);
627
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
628
+ log("");
629
+
630
+ await buildAllImages(repoRoot, imageTags, log);
631
+ log("✅ Docker images built");
632
+ } else {
633
+ const version = cliPkg.version;
634
+ const versionTag = version ? `v${version}` : "latest";
635
+ imageTags.assistant = `${DOCKERHUB_IMAGES.assistant}:${versionTag}`;
636
+ imageTags.gateway = `${DOCKERHUB_IMAGES.gateway}:${versionTag}`;
637
+ imageTags["credential-executor"] =
638
+ `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`;
639
+
640
+ log(`🥚 Hatching Docker assistant: ${instanceName}`);
641
+ log(` Species: ${species}`);
642
+ log(` Images:`);
643
+ log(` assistant: ${imageTags.assistant}`);
644
+ log(` gateway: ${imageTags.gateway}`);
645
+ log(` credential-executor: ${imageTags["credential-executor"]}`);
646
+ log("");
647
+
648
+ log("📦 Pulling Docker images...");
656
649
  await exec("docker", ["pull", imageTags.assistant]);
657
650
  await exec("docker", ["pull", imageTags.gateway]);
658
651
  await exec("docker", ["pull", imageTags["credential-executor"]]);
659
- } catch (err) {
660
- const message = err instanceof Error ? err.message : String(err);
661
- writeToLogFile(
662
- logFd,
663
- `[docker-pull] ${new Date().toISOString()} ERROR\n${message}\n`,
664
- );
665
- closeLogFile(logFd);
666
- throw err;
652
+ log("✅ Docker images pulled");
667
653
  }
668
- closeLogFile(logFd);
669
- console.log("✅ Docker images pulled\n");
670
- }
671
654
 
672
- const res = dockerResourceNames(instanceName);
655
+ const res = dockerResourceNames(instanceName);
673
656
 
674
- // Create shared network and volumes
675
- console.log("📁 Creating shared network and volumes...");
676
- await exec("docker", ["network", "create", res.network]);
677
- await exec("docker", ["volume", "create", res.dataVolume]);
678
- await exec("docker", ["volume", "create", res.socketVolume]);
657
+ log("📁 Creating shared network and volumes...");
658
+ await exec("docker", ["network", "create", res.network]);
659
+ await exec("docker", ["volume", "create", res.dataVolume]);
660
+ await exec("docker", ["volume", "create", res.socketVolume]);
679
661
 
680
- await startContainers({ gatewayPort, imageTags, instanceName, res });
662
+ await startContainers({ gatewayPort, imageTags, instanceName, res }, log);
681
663
 
682
- const runtimeUrl = `http://localhost:${gatewayPort}`;
683
- const dockerEntry: AssistantEntry = {
684
- assistantId: instanceName,
685
- runtimeUrl,
686
- cloud: "docker",
687
- species,
688
- hatchedAt: new Date().toISOString(),
689
- volume: res.dataVolume,
690
- };
691
- saveAssistantEntry(dockerEntry);
692
- setActiveAssistant(instanceName);
693
-
694
- // The assistant image runs the daemon directly (not via the CLI hatch
695
- // command), so we watch for the DaemonServer readiness message instead
696
- // of the CLI's "Local assistant hatched!" sentinel.
697
- await tailContainerUntilReady({
698
- containerName: res.assistantContainer,
699
- detached: watch ? false : detached,
700
- dockerEntry,
701
- instanceName,
702
- runtimeUrl,
703
- sentinel: "DaemonServer started",
704
- });
664
+ const runtimeUrl = `http://localhost:${gatewayPort}`;
665
+ const dockerEntry: AssistantEntry = {
666
+ assistantId: instanceName,
667
+ runtimeUrl,
668
+ cloud: "docker",
669
+ species,
670
+ hatchedAt: new Date().toISOString(),
671
+ volume: res.dataVolume,
672
+ };
673
+ saveAssistantEntry(dockerEntry);
674
+ setActiveAssistant(instanceName);
705
675
 
706
- if (watch && repoRoot) {
707
- const stopWatcher = startFileWatcher({
708
- gatewayPort,
709
- imageTags,
676
+ const { ready } = await waitForGatewayAndLease({
677
+ containerName: res.assistantContainer,
678
+ detached: watch ? false : detached,
710
679
  instanceName,
711
- repoRoot,
712
- res,
680
+ logFd,
681
+ runtimeUrl,
713
682
  });
714
683
 
715
- await new Promise<void>((resolve) => {
716
- const cleanup = async () => {
717
- console.log("\n🛑 Shutting down...");
718
- stopWatcher();
719
- await stopContainers(res);
720
- console.log("✅ Docker instance stopped.");
721
- resolve();
722
- };
723
-
724
- process.on("SIGINT", () => void cleanup());
725
- process.on("SIGTERM", () => void cleanup());
726
- });
684
+ if (!ready && !(watch && repoRoot)) {
685
+ throw new Error("Timed out waiting for assistant to become ready");
686
+ }
687
+
688
+ if (watch && repoRoot) {
689
+ saveAssistantEntry({ ...dockerEntry, watcherPid: process.pid });
690
+
691
+ const stopWatcher = startFileWatcher({
692
+ gatewayPort,
693
+ imageTags,
694
+ instanceName,
695
+ repoRoot,
696
+ res,
697
+ });
698
+
699
+ await new Promise<void>((resolve) => {
700
+ const cleanup = async () => {
701
+ log("\n🛑 Shutting down...");
702
+ stopWatcher();
703
+ await stopContainers(res);
704
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
705
+ log("✅ Docker instance stopped.");
706
+ resolve();
707
+ };
708
+
709
+ // SIGINT (Ctrl+C): full cleanup including stopping containers.
710
+ process.on("SIGINT", () => void cleanup());
711
+
712
+ // SIGTERM (from `vellum retire`): exit quickly — the caller
713
+ // handles container teardown, so we only need to close the
714
+ // file watchers and let the process terminate.
715
+ process.on("SIGTERM", () => {
716
+ stopWatcher();
717
+ saveAssistantEntry({ ...dockerEntry, watcherPid: undefined });
718
+ resolve();
719
+ });
720
+ });
721
+ }
722
+ } finally {
723
+ closeLogFile(logFd);
724
+ logFd = "ignore";
727
725
  }
728
726
  }
729
727
 
730
728
  /**
731
729
  * In detached mode, print instance details and return immediately.
732
- * Otherwise, tail the given container's logs until the sentinel string
733
- * appears, then attempt to lease a guardian token and report readiness.
730
+ * Otherwise, poll the gateway health check until it responds, then
731
+ * lease a guardian token.
734
732
  */
735
- async function tailContainerUntilReady(opts: {
733
+ async function waitForGatewayAndLease(opts: {
736
734
  containerName: string;
737
735
  detached: boolean;
738
- dockerEntry: AssistantEntry;
739
736
  instanceName: string;
737
+ logFd: number | "ignore";
740
738
  runtimeUrl: string;
741
- sentinel: string;
742
- }): Promise<void> {
743
- const {
744
- containerName,
745
- detached,
746
- dockerEntry,
747
- instanceName,
748
- runtimeUrl,
749
- sentinel,
750
- } = opts;
739
+ }): Promise<{ ready: boolean }> {
740
+ const { containerName, detached, instanceName, logFd, runtimeUrl } = opts;
741
+
742
+ const log = (msg: string): void => {
743
+ console.log(msg);
744
+ writeToLogFile(logFd, `${new Date().toISOString()} ${msg}\n`);
745
+ };
751
746
 
752
747
  if (detached) {
753
- console.log("\n✅ Docker assistant hatched!\n");
754
- console.log("Instance details:");
755
- console.log(` Name: ${instanceName}`);
756
- console.log(` Runtime: ${runtimeUrl}`);
757
- console.log(` Container: ${containerName}`);
758
- console.log("");
759
- console.log(`Stop with: vellum retire ${instanceName}`);
760
- return;
748
+ log("\n✅ Docker assistant hatched!\n");
749
+ log("Instance details:");
750
+ log(` Name: ${instanceName}`);
751
+ log(` Runtime: ${runtimeUrl}`);
752
+ log(` Container: ${containerName}`);
753
+ log("");
754
+ log(`Stop with: vellum retire ${instanceName}`);
755
+ return { ready: true };
761
756
  }
762
757
 
763
- console.log(` Container: ${containerName}`);
764
- console.log(` Runtime: ${runtimeUrl}`);
765
- console.log("");
758
+ log(` Container: ${containerName}`);
759
+ log(` Runtime: ${runtimeUrl}`);
760
+ log("");
761
+ log("Waiting for assistant to become ready...");
766
762
 
767
- await new Promise<void>((resolve, reject) => {
768
- const child = nodeSpawn("docker", ["logs", "-f", containerName], {
769
- stdio: ["ignore", "pipe", "pipe"],
770
- });
763
+ const readyUrl = `${runtimeUrl}/readyz`;
764
+ const start = Date.now();
765
+ let ready = false;
771
766
 
772
- const handleLine = (line: string): void => {
773
- if (line.includes(sentinel)) {
774
- process.nextTick(async () => {
775
- try {
776
- const tokenData = await leaseGuardianToken(
777
- runtimeUrl,
778
- instanceName,
779
- );
780
- dockerEntry.bearerToken = tokenData.accessToken;
781
- saveAssistantEntry(dockerEntry);
782
- } catch (err) {
783
- console.warn(
784
- `\u26a0\ufe0f Could not lease guardian token: ${err instanceof Error ? err.message : err}`,
785
- );
786
- }
787
-
788
- console.log("");
789
- console.log(`\u2705 Docker containers are up and running!`);
790
- console.log(` Name: ${instanceName}`);
791
- console.log(` Runtime: ${runtimeUrl}`);
792
- console.log("");
793
- child.kill();
794
- resolve();
795
- });
767
+ while (Date.now() - start < DOCKER_READY_TIMEOUT_MS) {
768
+ try {
769
+ const resp = await fetch(readyUrl, {
770
+ signal: AbortSignal.timeout(5000),
771
+ });
772
+ if (resp.ok) {
773
+ ready = true;
774
+ break;
796
775
  }
797
- };
776
+ const body = await resp.text();
777
+ let detail = "";
778
+ try {
779
+ const json = JSON.parse(body);
780
+ const parts = [json.status];
781
+ if (json.upstream != null) parts.push(`upstream=${json.upstream}`);
782
+ detail = ` — ${parts.join(", ")}`;
783
+ } catch {}
784
+ log(`Readiness check: ${resp.status}${detail} (retrying...)`);
785
+ } catch {
786
+ // Connection refused / timeout — not up yet
787
+ }
788
+ await new Promise((r) => setTimeout(r, 1000));
789
+ }
798
790
 
799
- const stdoutPrefixer = createLinePrefixer(
800
- process.stdout,
801
- "docker",
802
- handleLine,
803
- );
804
- const stderrPrefixer = createLinePrefixer(
805
- process.stderr,
806
- "docker",
807
- handleLine,
808
- );
791
+ if (!ready) {
792
+ log("");
793
+ log(` \u26a0\ufe0f Timed out waiting for assistant to become ready.`);
794
+ log(` The container is still running.`);
795
+ log(` Check logs with: docker logs -f ${containerName}`);
796
+ log("");
797
+ return { ready: false };
798
+ }
809
799
 
810
- child.stdout?.on("data", (data: Buffer) => stdoutPrefixer.write(data));
811
- child.stderr?.on("data", (data: Buffer) => stderrPrefixer.write(data));
812
- child.stdout?.on("end", () => stdoutPrefixer.flush());
813
- child.stderr?.on("end", () => stderrPrefixer.flush());
800
+ const elapsedSec = ((Date.now() - start) / 1000).toFixed(1);
801
+ log(`Assistant ready after ${elapsedSec}s`);
814
802
 
815
- child.on("close", (code) => {
816
- if (
817
- code === 0 ||
818
- code === null ||
819
- code === 130 ||
820
- code === 137 ||
821
- code === 143
822
- ) {
823
- resolve();
824
- } else {
825
- reject(new Error(`Docker container exited with code ${code}`));
826
- }
827
- });
828
- child.on("error", reject);
803
+ // Lease guardian token. The /readyz check confirms both gateway and
804
+ // assistant are reachable. Retry with backoff in case there is a brief
805
+ // window where readiness passes but the guardian endpoint is not yet ready.
806
+ log(`Guardian token lease: starting for ${instanceName} at ${runtimeUrl}`);
807
+ const leaseStart = Date.now();
808
+ const leaseDeadline = start + DOCKER_READY_TIMEOUT_MS;
809
+ let leaseSuccess = false;
810
+ let lastLeaseError: string | undefined;
829
811
 
830
- process.on("SIGINT", () => {
831
- child.kill();
832
- resolve();
833
- });
834
- });
812
+ while (Date.now() < leaseDeadline) {
813
+ try {
814
+ const tokenData = await leaseGuardianToken(runtimeUrl, instanceName);
815
+ const leaseElapsed = ((Date.now() - leaseStart) / 1000).toFixed(1);
816
+ log(
817
+ `Guardian token lease: success after ${leaseElapsed}s (principalId=${tokenData.guardianPrincipalId}, expiresAt=${tokenData.accessTokenExpiresAt})`,
818
+ );
819
+ leaseSuccess = true;
820
+ break;
821
+ } catch (err) {
822
+ lastLeaseError =
823
+ err instanceof Error ? (err.stack ?? err.message) : String(err);
824
+ // Log periodically so the user knows we're still trying
825
+ const elapsed = ((Date.now() - leaseStart) / 1000).toFixed(0);
826
+ log(
827
+ `Guardian token lease: attempt failed after ${elapsed}s (${lastLeaseError.split("\n")[0]}), retrying...`,
828
+ );
829
+ }
830
+ await new Promise((r) => setTimeout(r, 2000));
831
+ }
832
+
833
+ if (!leaseSuccess) {
834
+ log(
835
+ `\u26a0\ufe0f Guardian token lease: FAILED after ${((Date.now() - leaseStart) / 1000).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
836
+ );
837
+ }
838
+
839
+ log("");
840
+ log(`\u2705 Docker containers are up and running!`);
841
+ log(` Name: ${instanceName}`);
842
+ log(` Runtime: ${runtimeUrl}`);
843
+ log("");
844
+ return { ready: true };
835
845
  }