npm - @vellumai/cli - Versions diffs - 0.8.3 → 0.8.5 - Mend

@vellumai/cli 0.8.3 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/AGENTS.md +29 -7
package/package.json +1 -1
package/src/__tests__/api-key-check.test.ts +78 -0
package/src/__tests__/assistant-config.test.ts +108 -0
package/src/__tests__/assistant-target-args.test.ts +30 -0
package/src/__tests__/host-image-loader.test.ts +206 -0
package/src/__tests__/ps-platform-status.test.ts +100 -22
package/src/__tests__/retire.test.ts +241 -0
package/src/__tests__/use.test.ts +144 -0
package/src/commands/client.ts +27 -24
package/src/commands/ps.ts +107 -105
package/src/commands/retire.ts +144 -34
package/src/commands/roadmap.ts +449 -0
package/src/commands/use.ts +24 -10
package/src/components/DefaultMainScreen.tsx +27 -115
package/src/index.ts +3 -0
package/src/lib/__tests__/port-allocator.test.ts +117 -0
package/src/lib/__tests__/step-runner.test.ts +85 -0
package/src/lib/api-key-check.ts +40 -0
package/src/lib/assistant-config.ts +84 -5
package/src/lib/assistant-target-args.ts +21 -0
package/src/lib/docker.ts +67 -16
package/src/lib/hatch-local.ts +11 -0
package/src/lib/host-image-loader.ts +138 -0
package/src/lib/platform-releases.ts +12 -5
package/src/lib/port-allocator.ts +93 -0
package/src/lib/statefulset.ts +0 -10
package/src/lib/step-runner.ts +40 -7
package/src/shared/provider-env-vars.ts +1 -0

package/src/lib/docker.ts CHANGED Viewed

@@ -12,7 +12,6 @@ import {
   setActiveAssistant,
 } from "./assistant-config";
 import type { AssistantEntry } from "./assistant-config";
-import { buildHatchConfigValues, writeInitialConfig } from "./config-utils";
 import { buildServiceRunArgs } from "./statefulset.js";
 import type { Species } from "./constants";
 import { getDefaultPorts } from "./environments/paths.js";
@@ -21,12 +20,21 @@ import { leaseGuardianToken } from "./guardian-token";
 import { logHatchNextSteps } from "./hatch-next-steps.js";
 import { isVellumProcess, stopProcess } from "./process";
 import { generateInstanceName } from "./random-name";
-import { resolveImageRefs } from "./platform-releases.js";
+import {
+  HOST_IMAGE_LOADER_URL,
+  isLocalBuildRef,
+  loadImageViaHost,
+} from "./host-image-loader.js";
+import {
+  fetchLatestStableVersion,
+  resolveImageRefs,
+} from "./platform-releases.js";
 import {
   configureHatchProviderApiKey,
   formatProviderName,
   resolveHatchProvider,
 } from "./provider-secrets.js";
+import { findOpenPort } from "./port-allocator.js";
 import { exec, execOutput } from "./step-runner";
 import {
   closeLogFile,
@@ -637,7 +645,6 @@ export async function startContainers(
     extraAssistantEnv?: Record<string, string>;
     gatewayPort: number;
     imageTags: Record<ServiceName, string>;
-    defaultWorkspaceConfigPath?: string;
     instanceName: string;
     res: ReturnType<typeof dockerResourceNames>;
   },
@@ -973,7 +980,22 @@ export async function hatchDocker(
     await ensureDockerInstalled();
     const instanceName = generateInstanceName(species, name);
-    const gatewayPort = getDefaultPorts(getCurrentEnvironment()).gateway;
+    // Resolve the gateway's host port dynamically. The env-default
+    // (production 7830 / non-prod overrides) is just the *preferred*
+    // starting point — if it's taken by another local assistant, eval
+    // run, or unrelated process, we walk upward until we find a free
+    // port. This replaces the previous "first one in wins, everyone
+    // else gets a docker bind error" behavior and removes the need for
+    // an orphan-cleanup pre-flight in the evals harness.
+    const preferredGatewayPort = getDefaultPorts(
+      getCurrentEnvironment(),
+    ).gateway;
+    const gatewayPort = await findOpenPort(preferredGatewayPort);
+    if (gatewayPort !== preferredGatewayPort) {
+      log(
+        `Preferred gateway port ${preferredGatewayPort} is in use; allocated ${gatewayPort} for this instance.`,
+      );
+    }
     const imageTags: Record<ServiceName, string> = {
       assistant: "",
@@ -1059,8 +1081,23 @@ export async function hatchDocker(
         imageSource = "env override";
         log("Using image overrides from environment variables");
       } else {
-        const version = cliPkg.version;
-        const versionTag = version ? `v${version}` : "latest";
+        // Resolve image refs from a remote source that may have dev/local
+        // builds. If resolution is unavailable, fall back to the CLI's own
+        // version so a default tag can still be resolved.
+        log("🔍 Fetching latest stable release...");
+        const latestVersion = await fetchLatestStableVersion();
+        let versionTag: string;
+        if (latestVersion) {
+          versionTag = latestVersion.startsWith("v")
+            ? latestVersion
+            : `v${latestVersion}`;
+        } else {
+          const fallback = cliPkg.version;
+          versionTag = fallback ? `v${fallback}` : "latest";
+          log(
+            `⚠️  Platform releases unavailable; falling back to CLI version ${versionTag}`,
+          );
+        }
         log("🔍 Resolving image references...");
         const resolved = await resolveImageRefs(versionTag, log);
         imageTags.assistant = resolved.imageTags.assistant;
@@ -1078,11 +1115,25 @@ export async function hatchDocker(
       log(`     credential-executor:  ${imageTags["credential-executor"]}`);
       log("");
-      log("📦 Pulling Docker images...");
-      await exec("docker", ["pull", imageTags.assistant]);
-      await exec("docker", ["pull", imageTags.gateway]);
-      await exec("docker", ["pull", imageTags["credential-executor"]]);
-      log("✅ Docker images pulled");
+      // Per-ref branching: local-build refs need the image-loader; external
+      // registry refs get a normal `docker pull`. The two transports compose
+      // cleanly — a release can mix different sources for different images.
+      log("📦 Acquiring Docker images...");
+      for (const service of [
+        "assistant",
+        "gateway",
+        "credential-executor",
+      ] as const) {
+        const ref = imageTags[service];
+        if (isLocalBuildRef(ref)) {
+          log(`   ↪ loading ${ref} via host image-loader`);
+          await loadImageViaHost(HOST_IMAGE_LOADER_URL, ref, log);
+        } else {
+          log(`   ↪ pulling ${ref}`);
+          await exec("docker", ["pull", ref]);
+        }
+      }
+      log("✅ Docker images acquired");
     }
     const res = dockerResourceNames(instanceName);
@@ -1113,10 +1164,11 @@ export async function hatchDocker(
       "chown 1001:1001 /workspace /run/assistant-ipc /run/gateway-ipc",
     ]);
-    // Write --config key=value pairs to a temp file that gets bind-mounted
-    // into the assistant container and read via VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH.
-    const hatchConfigValues = buildHatchConfigValues(configValues, provider);
-    const defaultWorkspaceConfigPath = writeInitialConfig(hatchConfigValues);
+    // BYOK setup (API key, custom profiles, active-profile selection) is
+    // driven post-boot by the CLI calling the Assistant's public APIs
+    // (`POST /v1/secrets`, etc.) via `configureHatchProviderApiKey` below.
+    // The Assistant container comes up clean — no overlay file, no
+    // client-side workspace-config injection.
     const cesServiceToken = randomBytes(32).toString("hex");
     const signingKey = randomBytes(32).toString("hex");
@@ -1140,7 +1192,6 @@ export async function hatchDocker(
         cesServiceToken,
         gatewayPort,
         imageTags,
-        defaultWorkspaceConfigPath,
         instanceName,
         res,
       },

package/src/lib/hatch-local.ts CHANGED Viewed

@@ -40,6 +40,7 @@ import {
   resolveHatchProvider,
 } from "./provider-secrets.js";
 import { logHatchNextSteps } from "./hatch-next-steps.js";
+import { checkProviderApiKey } from "./api-key-check.js";
 /**
  * Attempts to place a symlink at the given path pointing to cliBinary.
@@ -178,6 +179,16 @@ export async function hatchLocal(
   console.log(`   Species: ${species}`);
   console.log("");
+  const apiKeyCheck = checkProviderApiKey();
+  if (!apiKeyCheck.hasKey) {
+    console.warn(
+      "Warning: No LLM provider API key is configured. The assistant will fail when you try to send a message.",
+    );
+    console.warn("  To fix, export your key before running vellum hatch:");
+    console.warn("  export ANTHROPIC_API_KEY=<your-key>");
+    console.warn("");
+  }
   if (!process.env.APP_VERSION) {
     process.env.APP_VERSION = cliPkg.version;
   }

package/src/lib/host-image-loader.ts ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * Client for the host-side image-loader endpoint. Used to acquire image refs
+ * that aren't pullable from any external registry.
+ *
+ * The endpoint URL is a well-known convention — port 5500 on 127.0.0.1.
+ * The CLI calls in whenever it sees a ref that starts with `vellum-local/`,
+ * which are image refs that only exist in a local docker daemon and can't be
+ * `docker pull`'d from any external registry.
+ *
+ * The endpoint contract is intentionally minimal — POST a ref as JSON, get
+ * back a 200 once the image is in the host docker daemon, or a non-2xx
+ * with a descriptive error message. The client doesn't know (or care) what
+ * transport the server uses to put the image there.
+ */
+/**
+ * Well-known URL of the host-side image-loader server.
+ */
+export const HOST_IMAGE_LOADER_URL = "http://127.0.0.1:5500/v1/images/load";
+/**
+ * Prefix for image refs that only exist in a local docker daemon.
+ * These cannot be `docker pull`'d from any external registry; the CLI must
+ * route them through the host image-loader instead.
+ */
+const LOCAL_BUILD_REF_PREFIX = "vellum-local/";
+/** Whether `ref` points at a local-build image that requires the host loader. */
+export function isLocalBuildRef(ref: string): boolean {
+  return ref.startsWith(LOCAL_BUILD_REF_PREFIX);
+}
+/** Default timeout for image-load requests. Large `docker save | docker load`
+ * pipelines for full assistant images can run for a minute or two on cold
+ * caches, so we give plenty of headroom. */
+const LOAD_TIMEOUT_MS = 120_000;
+export interface HostImageLoaderResponse {
+  loaded?: boolean;
+  ref?: string;
+  error?: string;
+}
+export class HostImageLoaderError extends Error {
+  readonly url: string;
+  readonly ref: string;
+  readonly status?: number;
+  constructor(message: string, url: string, ref: string, status?: number) {
+    super(message);
+    this.name = "HostImageLoaderError";
+    this.url = url;
+    this.ref = ref;
+    this.status = status;
+  }
+}
+function isConnectionRefused(err: unknown): boolean {
+  if (!err || typeof err !== "object") return false;
+  const e = err as { cause?: { code?: string }; code?: string };
+  return e.cause?.code === "ECONNREFUSED" || e.code === "ECONNREFUSED";
+}
+/**
+ * Ask the host-side loader to acquire `ref` into the host docker daemon.
+ *
+ * Resolves when the server returns 200; throws a {@link HostImageLoaderError}
+ * with a user-actionable message on any failure (network, timeout, non-2xx).
+ *
+ * The `log` callback receives one-line status updates; pass the same logger
+ * the surrounding command uses.
+ */
+/** Minimal fetch signature accepted for test injection. */
+export type FetchLike = (
+  input: string | URL,
+  init?: {
+    method?: string;
+    headers?: Record<string, string>;
+    body?: string;
+    signal?: AbortSignal;
+  },
+) => Promise<Response>;
+export async function loadImageViaHost(
+  url: string,
+  ref: string,
+  log: (msg: string) => void,
+  options: { timeoutMs?: number; fetchImpl?: FetchLike } = {},
+): Promise<void> {
+  const timeoutMs = options.timeoutMs ?? LOAD_TIMEOUT_MS;
+  const fetchImpl: FetchLike =
+    options.fetchImpl ?? (fetch as unknown as FetchLike);
+  log(`   ↪ ${ref}`);
+  let response: Response;
+  try {
+    response = await fetchImpl(url, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ ref }),
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+  } catch (err) {
+    if (isConnectionRefused(err)) {
+      throw new HostImageLoaderError(
+        `Could not reach image-loader at ${url}. The ref \`${ref}\` is a ` +
+          `local-build image that requires the loader. Is the loader running? ` +
+          `Start it, or set VELLUM_ASSISTANT_IMAGE / VELLUM_GATEWAY_IMAGE / ` +
+          `VELLUM_CREDENTIAL_EXECUTOR_IMAGE to bypass image resolution.`,
+        url,
+        ref,
+      );
+    }
+    const message = err instanceof Error ? err.message : String(err);
+    throw new HostImageLoaderError(
+      `Image-loader request for ${ref} failed: ${message}`,
+      url,
+      ref,
+    );
+  }
+  if (!response.ok) {
+    let body: HostImageLoaderResponse | null = null;
+    try {
+      body = (await response.json()) as HostImageLoaderResponse;
+    } catch {
+      // Server returned non-JSON; fall through with status-only error.
+    }
+    const detail = body?.error ? `: ${body.error}` : "";
+    throw new HostImageLoaderError(
+      `Image-loader returned HTTP ${response.status} for ${ref}${detail}`,
+      url,
+      ref,
+      response.status,
+    );
+  }
+}

package/src/lib/platform-releases.ts CHANGED Viewed

@@ -46,7 +46,10 @@ export async function resolveImageRefs(
   const platformRefs = await fetchPlatformImageRefs(version, log);
   if (platformRefs) {
     log?.("Resolved image refs from platform API");
-    return { imageTags: platformRefs, source: "platform" };
+    return {
+      imageTags: platformRefs.imageTags,
+      source: "platform",
+    };
   }
   log?.("Falling back to DockerHub tags");
@@ -68,7 +71,9 @@ export async function resolveImageRefs(
 async function fetchPlatformImageRefs(
   version: string,
   log?: (msg: string) => void,
-): Promise<Record<ServiceName, string> | null> {
+): Promise<{
+  imageTags: Record<ServiceName, string>;
+} | null> {
   try {
     const platformUrl = getPlatformUrl();
     const url = `${platformUrl}/v1/releases/?stable=true`;
@@ -123,9 +128,11 @@ async function fetchPlatformImageRefs(
     }
     return {
-      assistant: assistantImage,
-      "credential-executor": credentialExecutorImage,
-      gateway: gatewayImage,
+      imageTags: {
+        assistant: assistantImage,
+        "credential-executor": credentialExecutorImage,
+        gateway: gatewayImage,
+      },
     };
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);

package/src/lib/port-allocator.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import { createServer } from "net";
+/**
+ * Walks upward from `preferred` and returns the first host port that the
+ * kernel will let us bind to. Used by `hatchDocker` to pick the gateway's
+ * host-side port instead of always grabbing the env-default (e.g. 7830 /
+ * 20100), which collides with any other local assistant — eval-spawned or
+ * otherwise — already bound there.
+ *
+ * The previous design (`evals/src/lib/orphan-cleanup.ts`) tried to fix this
+ * by sweeping dead eval-run resources before the next hatch. That only
+ * helped when the conflict came from a prior eval run; an unrelated local
+ * `vellum hatch` holding the port wedged the whole flow. Discovering an
+ * open port at hatch time is the proper fix and lets us delete the cleanup
+ * pre-flight entirely.
+ *
+ * Walks linearly from `preferred` upward rather than asking the kernel for
+ * an arbitrary ephemeral port (`listen(0)`) so the resulting port stays
+ * legible to operators — three local assistants land on N, N+1, N+2
+ * instead of three random numbers in the 32768-60999 range.
+ */
+export async function findOpenPort(
+  preferred: number,
+  options: { maxAttempts?: number; host?: string } = {},
+): Promise<number> {
+  const maxAttempts = options.maxAttempts ?? 50;
+  const host = options.host ?? "0.0.0.0";
+  if (!Number.isInteger(preferred) || preferred < 1 || preferred > 65535) {
+    throw new Error(
+      `findOpenPort: preferred port ${preferred} is not a valid TCP port`,
+    );
+  }
+  if (!Number.isInteger(maxAttempts) || maxAttempts < 1) {
+    throw new Error(
+      `findOpenPort: maxAttempts ${maxAttempts} must be a positive integer`,
+    );
+  }
+  let lastError: Error | null = null;
+  for (let offset = 0; offset < maxAttempts; offset++) {
+    const port = preferred + offset;
+    if (port > 65535) break;
+    try {
+      await probePort(port, host);
+      return port;
+    } catch (err) {
+      lastError = err as Error;
+      if (!isPortInUseError(err)) {
+        // EACCES / EPERM / etc. are not "try the next port" signals — those
+        // are configuration problems an operator needs to see immediately.
+        throw err;
+      }
+    }
+  }
+  throw new Error(
+    `findOpenPort: no open port in range [${preferred}, ${preferred + maxAttempts - 1}]` +
+      (lastError ? ` (last error: ${lastError.message})` : ""),
+  );
+}
+/**
+ * Resolves if `port` on `host` can be bound right now. Rejects with the
+ * server's `error` event (typically `EADDRINUSE`) otherwise. Always closes
+ * the probe server before resolving so we don't leak the port we just
+ * proved was free.
+ */
+function probePort(port: number, host: string): Promise<void> {
+  return new Promise((resolve, reject) => {
+    const server = createServer();
+    const cleanup = (cb: () => void): void => {
+      server.removeAllListeners();
+      server.close(() => cb());
+    };
+    server.once("error", (err) => {
+      // close() on a server that never listened is a no-op; calling it
+      // anyway keeps cleanup uniform.
+      cleanup(() => reject(err));
+    });
+    server.once("listening", () => {
+      cleanup(() => resolve());
+    });
+    server.listen(port, host);
+  });
+}
+function isPortInUseError(err: unknown): boolean {
+  if (err instanceof Error && "code" in err) {
+    const code = (err as NodeJS.ErrnoException).code;
+    return code === "EADDRINUSE" || code === "EADDRNOTAVAIL";
+  }
+  return false;
+}

package/src/lib/statefulset.ts CHANGED Viewed

@@ -257,7 +257,6 @@ export interface BuildServiceRunArgsOpts extends DockerRunSecrets {
   instanceName: string;
   res: DockerResourceNames;
   extraAssistantEnv?: Record<string, string>;
-  defaultWorkspaceConfigPath?: string;
   /** Avatar device path, if available. Injected by `docker.ts` after resolving. */
   avatarDevicePath?: string;
 }
@@ -286,7 +285,6 @@ export function buildServiceRunArgs(
     instanceName,
     res,
     extraAssistantEnv,
-    defaultWorkspaceConfigPath,
     avatarDevicePath,
   } = opts;
@@ -355,14 +353,6 @@ export function buildServiceRunArgs(
           "-e", `GATEWAY_INTERNAL_URL=http://localhost:${GATEWAY_INTERNAL_PORT}`,
         );
-        if (defaultWorkspaceConfigPath) {
-          const cPath = `/tmp/vellum-default-workspace-config-${Date.now()}.json`;
-          args.push(
-            "-v", `${defaultWorkspaceConfigPath}:${cPath}:ro`,
-            "-e", `VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH=${cPath}`,
-          );
-        }
         if (extraAssistantEnv) {
           for (const [k, v] of Object.entries(extraAssistantEnv)) {
             args.push("-e", `${k}=${v}`);

package/src/lib/step-runner.ts CHANGED Viewed

@@ -1,5 +1,38 @@
 import { spawn } from "child_process";
+/**
+ * Build the error message for a failed child process. **Never include the
+ * argv** — `docker run ...` invocations carry `-e ANTHROPIC_API_KEY=…` /
+ * `-e OPENAI_API_KEY=…` style flags, and the resulting `Error.message`
+ * propagates all the way to:
+ *
+ *   - the CLI's top-level catch (`console.error("Error:", err.message)`)
+ *     which leaks them onto stderr,
+ *   - `subprocess-*.log` files captured by the evals harness when it
+ *     spawns `vellum hatch` (which then becomes the inlined log on the
+ *     run-detail report page),
+ *   - `run.json#error` and the last-N-lines tail in `progress.ndjson`
+ *     that the evals harness emits for `SubprocessFailedError`.
+ *
+ * The diagnostic substring callers actually grep for ("no such container",
+ * "is not running", "port is already allocated", …) lives in the child's
+ * stderr/stdout, which we DO preserve below. Keep the command name only —
+ * it's enough to disambiguate which step failed without quoting secrets.
+ *
+ * Exported so the unit test can assert no `-e KEY=...` slips back in.
+ */
+export function buildExecErrorMessage(
+  command: string,
+  code: number | null,
+  stderr: string,
+  stdout: string,
+): string {
+  const codeLabel = code === null ? "an unknown code" : `code ${code}`;
+  const header = `${command} exited with ${codeLabel}`;
+  const output = [stderr.trim(), stdout.trim()].filter(Boolean).join("\n");
+  return output ? `${header}\n${output}` : header;
+}
 export function exec(
   command: string,
   args: string[],
@@ -25,11 +58,7 @@ export function exec(
       if (code === 0) {
         resolve();
       } else {
-        const msg = `"${command} ${args.join(" ")}" exited with code ${code}`;
-        const output = [stderr.trim(), stdout.trim()]
-          .filter(Boolean)
-          .join("\n");
-        reject(new Error(output ? `${msg}\n${output}` : msg));
+        reject(new Error(buildExecErrorMessage(command, code, stderr, stdout)));
       }
     });
     child.on("error", reject);
@@ -61,8 +90,12 @@ export function execOutput(
       if (code === 0) {
         resolve(stdout.trim());
       } else {
-        const msg = `"${command} ${args.join(" ")}" exited with code ${code}`;
-        reject(new Error(stderr.trim() ? `${msg}\n${stderr.trim()}` : msg));
+        // execOutput intentionally drops stdout from the error message
+        // (callers that read stdout via the success path don't expect
+        // partial stdout to land in error.message). Stderr is enough
+        // for diagnostics, and the no-args-in-message guarantee from
+        // exec() still holds.
+        reject(new Error(buildExecErrorMessage(command, code, stderr, "")));
       }
     });
     child.on("error", reject);

package/src/shared/provider-env-vars.ts CHANGED Viewed

@@ -26,6 +26,7 @@ export const LLM_PROVIDER_ENV_VAR_NAMES: Record<string, string> = {
   gemini: "GEMINI_API_KEY",
   fireworks: "FIREWORKS_API_KEY",
   openrouter: "OPENROUTER_API_KEY",
+  minimax: "MINIMAX_API_KEY",
 };
 /** Search-provider env var names. Mirrors `SEARCH_PROVIDER_CATALOG` BYOK entries. */