npm - @vellumai/cli - Versions diffs - 0.8.9-staging.2 → 0.8.9-staging.3 - Mend

@vellumai/cli 0.8.9-staging.2 → 0.8.9-staging.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/node_modules/@vellumai/local-mode/src/__tests__/loopback-auth.test.ts +88 -0
package/node_modules/@vellumai/local-mode/src/index.ts +3 -0
package/node_modules/@vellumai/local-mode/src/lockfile.ts +15 -0
package/node_modules/@vellumai/local-mode/src/util.ts +33 -0
package/package.json +1 -1
package/src/__tests__/assistant-client-refresh.test.ts +65 -4
package/src/__tests__/client-tui-refresh.test.ts +50 -6
package/src/__tests__/guardian-token.test.ts +130 -4
package/src/__tests__/message.test.ts +86 -0
package/src/__tests__/teleport.test.ts +1 -0
package/src/__tests__/tui-midsession-refresh.test.ts +68 -9
package/src/commands/client.ts +100 -58
package/src/commands/hatch.ts +14 -4
package/src/commands/message.ts +109 -19
package/src/commands/teleport.ts +2 -0
package/src/components/DefaultMainScreen.tsx +27 -2
package/src/lib/assistant-client.ts +31 -13
package/src/lib/docker.ts +5 -0
package/src/lib/flag-args.test.ts +89 -0
package/src/lib/flag-args.ts +74 -0
package/src/lib/guardian-token.ts +54 -0
package/src/lib/hatch-local.ts +2 -0
package/src/lib/local.ts +6 -1
package/src/lib/runtime-url.ts +90 -0
package/src/lib/statefulset.ts +9 -0

package/src/components/DefaultMainScreen.tsx CHANGED Viewed

@@ -12,7 +12,12 @@ import { Box, render as inkRender, Text, useInput, useStdout } from "ink";
 import { SPECIES_CONFIG, type Species } from "../lib/constants";
 import { lookupAssistantByIdentifier } from "../lib/assistant-config";
 import { checkHealth } from "../lib/health-check";
-import { loadGuardianToken, refreshGuardianToken } from "../lib/guardian-token";
+import {
+  guardianTokenDueForRenewal,
+  loadGuardianToken,
+  refreshGuardianToken,
+} from "../lib/guardian-token";
+import { trustedRefreshUrl } from "../lib/runtime-url";
 import { appendHistory, loadHistory } from "../lib/input-history";
 import { tuiLog } from "../lib/tui-log";
 import { segmentsToPlainText } from "../lib/segments-to-plain-text";
@@ -193,6 +198,16 @@ function friendlyErrorMessage(status: number, body: string): string {
  * and access-only tokens. Because the TUI threads one shared `auth` object by
  * reference, mutating it here propagates to every later request and the SSE
  * reconnect — no callback threading needed.
+ *
+ * SECURITY: the refresh is bound to the paired entry's persisted runtime URL.
+ * `vellum client` lets `--url`/`-u` override the runtime URL while still using
+ * the selected paired entry's stored guardian token, so a victim pointed at an
+ * attacker-controlled (or poisoned/redirected) URL that returns 401 must NOT
+ * cause us to POST the long-lived refreshToken + deviceId to that origin. We
+ * therefore (a) refuse to refresh unless `baseUrl` normalizes to one of the
+ * entry's persisted URLs, and (b) send the refresh to the persisted URL rather
+ * than the caller-supplied `baseUrl` — defense in depth if the gate is ever
+ * bypassed.
  */
 export async function maybeRefreshAuthHeaders(
   baseUrl: string,
@@ -210,11 +225,21 @@ export async function maybeRefreshAuthHeaders(
     return false;
   }
+  // Bind the refresh origin to the persisted paired entry: refuse (and never
+  // leak credentials) if `baseUrl` was overridden via --url or poisoned to an
+  // origin that isn't one of the entry's persisted URLs. `refreshUrl` is the
+  // trusted persisted URL we actually send to.
+  const refreshUrl = trustedRefreshUrl(lookup.entry, baseUrl);
+  if (!refreshUrl) return false;
   const stored = loadGuardianToken(assistantId);
   if (!stored || stored.accessToken !== bearer || !stored.refreshToken) {
     return false;
   }
-  const refreshed = await refreshGuardianToken(baseUrl, assistantId);
+  // Only refresh once the token is actually due for renewal, so a forged 401
+  // on a still-valid token can't coax out the long-lived refresh credential.
+  if (!guardianTokenDueForRenewal(stored)) return false;
+  const refreshed = await refreshGuardianToken(refreshUrl, assistantId);
   if (!refreshed?.accessToken) return false;
   auth["Authorization"] = `Bearer ${refreshed.accessToken}`;
   return true;

package/src/lib/assistant-client.ts CHANGED Viewed

@@ -14,7 +14,11 @@
 import { resolveAssistant } from "./assistant-config.js";
 import { GATEWAY_PORT } from "./constants.js";
-import { loadGuardianToken, refreshGuardianToken } from "./guardian-token.js";
+import {
+  loadGuardianToken,
+  refreshGuardianToken,
+  guardianTokenDueForRenewal,
+} from "./guardian-token.js";
 const DEFAULT_TIMEOUT_MS = 30_000;
 const FALLBACK_RUNTIME_URL = `http://127.0.0.1:${GATEWAY_PORT}`;
@@ -219,21 +223,35 @@ export class AssistantClient {
     const response = await doFetch();
-    // Reactive auto-refresh: a paired/local guardian access token that has
-    // expired comes back 401. Refresh it once via the stored refresh credential
-    // and retry. Self-gating — refreshGuardianToken returns null unless a usable
-    // refresh token is stored, so ephemeral (`--token`) and access-only sessions
-    // just see the original 401. The platform session-auth path is never
-    // refreshed here (its token is managed by the Vellum platform).
+    // Reactive auto-refresh on a 401 for the guardian (non-session) path.
+    // Ephemeral (`--token`) and access-only sessions have no stored refresh
+    // credential and just see the original 401; the platform session-auth path
+    // is never refreshed here (its token is managed by the Vellum platform).
     if (response.status === 401 && !this.isSessionAuth) {
-      const refreshed = await refreshGuardianToken(
-        this.runtimeUrl,
-        this._assistantId,
-      );
-      if (refreshed?.accessToken) {
-        this.token = refreshed.accessToken;
+      const stored = loadGuardianToken(this._assistantId);
+      // Another process may have already rotated and persisted a fresh access
+      // token (e.g. a concurrent `vellum events`). Adopt it and retry — this
+      // sends no refresh credential, just picks up the newer local token.
+      if (stored?.accessToken && stored.accessToken !== this.token) {
+        this.token = stored.accessToken;
         return doFetch();
       }
+      // Otherwise only disclose the long-lived refresh token when our access
+      // token is actually due for renewal. A 401 on a still-valid token (e.g. a
+      // forged 401 from an impostor endpoint trying to coax out the refresh
+      // credential) is surfaced as-is, not refreshed.
+      if (stored?.refreshToken && guardianTokenDueForRenewal(stored)) {
+        const refreshed = await refreshGuardianToken(
+          this.runtimeUrl,
+          this._assistantId,
+        );
+        if (refreshed?.accessToken) {
+          this.token = refreshed.accessToken;
+          return doFetch();
+        }
+      }
     }
     return response;

package/src/lib/docker.ts CHANGED Viewed

@@ -662,6 +662,7 @@ export async function startContainers(
     bootstrapSecret?: string;
     cesServiceToken?: string;
     extraAssistantEnv?: Record<string, string>;
+    extraGatewayEnv?: Record<string, string>;
     gatewayPort: number;
     imageTags: Record<ServiceName, string>;
     instanceName: string;
@@ -1042,6 +1043,7 @@ export async function hatchDocker(
   name: string | null,
   watch: boolean = false,
   configValues: Record<string, string> = {},
+  flagEnvVars: Record<string, string> = {},
   options: HatchDockerOptions = {},
 ): Promise<void> {
   resetLogFile("hatch.log");
@@ -1321,12 +1323,15 @@ export async function hatchDocker(
       : ownSecret;
     emitProgress(4, 6, "Starting containers...");
+    const extraGatewayEnv =
+      Object.keys(flagEnvVars).length > 0 ? flagEnvVars : undefined;
     await startContainers(
       {
         signingKey,
         bootstrapSecret,
         cesServiceToken,
         extraAssistantEnv,
+        extraGatewayEnv,
         gatewayPort,
         imageTags,
         instanceName,

package/src/lib/flag-args.test.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { describe, expect, test, spyOn } from "bun:test";
+import { parseFeatureFlagArgs } from "./flag-args";
+describe("parseFeatureFlagArgs", () => {
+  test("single flag produces env var and empty remaining", () => {
+    const result = parseFeatureFlagArgs(["--flag", "voice-mode=true"]);
+    expect(result).toEqual({
+      envVars: { VELLUM_FLAG_VOICE_MODE: "true" },
+      remaining: [],
+    });
+  });
+  test("multiple flags produce multiple env vars", () => {
+    const result = parseFeatureFlagArgs([
+      "--flag",
+      "a=1",
+      "--flag",
+      "b=0",
+    ]);
+    expect(result).toEqual({
+      envVars: { VELLUM_FLAG_A: "1", VELLUM_FLAG_B: "0" },
+      remaining: [],
+    });
+  });
+  test("flags mixed with other args preserves remaining", () => {
+    const result = parseFeatureFlagArgs([
+      "--watch",
+      "--flag",
+      "x=y",
+      "--name",
+      "foo",
+    ]);
+    expect(result).toEqual({
+      envVars: { VELLUM_FLAG_X: "y" },
+      remaining: ["--watch", "--name", "foo"],
+    });
+  });
+  test("exits with error when --flag has no following argument", () => {
+    const exitSpy = spyOn(process, "exit").mockImplementation(() => {
+      throw new Error("process.exit");
+    });
+    const errorSpy = spyOn(console, "error").mockImplementation(() => {});
+    expect(() => parseFeatureFlagArgs(["--flag"])).toThrow("process.exit");
+    expect(errorSpy).toHaveBeenCalledWith(
+      "Error: --flag requires a key=value argument",
+    );
+    exitSpy.mockRestore();
+    errorSpy.mockRestore();
+  });
+  test("exits with error when value has no equals sign", () => {
+    const exitSpy = spyOn(process, "exit").mockImplementation(() => {
+      throw new Error("process.exit");
+    });
+    const errorSpy = spyOn(console, "error").mockImplementation(() => {});
+    expect(() => parseFeatureFlagArgs(["--flag", "noequals"])).toThrow(
+      "process.exit",
+    );
+    expect(errorSpy).toHaveBeenCalledWith(
+      'Error: --flag value must be in key=value format, got "noequals"',
+    );
+    exitSpy.mockRestore();
+    errorSpy.mockRestore();
+  });
+  test("exits with error when key is not kebab-case", () => {
+    const exitSpy = spyOn(process, "exit").mockImplementation(() => {
+      throw new Error("process.exit");
+    });
+    const errorSpy = spyOn(console, "error").mockImplementation(() => {});
+    expect(() => parseFeatureFlagArgs(["--flag", "UPPER=true"])).toThrow(
+      "process.exit",
+    );
+    expect(errorSpy).toHaveBeenCalledWith(
+      'Error: invalid flag key "UPPER". Keys must be kebab-case (e.g. "voice-mode")',
+    );
+    exitSpy.mockRestore();
+    errorSpy.mockRestore();
+  });
+});

package/src/lib/flag-args.ts ADDED Viewed

@@ -0,0 +1,74 @@
+/** Only allow simple kebab-case keys (e.g. "voice-mode", "ces-tools"). */
+const ALLOWED_KEY_RE = /^[a-z0-9][a-z0-9-]*$/;
+/**
+ * Extract repeatable `--flag key=value` pairs from a CLI arg list.
+ *
+ * Each `--flag` consumes the next argument as `key=value`. Keys are validated
+ * against a kebab-case pattern, then converted to env var names of the form
+ * `VELLUM_FLAG_<UPPER_SNAKE>`. All `--flag` pairs are stripped from the
+ * returned `remaining` array so downstream parsers never see them.
+ */
+export function parseFeatureFlagArgs(args: string[]): {
+  envVars: Record<string, string>;
+  remaining: string[];
+} {
+  const envVars: Record<string, string> = {};
+  const remaining: string[] = [];
+  let i = 0;
+  while (i < args.length) {
+    if (args[i] === "--flag") {
+      if (i + 1 >= args.length) {
+        console.error("Error: --flag requires a key=value argument");
+        process.exit(1);
+      }
+      const pair = args[i + 1]!;
+      const eqIdx = pair.indexOf("=");
+      if (eqIdx === -1) {
+        console.error(
+          `Error: --flag value must be in key=value format, got "${pair}"`,
+        );
+        process.exit(1);
+      }
+      const key = pair.slice(0, eqIdx);
+      const value = pair.slice(eqIdx + 1);
+      if (!ALLOWED_KEY_RE.test(key)) {
+        console.error(
+          `Error: invalid flag key "${key}". Keys must be kebab-case (e.g. "voice-mode")`,
+        );
+        process.exit(1);
+      }
+      const envName = `VELLUM_FLAG_${key.toUpperCase().replace(/-/g, "_")}`;
+      envVars[envName] = value;
+      i += 2;
+    } else {
+      remaining.push(args[i]!);
+      i += 1;
+    }
+  }
+  return { envVars, remaining };
+}
+const ENV_FLAG_PREFIX = "VELLUM_FLAG_";
+/**
+ * Scan `process.env` for ambient `VELLUM_FLAG_*` entries.
+ * Returns them as-is (same `Record<string, string>` shape as
+ * `parseFeatureFlagArgs().envVars`) so callers can merge both
+ * sources with `--flag` args winning over ambient env vars.
+ */
+export function readAmbientFlagEnvVars(): Record<string, string> {
+  const vars: Record<string, string> = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (key.startsWith(ENV_FLAG_PREFIX) && value !== undefined) {
+      vars[key] = value;
+    }
+  }
+  return vars;
+}

package/src/lib/guardian-token.ts CHANGED Viewed

@@ -254,10 +254,64 @@ function releaseRefreshLock(lockPath: string): void {
  * process already rotated it while we waited, we return that fresh token
  * instead of replaying our now-stale refresh token.
  */
+/**
+ * The guardian refresh token is long-lived and replayable, so we only transmit
+ * it over a confidential channel: HTTPS, or a loopback host (local dev, or a
+ * same-host reverse proxy / tunnel agent). Refreshing against a non-loopback
+ * plaintext `http://` URL is refused — an on-path attacker could otherwise
+ * capture the refresh token and rotate it into fresh credentials.
+ *
+ * A user-chosen malicious `https://` destination is intentionally out of scope:
+ * HTTPS protects the channel, and the access token already goes wherever the
+ * configured URL points. This guard targets the plaintext-interception vector.
+ */
+function isLoopbackHostname(hostname: string): boolean {
+  const h = hostname.toLowerCase();
+  return (
+    h === "localhost" ||
+    h === "::1" ||
+    h === "[::1]" ||
+    h === "0:0:0:0:0:0:0:1" ||
+    /^127(?:\.\d{1,3}){3}$/.test(h)
+  );
+}
+function isConfidentialRefreshUrl(gatewayUrl: string): boolean {
+  try {
+    const url = new URL(gatewayUrl);
+    return url.protocol === "https:" || isLoopbackHostname(url.hostname);
+  } catch {
+    return false;
+  }
+}
+/**
+ * True when a stored guardian token has reached its renewal point — now is
+ * at/after `refreshAfter` (preferred) or `accessTokenExpiresAt`. Used to gate
+ * refresh so a forged/synthetic 401 on a still-valid token can't coax out the
+ * long-lived refresh credential. Unparseable timestamps → not due.
+ */
+export function guardianTokenDueForRenewal(token: GuardianTokenData): boolean {
+  const raw = token.refreshAfter || token.accessTokenExpiresAt;
+  const at = new Date(raw).getTime();
+  if (!Number.isFinite(at)) return false;
+  return at <= Date.now();
+}
 export async function refreshGuardianToken(
   gatewayUrl: string,
   assistantId: string,
 ): Promise<GuardianTokenData | null> {
+  // Never send the long-lived refresh token over a non-loopback plaintext URL.
+  if (!isConfidentialRefreshUrl(gatewayUrl)) {
+    console.warn(
+      `Refusing to refresh the guardian token over an insecure URL (${gatewayUrl}). ` +
+        "The refresh token is only sent over https or a loopback address — " +
+        "use an https URL (e.g. a tunnel) or connect over loopback.",
+    );
+    return null;
+  }
   const before = loadGuardianToken(assistantId);
   if (!before) return null;

package/src/lib/hatch-local.ts CHANGED Viewed

@@ -164,6 +164,7 @@ export async function hatchLocal(
   watch: boolean = false,
   keepAlive: boolean = false,
   configValues: Record<string, string> = {},
+  flagEnvVars: Record<string, string> = {},
   options: HatchLocalOptions = {},
 ): Promise<HatchLocalResult> {
   const reporter = options.reporter ?? consoleLifecycleReporter;
@@ -234,6 +235,7 @@ export async function hatchLocal(
     runtimeUrl = await startGateway(watch, resources, {
       signingKey,
       bootstrapSecret,
+      envOverrides: flagEnvVars,
     });
   } catch (error) {
     // Gateway failed — stop the daemon we just started so we don't leave

package/src/lib/local.ts CHANGED Viewed

@@ -1057,7 +1057,11 @@ export async function startLocalDaemon(
 export async function startGateway(
   watch: boolean = false,
   resources?: LocalInstanceResources,
-  options?: { signingKey?: string; bootstrapSecret?: string },
+  options?: {
+    signingKey?: string;
+    bootstrapSecret?: string;
+    envOverrides?: Record<string, string>;
+  },
 ): Promise<string> {
   const effectiveGatewayPort = resources?.gatewayPort ?? GATEWAY_PORT;
@@ -1083,6 +1087,7 @@ export async function startGateway(
   const gatewayEnv: Record<string, string> = {
     ...(process.env as Record<string, string>),
+    ...options?.envOverrides,
     RUNTIME_HTTP_PORT: String(effectiveDaemonPort),
     GATEWAY_PORT: String(effectiveGatewayPort),
     // Pass gateway operational settings via env vars so the CLI does not

package/src/lib/runtime-url.ts CHANGED Viewed

@@ -1,3 +1,6 @@
+import { hostname } from "node:os";
+import { getLocalLanIPv4 } from "./local";
 import type { AssistantEntry } from "./assistant-config.js";
 /**
@@ -50,3 +53,90 @@ export function resolveRuntimeUrl(
   }
   return `${entry.runtimeUrl}/v1/${subpath}`;
 }
+/**
+ * If the hostname in `url` matches this machine's local DNS name, LAN IP, or
+ * raw hostname, replace it with 127.0.0.1 so the client avoids mDNS round-trips
+ * when talking to an assistant running on the same machine. Trailing slashes are
+ * stripped on a swap. Returns the input unchanged if it doesn't parse as a URL.
+ */
+function maybeSwapToLocalhost(url: string): string {
+  let parsed: URL;
+  try {
+    parsed = new URL(url);
+  } catch {
+    return url;
+  }
+  const urlHost = parsed.hostname.toLowerCase();
+  const localNames: string[] = [];
+  const host = hostname();
+  if (host) {
+    localNames.push(host.toLowerCase());
+    // Also consider the bare name without .local suffix
+    if (host.toLowerCase().endsWith(".local")) {
+      localNames.push(host.toLowerCase().slice(0, -".local".length));
+    }
+  }
+  const lanIp = getLocalLanIPv4();
+  if (lanIp) {
+    localNames.push(lanIp);
+  }
+  if (localNames.includes(urlHost)) {
+    parsed.hostname = "127.0.0.1";
+    return parsed.toString().replace(/\/+$/, "");
+  }
+  return url;
+}
+/**
+ * Canonical form of a runtime/base URL used throughout the CLI: trailing
+ * slashes stripped, then localhost-swapped. This is exactly the transform
+ * `vellum client` applies to the runtime URL it hands the TUI, so comparing two
+ * URLs after passing both through this function is a like-for-like comparison.
+ */
+export function normalizeRuntimeUrl(url: string): string {
+  return maybeSwapToLocalhost(url.replace(/\/+$/, ""));
+}
+/**
+ * SECURITY: decide whether a guardian-token refresh may be sent to
+ * `candidateUrl`, and to which URL it should actually go.
+ *
+ * `vellum client` lets `--url`/`-u` override the runtime URL while still reusing
+ * the selected entry's stored guardian token, so a victim pointed at an
+ * attacker-controlled (or poisoned/redirected) URL must NOT cause us to POST the
+ * long-lived refreshToken + deviceId there. Refresh is permitted only when
+ * `candidateUrl` normalizes to one of the entry's persisted URLs (`localUrl`,
+ * which the CLI prefers when present, or `runtimeUrl`).
+ *
+ * Returns the persisted URL that the candidate matched — never the
+ * caller-supplied `candidateUrl` verbatim — so credentials only ever reach a
+ * trusted origin even if a caller forgets to use this return value. The matched
+ * URL is preferred over always returning `runtimeUrl` so the refresh stays on
+ * the same interface the session is using: e.g. a local entry may persist both a
+ * loopback `localUrl` (which `vellum client` defaults to) and an externally
+ * discovered `runtimeUrl`, and refreshing the loopback session against the
+ * external address could be unreachable or needlessly cross the public
+ * interface. Returns `null` when the candidate is untrusted (caller must skip
+ * the refresh).
+ */
+export function trustedRefreshUrl(
+  entry: Pick<AssistantEntry, "runtimeUrl" | "localUrl">,
+  candidateUrl: string,
+): string | null {
+  const candidate = normalizeRuntimeUrl(candidateUrl);
+  // localUrl first: it's what the CLI prefers when present, so the candidate is
+  // most likely to match it, and we want to keep the refresh on that interface.
+  for (const persisted of [entry.localUrl, entry.runtimeUrl]) {
+    if (persisted && normalizeRuntimeUrl(persisted) === candidate) {
+      return persisted;
+    }
+  }
+  return null;
+}

package/src/lib/statefulset.ts CHANGED Viewed

@@ -257,6 +257,7 @@ export interface BuildServiceRunArgsOpts extends DockerRunSecrets {
   instanceName: string;
   res: DockerResourceNames;
   extraAssistantEnv?: Record<string, string>;
+  extraGatewayEnv?: Record<string, string>;
   /** Avatar device path, if available. Injected by `docker.ts` after resolving. */
   avatarDevicePath?: string;
 }
@@ -285,6 +286,7 @@ export function buildServiceRunArgs(
     instanceName,
     res,
     extraAssistantEnv,
+    extraGatewayEnv,
     avatarDevicePath,
   } = opts;
@@ -346,6 +348,13 @@ export function buildServiceRunArgs(
         }
       }
+      // Gateway-only additions (e.g. feature flag env overrides)
+      if (svc === "gateway" && extraGatewayEnv) {
+        for (const [k, v] of Object.entries(extraGatewayEnv)) {
+          args.push("-e", `${k}=${v}`);
+        }
+      }
       // Assistant-only computed / optional additions
       if (svc === "assistant") {
         args.push(