@vellumai/cli 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import {
5
5
  existsSync,
6
6
  mkdirSync,
7
7
  readFileSync,
8
+ statSync,
8
9
  writeFileSync,
9
10
  } from "fs";
10
11
  import { platform } from "os";
@@ -12,6 +13,7 @@ import { dirname, join } from "path";
12
13
 
13
14
  import { getConfigDir } from "./environments/paths.js";
14
15
  import { getCurrentEnvironment } from "./environments/resolve.js";
16
+ import { SEEDS } from "./environments/seeds.js";
15
17
 
16
18
  const DEVICE_ID_SALT = "vellum-assistant-host-id";
17
19
 
@@ -200,3 +202,64 @@ export async function leaseGuardianToken(
200
202
  saveGuardianToken(assistantId, tokenData);
201
203
  return tokenData;
202
204
  }
205
+
206
+ /**
207
+ * Copy a guardian token from a sibling environment's config directory into
208
+ * the current environment's dir when the current one is missing it.
209
+ *
210
+ * The CLI's per-environment config layout (`~/.config/vellum{-env}/`) scopes
211
+ * the lockfile and the guardian token by VELLUM_ENVIRONMENT. Lockfiles are
212
+ * cross-written at hatch time, but a guardian token is only written under
213
+ * the env the assistant was hatched in. If the user later wakes the same
214
+ * assistant under a different env (e.g. a freshly built desktop app ships
215
+ * with VELLUM_ENVIRONMENT=local while the original hatch was under dev),
216
+ * the app cannot locate a bearer token and falls into a 401 → auth-rate-
217
+ * limit → 429 cascade against the local gateway.
218
+ *
219
+ * Returns true if a token was seeded, false if a token was already present
220
+ * or no sibling env had one to copy.
221
+ */
222
+ export function seedGuardianTokenFromSiblingEnv(assistantId: string): boolean {
223
+ if (loadGuardianToken(assistantId) !== null) return false;
224
+
225
+ const currentEnvName = getCurrentEnvironment().name;
226
+ const destPath = getGuardianTokenPath(assistantId);
227
+
228
+ const candidates: { path: string; mtimeMs: number }[] = [];
229
+ for (const env of Object.values(SEEDS)) {
230
+ if (env.name === currentEnvName) continue;
231
+ const sibling = join(
232
+ getConfigDir(env),
233
+ "assistants",
234
+ assistantId,
235
+ "guardian-token.json",
236
+ );
237
+ try {
238
+ const stat = statSync(sibling);
239
+ candidates.push({ path: sibling, mtimeMs: stat.mtimeMs });
240
+ } catch {
241
+ continue;
242
+ }
243
+ }
244
+ candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
245
+
246
+ const now = Date.now();
247
+ for (const { path: sibling } of candidates) {
248
+ try {
249
+ const raw = readFileSync(sibling);
250
+ const parsed = JSON.parse(raw.toString("utf-8")) as GuardianTokenData;
251
+ const refreshExpiry = Date.parse(parsed.refreshTokenExpiresAt);
252
+ if (!Number.isFinite(refreshExpiry) || refreshExpiry <= now) continue;
253
+ const dir = dirname(destPath);
254
+ if (!existsSync(dir)) {
255
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
256
+ }
257
+ writeFileSync(destPath, raw, { mode: 0o600 });
258
+ chmodSync(destPath, 0o600);
259
+ return true;
260
+ } catch {
261
+ continue;
262
+ }
263
+ }
264
+ return false;
265
+ }
@@ -305,10 +305,26 @@ export async function hatchLocal(
305
305
  // IP which the daemon rejects as non-loopback.
306
306
  emitProgress(6, 7, "Securing connection...");
307
307
  const loopbackUrl = `http://127.0.0.1:${resources.gatewayPort}`;
308
- try {
309
- await leaseGuardianToken(loopbackUrl, instanceName);
310
- } catch (err) {
311
- console.error(`⚠️ Guardian token lease failed: ${err}`);
308
+ const maxLeaseAttempts = 3;
309
+ for (let attempt = 1; attempt <= maxLeaseAttempts; attempt++) {
310
+ try {
311
+ await leaseGuardianToken(loopbackUrl, instanceName);
312
+ break;
313
+ } catch (err) {
314
+ if (attempt < maxLeaseAttempts) {
315
+ const delayMs = 2000 * 2 ** (attempt - 1);
316
+ console.error(
317
+ `⚠️ Guardian token lease attempt ${attempt}/${maxLeaseAttempts} failed — retrying in ${delayMs / 1000}s: ${err}`,
318
+ );
319
+ await new Promise((r) => setTimeout(r, delayMs));
320
+ } else {
321
+ console.error(
322
+ `⚠️ Guardian token lease failed after ${maxLeaseAttempts} attempts: ${err}\n` +
323
+ ` The assistant is running but guardian-token.json was not written.\n` +
324
+ ` If the desktop app loses its stored credentials, re-hatch to recover.`,
325
+ );
326
+ }
327
+ }
312
328
  }
313
329
 
314
330
  // Auto-start ngrok if webhook integrations (e.g. Telegram, Twilio) are configured.
package/src/lib/local.ts CHANGED
@@ -1076,6 +1076,7 @@ export async function startGateway(
1076
1076
  // (mirrors the daemon env setup).
1077
1077
  ...(resources
1078
1078
  ? {
1079
+ BASE_DATA_DIR: resources.instanceDir,
1079
1080
  VELLUM_WORKSPACE_DIR: join(
1080
1081
  resources.instanceDir,
1081
1082
  ".vellum",
@@ -222,6 +222,140 @@ export async function ensureSelfHostedLocalRegistration(
222
222
  return (await response.json()) as EnsureRegistrationResponse;
223
223
  }
224
224
 
225
+ // ---------------------------------------------------------------------------
226
+ // API key reprovisioning
227
+ // ---------------------------------------------------------------------------
228
+
229
+ export interface ReprovisionApiKeyResponse {
230
+ provisioning: {
231
+ assistant_api_key: string;
232
+ };
233
+ }
234
+
235
+ /**
236
+ * Reprovision (rotate) the API key for a self-hosted local assistant.
237
+ *
238
+ * Calls `POST /v1/assistants/self-hosted-local/reprovision-api-key/`.
239
+ * Returns a fresh API key. The previous key is revoked server-side.
240
+ */
241
+ export async function reprovisionAssistantApiKey(
242
+ token: string,
243
+ organizationId: string,
244
+ clientInstallationId: string,
245
+ runtimeAssistantId: string,
246
+ clientPlatform: string,
247
+ assistantVersion?: string,
248
+ platformUrl?: string,
249
+ ): Promise<ReprovisionApiKeyResponse> {
250
+ const resolvedUrl = platformUrl || getPlatformUrl();
251
+ const body: Record<string, string> = {
252
+ client_installation_id: clientInstallationId,
253
+ runtime_assistant_id: runtimeAssistantId,
254
+ client_platform: clientPlatform,
255
+ };
256
+ if (assistantVersion) {
257
+ body.assistant_version = assistantVersion;
258
+ }
259
+
260
+ const response = await fetch(
261
+ `${resolvedUrl}/v1/assistants/self-hosted-local/reprovision-api-key/`,
262
+ {
263
+ method: "POST",
264
+ headers: {
265
+ "Content-Type": "application/json",
266
+ Accept: "application/json",
267
+ "X-Session-Token": token,
268
+ "Vellum-Organization-Id": organizationId,
269
+ },
270
+ body: JSON.stringify(body),
271
+ },
272
+ );
273
+
274
+ if (response.status === 401 || response.status === 403) {
275
+ throw new Error("Authentication required for API key reprovisioning.");
276
+ }
277
+
278
+ if (!response.ok) {
279
+ const detail = await response.text().catch(() => "");
280
+ throw new Error(
281
+ `API key reprovisioning failed (${response.status}): ${detail || response.statusText}`,
282
+ );
283
+ }
284
+
285
+ return (await response.json()) as ReprovisionApiKeyResponse;
286
+ }
287
+
288
+ // ---------------------------------------------------------------------------
289
+ // Credential reading from running assistant via gateway
290
+ // ---------------------------------------------------------------------------
291
+
292
+ export interface GatewayCredentialResult {
293
+ /** The credential value, if found. */
294
+ value: string | null;
295
+ /** True when the gateway/daemon was unreachable (network error, timeout, etc.). */
296
+ unreachable: boolean;
297
+ }
298
+
299
+ /**
300
+ * Read an existing credential from the assistant's secret store via the
301
+ * gateway-proxied `POST /v1/secrets/read` endpoint (with `reveal: true`).
302
+ *
303
+ * Returns a result distinguishing "key not found" (`value: null,
304
+ * unreachable: false`) from "gateway unreachable" (`value: null,
305
+ * unreachable: true`). Callers should only reprovision when the gateway
306
+ * is reachable but the key is genuinely missing — reprovisioning while
307
+ * the gateway is down would revoke the old key server-side without being
308
+ * able to inject the replacement.
309
+ *
310
+ * Never throws.
311
+ */
312
+ export async function readGatewayCredential(
313
+ gatewayUrl: string,
314
+ name: string,
315
+ bearerToken?: string,
316
+ ): Promise<GatewayCredentialResult> {
317
+ try {
318
+ const headers: Record<string, string> = {
319
+ "Content-Type": "application/json",
320
+ Accept: "application/json",
321
+ };
322
+ if (bearerToken) {
323
+ headers["Authorization"] = `Bearer ${bearerToken}`;
324
+ }
325
+
326
+ const response = await fetch(`${gatewayUrl}/v1/secrets/read`, {
327
+ method: "POST",
328
+ headers,
329
+ body: JSON.stringify({ type: "credential", name, reveal: true }),
330
+ signal: AbortSignal.timeout(10_000),
331
+ });
332
+
333
+ if (!response.ok) {
334
+ // 5xx means the gateway/daemon backend is down — treat as unreachable
335
+ // so callers don't revoke a potentially valid key.
336
+ return { value: null, unreachable: response.status >= 500 };
337
+ }
338
+
339
+ const json = (await response.json()) as {
340
+ found: boolean;
341
+ value?: string;
342
+ unreachable?: boolean;
343
+ };
344
+ // The daemon's /v1/secrets/read returns `unreachable: true` when the
345
+ // credential backend (CES) can't be reached. Respect that signal.
346
+ if (json.unreachable) {
347
+ return { value: null, unreachable: true };
348
+ }
349
+ return {
350
+ value: json.found && json.value ? json.value : null,
351
+ unreachable: false,
352
+ };
353
+ } catch {
354
+ // Network error, timeout, or gateway down
355
+ return { value: null, unreachable: true };
356
+ }
357
+ }
358
+
225
359
  // ---------------------------------------------------------------------------
226
360
  // Credential injection into running assistant via gateway
227
361
  // ---------------------------------------------------------------------------
@@ -1,13 +1,17 @@
1
1
  import { createConnection } from "net";
2
2
  import { existsSync } from "fs";
3
3
 
4
- import type { AssistantEntry } from "../lib/assistant-config";
4
+ import type { AssistantEntry } from "./assistant-config";
5
5
 
6
6
  /**
7
7
  * Connect to an Apple Container assistant via its management socket.
8
8
  * Sends a JSON handshake then relays stdin/stdout in raw mode.
9
9
  */
10
- export async function sshAppleContainer(entry: AssistantEntry): Promise<void> {
10
+ export async function sshAppleContainer(
11
+ entry: AssistantEntry,
12
+ command?: string[],
13
+ service?: string,
14
+ ): Promise<void> {
11
15
  const mgmtSocket = entry.mgmtSocket as string | undefined;
12
16
  if (!mgmtSocket) {
13
17
  console.error(
@@ -34,8 +38,8 @@ export async function sshAppleContainer(entry: AssistantEntry): Promise<void> {
34
38
 
35
39
  const handshake =
36
40
  JSON.stringify({
37
- command: ["/bin/bash"],
38
- service: "vellum-assistant",
41
+ command: command && command.length > 0 ? command : ["/bin/bash"],
42
+ service: service || "vellum-assistant",
39
43
  cols,
40
44
  rows,
41
45
  }) + "\n";
@@ -1,19 +1,43 @@
1
1
  /**
2
2
  * Provider API key environment variable names, keyed by provider ID.
3
3
  *
4
- * Keep in sync with:
5
- * - assistant/src/shared/provider-env-vars.ts
6
- * - meta/provider-env-vars.json (consumed by the macOS client build)
4
+ * Two sources are merged into a single combined map:
7
5
  *
8
- * Once a consolidated shared package exists in packages/, all three
9
- * copies can be replaced by a single import.
6
+ * 1. Search-provider env vars sourced from `meta/provider-env-vars.json`
7
+ * (single source of truth, also bundled into the macOS client).
8
+ * 2. LLM-provider env vars — sourced from `PROVIDER_CATALOG` in
9
+ * `assistant/src/providers/model-catalog.ts` via a locally-maintained
10
+ * mirror (the CLI does not import from `assistant/src/`; drift is caught
11
+ * by `cli/src/__tests__/llm-provider-env-var-parity.test.ts`).
12
+ *
13
+ * The combined map is what cloud-infra code (docker.ts, aws.ts, gcp.ts)
14
+ * iterates to forward provider API keys from the caller's environment into
15
+ * containers / VMs. Keeping both kinds of provider env vars in one map means
16
+ * the infra call sites don't need to know which kind is which — they just
17
+ * forward every value whose env var is set.
10
18
  */
11
- export const PROVIDER_ENV_VAR_NAMES: Record<string, string> = {
19
+
20
+ /** LLM provider env var names. Mirrors `PROVIDER_CATALOG` entries with an `envVar`. */
21
+ export const LLM_PROVIDER_ENV_VAR_NAMES: Record<string, string> = {
12
22
  anthropic: "ANTHROPIC_API_KEY",
13
23
  openai: "OPENAI_API_KEY",
14
24
  gemini: "GEMINI_API_KEY",
15
25
  fireworks: "FIREWORKS_API_KEY",
16
26
  openrouter: "OPENROUTER_API_KEY",
27
+ };
28
+
29
+ /** Search-provider env var names. Mirrors `meta/provider-env-vars.json`. */
30
+ export const SEARCH_PROVIDER_ENV_VAR_NAMES: Record<string, string> = {
17
31
  brave: "BRAVE_API_KEY",
18
32
  perplexity: "PERPLEXITY_API_KEY",
19
33
  };
34
+
35
+ /**
36
+ * Combined provider env var names — the union of LLM and search providers.
37
+ * Used by the cloud-infra flows (docker/aws/gcp) to forward every supported
38
+ * provider API key from the caller's environment.
39
+ */
40
+ export const PROVIDER_ENV_VAR_NAMES: Record<string, string> = {
41
+ ...LLM_PROVIDER_ENV_VAR_NAMES,
42
+ ...SEARCH_PROVIDER_ENV_VAR_NAMES,
43
+ };