@vellumai/cli 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/AGENTS.md +12 -2
  2. package/README.md +3 -3
  3. package/bun.lock +17 -17
  4. package/bunfig.toml +6 -0
  5. package/package.json +18 -18
  6. package/src/__tests__/assistant-config.test.ts +124 -0
  7. package/src/__tests__/env-drift.test.ts +87 -0
  8. package/src/__tests__/guardian-token.test.ts +225 -0
  9. package/src/__tests__/llm-provider-env-var-parity.test.ts +64 -0
  10. package/src/__tests__/multi-local.test.ts +90 -13
  11. package/src/__tests__/orphan-detection.test.ts +214 -0
  12. package/src/__tests__/platform-client.test.ts +204 -0
  13. package/src/__tests__/preload.ts +27 -0
  14. package/src/__tests__/ssh-user-guard.test.ts +28 -0
  15. package/src/__tests__/teleport.test.ts +1073 -56
  16. package/src/commands/backup.ts +8 -0
  17. package/src/commands/exec.ts +186 -0
  18. package/src/commands/hatch.ts +1 -1
  19. package/src/commands/login.ts +209 -9
  20. package/src/commands/logs.ts +652 -0
  21. package/src/commands/pair.ts +9 -1
  22. package/src/commands/ps.ts +37 -7
  23. package/src/commands/recover.ts +8 -4
  24. package/src/commands/restore.ts +8 -0
  25. package/src/commands/retire.ts +16 -9
  26. package/src/commands/rollback.ts +32 -33
  27. package/src/commands/ssh.ts +7 -0
  28. package/src/commands/teleport.ts +253 -1
  29. package/src/commands/upgrade.ts +43 -52
  30. package/src/commands/wake.ts +25 -10
  31. package/src/components/DefaultMainScreen.tsx +7 -1
  32. package/src/index.ts +6 -0
  33. package/src/lib/__tests__/docker.test.ts +168 -0
  34. package/src/lib/assistant-config.ts +82 -108
  35. package/src/lib/aws.ts +12 -1
  36. package/src/lib/config-utils.ts +4 -4
  37. package/src/lib/constants.ts +0 -10
  38. package/src/lib/docker.ts +158 -8
  39. package/src/lib/environments/__tests__/paths.test.ts +228 -0
  40. package/src/lib/environments/__tests__/resolve.test.ts +226 -0
  41. package/src/lib/environments/__tests__/seeds.test.ts +72 -0
  42. package/src/lib/environments/paths.ts +109 -0
  43. package/src/lib/environments/resolve.ts +96 -0
  44. package/src/lib/environments/seeds.ts +74 -0
  45. package/src/lib/environments/types.ts +60 -0
  46. package/src/lib/exec-apple-container.ts +122 -0
  47. package/src/lib/gcp.ts +12 -1
  48. package/src/lib/guardian-token.ts +71 -10
  49. package/src/lib/hatch-local.ts +44 -23
  50. package/src/lib/local.ts +47 -5
  51. package/src/lib/orphan-detection.ts +28 -12
  52. package/src/lib/platform-client.ts +354 -24
  53. package/src/lib/retire-apple-container.ts +102 -0
  54. package/src/lib/ssh-apple-container.ts +166 -0
  55. package/src/lib/upgrade-lifecycle.ts +101 -28
  56. package/src/shared/provider-env-vars.ts +30 -6
@@ -9,11 +9,13 @@ import type { AssistantEntry } from "../lib/assistant-config.js";
9
9
  import {
10
10
  loadGuardianToken,
11
11
  leaseGuardianToken,
12
+ computeDeviceId,
12
13
  } from "../lib/guardian-token.js";
13
14
  import {
14
15
  readPlatformToken,
15
16
  getPlatformUrl,
16
17
  hatchAssistant,
18
+ checkExistingPlatformAssistant,
17
19
  platformInitiateExport,
18
20
  platformPollExportStatus,
19
21
  platformDownloadExport,
@@ -24,6 +26,12 @@ import {
24
26
  platformImportPreflightFromGcs,
25
27
  platformImportBundleFromGcs,
26
28
  platformPollImportStatus,
29
+ ensureSelfHostedLocalRegistration,
30
+ readGatewayCredential,
31
+ reprovisionAssistantApiKey,
32
+ injectCredentialsIntoAssistant,
33
+ fetchCurrentUser,
34
+ fetchOrganizationId,
27
35
  } from "../lib/platform-client.js";
28
36
  import {
29
37
  hatchDocker,
@@ -35,6 +43,8 @@ import { hatchLocal } from "../lib/hatch-local.js";
35
43
  import { retireLocal } from "../lib/retire-local.js";
36
44
  import { validateAssistantName } from "../lib/retire-archive.js";
37
45
  import { stopProcessByPidFile } from "../lib/process.js";
46
+ import { fetchCurrentVersion } from "../lib/upgrade-lifecycle.js";
47
+ import { compareVersions } from "../lib/version-compat.js";
38
48
  import { join } from "node:path";
39
49
 
40
50
  function printHelp(): void {
@@ -606,6 +616,13 @@ interface ImportResponse {
606
616
  files_skipped: number;
607
617
  backups_created: number;
608
618
  };
619
+ credentialsImported?: {
620
+ total: number;
621
+ succeeded: number;
622
+ failed: number;
623
+ failedAccounts: string[];
624
+ skippedPlatform?: number;
625
+ };
609
626
  }
610
627
 
611
628
  async function importToAssistant(
@@ -895,7 +912,29 @@ export async function resolveOrHatchTarget(
895
912
  process.exit(1);
896
913
  }
897
914
 
898
- const result = await hatchAssistant(token);
915
+ const { assistant: result, reusedExisting } = await hatchAssistant(token);
916
+
917
+ // Defensive safety net — should not happen because of the pre-check in
918
+ // teleport(), but guards against a TOCTOU race between the pre-check and
919
+ // hatch (e.g. another client hatches in the GCS-upload window).
920
+ if (reusedExisting) {
921
+ const entry: AssistantEntry = {
922
+ assistantId: result.id,
923
+ runtimeUrl: getPlatformUrl(),
924
+ cloud: "vellum",
925
+ species: "vellum",
926
+ hatchedAt: new Date().toISOString(),
927
+ };
928
+ saveAssistantEntry(entry);
929
+ console.error(
930
+ `Error: You already have a platform assistant '${result.id}'.`,
931
+ );
932
+ console.error(
933
+ `Retire it first with 'vellum retire ${result.id}', then retry the teleport.`,
934
+ );
935
+ process.exit(1);
936
+ }
937
+
899
938
  const entry: AssistantEntry = {
900
939
  assistantId: result.id,
901
940
  runtimeUrl: getPlatformUrl(),
@@ -1048,6 +1087,20 @@ function printImportSummary(result: ImportResponse): void {
1048
1087
  console.log(` Files skipped: ${summary.files_skipped}`);
1049
1088
  console.log(` Backups created: ${summary.backups_created}`);
1050
1089
 
1090
+ const creds = result.credentialsImported;
1091
+ if (creds) {
1092
+ console.log(` Credentials imported: ${creds.succeeded}/${creds.total}`);
1093
+ if (creds.skippedPlatform) {
1094
+ console.log(` Platform credentials skipped: ${creds.skippedPlatform}`);
1095
+ }
1096
+ if (creds.failed > 0) {
1097
+ console.log(` Credentials failed: ${creds.failed}`);
1098
+ for (const account of creds.failedAccounts) {
1099
+ console.log(` - ${account}`);
1100
+ }
1101
+ }
1102
+ }
1103
+
1051
1104
  const warnings = result.warnings ?? [];
1052
1105
  if (warnings.length > 0) {
1053
1106
  console.log("");
@@ -1058,6 +1111,79 @@ function printImportSummary(result: ImportResponse): void {
1058
1111
  }
1059
1112
  }
1060
1113
 
1114
+ /**
1115
+ * After teleporting to a local/docker target, register the assistant with
1116
+ * the platform and inject fresh platform credentials — mirroring the
1117
+ * login flow. Non-fatal: failures are logged as warnings.
1118
+ */
1119
+ async function tryInjectPlatformCredentials(
1120
+ entry: AssistantEntry,
1121
+ ): Promise<void> {
1122
+ const token = readPlatformToken();
1123
+ if (!token) {
1124
+ console.log(" Skipped platform credential injection (not logged in).");
1125
+ return;
1126
+ }
1127
+
1128
+ try {
1129
+ const user = await fetchCurrentUser(token);
1130
+ const orgId = await fetchOrganizationId(token);
1131
+ const clientInstallationId = computeDeviceId();
1132
+ const registration = await ensureSelfHostedLocalRegistration(
1133
+ token,
1134
+ orgId,
1135
+ clientInstallationId,
1136
+ entry.assistantId,
1137
+ "cli",
1138
+ );
1139
+
1140
+ // Resolve the API key: 1) fresh from registration, 2) existing from
1141
+ // daemon credential store, 3) reprovision as last resort (revokes old key).
1142
+ // Only reprovision when the gateway confirms no key exists — not when
1143
+ // the gateway is merely unreachable (would revoke without injecting).
1144
+ let assistantApiKey = registration.assistant_api_key;
1145
+ if (!assistantApiKey) {
1146
+ const cached = await readGatewayCredential(
1147
+ entry.runtimeUrl,
1148
+ "vellum:assistant_api_key",
1149
+ entry.bearerToken,
1150
+ );
1151
+ if (cached.value) {
1152
+ assistantApiKey = cached.value;
1153
+ } else if (!cached.unreachable) {
1154
+ const reprovision = await reprovisionAssistantApiKey(
1155
+ token,
1156
+ orgId,
1157
+ clientInstallationId,
1158
+ entry.assistantId,
1159
+ "cli",
1160
+ );
1161
+ assistantApiKey = reprovision.provisioning.assistant_api_key;
1162
+ }
1163
+ }
1164
+
1165
+ const allInjected = await injectCredentialsIntoAssistant({
1166
+ gatewayUrl: entry.runtimeUrl,
1167
+ bearerToken: entry.bearerToken,
1168
+ assistantApiKey,
1169
+ platformAssistantId: registration.assistant.id,
1170
+ platformBaseUrl: getPlatformUrl(),
1171
+ organizationId: orgId,
1172
+ userId: user.id,
1173
+ webhookSecret: registration.webhook_secret,
1174
+ });
1175
+
1176
+ if (allInjected) {
1177
+ console.log(" Platform credentials injected.");
1178
+ } else {
1179
+ console.warn(" Some platform credentials could not be injected.");
1180
+ }
1181
+ } catch (err) {
1182
+ const msg = err instanceof Error ? err.message : String(err);
1183
+ console.warn(` Platform credential injection skipped: ${msg}`);
1184
+ }
1185
+ }
1186
+
1061
1187
  // ---------------------------------------------------------------------------
1062
1188
  // Main entry point
1063
1189
  // ---------------------------------------------------------------------------
@@ -1104,6 +1230,13 @@ export async function teleport(): Promise<void> {
1104
1230
 
1105
1231
  const fromCloud = resolveCloud(fromEntry);
1106
1232
 
1233
+ if (fromCloud === "apple-container") {
1234
+ console.error(
1235
+ `Error: '${from}' uses the Apple Containers runtime. Teleport is not yet supported for this topology.`,
1236
+ );
1237
+ process.exit(1);
1238
+ }
1239
+
1107
1240
  // Early same-environment guard — compare source cloud against the CLI flag
1108
1241
  // BEFORE exporting or hatching, to avoid creating orphaned assistants.
1109
1242
  const normalizedSourceEnv = fromCloud === "vellum" ? "platform" : fromCloud;
@@ -1137,6 +1270,28 @@ export async function teleport(): Promise<void> {
1137
1270
  process.exit(1);
1138
1271
  }
1139
1272
 
1273
+ // Version guard: block platform→non-platform when target is behind
1274
+ if (fromCloud === "vellum" && toCloud !== "vellum") {
1275
+ const [sourceVersion, targetVersion] = await Promise.all([
1276
+ fetchCurrentVersion(fromEntry.runtimeUrl),
1277
+ fetchCurrentVersion(existingTarget.runtimeUrl),
1278
+ ]);
1279
+ const cmp =
1280
+ sourceVersion && targetVersion
1281
+ ? compareVersions(targetVersion, sourceVersion)
1282
+ : null;
1283
+ if (cmp !== null && cmp < 0) {
1284
+ console.error(
1285
+ `Error: Target assistant '${existingTarget.assistantId}' is running ${targetVersion}, ` +
1286
+ `but the platform source is on ${sourceVersion}.`,
1287
+ );
1288
+ console.error(
1289
+ `Upgrade your ${toCloud} assistant first: vellum upgrade ${existingTarget.assistantId}`,
1290
+ );
1291
+ process.exit(1);
1292
+ }
1293
+ }
1294
+
1140
1295
  console.log(`Exporting from ${from} (${fromCloud})...`);
1141
1296
  const bundleData = await exportFromAssistant(fromEntry, fromCloud);
1142
1297
  console.log(`Importing to ${existingTarget.assistantId} (${toCloud})...`);
@@ -1196,6 +1351,31 @@ export async function teleport(): Promise<void> {
1196
1351
  // and import hit the same instance.
1197
1352
  const targetPlatformUrl = existingTarget?.runtimeUrl;
1198
1353
 
1354
+ // Step B2 — Pre-check: block if the user already has a platform assistant.
1355
+ // This runs BEFORE the expensive GCS upload so we don't waste bandwidth.
1356
+ if (!existingTarget) {
1357
+ const existing = await checkExistingPlatformAssistant(
1358
+ token,
1359
+ targetPlatformUrl,
1360
+ );
1361
+ if (existing) {
1362
+ saveAssistantEntry({
1363
+ assistantId: existing.id,
1364
+ runtimeUrl: getPlatformUrl(),
1365
+ cloud: "vellum",
1366
+ species: "vellum",
1367
+ hatchedAt: new Date().toISOString(),
1368
+ });
1369
+ console.error(
1370
+ `Error: You already have a platform assistant '${existing.id}'.`,
1371
+ );
1372
+ console.error(
1373
+ `Retire it first with 'vellum retire ${existing.id}', then retry the teleport.`,
1374
+ );
1375
+ process.exit(1);
1376
+ }
1377
+ }
1378
+
1199
1379
  // Step C — Upload to GCS
1200
1380
  // bundleKey: string = uploaded successfully, null = tried but unavailable,
1201
1381
  // undefined would mean "never tried" (not used here).
@@ -1238,6 +1418,36 @@ export async function teleport(): Promise<void> {
1238
1418
  // fails, the user can recover by running `vellum wake <source>`.
1239
1419
  const sourceIsLocalOrDocker = fromCloud === "local" || fromCloud === "docker";
1240
1420
  const targetIsLocalOrDocker = targetEnv === "local" || targetEnv === "docker";
1421
+
1422
+ // Version guard (pre-hatch): for existing targets, check BEFORE hatching
1423
+ // to avoid creating orphaned assistants when the version check would fail.
1424
+ let versionGuardPassed = false;
1425
+ if (fromCloud === "vellum" && targetIsLocalOrDocker && targetName) {
1426
+ const existingTarget = findAssistantByName(targetName);
1427
+ if (existingTarget) {
1428
+ const [sourceVersion, existingVersion] = await Promise.all([
1429
+ fetchCurrentVersion(fromEntry.runtimeUrl),
1430
+ fetchCurrentVersion(existingTarget.runtimeUrl),
1431
+ ]);
1432
+ const cmp =
1433
+ sourceVersion && existingVersion
1434
+ ? compareVersions(existingVersion, sourceVersion)
1435
+ : null;
1436
+ if (cmp !== null && cmp < 0) {
1437
+ console.error(
1438
+ `Error: Target assistant '${existingTarget.assistantId}' is running ${existingVersion}, ` +
1439
+ `but the platform source is on ${sourceVersion}.`,
1440
+ );
1441
+ console.error(
1442
+ `Upgrade your ${targetEnv} assistant first: vellum upgrade ${existingTarget.assistantId}`,
1443
+ );
1444
+ process.exit(1);
1445
+ }
1446
+ // Pre-hatch check passed (or was best-effort skipped) — skip post-hatch
1447
+ versionGuardPassed = true;
1448
+ }
1449
+ }
1450
+
1241
1451
  if (sourceIsLocalOrDocker && targetIsLocalOrDocker && !keepSource) {
1242
1452
  console.log(`Stopping source assistant '${from}' to free ports...`);
1243
1453
  if (fromCloud === "docker") {
@@ -1268,10 +1478,52 @@ export async function teleport(): Promise<void> {
1268
1478
  process.exit(1);
1269
1479
  }
1270
1480
 
1481
+ // Version guard (post-hatch): for newly hatched targets we must check after
1482
+ // hatch because the assistant doesn't exist yet before. If it fails, clean
1483
+ // up the freshly hatched assistant to avoid orphans.
1484
+ // Skip if the pre-hatch guard already ran for an existing target.
1485
+ if (!versionGuardPassed && fromCloud === "vellum" && toCloud !== "vellum") {
1486
+ const [sourceVersion, targetVersion] = await Promise.all([
1487
+ fetchCurrentVersion(fromEntry.runtimeUrl),
1488
+ fetchCurrentVersion(toEntry.runtimeUrl),
1489
+ ]);
1490
+ const cmp =
1491
+ sourceVersion && targetVersion
1492
+ ? compareVersions(targetVersion, sourceVersion)
1493
+ : null;
1494
+ if (cmp !== null && cmp < 0) {
1495
+ // Clean up the freshly hatched assistant to avoid orphans
1496
+ console.error(
1497
+ `Cleaning up newly hatched assistant '${toEntry.assistantId}'...`,
1498
+ );
1499
+ if (toCloud === "docker") {
1500
+ await retireDocker(toEntry.assistantId);
1501
+ } else {
1502
+ await retireLocal(toEntry.assistantId, toEntry);
1503
+ }
1504
+ removeAssistantEntry(toEntry.assistantId);
1505
+ console.error(
1506
+ `Error: Target assistant '${toEntry.assistantId}' was running ${targetVersion}, ` +
1507
+ `but the platform source is on ${sourceVersion}.`,
1508
+ );
1509
+ console.error(
1510
+ `Upgrade your ${toCloud} environment first, then retry the teleport.`,
1511
+ );
1512
+ process.exit(1);
1513
+ }
1514
+ }
1515
+
1271
1516
  // Import to target
1272
1517
  console.log(`Importing to ${toEntry.assistantId} (${toCloud})...`);
1273
1518
  await importToAssistant(toEntry, toCloud, bundleData, false);
1274
1519
 
1520
+ // After successful import, inject fresh platform credentials if the
1521
+ // user is logged in — replaces the source's stale vellum:* credentials
1522
+ // that were filtered during import.
1523
+ if (fromCloud === "vellum") {
1524
+ await tryInjectPlatformCredentials(toEntry);
1525
+ }
1526
+
1275
1527
  // Retire source after successful import
1276
1528
  if (sourceIsLocalOrDocker && targetIsLocalOrDocker) {
1277
1529
  if (!keepSource) {
@@ -189,18 +189,9 @@ async function upgradeDocker(
189
189
  const versionTag =
190
190
  version ?? (cliPkg.version ? `v${cliPkg.version}` : "latest");
191
191
 
192
- // Reject downgrades `vellum upgrade` only handles forward version changes.
193
- // Users should use `vellum rollback --version <version>` for downgrades.
194
- const currentVersion = entry.serviceGroupVersion;
195
- if (currentVersion && versionTag) {
196
- const cmp = compareVersions(versionTag, currentVersion);
197
- if (cmp !== null && cmp < 0) {
198
- const msg = `Cannot upgrade to an older version (${versionTag} < ${currentVersion}). Use \`vellum rollback --version ${versionTag}\` instead.`;
199
- console.error(msg);
200
- emitCliError("VERSION_DIRECTION", msg);
201
- process.exit(1);
202
- }
203
- }
192
+ // Fetch the current running version from the health endpoint.
193
+ // This is used for logging, commit messages, and version-direction guards.
194
+ let currentVersion: string | undefined;
204
195
 
205
196
  console.log("🔍 Resolving image references...");
206
197
  const { imageTags } = await resolveImageRefs(versionTag);
@@ -225,7 +216,7 @@ async function upgradeDocker(
225
216
  );
226
217
  }
227
218
 
228
- // Capture current migration state for rollback targeting.
219
+ // Capture current migration state and running version for rollback targeting.
229
220
  // Must happen while daemon is still running (before containers are stopped).
230
221
  let preMigrationState: {
231
222
  dbVersion?: number;
@@ -240,26 +231,47 @@ async function upgradeDocker(
240
231
  );
241
232
  if (healthResp.ok) {
242
233
  const health = (await healthResp.json()) as {
234
+ version?: string;
243
235
  migrations?: { dbVersion?: number; lastWorkspaceMigrationId?: string };
244
236
  };
245
237
  preMigrationState = health.migrations ?? {};
238
+ currentVersion = health.version;
246
239
  }
247
240
  } catch {
248
241
  // Best-effort — if we can't get migration state, rollback will skip migration reversal
249
242
  }
250
243
 
244
+ // Reject downgrades — `vellum upgrade` only handles forward version changes.
245
+ // Users should use `vellum rollback --version <version>` for downgrades.
246
+ if (!currentVersion && versionTag) {
247
+ console.warn(
248
+ "⚠️ Could not determine current version from health endpoint — skipping version-direction check.\n",
249
+ );
250
+ }
251
+ if (currentVersion && versionTag) {
252
+ const cmp = compareVersions(versionTag, currentVersion);
253
+ if (cmp !== null && cmp < 0) {
254
+ const msg = `Cannot upgrade to an older version (${versionTag} < ${currentVersion}). Use \`vellum rollback --version ${versionTag}\` instead.`;
255
+ console.error(msg);
256
+ emitCliError("VERSION_DIRECTION", msg);
257
+ process.exit(1);
258
+ }
259
+ }
260
+
251
261
  // Persist rollback state to lockfile BEFORE any destructive changes.
252
262
  // This enables the `vellum rollback` command to restore the previous version.
253
- if (entry.serviceGroupVersion && entry.containerInfo) {
263
+ if (entry.containerInfo) {
254
264
  const rollbackEntry: AssistantEntry = {
255
265
  ...entry,
256
- previousServiceGroupVersion: entry.serviceGroupVersion,
257
266
  previousContainerInfo: { ...entry.containerInfo },
267
+ previousVersion: currentVersion,
258
268
  previousDbMigrationVersion: preMigrationState.dbVersion,
259
269
  previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
260
270
  };
261
271
  saveAssistantEntry(rollbackEntry);
262
- console.log(` Saved rollback state: ${entry.serviceGroupVersion}\n`);
272
+ if (currentVersion) {
273
+ console.log(` Saved rollback state: ${currentVersion}\n`);
274
+ }
263
275
  }
264
276
 
265
277
  // Record version transition start in workspace git history
@@ -269,7 +281,7 @@ async function upgradeDocker(
269
281
  buildUpgradeCommitMessage({
270
282
  action: "upgrade",
271
283
  phase: "starting",
272
- from: entry.serviceGroupVersion ?? "unknown",
284
+ from: currentVersion ?? "unknown",
273
285
  to: versionTag,
274
286
  topology: "docker",
275
287
  assistantId: entry.assistantId,
@@ -321,7 +333,7 @@ async function upgradeDocker(
321
333
  await broadcastUpgradeEvent(
322
334
  entry.runtimeUrl,
323
335
  entry.assistantId,
324
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
336
+ buildCompleteEvent(currentVersion ?? "unknown", false),
325
337
  );
326
338
  emitCliError("IMAGE_PULL_FAILED", "Failed to pull Docker images", detail);
327
339
  process.exit(1);
@@ -361,7 +373,7 @@ async function upgradeDocker(
361
373
  console.log("📦 Creating pre-upgrade backup...");
362
374
  const backupPath = await createBackup(entry.runtimeUrl, entry.assistantId, {
363
375
  prefix: `${entry.assistantId}-pre-upgrade`,
364
- description: `Pre-upgrade snapshot before ${entry.serviceGroupVersion ?? "unknown"} → ${versionTag}`,
376
+ description: `Pre-upgrade snapshot before ${currentVersion ?? "unknown"} → ${versionTag}`,
365
377
  });
366
378
  if (backupPath) {
367
379
  console.log(` Backup saved: ${backupPath}\n`);
@@ -434,7 +446,6 @@ async function upgradeDocker(
434
446
  const newDigests = await captureImageRefs(res);
435
447
  const updatedEntry: AssistantEntry = {
436
448
  ...entry,
437
- serviceGroupVersion: versionTag,
438
449
  containerInfo: {
439
450
  assistantImage: imageTags.assistant,
440
451
  gatewayImage: imageTags.gateway,
@@ -444,7 +455,6 @@ async function upgradeDocker(
444
455
  cesDigest: newDigests?.["credential-executor"],
445
456
  networkName: res.network,
446
457
  },
447
- previousServiceGroupVersion: entry.serviceGroupVersion,
448
458
  previousContainerInfo: entry.containerInfo,
449
459
  previousDbMigrationVersion: preMigrationState.dbVersion,
450
460
  previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
@@ -467,7 +477,7 @@ async function upgradeDocker(
467
477
  buildUpgradeCommitMessage({
468
478
  action: "upgrade",
469
479
  phase: "complete",
470
- from: entry.serviceGroupVersion ?? "unknown",
480
+ from: currentVersion ?? "unknown",
471
481
  to: versionTag,
472
482
  topology: "docker",
473
483
  assistantId: entry.assistantId,
@@ -584,7 +594,6 @@ async function upgradeDocker(
584
594
  previousImageRefs["credential-executor"],
585
595
  networkName: res.network,
586
596
  },
587
- previousServiceGroupVersion: undefined,
588
597
  previousContainerInfo: undefined,
589
598
  previousDbMigrationVersion: undefined,
590
599
  previousWorkspaceMigrationId: undefined,
@@ -598,9 +607,9 @@ async function upgradeDocker(
598
607
  entry.runtimeUrl,
599
608
  entry.assistantId,
600
609
  buildCompleteEvent(
601
- entry.serviceGroupVersion ?? "unknown",
610
+ currentVersion ?? "unknown",
602
611
  false,
603
- entry.serviceGroupVersion,
612
+ currentVersion,
604
613
  ),
605
614
  );
606
615
 
@@ -621,7 +630,7 @@ async function upgradeDocker(
621
630
  await broadcastUpgradeEvent(
622
631
  entry.runtimeUrl,
623
632
  entry.assistantId,
624
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
633
+ buildCompleteEvent(currentVersion ?? "unknown", false),
625
634
  );
626
635
  emitCliError(
627
636
  "ROLLBACK_FAILED",
@@ -641,7 +650,7 @@ async function upgradeDocker(
641
650
  await broadcastUpgradeEvent(
642
651
  entry.runtimeUrl,
643
652
  entry.assistantId,
644
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
653
+ buildCompleteEvent(currentVersion ?? "unknown", false),
645
654
  );
646
655
  emitCliError(
647
656
  "ROLLBACK_FAILED",
@@ -657,7 +666,7 @@ async function upgradeDocker(
657
666
  await broadcastUpgradeEvent(
658
667
  entry.runtimeUrl,
659
668
  entry.assistantId,
660
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
669
+ buildCompleteEvent(currentVersion ?? "unknown", false),
661
670
  );
662
671
  emitCliError(
663
672
  "ROLLBACK_NO_STATE",
@@ -678,22 +687,6 @@ async function upgradePlatform(
678
687
  entry: AssistantEntry,
679
688
  version: string | null,
680
689
  ): Promise<void> {
681
- // Reject downgrades — `vellum upgrade` only handles forward version changes.
682
- // Users should use `vellum rollback --version <version>` for downgrades.
683
- // Only enforce this guard when the user explicitly passed `--version`.
684
- // When version is null the platform API decides the actual target, so
685
- // we must not block the request based on the local CLI version.
686
- const currentVersion = entry.serviceGroupVersion;
687
- if (version && currentVersion) {
688
- const cmp = compareVersions(version, currentVersion);
689
- if (cmp !== null && cmp < 0) {
690
- const msg = `Cannot upgrade to an older version (${version} < ${currentVersion}). Use \`vellum rollback --version ${version}\` instead.`;
691
- console.error(msg);
692
- emitCliError("VERSION_DIRECTION", msg);
693
- process.exit(1);
694
- }
695
- }
696
-
697
690
  console.log(
698
691
  `🔄 Upgrading platform-hosted assistant '${entry.assistantId}'...\n`,
699
692
  );
@@ -733,7 +726,7 @@ async function upgradePlatform(
733
726
  await broadcastUpgradeEvent(
734
727
  entry.runtimeUrl,
735
728
  entry.assistantId,
736
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
729
+ buildCompleteEvent("unknown", false),
737
730
  );
738
731
  } catch {
739
732
  // Best-effort — broadcast may fail if the assistant is unreachable
@@ -755,7 +748,7 @@ async function upgradePlatform(
755
748
  await broadcastUpgradeEvent(
756
749
  entry.runtimeUrl,
757
750
  entry.assistantId,
758
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
751
+ buildCompleteEvent("unknown", false),
759
752
  );
760
753
  } catch {
761
754
  // Best-effort — broadcast may fail if the assistant is unreachable
@@ -788,8 +781,8 @@ async function upgradePrepare(
788
781
  entry: AssistantEntry,
789
782
  version: string | null,
790
783
  ): Promise<void> {
791
- const targetVersion = version ?? entry.serviceGroupVersion ?? "unknown";
792
- const currentVersion = entry.serviceGroupVersion ?? "unknown";
784
+ const targetVersion = version ?? "unknown";
785
+ const currentVersion = "unknown";
793
786
 
794
787
  // 1. Broadcast "starting" so the UI shows the progress spinner
795
788
  await broadcastUpgradeEvent(
@@ -857,9 +850,7 @@ async function upgradeFinalize(
857
850
  }
858
851
 
859
852
  const fromVersion = version;
860
- const currentVersion = cliPkg.version
861
- ? `v${cliPkg.version}`
862
- : (entry.serviceGroupVersion ?? "unknown");
853
+ const currentVersion = cliPkg.version ? `v${cliPkg.version}` : "unknown";
863
854
 
864
855
  // 1. Broadcast "complete" so the UI clears the progress spinner
865
856
  await broadcastUpgradeEvent(
@@ -911,7 +902,7 @@ export async function upgrade(): Promise<void> {
911
902
  await broadcastUpgradeEvent(
912
903
  entry.runtimeUrl,
913
904
  entry.assistantId,
914
- buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
905
+ buildCompleteEvent("unknown", false),
915
906
  );
916
907
  emitCliError(categorizeUpgradeError(err), "Upgrade failed", detail);
917
908
  process.exit(1);
@@ -6,6 +6,7 @@ import {
6
6
  saveAssistantEntry,
7
7
  } from "../lib/assistant-config.js";
8
8
  import { dockerResourceNames, wakeContainers } from "../lib/docker.js";
9
+ import { seedGuardianTokenFromSiblingEnv } from "../lib/guardian-token.js";
9
10
  import { isProcessAlive, stopProcessByPidFile } from "../lib/process";
10
11
  import {
11
12
  generateLocalSigningKey,
@@ -182,19 +183,33 @@ export async function wake(): Promise<void> {
182
183
  }
183
184
  }
184
185
 
186
+ // Self-heal the guardian token when the current environment's config dir
187
+ // is missing it. Hatch cross-writes the lockfile across env dirs but the
188
+ // guardian token is only persisted under the hatch-time env, so a desktop
189
+ // app built under a different VELLUM_ENVIRONMENT can't find a bearer and
190
+ // cascades into 401 → auth-rate-limit → 429. A sibling env copy is cheap
191
+ // and strictly additive.
192
+ if (seedGuardianTokenFromSiblingEnv(entry.assistantId)) {
193
+ console.log(" Seeded guardian token from sibling environment.");
194
+ }
195
+
185
196
  // Auto-start ngrok if webhook integrations (e.g. Telegram) are configured.
186
- // Set BASE_DATA_DIR so ngrok reads the correct instance config.
197
+ // Scope BASE_DATA_DIR to the woken instance so ngrok reads the correct
198
+ // instance config, then restore on any exit path.
187
199
  const prevBaseDataDir = process.env.BASE_DATA_DIR;
188
200
  process.env.BASE_DATA_DIR = resources.instanceDir;
189
- const ngrokChild = await maybeStartNgrokTunnel(resources.gatewayPort);
190
- if (ngrokChild?.pid) {
191
- const ngrokPidFile = join(resources.instanceDir, ".vellum", "ngrok.pid");
192
- writeFileSync(ngrokPidFile, String(ngrokChild.pid));
193
- }
194
- if (prevBaseDataDir !== undefined) {
195
- process.env.BASE_DATA_DIR = prevBaseDataDir;
196
- } else {
197
- delete process.env.BASE_DATA_DIR;
201
+ try {
202
+ const ngrokChild = await maybeStartNgrokTunnel(resources.gatewayPort);
203
+ if (ngrokChild?.pid) {
204
+ const ngrokPidFile = join(resources.instanceDir, ".vellum", "ngrok.pid");
205
+ writeFileSync(ngrokPidFile, String(ngrokChild.pid));
206
+ }
207
+ } finally {
208
+ if (prevBaseDataDir !== undefined) {
209
+ process.env.BASE_DATA_DIR = prevBaseDataDir;
210
+ } else {
211
+ delete process.env.BASE_DATA_DIR;
212
+ }
198
213
  }
199
214
 
200
215
  console.log("Wake complete.");
@@ -1939,8 +1939,14 @@ function ChatApp({
1939
1939
  );
1940
1940
  }
1941
1941
 
1942
+ let username: string;
1943
+ try {
1944
+ username = userInfo().username;
1945
+ } catch {
1946
+ username = "";
1947
+ }
1942
1948
  const hostId = createHash("sha256")
1943
- .update(hostname() + userInfo().username)
1949
+ .update(hostname() + username)
1944
1950
  .digest("hex");
1945
1951
  const payload = JSON.stringify({
1946
1952
  type: "vellum-assistant",