@vellumai/cli 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,6 @@ import {
11
11
  import type { AssistantEntry } from "../lib/assistant-config";
12
12
  import {
13
13
  captureImageRefs,
14
- clearSigningKeyBootstrapLock,
15
- DOCKERHUB_IMAGES,
16
- DOCKER_READY_TIMEOUT_MS,
17
14
  GATEWAY_INTERNAL_PORT,
18
15
  dockerResourceNames,
19
16
  migrateCesSecurityFiles,
@@ -21,31 +18,55 @@ import {
21
18
  startContainers,
22
19
  stopContainers,
23
20
  } from "../lib/docker";
24
- import type { ServiceName } from "../lib/docker";
21
+ import { resolveImageRefs } from "../lib/platform-releases";
25
22
  import {
26
23
  fetchOrganizationId,
27
24
  getPlatformUrl,
28
25
  readPlatformToken,
29
26
  } from "../lib/platform-client";
30
- import { loadBootstrapSecret, loadGuardianToken } from "../lib/guardian-token";
31
- import { exec, execOutput } from "../lib/step-runner";
27
+ import {
28
+ createBackup,
29
+ pruneOldBackups,
30
+ restoreBackup,
31
+ } from "../lib/backup-ops.js";
32
+ import { emitCliError, categorizeUpgradeError } from "../lib/cli-error.js";
33
+ import { exec } from "../lib/step-runner.js";
34
+ import {
35
+ broadcastUpgradeEvent,
36
+ buildCompleteEvent,
37
+ buildProgressEvent,
38
+ buildStartingEvent,
39
+ buildUpgradeCommitMessage,
40
+ captureContainerEnv,
41
+ commitWorkspaceViaGateway,
42
+ CONTAINER_ENV_EXCLUDE_KEYS,
43
+ rollbackMigrations,
44
+ UPGRADE_PROGRESS,
45
+ waitForReady,
46
+ } from "../lib/upgrade-lifecycle.js";
47
+ import { parseVersion } from "../lib/version-compat.js";
32
48
 
33
49
  interface UpgradeArgs {
34
50
  name: string | null;
35
51
  version: string | null;
52
+ prepare: boolean;
53
+ finalize: boolean;
36
54
  }
37
55
 
38
56
  function parseArgs(): UpgradeArgs {
39
57
  const args = process.argv.slice(3);
40
58
  let name: string | null = null;
41
59
  let version: string | null = null;
60
+ let prepare = false;
61
+ let finalize = false;
42
62
 
43
63
  for (let i = 0; i < args.length; i++) {
44
64
  const arg = args[i];
45
65
  if (arg === "--help" || arg === "-h") {
46
66
  console.log("Usage: vellum upgrade [<name>] [options]");
47
67
  console.log("");
48
- console.log("Upgrade an assistant to the latest version.");
68
+ console.log("Upgrade an assistant to a newer version.");
69
+ console.log("To roll back to a previous version, use `vellum rollback`.");
49
70
  console.log("");
50
71
  console.log("Arguments:");
51
72
  console.log(
@@ -56,6 +77,12 @@ function parseArgs(): UpgradeArgs {
56
77
  console.log(
57
78
  " --version <version> Target version to upgrade to (default: latest)",
58
79
  );
80
+ console.log(
81
+ " --prepare Run pre-upgrade steps only (backup, notify) without swapping versions",
82
+ );
83
+ console.log(
84
+ " --finalize Run post-upgrade steps only (broadcast complete, workspace commit)",
85
+ );
59
86
  console.log("");
60
87
  console.log("Examples:");
61
88
  console.log(
@@ -72,19 +99,31 @@ function parseArgs(): UpgradeArgs {
72
99
  const next = args[i + 1];
73
100
  if (!next || next.startsWith("-")) {
74
101
  console.error("Error: --version requires a value");
102
+ emitCliError("UNKNOWN", "--version requires a value");
75
103
  process.exit(1);
76
104
  }
77
105
  version = next;
78
106
  i++;
107
+ } else if (arg === "--prepare") {
108
+ prepare = true;
109
+ } else if (arg === "--finalize") {
110
+ finalize = true;
79
111
  } else if (!arg.startsWith("-")) {
80
112
  name = arg;
81
113
  } else {
82
114
  console.error(`Error: Unknown option '${arg}'.`);
115
+ emitCliError("UNKNOWN", `Unknown option '${arg}'`);
83
116
  process.exit(1);
84
117
  }
85
118
  }
86
119
 
87
- return { name, version };
120
+ if (prepare && finalize) {
121
+ console.error("Error: --prepare and --finalize are mutually exclusive.");
122
+ emitCliError("UNKNOWN", "--prepare and --finalize are mutually exclusive");
123
+ process.exit(1);
124
+ }
125
+
126
+ return { name, version, prepare, finalize };
88
127
  }
89
128
 
90
129
  function resolveCloud(entry: AssistantEntry): string {
@@ -111,6 +150,10 @@ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
111
150
  const entry = findAssistantByName(nameArg);
112
151
  if (!entry) {
113
152
  console.error(`No assistant found with name '${nameArg}'.`);
153
+ emitCliError(
154
+ "ASSISTANT_NOT_FOUND",
155
+ `No assistant found with name '${nameArg}'.`,
156
+ );
114
157
  process.exit(1);
115
158
  }
116
159
  return entry;
@@ -126,110 +169,18 @@ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
126
169
  if (all.length === 1) return all[0];
127
170
 
128
171
  if (all.length === 0) {
129
- console.error("No assistants found. Run 'vellum hatch' first.");
172
+ const msg = "No assistants found. Run 'vellum hatch' first.";
173
+ console.error(msg);
174
+ emitCliError("ASSISTANT_NOT_FOUND", msg);
130
175
  } else {
131
- console.error(
132
- "Multiple assistants found. Specify a name or set an active assistant with 'vellum use <name>'.",
133
- );
176
+ const msg =
177
+ "Multiple assistants found. Specify a name or set an active assistant with 'vellum use <name>'.";
178
+ console.error(msg);
179
+ emitCliError("ASSISTANT_NOT_FOUND", msg);
134
180
  }
135
181
  process.exit(1);
136
182
  }
137
183
 
138
- /**
139
- * Capture environment variables from a running Docker container so they
140
- * can be replayed onto the replacement container after upgrade.
141
- */
142
- export async function captureContainerEnv(
143
- containerName: string,
144
- ): Promise<Record<string, string>> {
145
- const captured: Record<string, string> = {};
146
- try {
147
- const raw = await execOutput("docker", [
148
- "inspect",
149
- "--format",
150
- "{{json .Config.Env}}",
151
- containerName,
152
- ]);
153
- const entries = JSON.parse(raw) as string[];
154
- for (const entry of entries) {
155
- const eqIdx = entry.indexOf("=");
156
- if (eqIdx > 0) {
157
- captured[entry.slice(0, eqIdx)] = entry.slice(eqIdx + 1);
158
- }
159
- }
160
- } catch {
161
- // Container may not exist or not be inspectable
162
- }
163
- return captured;
164
- }
165
-
166
- /**
167
- * Poll the gateway `/readyz` endpoint until it returns 200 or the timeout
168
- * elapses. Returns whether the assistant became ready.
169
- */
170
- export async function waitForReady(runtimeUrl: string): Promise<boolean> {
171
- const readyUrl = `${runtimeUrl}/readyz`;
172
- const start = Date.now();
173
-
174
- while (Date.now() - start < DOCKER_READY_TIMEOUT_MS) {
175
- try {
176
- const resp = await fetch(readyUrl, {
177
- signal: AbortSignal.timeout(5000),
178
- });
179
- if (resp.ok) {
180
- const elapsedSec = ((Date.now() - start) / 1000).toFixed(1);
181
- console.log(`Assistant ready after ${elapsedSec}s`);
182
- return true;
183
- }
184
- let detail = "";
185
- try {
186
- const body = await resp.text();
187
- const json = JSON.parse(body);
188
- const parts = [json.status];
189
- if (json.upstream != null) parts.push(`upstream=${json.upstream}`);
190
- detail = ` — ${parts.join(", ")}`;
191
- } catch {
192
- // ignore parse errors
193
- }
194
- console.log(`Readiness check: ${resp.status}${detail} (retrying...)`);
195
- } catch {
196
- // Connection refused / timeout — not up yet
197
- }
198
- await new Promise((r) => setTimeout(r, 1000));
199
- }
200
-
201
- return false;
202
- }
203
-
204
- /**
205
- * Best-effort broadcast of an upgrade lifecycle event to connected clients
206
- * via the gateway's upgrade-broadcast proxy. Uses guardian token auth.
207
- * Failures are logged but never block the upgrade flow.
208
- */
209
- export async function broadcastUpgradeEvent(
210
- gatewayUrl: string,
211
- assistantId: string,
212
- event: Record<string, unknown>,
213
- ): Promise<void> {
214
- try {
215
- const token = loadGuardianToken(assistantId);
216
- const headers: Record<string, string> = {
217
- "Content-Type": "application/json",
218
- };
219
- if (token?.accessToken) {
220
- headers["Authorization"] = `Bearer ${token.accessToken}`;
221
- }
222
- await fetch(`${gatewayUrl}/v1/admin/upgrade-broadcast`, {
223
- method: "POST",
224
- headers,
225
- body: JSON.stringify(event),
226
- signal: AbortSignal.timeout(3000),
227
- });
228
- } catch {
229
- // Best-effort — gateway/daemon may already be shutting down or not yet ready
230
- }
231
- }
232
-
233
184
  async function upgradeDocker(
234
185
  entry: AssistantEntry,
235
186
  version: string | null,
@@ -239,11 +190,31 @@ async function upgradeDocker(
239
190
 
240
191
  const versionTag =
241
192
  version ?? (cliPkg.version ? `v${cliPkg.version}` : "latest");
242
- const imageTags: Record<ServiceName, string> = {
243
- assistant: `${DOCKERHUB_IMAGES.assistant}:${versionTag}`,
244
- "credential-executor": `${DOCKERHUB_IMAGES["credential-executor"]}:${versionTag}`,
245
- gateway: `${DOCKERHUB_IMAGES.gateway}:${versionTag}`,
246
- };
193
+
194
+ // Reject downgrades — `vellum upgrade` only handles forward version changes.
195
+ // Users should use `vellum rollback --version <version>` for downgrades.
196
+ const currentVersion = entry.serviceGroupVersion;
197
+ if (currentVersion && versionTag) {
198
+ const current = parseVersion(currentVersion);
199
+ const target = parseVersion(versionTag);
200
+ if (current && target) {
201
+ const isOlder =
202
+ target.major < current.major ||
203
+ (target.major === current.major && target.minor < current.minor) ||
204
+ (target.major === current.major &&
205
+ target.minor === current.minor &&
206
+ target.patch < current.patch);
207
+ if (isOlder) {
208
+ const msg = `Cannot upgrade to an older version (${versionTag} < ${currentVersion}). Use \`vellum rollback --version ${versionTag}\` instead.`;
209
+ console.error(msg);
210
+ emitCliError("VERSION_DIRECTION", msg);
211
+ process.exit(1);
212
+ }
213
+ }
214
+ }
215
+
216
+ console.log("🔍 Resolving image references...");
217
+ const { imageTags } = await resolveImageRefs(versionTag);
247
218
 
248
219
  console.log(
249
220
  `🔄 Upgrading Docker assistant '${instanceName}' to ${versionTag}...\n`,
@@ -265,6 +236,29 @@ async function upgradeDocker(
265
236
  );
266
237
  }
267
238
 
239
+ // Capture current migration state for rollback targeting.
240
+ // Must happen while daemon is still running (before containers are stopped).
241
+ let preMigrationState: {
242
+ dbVersion?: number;
243
+ lastWorkspaceMigrationId?: string;
244
+ } = {};
245
+ try {
246
+ const healthResp = await fetch(
247
+ `${entry.runtimeUrl}/healthz?include=migrations`,
248
+ {
249
+ signal: AbortSignal.timeout(5000),
250
+ },
251
+ );
252
+ if (healthResp.ok) {
253
+ const health = (await healthResp.json()) as {
254
+ migrations?: { dbVersion?: number; lastWorkspaceMigrationId?: string };
255
+ };
256
+ preMigrationState = health.migrations ?? {};
257
+ }
258
+ } catch {
259
+ // Best-effort — if we can't get migration state, rollback will skip migration reversal
260
+ }
261
+
268
262
  // Persist rollback state to lockfile BEFORE any destructive changes.
269
263
  // This enables the `vellum rollback` command to restore the previous version.
270
264
  if (entry.serviceGroupVersion && entry.containerInfo) {
@@ -272,36 +266,73 @@ async function upgradeDocker(
272
266
  ...entry,
273
267
  previousServiceGroupVersion: entry.serviceGroupVersion,
274
268
  previousContainerInfo: { ...entry.containerInfo },
269
+ previousDbMigrationVersion: preMigrationState.dbVersion,
270
+ previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
275
271
  };
276
272
  saveAssistantEntry(rollbackEntry);
277
273
  console.log(` Saved rollback state: ${entry.serviceGroupVersion}\n`);
278
274
  }
279
275
 
276
+ // Record version transition start in workspace git history
277
+ await commitWorkspaceViaGateway(
278
+ entry.runtimeUrl,
279
+ entry.assistantId,
280
+ buildUpgradeCommitMessage({
281
+ action: "upgrade",
282
+ phase: "starting",
283
+ from: entry.serviceGroupVersion ?? "unknown",
284
+ to: versionTag,
285
+ topology: "docker",
286
+ assistantId: entry.assistantId,
287
+ }),
288
+ );
289
+
280
290
  console.log("💾 Capturing existing container environment...");
281
291
  const capturedEnv = await captureContainerEnv(res.assistantContainer);
282
292
  console.log(
283
293
  ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
284
294
  );
285
295
 
286
- console.log("📦 Pulling new Docker images...");
287
- await exec("docker", ["pull", imageTags.assistant]);
288
- await exec("docker", ["pull", imageTags.gateway]);
289
- await exec("docker", ["pull", imageTags["credential-executor"]]);
290
- console.log("✅ Docker images pulled\n");
291
-
292
296
  // Notify connected clients that an upgrade is about to begin.
297
+ // This must fire BEFORE any progress broadcasts so the UI sets
298
+ // isUpdateInProgress = true and starts displaying status messages.
293
299
  console.log("📢 Notifying connected clients...");
294
- await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
295
- type: "starting",
296
- targetVersion: versionTag,
297
- expectedDowntimeSeconds: 60,
298
- });
299
- // Brief pause to allow SSE delivery before containers stop.
300
+ await broadcastUpgradeEvent(
301
+ entry.runtimeUrl,
302
+ entry.assistantId,
303
+ buildStartingEvent(versionTag),
304
+ );
305
+ // Brief pause to allow SSE delivery before progress events.
300
306
  await new Promise((r) => setTimeout(r, 500));
301
307
 
302
- console.log("🛑 Stopping existing containers...");
303
- await stopContainers(res);
304
- console.log("✅ Containers stopped\n");
308
+ await broadcastUpgradeEvent(
309
+ entry.runtimeUrl,
310
+ entry.assistantId,
311
+ buildProgressEvent(UPGRADE_PROGRESS.DOWNLOADING),
312
+ );
313
+ console.log("📦 Pulling new Docker images...");
314
+ const pullImages: Array<[string, string]> = [
315
+ ["assistant", imageTags.assistant],
316
+ ["gateway", imageTags.gateway],
317
+ ["credential-executor", imageTags["credential-executor"]],
318
+ ];
319
+ try {
320
+ for (const [service, image] of pullImages) {
321
+ console.log(` Pulling ${service}: ${image}`);
322
+ await exec("docker", ["pull", image]);
323
+ }
324
+ } catch (pullErr) {
325
+ const detail = pullErr instanceof Error ? pullErr.message : String(pullErr);
326
+ console.error(`\n❌ Failed to pull Docker images: ${detail}`);
327
+ await broadcastUpgradeEvent(
328
+ entry.runtimeUrl,
329
+ entry.assistantId,
330
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
331
+ );
332
+ emitCliError("IMAGE_PULL_FAILED", "Failed to pull Docker images", detail);
333
+ process.exit(1);
334
+ }
335
+ console.log("✅ Docker images pulled\n");
305
336
 
306
337
  // Parse gateway port from entry's runtimeUrl, fall back to default
307
338
  let gatewayPort = GATEWAY_INTERNAL_PORT;
@@ -322,20 +353,57 @@ async function upgradeDocker(
322
353
  const cesServiceToken =
323
354
  capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
324
355
 
325
- // Retrieve or generate a bootstrap secret for the gateway. The secret was
326
- // persisted to disk during hatch; older instances won't have one yet.
327
- const bootstrapSecret =
328
- loadBootstrapSecret(instanceName) || randomBytes(32).toString("hex");
356
+ // Extract or generate the shared JWT signing key. Pre-env-var instances
357
+ // won't have it in capturedEnv, so generate fresh in that case.
358
+ const signingKey =
359
+ capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
360
+
361
+ // Create pre-upgrade backup (best-effort, daemon must be running)
362
+ await broadcastUpgradeEvent(
363
+ entry.runtimeUrl,
364
+ entry.assistantId,
365
+ buildProgressEvent(UPGRADE_PROGRESS.BACKING_UP),
366
+ );
367
+ console.log("📦 Creating pre-upgrade backup...");
368
+ const backupPath = await createBackup(entry.runtimeUrl, entry.assistantId, {
369
+ prefix: `${entry.assistantId}-pre-upgrade`,
370
+ description: `Pre-upgrade snapshot before ${entry.serviceGroupVersion ?? "unknown"} → ${versionTag}`,
371
+ });
372
+ if (backupPath) {
373
+ console.log(` Backup saved: ${backupPath}\n`);
374
+ // Clean up old pre-upgrade backups, keep last 3
375
+ pruneOldBackups(entry.assistantId, 3);
376
+ } else {
377
+ console.warn("⚠️ Pre-upgrade backup failed (continuing with upgrade)\n");
378
+ }
379
+
380
+ // Persist the backup path so `vellum rollback` can restore the exact backup
381
+ // created for this upgrade attempt — never a stale backup from a prior cycle.
382
+ // Re-read the entry to pick up the rollback state saved earlier.
383
+ {
384
+ const current = findAssistantByName(entry.assistantId);
385
+ if (current) {
386
+ saveAssistantEntry({
387
+ ...current,
388
+ preUpgradeBackupPath: backupPath ?? undefined,
389
+ });
390
+ }
391
+ }
392
+
393
+ await broadcastUpgradeEvent(
394
+ entry.runtimeUrl,
395
+ entry.assistantId,
396
+ buildProgressEvent(UPGRADE_PROGRESS.INSTALLING),
397
+ );
398
+
399
+ console.log("🛑 Stopping existing containers...");
400
+ await stopContainers(res);
401
+ console.log("✅ Containers stopped\n");
329
402
 
330
403
  // Build the set of extra env vars to replay on the new assistant container.
331
404
  // Captured env vars serve as the base; keys already managed by
332
405
  // serviceDockerRunArgs are excluded to avoid duplicates.
333
- const envKeysSetByRunArgs = new Set([
334
- "CES_SERVICE_TOKEN",
335
- "VELLUM_ASSISTANT_NAME",
336
- "RUNTIME_HTTP_HOST",
337
- "PATH",
338
- ]);
406
+ const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
339
407
  // Only exclude keys that serviceDockerRunArgs will actually set
340
408
  for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
341
409
  if (process.env[envVar]) {
@@ -355,13 +423,10 @@ async function upgradeDocker(
355
423
  console.log("🔄 Migrating credential files to CES security volume...");
356
424
  await migrateCesSecurityFiles(res, (msg) => console.log(msg));
357
425
 
358
- console.log("🔑 Clearing signing key bootstrap lock...");
359
- await clearSigningKeyBootstrapLock(res);
360
-
361
426
  console.log("🚀 Starting upgraded containers...");
362
427
  await startContainers(
363
428
  {
364
- bootstrapSecret,
429
+ signingKey,
365
430
  cesServiceToken,
366
431
  extraAssistantEnv,
367
432
  gatewayPort,
@@ -392,15 +457,34 @@ async function upgradeDocker(
392
457
  },
393
458
  previousServiceGroupVersion: entry.serviceGroupVersion,
394
459
  previousContainerInfo: entry.containerInfo,
460
+ previousDbMigrationVersion: preMigrationState.dbVersion,
461
+ previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
462
+ // Preserve the backup path so `vellum rollback` can restore it later
463
+ preUpgradeBackupPath: backupPath ?? undefined,
395
464
  };
396
465
  saveAssistantEntry(updatedEntry);
397
466
 
398
467
  // Notify clients on the new service group that the upgrade succeeded.
399
- await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
400
- type: "complete",
401
- installedVersion: versionTag,
402
- success: true,
403
- });
468
+ await broadcastUpgradeEvent(
469
+ entry.runtimeUrl,
470
+ entry.assistantId,
471
+ buildCompleteEvent(versionTag, true),
472
+ );
473
+
474
+ // Record successful upgrade in workspace git history
475
+ await commitWorkspaceViaGateway(
476
+ entry.runtimeUrl,
477
+ entry.assistantId,
478
+ buildUpgradeCommitMessage({
479
+ action: "upgrade",
480
+ phase: "complete",
481
+ from: entry.serviceGroupVersion ?? "unknown",
482
+ to: versionTag,
483
+ topology: "docker",
484
+ assistantId: entry.assistantId,
485
+ result: "success",
486
+ }),
487
+ );
404
488
 
405
489
  console.log(
406
490
  `\n✅ Docker assistant '${instanceName}' upgraded to ${versionTag}.`,
@@ -409,13 +493,41 @@ async function upgradeDocker(
409
493
  console.error(`\n❌ Containers failed to become ready within the timeout.`);
410
494
 
411
495
  if (previousImageRefs) {
496
+ await broadcastUpgradeEvent(
497
+ entry.runtimeUrl,
498
+ entry.assistantId,
499
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING),
500
+ );
412
501
  console.log(`\n🔄 Rolling back to previous images...`);
413
502
  try {
503
+ // Attempt to roll back migrations before swapping containers.
504
+ // The new daemon may be partially up — try best-effort.
505
+ if (
506
+ preMigrationState.dbVersion !== undefined ||
507
+ preMigrationState.lastWorkspaceMigrationId !== undefined
508
+ ) {
509
+ console.log("🔄 Reverting database changes...");
510
+ await broadcastUpgradeEvent(
511
+ entry.runtimeUrl,
512
+ entry.assistantId,
513
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING_MIGRATIONS),
514
+ );
515
+ await rollbackMigrations(
516
+ entry.runtimeUrl,
517
+ entry.assistantId,
518
+ preMigrationState.dbVersion,
519
+ preMigrationState.lastWorkspaceMigrationId,
520
+ );
521
+ }
522
+
414
523
  await stopContainers(res);
415
524
 
525
+ await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
526
+ await migrateCesSecurityFiles(res, (msg) => console.log(msg));
527
+
416
528
  await startContainers(
417
529
  {
418
- bootstrapSecret,
530
+ signingKey,
419
531
  cesServiceToken,
420
532
  extraAssistantEnv,
421
533
  gatewayPort,
@@ -428,46 +540,90 @@ async function upgradeDocker(
428
540
 
429
541
  const rollbackReady = await waitForReady(entry.runtimeUrl);
430
542
  if (rollbackReady) {
431
- // Restore previous container info in lockfile after rollback.
432
- // previousImageRefs contains sha256 digests from `docker inspect
433
- // --format {{.Image}}`. The *Image fields should hold
434
- // human-readable image:tag names, so prefer the pre-upgrade
435
- // containerInfo values and store digests in the *Digest fields.
436
- if (previousImageRefs) {
437
- const rolledBackEntry: AssistantEntry = {
438
- ...entry,
439
- containerInfo: {
440
- assistantImage:
441
- entry.containerInfo?.assistantImage ??
442
- previousImageRefs.assistant,
443
- gatewayImage:
444
- entry.containerInfo?.gatewayImage ??
445
- previousImageRefs.gateway,
446
- cesImage:
447
- entry.containerInfo?.cesImage ??
448
- previousImageRefs["credential-executor"],
449
- assistantDigest: previousImageRefs.assistant,
450
- gatewayDigest: previousImageRefs.gateway,
451
- cesDigest: previousImageRefs["credential-executor"],
452
- networkName: res.network,
453
- },
454
- previousServiceGroupVersion: undefined,
455
- previousContainerInfo: undefined,
456
- };
457
- saveAssistantEntry(rolledBackEntry);
543
+ // Restore data from the backup created for THIS upgrade attempt.
544
+ // Only use the specific backupPath never scan for the latest
545
+ // backup on disk, which could be from a previous upgrade cycle
546
+ // and contain stale data.
547
+ if (backupPath) {
548
+ await broadcastUpgradeEvent(
549
+ entry.runtimeUrl,
550
+ entry.assistantId,
551
+ buildProgressEvent(UPGRADE_PROGRESS.RESTORING),
552
+ );
553
+ console.log(`📦 Restoring data from pre-upgrade backup...`);
554
+ console.log(` Source: ${backupPath}`);
555
+ const restored = await restoreBackup(
556
+ entry.runtimeUrl,
557
+ entry.assistantId,
558
+ backupPath,
559
+ );
560
+ if (restored) {
561
+ console.log(" ✅ Data restored successfully\n");
562
+ } else {
563
+ console.warn(
564
+ " ⚠️ Data restore failed (rollback continues without data restoration)\n",
565
+ );
566
+ }
567
+ } else {
568
+ console.log(
569
+ "ℹ️ No pre-upgrade backup was created for this attempt, skipping data restoration\n",
570
+ );
458
571
  }
459
572
 
573
+ // Capture fresh digests from the now-running rolled-back containers.
574
+ const rollbackDigests = await captureImageRefs(res);
575
+
576
+ // Restore previous container info in lockfile after rollback.
577
+ // The *Image fields hold human-readable image:tag names from the
578
+ // pre-upgrade containerInfo; *Digest fields get fresh values from
579
+ // the running containers (or fall back to previousImageRefs).
580
+ const rolledBackEntry: AssistantEntry = {
581
+ ...entry,
582
+ containerInfo: {
583
+ assistantImage:
584
+ entry.containerInfo?.assistantImage ??
585
+ previousImageRefs.assistant,
586
+ gatewayImage:
587
+ entry.containerInfo?.gatewayImage ?? previousImageRefs.gateway,
588
+ cesImage:
589
+ entry.containerInfo?.cesImage ??
590
+ previousImageRefs["credential-executor"],
591
+ assistantDigest:
592
+ rollbackDigests?.assistant ?? previousImageRefs.assistant,
593
+ gatewayDigest:
594
+ rollbackDigests?.gateway ?? previousImageRefs.gateway,
595
+ cesDigest:
596
+ rollbackDigests?.["credential-executor"] ??
597
+ previousImageRefs["credential-executor"],
598
+ networkName: res.network,
599
+ },
600
+ previousServiceGroupVersion: undefined,
601
+ previousContainerInfo: undefined,
602
+ previousDbMigrationVersion: undefined,
603
+ previousWorkspaceMigrationId: undefined,
604
+ // Clear the backup path — the upgrade that created it just failed
605
+ preUpgradeBackupPath: undefined,
606
+ };
607
+ saveAssistantEntry(rolledBackEntry);
608
+
460
609
  // Notify clients that the upgrade failed and rolled back.
461
- await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
462
- type: "complete",
463
- installedVersion: entry.serviceGroupVersion ?? "unknown",
464
- success: false,
465
- rolledBackToVersion: entry.serviceGroupVersion,
466
- });
610
+ await broadcastUpgradeEvent(
611
+ entry.runtimeUrl,
612
+ entry.assistantId,
613
+ buildCompleteEvent(
614
+ entry.serviceGroupVersion ?? "unknown",
615
+ false,
616
+ entry.serviceGroupVersion,
617
+ ),
618
+ );
467
619
 
468
620
  console.log(
469
621
  `\n⚠️ Rolled back to previous version. Upgrade to ${versionTag} failed.`,
470
622
  );
623
+ emitCliError(
624
+ "READINESS_TIMEOUT",
625
+ `Upgrade to ${versionTag} failed: containers did not become ready. Rolled back to previous version.`,
626
+ );
471
627
  } else {
472
628
  console.error(
473
629
  `\n❌ Rollback also failed. Manual intervention required.`,
@@ -475,21 +631,51 @@ async function upgradeDocker(
475
631
  console.log(
476
632
  ` Check logs with: docker logs -f ${res.assistantContainer}`,
477
633
  );
634
+ await broadcastUpgradeEvent(
635
+ entry.runtimeUrl,
636
+ entry.assistantId,
637
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
638
+ );
639
+ emitCliError(
640
+ "ROLLBACK_FAILED",
641
+ "Rollback also failed after readiness timeout. Manual intervention required.",
642
+ );
478
643
  }
479
644
  } catch (rollbackErr) {
480
- console.error(
481
- `\n❌ Rollback failed: ${rollbackErr instanceof Error ? rollbackErr.message : String(rollbackErr)}`,
482
- );
645
+ const rollbackDetail =
646
+ rollbackErr instanceof Error
647
+ ? rollbackErr.message
648
+ : String(rollbackErr);
649
+ console.error(`\n❌ Rollback failed: ${rollbackDetail}`);
483
650
  console.error(` Manual intervention required.`);
484
651
  console.log(
485
652
  ` Check logs with: docker logs -f ${res.assistantContainer}`,
486
653
  );
654
+ await broadcastUpgradeEvent(
655
+ entry.runtimeUrl,
656
+ entry.assistantId,
657
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
658
+ );
659
+ emitCliError(
660
+ "ROLLBACK_FAILED",
661
+ "Auto-rollback failed after readiness timeout. Manual intervention required.",
662
+ rollbackDetail,
663
+ );
487
664
  }
488
665
  } else {
489
666
  console.log(` No previous images available for rollback.`);
490
667
  console.log(
491
668
  ` Check logs with: docker logs -f ${res.assistantContainer}`,
492
669
  );
670
+ await broadcastUpgradeEvent(
671
+ entry.runtimeUrl,
672
+ entry.assistantId,
673
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
674
+ );
675
+ emitCliError(
676
+ "ROLLBACK_NO_STATE",
677
+ "Containers failed to become ready and no previous images available for rollback.",
678
+ );
493
679
  }
494
680
 
495
681
  process.exit(1);
@@ -505,15 +691,55 @@ async function upgradePlatform(
505
691
  entry: AssistantEntry,
506
692
  version: string | null,
507
693
  ): Promise<void> {
694
+ // Reject downgrades — `vellum upgrade` only handles forward version changes.
695
+ // Users should use `vellum rollback --version <version>` for downgrades.
696
+ // Only enforce this guard when the user explicitly passed `--version`.
697
+ // When version is null the platform API decides the actual target, so
698
+ // we must not block the request based on the local CLI version.
699
+ const currentVersion = entry.serviceGroupVersion;
700
+ if (version && currentVersion) {
701
+ const current = parseVersion(currentVersion);
702
+ const target = parseVersion(version);
703
+ if (current && target) {
704
+ const isOlder =
705
+ target.major < current.major ||
706
+ (target.major === current.major && target.minor < current.minor) ||
707
+ (target.major === current.major &&
708
+ target.minor === current.minor &&
709
+ target.patch < current.patch);
710
+ if (isOlder) {
711
+ const msg = `Cannot upgrade to an older version (${version} < ${currentVersion}). Use \`vellum rollback --version ${version}\` instead.`;
712
+ console.error(msg);
713
+ emitCliError("VERSION_DIRECTION", msg);
714
+ process.exit(1);
715
+ }
716
+ }
717
+ }
718
+
719
+ // Record version transition start in workspace git history
720
+ await commitWorkspaceViaGateway(
721
+ entry.runtimeUrl,
722
+ entry.assistantId,
723
+ buildUpgradeCommitMessage({
724
+ action: "upgrade",
725
+ phase: "starting",
726
+ from: entry.serviceGroupVersion ?? "unknown",
727
+ to: version ?? "latest",
728
+ topology: "managed",
729
+ assistantId: entry.assistantId,
730
+ }),
731
+ );
732
+
508
733
  console.log(
509
734
  `🔄 Upgrading platform-hosted assistant '${entry.assistantId}'...\n`,
510
735
  );
511
736
 
512
737
  const token = readPlatformToken();
513
738
  if (!token) {
514
- console.error(
515
- "Error: Not logged in. Run `vellum login --token <token>` first.",
516
- );
739
+ const msg =
740
+ "Error: Not logged in. Run `vellum login --token <token>` first.";
741
+ console.error(msg);
742
+ emitCliError("AUTH_FAILED", msg);
517
743
  process.exit(1);
518
744
  }
519
745
 
@@ -530,11 +756,11 @@ async function upgradePlatform(
530
756
  // Notify connected clients that an upgrade is about to begin.
531
757
  const targetVersion = version ?? `v${cliPkg.version}`;
532
758
  console.log("📢 Notifying connected clients...");
533
- await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
534
- type: "starting",
535
- targetVersion,
536
- expectedDowntimeSeconds: 90,
537
- });
759
+ await broadcastUpgradeEvent(
760
+ entry.runtimeUrl,
761
+ entry.assistantId,
762
+ buildStartingEvent(targetVersion, 90),
763
+ );
538
764
 
539
765
  const response = await fetch(url, {
540
766
  method: "POST",
@@ -551,11 +777,16 @@ async function upgradePlatform(
551
777
  console.error(
552
778
  `Error: Platform upgrade failed (${response.status}): ${text}`,
553
779
  );
554
- await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
555
- type: "complete",
556
- installedVersion: entry.serviceGroupVersion ?? "unknown",
557
- success: false,
558
- });
780
+ emitCliError(
781
+ "PLATFORM_API_ERROR",
782
+ `Platform upgrade failed (${response.status})`,
783
+ text,
784
+ );
785
+ await broadcastUpgradeEvent(
786
+ entry.runtimeUrl,
787
+ entry.assistantId,
788
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
789
+ );
559
790
  process.exit(1);
560
791
  }
561
792
 
@@ -568,29 +799,168 @@ async function upgradePlatform(
568
799
  // version-change detection (DaemonConnection.swift) once the new
569
800
  // version actually appears after the platform restarts the service group.
570
801
 
802
+ // Record successful upgrade in workspace git history
803
+ await commitWorkspaceViaGateway(
804
+ entry.runtimeUrl,
805
+ entry.assistantId,
806
+ buildUpgradeCommitMessage({
807
+ action: "upgrade",
808
+ phase: "complete",
809
+ from: entry.serviceGroupVersion ?? "unknown",
810
+ to: version ?? "latest",
811
+ topology: "managed",
812
+ assistantId: entry.assistantId,
813
+ result: "success",
814
+ }),
815
+ );
816
+
571
817
  console.log(`✅ ${result.detail}`);
572
818
  if (result.version) {
573
819
  console.log(` Version: ${result.version}`);
574
820
  }
575
821
  }
576
822
 
823
+ /**
824
+ * Pre-upgrade steps for Sparkle (macOS app) lifecycle.
825
+ * Runs the pre-update orchestration without actually swapping containers:
826
+ * broadcasts SSE events, creates a workspace commit, creates a backup,
827
+ * prunes old backups, and outputs the backup path.
828
+ */
829
+ async function upgradePrepare(
830
+ entry: AssistantEntry,
831
+ version: string | null,
832
+ ): Promise<void> {
833
+ const targetVersion = version ?? entry.serviceGroupVersion ?? "unknown";
834
+ const currentVersion = entry.serviceGroupVersion ?? "unknown";
835
+
836
+ // 1. Broadcast "starting" so the UI shows the progress spinner
837
+ await broadcastUpgradeEvent(
838
+ entry.runtimeUrl,
839
+ entry.assistantId,
840
+ buildStartingEvent(targetVersion, 30),
841
+ );
842
+
843
+ // 2. Workspace commit: record pre-update state
844
+ await commitWorkspaceViaGateway(
845
+ entry.runtimeUrl,
846
+ entry.assistantId,
847
+ `[sparkle-update] Starting: ${currentVersion} → ${targetVersion}`,
848
+ );
849
+
850
+ // 3. Progress: saving backup
851
+ await broadcastUpgradeEvent(
852
+ entry.runtimeUrl,
853
+ entry.assistantId,
854
+ buildProgressEvent("Saving a backup of your data…"),
855
+ );
856
+
857
+ // 4. Create backup
858
+ const backupPath = await createBackup(entry.runtimeUrl, entry.assistantId, {
859
+ prefix: `${entry.assistantId}-pre-upgrade`,
860
+ description: `Pre-upgrade snapshot before ${currentVersion} → ${targetVersion}`,
861
+ });
862
+
863
+ // 5. Prune old backups (keep 3)
864
+ if (backupPath) {
865
+ pruneOldBackups(entry.assistantId, 3);
866
+ }
867
+
868
+ // 6. Progress: installing update
869
+ await broadcastUpgradeEvent(
870
+ entry.runtimeUrl,
871
+ entry.assistantId,
872
+ buildProgressEvent(UPGRADE_PROGRESS.INSTALLING),
873
+ );
874
+
875
+ // 7. Output backup path to stdout for the macOS app to parse
876
+ if (backupPath) {
877
+ console.log(`BACKUP_PATH:${backupPath}`);
878
+ }
879
+ }
880
+
881
+ /**
882
+ * Post-upgrade steps for Sparkle (macOS app) lifecycle.
883
+ * Called after the app has been replaced and the daemon is back up.
884
+ * Broadcasts a "complete" SSE event and creates a workspace commit.
885
+ */
886
+ async function upgradeFinalize(
887
+ entry: AssistantEntry,
888
+ version: string | null,
889
+ ): Promise<void> {
890
+ if (!version) {
891
+ console.error(
892
+ "Error: --finalize requires --version <from-version> to record the transition.",
893
+ );
894
+ emitCliError(
895
+ "UNKNOWN",
896
+ "--finalize requires --version <from-version> to record the transition",
897
+ );
898
+ process.exit(1);
899
+ }
900
+
901
+ const fromVersion = version;
902
+ const currentVersion = cliPkg.version
903
+ ? `v${cliPkg.version}`
904
+ : (entry.serviceGroupVersion ?? "unknown");
905
+
906
+ // 1. Broadcast "complete" so the UI clears the progress spinner
907
+ await broadcastUpgradeEvent(
908
+ entry.runtimeUrl,
909
+ entry.assistantId,
910
+ buildCompleteEvent(currentVersion, true),
911
+ );
912
+
913
+ // 2. Workspace commit: record successful update
914
+ await commitWorkspaceViaGateway(
915
+ entry.runtimeUrl,
916
+ entry.assistantId,
917
+ `[sparkle-update] Complete: ${fromVersion} → ${currentVersion}\n\nresult: success`,
918
+ );
919
+ }
920
+
577
921
  export async function upgrade(): Promise<void> {
578
- const { name, version } = parseArgs();
922
+ const { name, version, prepare, finalize } = parseArgs();
579
923
  const entry = resolveTargetAssistant(name);
580
- const cloud = resolveCloud(entry);
581
924
 
582
- if (cloud === "docker") {
583
- await upgradeDocker(entry, version);
925
+ if (prepare) {
926
+ await upgradePrepare(entry, version);
584
927
  return;
585
928
  }
586
929
 
587
- if (cloud === "vellum") {
588
- await upgradePlatform(entry, version);
930
+ if (finalize) {
931
+ await upgradeFinalize(entry, version);
589
932
  return;
590
933
  }
591
934
 
592
- console.error(
593
- `Error: Upgrade is not supported for '${cloud}' assistants. Only 'docker' and 'vellum' assistants can be upgraded via the CLI.`,
594
- );
935
+ const cloud = resolveCloud(entry);
936
+
937
+ try {
938
+ if (cloud === "docker") {
939
+ await upgradeDocker(entry, version);
940
+ return;
941
+ }
942
+
943
+ if (cloud === "vellum") {
944
+ await upgradePlatform(entry, version);
945
+ return;
946
+ }
947
+ } catch (err) {
948
+ const detail = err instanceof Error ? err.message : String(err);
949
+ console.error(`\n❌ Upgrade failed: ${detail}`);
950
+ // Best-effort: notify connected clients that the upgrade failed.
951
+ // A `starting` event may have been sent inside upgradeDocker/upgradePlatform
952
+ // before the error was thrown, so we must close with `complete`.
953
+ await broadcastUpgradeEvent(
954
+ entry.runtimeUrl,
955
+ entry.assistantId,
956
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
957
+ );
958
+ emitCliError(categorizeUpgradeError(err), "Upgrade failed", detail);
959
+ process.exit(1);
960
+ }
961
+
962
+ const msg = `Error: Upgrade is not supported for '${cloud}' assistants. Only 'docker' and 'vellum' assistants can be upgraded via the CLI.`;
963
+ console.error(msg);
964
+ emitCliError("UNSUPPORTED_TOPOLOGY", msg);
595
965
  process.exit(1);
596
966
  }