@vellumai/cli 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,443 @@
1
+ import { randomBytes } from "crypto";
2
+ import { join } from "path";
3
+
4
+ import {
5
+ findAssistantByName,
6
+ getActiveAssistant,
7
+ loadAllAssistants,
8
+ saveAssistantEntry,
9
+ } from "../lib/assistant-config";
10
+ import type { AssistantEntry } from "../lib/assistant-config";
11
+ import {
12
+ captureImageRefs,
13
+ GATEWAY_INTERNAL_PORT,
14
+ dockerResourceNames,
15
+ migrateCesSecurityFiles,
16
+ migrateGatewaySecurityFiles,
17
+ startContainers,
18
+ stopContainers,
19
+ } from "../lib/docker";
20
+ import type { ServiceName } from "../lib/docker";
21
+ import {
22
+ loadBootstrapSecret,
23
+ saveBootstrapSecret,
24
+ } from "../lib/guardian-token";
25
+ import { restoreBackup } from "../lib/backup-ops.js";
26
+ import { emitCliError, categorizeUpgradeError } from "../lib/cli-error.js";
27
+ import {
28
+ broadcastUpgradeEvent,
29
+ buildCompleteEvent,
30
+ buildProgressEvent,
31
+ buildStartingEvent,
32
+ buildUpgradeCommitMessage,
33
+ captureContainerEnv,
34
+ CONTAINER_ENV_EXCLUDE_KEYS,
35
+ rollbackMigrations,
36
+ UPGRADE_PROGRESS,
37
+ waitForReady,
38
+ } from "../lib/upgrade-lifecycle.js";
39
+ import { commitWorkspaceState } from "../lib/workspace-git.js";
40
+
41
+ function parseArgs(): { name: string | null } {
42
+ const args = process.argv.slice(3);
43
+ let name: string | null = null;
44
+
45
+ for (let i = 0; i < args.length; i++) {
46
+ const arg = args[i];
47
+ if (arg === "--help" || arg === "-h") {
48
+ console.log("Usage: vellum rollback [<name>]");
49
+ console.log("");
50
+ console.log("Roll back a Docker assistant to the previous version.");
51
+ console.log("");
52
+ console.log("Arguments:");
53
+ console.log(
54
+ " <name> Name of the assistant to roll back (default: active or only assistant)",
55
+ );
56
+ console.log("");
57
+ console.log("Examples:");
58
+ console.log(
59
+ " vellum rollback # Roll back the active assistant",
60
+ );
61
+ console.log(
62
+ " vellum rollback my-assistant # Roll back a specific assistant by name",
63
+ );
64
+ process.exit(0);
65
+ } else if (!arg.startsWith("-")) {
66
+ name = arg;
67
+ } else {
68
+ console.error(`Error: Unknown option '${arg}'.`);
69
+ emitCliError("UNKNOWN", `Unknown option '${arg}'`);
70
+ process.exit(1);
71
+ }
72
+ }
73
+
74
+ return { name };
75
+ }
76
+
77
+ function resolveCloud(entry: AssistantEntry): string {
78
+ if (entry.cloud) {
79
+ return entry.cloud;
80
+ }
81
+ if (entry.project) {
82
+ return "gcp";
83
+ }
84
+ if (entry.sshUser) {
85
+ return "custom";
86
+ }
87
+ return "local";
88
+ }
89
+
90
+ /**
91
+ * Resolve which assistant to target for the rollback command. Priority:
92
+ * 1. Explicit name argument
93
+ * 2. Active assistant set via `vellum use`
94
+ * 3. Sole assistant (when exactly one exists)
95
+ */
96
+ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
97
+ if (nameArg) {
98
+ const entry = findAssistantByName(nameArg);
99
+ if (!entry) {
100
+ console.error(`No assistant found with name '${nameArg}'.`);
101
+ emitCliError(
102
+ "ASSISTANT_NOT_FOUND",
103
+ `No assistant found with name '${nameArg}'.`,
104
+ );
105
+ process.exit(1);
106
+ }
107
+ return entry;
108
+ }
109
+
110
+ const active = getActiveAssistant();
111
+ if (active) {
112
+ const entry = findAssistantByName(active);
113
+ if (entry) return entry;
114
+ }
115
+
116
+ const all = loadAllAssistants();
117
+ if (all.length === 1) return all[0];
118
+
119
+ if (all.length === 0) {
120
+ const msg = "No assistants found. Run 'vellum hatch' first.";
121
+ console.error(msg);
122
+ emitCliError("ASSISTANT_NOT_FOUND", msg);
123
+ } else {
124
+ const msg =
125
+ "Multiple assistants found. Specify a name or set an active assistant with 'vellum use <name>'.";
126
+ console.error(msg);
127
+ emitCliError("ASSISTANT_NOT_FOUND", msg);
128
+ }
129
+ process.exit(1);
130
+ }
131
+
132
+ export async function rollback(): Promise<void> {
133
+ const { name } = parseArgs();
134
+ const entry = resolveTargetAssistant(name);
135
+ const cloud = resolveCloud(entry);
136
+
137
+ // Only Docker assistants support rollback
138
+ if (cloud !== "docker") {
139
+ const msg =
140
+ "Rollback is only supported for Docker assistants. For managed assistants, use the version picker to upgrade to the previous version.";
141
+ console.error(msg);
142
+ emitCliError("UNSUPPORTED_TOPOLOGY", msg);
143
+ process.exit(1);
144
+ }
145
+
146
+ // Verify rollback state exists
147
+ if (!entry.previousServiceGroupVersion || !entry.previousContainerInfo) {
148
+ const msg =
149
+ "No rollback state available. Run `vellum upgrade` first to create a rollback point.";
150
+ console.error(msg);
151
+ emitCliError("ROLLBACK_NO_STATE", msg);
152
+ process.exit(1);
153
+ }
154
+
155
+ // Verify all three digest fields are present
156
+ const prev = entry.previousContainerInfo;
157
+ if (!prev.assistantDigest || !prev.gatewayDigest || !prev.cesDigest) {
158
+ const msg =
159
+ "Incomplete rollback state. Previous container digests are missing.";
160
+ console.error(msg);
161
+ emitCliError("ROLLBACK_NO_STATE", msg);
162
+ process.exit(1);
163
+ }
164
+
165
+ // Build image refs from the previous digests
166
+ const previousImageRefs: Record<ServiceName, string> = {
167
+ assistant: prev.assistantDigest,
168
+ "credential-executor": prev.cesDigest,
169
+ gateway: prev.gatewayDigest,
170
+ };
171
+
172
+ const instanceName = entry.assistantId;
173
+ const res = dockerResourceNames(instanceName);
174
+
175
+ try {
176
+ const workspaceDir = entry.resources
177
+ ? join(entry.resources.instanceDir, ".vellum", "workspace")
178
+ : undefined;
179
+
180
+ // Record rollback start in workspace git history
181
+ if (workspaceDir) {
182
+ try {
183
+ await commitWorkspaceState(
184
+ workspaceDir,
185
+ buildUpgradeCommitMessage({
186
+ action: "rollback",
187
+ phase: "starting",
188
+ from: entry.serviceGroupVersion ?? "unknown",
189
+ to: entry.previousServiceGroupVersion ?? "unknown",
190
+ topology: "docker",
191
+ assistantId: entry.assistantId,
192
+ }),
193
+ );
194
+ } catch (err) {
195
+ console.warn(
196
+ `āš ļø Failed to create pre-rollback workspace commit: ${err instanceof Error ? err.message : String(err)}`,
197
+ );
198
+ }
199
+ }
200
+
201
+ console.log(
202
+ `šŸ”„ Rolling back Docker assistant '${instanceName}' to ${entry.previousServiceGroupVersion}...\n`,
203
+ );
204
+
205
+ // Capture current container env
206
+ console.log("šŸ’¾ Capturing existing container environment...");
207
+ const capturedEnv = await captureContainerEnv(res.assistantContainer);
208
+ console.log(
209
+ ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
210
+ );
211
+
212
+ // Extract CES_SERVICE_TOKEN from captured env, or generate fresh one
213
+ const cesServiceToken =
214
+ capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
215
+
216
+ // Retrieve or generate a bootstrap secret for the gateway.
217
+ const loadedSecret = loadBootstrapSecret(instanceName);
218
+ const bootstrapSecret = loadedSecret || randomBytes(32).toString("hex");
219
+ if (!loadedSecret) {
220
+ saveBootstrapSecret(instanceName, bootstrapSecret);
221
+ }
222
+
223
+ // Extract or generate the shared JWT signing key.
224
+ const signingKey =
225
+ capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
226
+
227
+ // Build extra env vars, excluding keys managed by serviceDockerRunArgs
228
+ const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
229
+ for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
230
+ if (process.env[envVar]) {
231
+ envKeysSetByRunArgs.add(envVar);
232
+ }
233
+ }
234
+ const extraAssistantEnv: Record<string, string> = {};
235
+ for (const [key, value] of Object.entries(capturedEnv)) {
236
+ if (!envKeysSetByRunArgs.has(key)) {
237
+ extraAssistantEnv[key] = value;
238
+ }
239
+ }
240
+
241
+ // Parse gateway port from entry's runtimeUrl, fall back to default
242
+ let gatewayPort = GATEWAY_INTERNAL_PORT;
243
+ try {
244
+ const parsed = new URL(entry.runtimeUrl);
245
+ const port = parseInt(parsed.port, 10);
246
+ if (!isNaN(port)) {
247
+ gatewayPort = port;
248
+ }
249
+ } catch {
250
+ // use default
251
+ }
252
+
253
+ // Notify connected clients that a rollback is about to begin (best-effort)
254
+ console.log("šŸ“¢ Notifying connected clients...");
255
+ await broadcastUpgradeEvent(
256
+ entry.runtimeUrl,
257
+ entry.assistantId,
258
+ buildStartingEvent(entry.previousServiceGroupVersion),
259
+ );
260
+ // Brief pause to allow SSE delivery before containers stop.
261
+ await new Promise((r) => setTimeout(r, 500));
262
+
263
+ // Roll back migrations to pre-upgrade state (must happen before containers stop)
264
+ if (
265
+ entry.previousDbMigrationVersion !== undefined ||
266
+ entry.previousWorkspaceMigrationId !== undefined
267
+ ) {
268
+ console.log("šŸ”„ Reverting database changes...");
269
+ await broadcastUpgradeEvent(
270
+ entry.runtimeUrl,
271
+ entry.assistantId,
272
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING_MIGRATIONS),
273
+ );
274
+ await rollbackMigrations(
275
+ entry.runtimeUrl,
276
+ entry.assistantId,
277
+ entry.previousDbMigrationVersion,
278
+ entry.previousWorkspaceMigrationId,
279
+ );
280
+ }
281
+
282
+ // Progress: switching version (must be sent BEFORE stopContainers)
283
+ await broadcastUpgradeEvent(
284
+ entry.runtimeUrl,
285
+ entry.assistantId,
286
+ buildProgressEvent(UPGRADE_PROGRESS.SWITCHING),
287
+ );
288
+
289
+ console.log("šŸ›‘ Stopping existing containers...");
290
+ await stopContainers(res);
291
+ console.log("āœ… Containers stopped\n");
292
+
293
+ // Run security file migrations and signing key cleanup
294
+ console.log("šŸ”„ Migrating security files to gateway volume...");
295
+ await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
296
+
297
+ console.log("šŸ”„ Migrating credential files to CES security volume...");
298
+ await migrateCesSecurityFiles(res, (msg) => console.log(msg));
299
+
300
+ console.log("šŸš€ Starting containers with previous version...");
301
+ await startContainers(
302
+ {
303
+ signingKey,
304
+ bootstrapSecret,
305
+ cesServiceToken,
306
+ extraAssistantEnv,
307
+ gatewayPort,
308
+ imageTags: previousImageRefs,
309
+ instanceName,
310
+ res,
311
+ },
312
+ (msg) => console.log(msg),
313
+ );
314
+ console.log("āœ… Containers started\n");
315
+
316
+ console.log("Waiting for assistant to become ready...");
317
+ const ready = await waitForReady(entry.runtimeUrl);
318
+
319
+ if (ready) {
320
+ // Restore data from the backup created for the specific upgrade being
321
+ // rolled back. We use the persisted preUpgradeBackupPath rather than
322
+ // scanning for the latest backup on disk — if the most recent upgrade's
323
+ // backup failed, a global scan would find a stale backup from a prior
324
+ // cycle and overwrite newer user data.
325
+ const backupPath = entry.preUpgradeBackupPath as string | undefined;
326
+ if (backupPath) {
327
+ // Progress: restoring data (gateway is back up at this point)
328
+ await broadcastUpgradeEvent(
329
+ entry.runtimeUrl,
330
+ entry.assistantId,
331
+ buildProgressEvent(UPGRADE_PROGRESS.RESTORING),
332
+ );
333
+
334
+ console.log(`šŸ“¦ Restoring data from pre-upgrade backup...`);
335
+ console.log(` Source: ${backupPath}`);
336
+ const restored = await restoreBackup(
337
+ entry.runtimeUrl,
338
+ entry.assistantId,
339
+ backupPath,
340
+ );
341
+ if (restored) {
342
+ console.log(" āœ… Data restored successfully\n");
343
+ } else {
344
+ console.warn(
345
+ " āš ļø Data restore failed (rollback continues without data restoration)\n",
346
+ );
347
+ }
348
+ } else {
349
+ console.log(
350
+ "ā„¹ļø No pre-upgrade backup was created for this upgrade, skipping data restoration\n",
351
+ );
352
+ }
353
+
354
+ // Capture new digests from the rolled-back containers
355
+ const newDigests = await captureImageRefs(res);
356
+
357
+ // Swap current/previous state to enable "rollback the rollback"
358
+ const updatedEntry: AssistantEntry = {
359
+ ...entry,
360
+ serviceGroupVersion: entry.previousServiceGroupVersion,
361
+ containerInfo: {
362
+ assistantImage: prev.assistantImage ?? previousImageRefs.assistant,
363
+ gatewayImage: prev.gatewayImage ?? previousImageRefs.gateway,
364
+ cesImage: prev.cesImage ?? previousImageRefs["credential-executor"],
365
+ assistantDigest: newDigests?.assistant,
366
+ gatewayDigest: newDigests?.gateway,
367
+ cesDigest: newDigests?.["credential-executor"],
368
+ networkName: res.network,
369
+ },
370
+ previousServiceGroupVersion: entry.serviceGroupVersion,
371
+ previousContainerInfo: entry.containerInfo,
372
+ // Clear the backup path — it belonged to the upgrade we just rolled back
373
+ preUpgradeBackupPath: undefined,
374
+ previousDbMigrationVersion: undefined,
375
+ previousWorkspaceMigrationId: undefined,
376
+ };
377
+ saveAssistantEntry(updatedEntry);
378
+
379
+ // Notify clients that the rollback succeeded
380
+ await broadcastUpgradeEvent(
381
+ entry.runtimeUrl,
382
+ entry.assistantId,
383
+ buildCompleteEvent(entry.previousServiceGroupVersion, true),
384
+ );
385
+
386
+ // Record successful rollback in workspace git history
387
+ if (workspaceDir) {
388
+ try {
389
+ await commitWorkspaceState(
390
+ workspaceDir,
391
+ buildUpgradeCommitMessage({
392
+ action: "rollback",
393
+ phase: "complete",
394
+ from: entry.serviceGroupVersion ?? "unknown",
395
+ to: entry.previousServiceGroupVersion ?? "unknown",
396
+ topology: "docker",
397
+ assistantId: entry.assistantId,
398
+ result: "success",
399
+ }),
400
+ );
401
+ } catch (err) {
402
+ console.warn(
403
+ `āš ļø Failed to create post-rollback workspace commit: ${err instanceof Error ? err.message : String(err)}`,
404
+ );
405
+ }
406
+ }
407
+
408
+ console.log(
409
+ `\nāœ… Docker assistant '${instanceName}' rolled back to ${entry.previousServiceGroupVersion}.`,
410
+ );
411
+ } else {
412
+ console.error(
413
+ `\nāŒ Containers failed to become ready within the timeout.`,
414
+ );
415
+ console.log(
416
+ ` Check logs with: docker logs -f ${res.assistantContainer}`,
417
+ );
418
+ await broadcastUpgradeEvent(
419
+ entry.runtimeUrl,
420
+ entry.assistantId,
421
+ buildCompleteEvent(
422
+ entry.previousServiceGroupVersion ?? "unknown",
423
+ false,
424
+ ),
425
+ );
426
+ emitCliError(
427
+ "READINESS_TIMEOUT",
428
+ "Rolled-back containers failed to become ready within the timeout.",
429
+ );
430
+ process.exit(1);
431
+ }
432
+ } catch (err) {
433
+ const detail = err instanceof Error ? err.message : String(err);
434
+ console.error(`\nāŒ Rollback failed: ${detail}`);
435
+ await broadcastUpgradeEvent(
436
+ entry.runtimeUrl,
437
+ entry.assistantId,
438
+ buildCompleteEvent(entry.serviceGroupVersion ?? "unknown", false),
439
+ );
440
+ emitCliError(categorizeUpgradeError(err), "Rollback failed", detail);
441
+ process.exit(1);
442
+ }
443
+ }