@vellumai/cli 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,844 @@
1
+ import { randomBytes } from "crypto";
2
+
3
+ import type { AssistantEntry } from "./assistant-config.js";
4
+ import { saveAssistantEntry } from "./assistant-config.js";
5
+ import { createBackup, pruneOldBackups, restoreBackup } from "./backup-ops.js";
6
+ import { emitCliError } from "./cli-error.js";
7
+ import {
8
+ captureImageRefs,
9
+ DOCKER_READY_TIMEOUT_MS,
10
+ dockerResourceNames,
11
+ GATEWAY_INTERNAL_PORT,
12
+ migrateCesSecurityFiles,
13
+ migrateGatewaySecurityFiles,
14
+ startContainers,
15
+ stopContainers,
16
+ } from "./docker.js";
17
+ import { loadGuardianToken } from "./guardian-token.js";
18
+ import { getPlatformUrl } from "./platform-client.js";
19
+ import { resolveImageRefs } from "./platform-releases.js";
20
+ import { exec, execOutput } from "./step-runner.js";
21
+ import { parseVersion } from "./version-compat.js";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Shared constants & builders for upgrade / rollback lifecycle events
25
+ // ---------------------------------------------------------------------------
26
+
27
+ /** User-facing progress messages shared across upgrade and rollback flows. */
28
+ export const UPGRADE_PROGRESS = {
29
+ DOWNLOADING: "Downloading…",
30
+ BACKING_UP: "Saving a backup of your data…",
31
+ INSTALLING: "Installing…",
32
+ REVERTING: "Something went wrong. Reverting to the previous version…",
33
+ REVERTING_MIGRATIONS: "Reverting database changes…",
34
+ RESTORING: "Restoring your data…",
35
+ SWITCHING: "Switching to the previous version…",
36
+ } as const;
37
+
38
+ export function buildStartingEvent(
39
+ targetVersion: string,
40
+ expectedDowntimeSeconds = 60,
41
+ ) {
42
+ return { type: "starting" as const, targetVersion, expectedDowntimeSeconds };
43
+ }
44
+
45
+ export function buildProgressEvent(statusMessage: string) {
46
+ return { type: "progress" as const, statusMessage };
47
+ }
48
+
49
+ export function buildCompleteEvent(
50
+ installedVersion: string,
51
+ success: boolean,
52
+ rolledBackToVersion?: string,
53
+ ) {
54
+ return {
55
+ type: "complete" as const,
56
+ installedVersion,
57
+ success,
58
+ ...(rolledBackToVersion ? { rolledBackToVersion } : {}),
59
+ };
60
+ }
61
+
62
+ export function buildUpgradeCommitMessage(options: {
63
+ action: "upgrade" | "rollback";
64
+ phase: "starting" | "complete";
65
+ from: string;
66
+ to: string;
67
+ topology: "docker" | "managed";
68
+ assistantId: string;
69
+ result?: "success" | "failure";
70
+ }): string {
71
+ const { action, phase, from, to, topology, assistantId, result } = options;
72
+ const header =
73
+ phase === "starting"
74
+ ? `[${action}] Starting: ${from} → ${to}`
75
+ : `[${action}] Complete: ${from} → ${to}`;
76
+ const lines = [
77
+ header,
78
+ "",
79
+ `assistant: ${assistantId}`,
80
+ `from: ${from}`,
81
+ `to: ${to}`,
82
+ ];
83
+ if (result) lines.push(`result: ${result}`);
84
+ lines.push(`topology: ${topology}`);
85
+ return lines.join("\n");
86
+ }
87
+
88
+ /**
89
+ * Environment variable keys that are set by CLI run arguments and should
90
+ * not be replayed from a captured container environment during upgrades
91
+ * or rollbacks. Shared between upgrade.ts and rollback.ts.
92
+ */
93
+ export const CONTAINER_ENV_EXCLUDE_KEYS: ReadonlySet<string> = new Set([
94
+ "CES_SERVICE_TOKEN",
95
+ "VELLUM_ASSISTANT_NAME",
96
+ "RUNTIME_HTTP_HOST",
97
+ "PATH",
98
+ "ACTOR_TOKEN_SIGNING_KEY",
99
+ ]);
100
+
101
+ /**
102
+ * Capture environment variables from a running Docker container so they
103
+ * can be replayed onto the replacement container after upgrade.
104
+ */
105
+ export async function captureContainerEnv(
106
+ containerName: string,
107
+ ): Promise<Record<string, string>> {
108
+ const captured: Record<string, string> = {};
109
+ try {
110
+ const raw = await execOutput("docker", [
111
+ "inspect",
112
+ "--format",
113
+ "{{json .Config.Env}}",
114
+ containerName,
115
+ ]);
116
+ const entries = JSON.parse(raw) as string[];
117
+ for (const entry of entries) {
118
+ const eqIdx = entry.indexOf("=");
119
+ if (eqIdx > 0) {
120
+ captured[entry.slice(0, eqIdx)] = entry.slice(eqIdx + 1);
121
+ }
122
+ }
123
+ } catch {
124
+ // Container may not exist or not be inspectable
125
+ }
126
+ return captured;
127
+ }
128
+
129
+ /**
130
+ * Poll the gateway `/readyz` endpoint until it returns 200 or the timeout
131
+ * elapses. Returns whether the assistant became ready.
132
+ */
133
+ export async function waitForReady(runtimeUrl: string): Promise<boolean> {
134
+ const readyUrl = `${runtimeUrl}/readyz`;
135
+ const start = Date.now();
136
+
137
+ while (Date.now() - start < DOCKER_READY_TIMEOUT_MS) {
138
+ try {
139
+ const resp = await fetch(readyUrl, {
140
+ signal: AbortSignal.timeout(5000),
141
+ });
142
+ if (resp.ok) {
143
+ const elapsedSec = ((Date.now() - start) / 1000).toFixed(1);
144
+ console.log(`Assistant ready after ${elapsedSec}s`);
145
+ return true;
146
+ }
147
+ let detail = "";
148
+ try {
149
+ const body = await resp.text();
150
+ const json = JSON.parse(body);
151
+ const parts = [json.status];
152
+ if (json.upstream != null) parts.push(`upstream=${json.upstream}`);
153
+ detail = ` — ${parts.join(", ")}`;
154
+ } catch {
155
+ // ignore parse errors
156
+ }
157
+ console.log(`Readiness check: ${resp.status}${detail} (retrying...)`);
158
+ } catch {
159
+ // Connection refused / timeout — not up yet
160
+ }
161
+ await new Promise((r) => setTimeout(r, 1000));
162
+ }
163
+
164
+ return false;
165
+ }
166
+
167
+ /**
168
+ * Best-effort broadcast of an upgrade lifecycle event to connected clients
169
+ * via the gateway's upgrade-broadcast proxy. Uses guardian token auth.
170
+ * Failures are logged but never block the upgrade flow.
171
+ */
172
+ export async function broadcastUpgradeEvent(
173
+ gatewayUrl: string,
174
+ assistantId: string,
175
+ event: Record<string, unknown>,
176
+ ): Promise<void> {
177
+ try {
178
+ const token = loadGuardianToken(assistantId);
179
+ const headers: Record<string, string> = {
180
+ "Content-Type": "application/json",
181
+ };
182
+ if (token?.accessToken) {
183
+ headers["Authorization"] = `Bearer ${token.accessToken}`;
184
+ }
185
+ await fetch(`${gatewayUrl}/v1/admin/upgrade-broadcast`, {
186
+ method: "POST",
187
+ headers,
188
+ body: JSON.stringify(event),
189
+ signal: AbortSignal.timeout(3000),
190
+ });
191
+ } catch {
192
+ // Best-effort — gateway/daemon may already be shutting down or not yet ready
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Best-effort workspace git commit via the gateway's workspace-commit endpoint.
198
+ * Uses guardian token auth. Failures are silently swallowed — this should never
199
+ * block upgrade or rollback flows.
200
+ */
201
+ export async function commitWorkspaceViaGateway(
202
+ gatewayUrl: string,
203
+ assistantId: string,
204
+ message: string,
205
+ ): Promise<void> {
206
+ try {
207
+ const token = loadGuardianToken(assistantId);
208
+ const headers: Record<string, string> = {
209
+ "Content-Type": "application/json",
210
+ };
211
+ if (token?.accessToken) {
212
+ headers["Authorization"] = `Bearer ${token.accessToken}`;
213
+ }
214
+ await fetch(`${gatewayUrl}/v1/admin/workspace-commit`, {
215
+ method: "POST",
216
+ headers,
217
+ body: JSON.stringify({ message }),
218
+ signal: AbortSignal.timeout(10_000),
219
+ });
220
+ } catch {
221
+ // Best-effort — gateway/daemon may already be shutting down or not yet ready
222
+ }
223
+ }
224
+
225
+ /**
226
+ * Roll back DB and workspace migrations to a target state via the gateway.
227
+ * Best-effort — failures are logged but never block the rollback flow.
228
+ */
229
+ export async function rollbackMigrations(
230
+ gatewayUrl: string,
231
+ assistantId: string,
232
+ targetDbVersion?: number,
233
+ targetWorkspaceMigrationId?: string,
234
+ rollbackToRegistryCeiling?: boolean,
235
+ ): Promise<boolean> {
236
+ if (
237
+ !rollbackToRegistryCeiling &&
238
+ targetDbVersion === undefined &&
239
+ targetWorkspaceMigrationId === undefined
240
+ ) {
241
+ return false;
242
+ }
243
+ try {
244
+ const token = loadGuardianToken(assistantId);
245
+ const headers: Record<string, string> = {
246
+ "Content-Type": "application/json",
247
+ };
248
+ if (token?.accessToken) {
249
+ headers["Authorization"] = `Bearer ${token.accessToken}`;
250
+ }
251
+ const body: Record<string, unknown> = {};
252
+ if (targetDbVersion !== undefined) body.targetDbVersion = targetDbVersion;
253
+ if (targetWorkspaceMigrationId !== undefined)
254
+ body.targetWorkspaceMigrationId = targetWorkspaceMigrationId;
255
+ if (rollbackToRegistryCeiling) body.rollbackToRegistryCeiling = true;
256
+
257
+ const resp = await fetch(`${gatewayUrl}/v1/admin/rollback-migrations`, {
258
+ method: "POST",
259
+ headers,
260
+ body: JSON.stringify(body),
261
+ signal: AbortSignal.timeout(120_000),
262
+ });
263
+ if (!resp.ok) {
264
+ const text = await resp.text();
265
+ console.warn(`⚠️ Migration rollback failed (${resp.status}): ${text}`);
266
+ return false;
267
+ }
268
+ const result = (await resp.json()) as {
269
+ rolledBack?: { db?: string[]; workspace?: string[] };
270
+ };
271
+ const dbCount = result.rolledBack?.db?.length ?? 0;
272
+ const wsCount = result.rolledBack?.workspace?.length ?? 0;
273
+ if (dbCount > 0 || wsCount > 0) {
274
+ console.log(
275
+ ` Rolled back ${dbCount} DB migration(s) and ${wsCount} workspace migration(s)`,
276
+ );
277
+ }
278
+ return true;
279
+ } catch (err) {
280
+ const msg = err instanceof Error ? err.message : String(err);
281
+ console.warn(`⚠️ Migration rollback failed: ${msg}`);
282
+ return false;
283
+ }
284
+ }
285
+
286
+ // ---------------------------------------------------------------------------
287
+ // Shared Docker rollback orchestration
288
+ // ---------------------------------------------------------------------------
289
+
290
+ export interface PerformDockerRollbackOptions {
291
+ /** Specific version to roll back to. */
292
+ targetVersion?: string;
293
+ }
294
+
295
+ /**
296
+ * Perform a Docker rollback to a target version. Reusable by both `rollback.ts`
297
+ * (targeted version rollback) and `restore.ts` (version + data restore).
298
+ *
299
+ * This function handles the full lifecycle:
300
+ * - Version validation (target must be older than current)
301
+ * - Image resolution and pulling
302
+ * - Migration ceiling lookup and pre-swap rollback
303
+ * - Container stop/start with target images
304
+ * - Readiness check
305
+ * - Lockfile update with rollback state
306
+ * - Auto-rollback on failure
307
+ */
308
+ export async function performDockerRollback(
309
+ entry: AssistantEntry,
310
+ options: PerformDockerRollbackOptions,
311
+ ): Promise<void> {
312
+ const { targetVersion } = options;
313
+
314
+ if (!targetVersion) {
315
+ throw new Error("targetVersion is required for performDockerRollback");
316
+ }
317
+
318
+ const currentVersion = entry.serviceGroupVersion;
319
+
320
+ // Validate target version < current version
321
+ if (currentVersion) {
322
+ const current = parseVersion(currentVersion);
323
+ const target = parseVersion(targetVersion);
324
+ if (current && target) {
325
+ const isNewer = (() => {
326
+ if (target.major !== current.major) return target.major > current.major;
327
+ if (target.minor !== current.minor) return target.minor > current.minor;
328
+ return target.patch > current.patch;
329
+ })();
330
+ if (isNewer) {
331
+ const msg =
332
+ "Cannot roll back to a newer version. Use `vellum upgrade` instead.";
333
+ console.error(msg);
334
+ emitCliError("VERSION_DIRECTION", msg);
335
+ process.exit(1);
336
+ }
337
+ const isSame =
338
+ target.major === current.major &&
339
+ target.minor === current.minor &&
340
+ target.patch === current.patch;
341
+ if (isSame) {
342
+ const msg = `Already on version ${targetVersion}. Nothing to roll back to.`;
343
+ console.error(msg);
344
+ emitCliError("VERSION_DIRECTION", msg);
345
+ process.exit(1);
346
+ }
347
+ }
348
+ }
349
+
350
+ const instanceName = entry.assistantId;
351
+ const res = dockerResourceNames(instanceName);
352
+
353
+ // Resolve Docker image refs for the target version
354
+ console.log("🔍 Resolving image references...");
355
+ const { imageTags: targetImageTags } = await resolveImageRefs(targetVersion);
356
+
357
+ // Fetch target migration ceiling from releases API
358
+ let targetMigrationCeiling: {
359
+ dbVersion?: number;
360
+ workspaceMigrationId?: string;
361
+ } = {};
362
+ try {
363
+ const platformUrl = getPlatformUrl();
364
+ const releasesResp = await fetch(
365
+ `${platformUrl}/v1/releases/?stable=true`,
366
+ { signal: AbortSignal.timeout(10000) },
367
+ );
368
+ if (releasesResp.ok) {
369
+ const releases = (await releasesResp.json()) as Array<{
370
+ version: string;
371
+ db_migration_version?: number | null;
372
+ last_workspace_migration_id?: string;
373
+ }>;
374
+ const normalizedTag = targetVersion.replace(/^v/, "");
375
+ const targetRelease = releases.find(
376
+ (r) => r.version?.replace(/^v/, "") === normalizedTag,
377
+ );
378
+ if (
379
+ targetRelease?.db_migration_version != null ||
380
+ targetRelease?.last_workspace_migration_id
381
+ ) {
382
+ targetMigrationCeiling = {
383
+ dbVersion: targetRelease.db_migration_version ?? undefined,
384
+ workspaceMigrationId:
385
+ targetRelease.last_workspace_migration_id || undefined,
386
+ };
387
+ }
388
+ }
389
+ } catch {
390
+ // Best-effort — fall back to rollbackToRegistryCeiling post-swap
391
+ }
392
+
393
+ // Capture current image digests for auto-rollback on failure
394
+ console.log("📸 Capturing current image references for rollback...");
395
+ const currentImageRefs = await captureImageRefs(res);
396
+
397
+ // Capture current migration state for rollback targeting
398
+ let preMigrationState: {
399
+ dbVersion?: number;
400
+ lastWorkspaceMigrationId?: string;
401
+ } = {};
402
+ try {
403
+ const healthResp = await fetch(
404
+ `${entry.runtimeUrl}/healthz?include=migrations`,
405
+ { signal: AbortSignal.timeout(5000) },
406
+ );
407
+ if (healthResp.ok) {
408
+ const health = (await healthResp.json()) as {
409
+ migrations?: { dbVersion?: number; lastWorkspaceMigrationId?: string };
410
+ };
411
+ preMigrationState = health.migrations ?? {};
412
+ }
413
+ } catch {
414
+ // Best-effort
415
+ }
416
+
417
+ // Persist rollback state to lockfile BEFORE any destructive changes
418
+ if (entry.serviceGroupVersion && entry.containerInfo) {
419
+ const rollbackEntry: AssistantEntry = {
420
+ ...entry,
421
+ previousServiceGroupVersion: entry.serviceGroupVersion,
422
+ previousContainerInfo: { ...entry.containerInfo },
423
+ previousDbMigrationVersion: preMigrationState.dbVersion,
424
+ previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
425
+ };
426
+ saveAssistantEntry(rollbackEntry);
427
+ console.log(` Saved rollback state: ${entry.serviceGroupVersion}\n`);
428
+ }
429
+
430
+ // Record rollback start in workspace git history
431
+ await commitWorkspaceViaGateway(
432
+ entry.runtimeUrl,
433
+ entry.assistantId,
434
+ buildUpgradeCommitMessage({
435
+ action: "rollback",
436
+ phase: "starting",
437
+ from: currentVersion ?? "unknown",
438
+ to: targetVersion,
439
+ topology: "docker",
440
+ assistantId: entry.assistantId,
441
+ }),
442
+ );
443
+
444
+ console.log(
445
+ `🔄 Rolling back Docker assistant '${instanceName}' to ${targetVersion}...\n`,
446
+ );
447
+
448
+ // Create a pre-rollback backup as a safety net
449
+ console.log("📦 Creating pre-rollback backup...");
450
+ const preRollbackBackupPath = await createBackup(
451
+ entry.runtimeUrl,
452
+ entry.assistantId,
453
+ {
454
+ prefix: `${entry.assistantId}-pre-upgrade`,
455
+ description: `Pre-rollback snapshot before ${currentVersion ?? "unknown"} → ${targetVersion}`,
456
+ },
457
+ );
458
+ if (preRollbackBackupPath) {
459
+ console.log(` Backup saved: ${preRollbackBackupPath}\n`);
460
+ pruneOldBackups(entry.assistantId, 3);
461
+ } else {
462
+ console.warn("⚠️ Pre-rollback backup failed (continuing with rollback)\n");
463
+ }
464
+
465
+ // Capture container env, extract secrets
466
+ console.log("💾 Capturing existing container environment...");
467
+ const capturedEnv = await captureContainerEnv(res.assistantContainer);
468
+ console.log(
469
+ ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
470
+ );
471
+
472
+ const cesServiceToken =
473
+ capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
474
+
475
+ const signingKey =
476
+ capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
477
+
478
+ // Build extra env vars, excluding keys managed by serviceDockerRunArgs
479
+ const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
480
+ for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
481
+ if (process.env[envVar]) {
482
+ envKeysSetByRunArgs.add(envVar);
483
+ }
484
+ }
485
+ const extraAssistantEnv: Record<string, string> = {};
486
+ for (const [key, value] of Object.entries(capturedEnv)) {
487
+ if (!envKeysSetByRunArgs.has(key)) {
488
+ extraAssistantEnv[key] = value;
489
+ }
490
+ }
491
+
492
+ // Parse gateway port from entry's runtimeUrl
493
+ let gatewayPort = GATEWAY_INTERNAL_PORT;
494
+ try {
495
+ const parsed = new URL(entry.runtimeUrl);
496
+ const port = parseInt(parsed.port, 10);
497
+ if (!isNaN(port)) {
498
+ gatewayPort = port;
499
+ }
500
+ } catch {
501
+ // use default
502
+ }
503
+
504
+ // Broadcast SSE "starting" event
505
+ console.log("📢 Notifying connected clients...");
506
+ await broadcastUpgradeEvent(
507
+ entry.runtimeUrl,
508
+ entry.assistantId,
509
+ buildStartingEvent(targetVersion),
510
+ );
511
+ // Brief pause for SSE delivery
512
+ await new Promise((r) => setTimeout(r, 500));
513
+
514
+ // Pull target version Docker images
515
+ await broadcastUpgradeEvent(
516
+ entry.runtimeUrl,
517
+ entry.assistantId,
518
+ buildProgressEvent(UPGRADE_PROGRESS.DOWNLOADING),
519
+ );
520
+ console.log("📦 Pulling target Docker images...");
521
+ const pullImages: Array<[string, string]> = [
522
+ ["assistant", targetImageTags.assistant],
523
+ ["gateway", targetImageTags.gateway],
524
+ ["credential-executor", targetImageTags["credential-executor"]],
525
+ ];
526
+ try {
527
+ for (const [service, image] of pullImages) {
528
+ console.log(` Pulling ${service}: ${image}`);
529
+ await exec("docker", ["pull", image]);
530
+ }
531
+ } catch (pullErr) {
532
+ const detail = pullErr instanceof Error ? pullErr.message : String(pullErr);
533
+ console.error(`\n❌ Failed to pull Docker images: ${detail}`);
534
+ await broadcastUpgradeEvent(
535
+ entry.runtimeUrl,
536
+ entry.assistantId,
537
+ buildCompleteEvent(currentVersion ?? "unknown", false),
538
+ );
539
+ emitCliError("IMAGE_PULL_FAILED", "Failed to pull Docker images", detail);
540
+ process.exit(1);
541
+ }
542
+ console.log("✅ Docker images pulled\n");
543
+
544
+ // Pre-swap migration rollback to target ceiling on the CURRENT (newer) daemon
545
+ let preSwapRollbackOk = true;
546
+ if (
547
+ targetMigrationCeiling.dbVersion !== undefined ||
548
+ targetMigrationCeiling.workspaceMigrationId !== undefined
549
+ ) {
550
+ console.log("🔄 Reverting database changes...");
551
+ await broadcastUpgradeEvent(
552
+ entry.runtimeUrl,
553
+ entry.assistantId,
554
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING_MIGRATIONS),
555
+ );
556
+ preSwapRollbackOk = await rollbackMigrations(
557
+ entry.runtimeUrl,
558
+ entry.assistantId,
559
+ targetMigrationCeiling.dbVersion,
560
+ targetMigrationCeiling.workspaceMigrationId,
561
+ );
562
+ }
563
+
564
+ // Progress: switching version
565
+ await broadcastUpgradeEvent(
566
+ entry.runtimeUrl,
567
+ entry.assistantId,
568
+ buildProgressEvent(UPGRADE_PROGRESS.SWITCHING),
569
+ );
570
+
571
+ // Stop containers, migrate security files, start with target images
572
+ console.log("🛑 Stopping existing containers...");
573
+ await stopContainers(res);
574
+ console.log("✅ Containers stopped\n");
575
+
576
+ console.log("🔄 Migrating security files to gateway volume...");
577
+ await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
578
+
579
+ console.log("🔄 Migrating credential files to CES security volume...");
580
+ await migrateCesSecurityFiles(res, (msg) => console.log(msg));
581
+
582
+ console.log("🚀 Starting containers with target version...");
583
+ await startContainers(
584
+ {
585
+ signingKey,
586
+ cesServiceToken,
587
+ extraAssistantEnv,
588
+ gatewayPort,
589
+ imageTags: targetImageTags,
590
+ instanceName,
591
+ res,
592
+ },
593
+ (msg) => console.log(msg),
594
+ );
595
+ console.log("✅ Containers started\n");
596
+
597
+ // Wait for readiness
598
+ console.log("Waiting for assistant to become ready...");
599
+ const ready = await waitForReady(entry.runtimeUrl);
600
+
601
+ if (ready) {
602
+ // Success path
603
+
604
+ // Post-swap migration rollback fallback: if pre-swap rollback failed
605
+ // or no ceiling metadata was available, ask the now-running old daemon
606
+ // to roll back migrations above its own registry ceiling.
607
+ if (
608
+ !preSwapRollbackOk ||
609
+ (targetMigrationCeiling.dbVersion === undefined &&
610
+ targetMigrationCeiling.workspaceMigrationId === undefined)
611
+ ) {
612
+ await rollbackMigrations(
613
+ entry.runtimeUrl,
614
+ entry.assistantId,
615
+ undefined,
616
+ undefined,
617
+ true,
618
+ );
619
+ }
620
+
621
+ // Capture new digests from the rolled-back containers
622
+ const newDigests = await captureImageRefs(res);
623
+
624
+ // Swap current/previous state to enable "rollback the rollback"
625
+ const updatedEntry: AssistantEntry = {
626
+ ...entry,
627
+ serviceGroupVersion: targetVersion,
628
+ containerInfo: {
629
+ assistantImage: targetImageTags.assistant,
630
+ gatewayImage: targetImageTags.gateway,
631
+ cesImage: targetImageTags["credential-executor"],
632
+ assistantDigest: newDigests?.assistant,
633
+ gatewayDigest: newDigests?.gateway,
634
+ cesDigest: newDigests?.["credential-executor"],
635
+ networkName: res.network,
636
+ },
637
+ previousServiceGroupVersion: entry.serviceGroupVersion,
638
+ previousContainerInfo: entry.containerInfo,
639
+ previousDbMigrationVersion: preMigrationState.dbVersion,
640
+ previousWorkspaceMigrationId: preMigrationState.lastWorkspaceMigrationId,
641
+ preUpgradeBackupPath: undefined,
642
+ };
643
+ saveAssistantEntry(updatedEntry);
644
+
645
+ // Notify clients that the rollback succeeded
646
+ await broadcastUpgradeEvent(
647
+ entry.runtimeUrl,
648
+ entry.assistantId,
649
+ buildCompleteEvent(targetVersion, true),
650
+ );
651
+
652
+ // Record successful rollback in workspace git history
653
+ await commitWorkspaceViaGateway(
654
+ entry.runtimeUrl,
655
+ entry.assistantId,
656
+ buildUpgradeCommitMessage({
657
+ action: "rollback",
658
+ phase: "complete",
659
+ from: currentVersion ?? "unknown",
660
+ to: targetVersion,
661
+ topology: "docker",
662
+ assistantId: entry.assistantId,
663
+ result: "success",
664
+ }),
665
+ );
666
+
667
+ console.log(
668
+ `\n✅ Docker assistant '${instanceName}' rolled back to ${targetVersion}.`,
669
+ );
670
+ } else {
671
+ // Failure path — attempt auto-rollback to original version
672
+ console.error(`\n❌ Containers failed to become ready within the timeout.`);
673
+
674
+ if (currentImageRefs) {
675
+ await broadcastUpgradeEvent(
676
+ entry.runtimeUrl,
677
+ entry.assistantId,
678
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING),
679
+ );
680
+ console.log(`\n🔄 Rolling back to original version...`);
681
+ try {
682
+ // Attempt to roll back migrations before reverting containers
683
+ if (
684
+ preMigrationState.dbVersion !== undefined ||
685
+ preMigrationState.lastWorkspaceMigrationId !== undefined
686
+ ) {
687
+ console.log("🔄 Reverting database changes...");
688
+ await broadcastUpgradeEvent(
689
+ entry.runtimeUrl,
690
+ entry.assistantId,
691
+ buildProgressEvent(UPGRADE_PROGRESS.REVERTING_MIGRATIONS),
692
+ );
693
+ await rollbackMigrations(
694
+ entry.runtimeUrl,
695
+ entry.assistantId,
696
+ preMigrationState.dbVersion,
697
+ preMigrationState.lastWorkspaceMigrationId,
698
+ );
699
+ }
700
+
701
+ await stopContainers(res);
702
+
703
+ await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
704
+ await migrateCesSecurityFiles(res, (msg) => console.log(msg));
705
+
706
+ await startContainers(
707
+ {
708
+ signingKey,
709
+ cesServiceToken,
710
+ extraAssistantEnv,
711
+ gatewayPort,
712
+ imageTags: currentImageRefs,
713
+ instanceName,
714
+ res,
715
+ },
716
+ (msg) => console.log(msg),
717
+ );
718
+
719
+ const revertReady = await waitForReady(entry.runtimeUrl);
720
+ if (revertReady) {
721
+ // Restore from pre-rollback backup on failure
722
+ if (preRollbackBackupPath) {
723
+ await broadcastUpgradeEvent(
724
+ entry.runtimeUrl,
725
+ entry.assistantId,
726
+ buildProgressEvent(UPGRADE_PROGRESS.RESTORING),
727
+ );
728
+ console.log(`📦 Restoring data from pre-rollback backup...`);
729
+ console.log(` Source: ${preRollbackBackupPath}`);
730
+ const restored = await restoreBackup(
731
+ entry.runtimeUrl,
732
+ entry.assistantId,
733
+ preRollbackBackupPath,
734
+ );
735
+ if (restored) {
736
+ console.log(" ✅ Data restored successfully\n");
737
+ } else {
738
+ console.warn(
739
+ " ⚠️ Data restore failed (auto-rollback continues without data restoration)\n",
740
+ );
741
+ }
742
+ }
743
+
744
+ // Restore lockfile state
745
+ const revertDigests = await captureImageRefs(res);
746
+ const revertedEntry: AssistantEntry = {
747
+ ...entry,
748
+ containerInfo: {
749
+ assistantImage:
750
+ entry.containerInfo?.assistantImage ??
751
+ currentImageRefs.assistant,
752
+ gatewayImage:
753
+ entry.containerInfo?.gatewayImage ?? currentImageRefs.gateway,
754
+ cesImage:
755
+ entry.containerInfo?.cesImage ??
756
+ currentImageRefs["credential-executor"],
757
+ assistantDigest:
758
+ revertDigests?.assistant ?? currentImageRefs.assistant,
759
+ gatewayDigest: revertDigests?.gateway ?? currentImageRefs.gateway,
760
+ cesDigest:
761
+ revertDigests?.["credential-executor"] ??
762
+ currentImageRefs["credential-executor"],
763
+ networkName: res.network,
764
+ },
765
+ previousServiceGroupVersion: undefined,
766
+ previousContainerInfo: undefined,
767
+ previousDbMigrationVersion: undefined,
768
+ previousWorkspaceMigrationId: undefined,
769
+ preUpgradeBackupPath: undefined,
770
+ };
771
+ saveAssistantEntry(revertedEntry);
772
+
773
+ await broadcastUpgradeEvent(
774
+ entry.runtimeUrl,
775
+ entry.assistantId,
776
+ buildCompleteEvent(
777
+ currentVersion ?? "unknown",
778
+ false,
779
+ currentVersion,
780
+ ),
781
+ );
782
+
783
+ console.log(
784
+ `\n⚠️ Rolled back to original version. Rollback to ${targetVersion} failed.`,
785
+ );
786
+ emitCliError(
787
+ "READINESS_TIMEOUT",
788
+ `Rollback to ${targetVersion} failed: containers did not become ready. Rolled back to original version.`,
789
+ );
790
+ } else {
791
+ console.error(
792
+ `\n❌ Auto-rollback also failed. Manual intervention required.`,
793
+ );
794
+ console.log(
795
+ ` Check logs with: docker logs -f ${res.assistantContainer}`,
796
+ );
797
+ await broadcastUpgradeEvent(
798
+ entry.runtimeUrl,
799
+ entry.assistantId,
800
+ buildCompleteEvent(currentVersion ?? "unknown", false),
801
+ );
802
+ emitCliError(
803
+ "ROLLBACK_FAILED",
804
+ "Auto-rollback also failed after readiness timeout. Manual intervention required.",
805
+ );
806
+ }
807
+ } catch (revertErr) {
808
+ const revertDetail =
809
+ revertErr instanceof Error ? revertErr.message : String(revertErr);
810
+ console.error(`\n❌ Auto-rollback failed: ${revertDetail}`);
811
+ console.error(` Manual intervention required.`);
812
+ console.log(
813
+ ` Check logs with: docker logs -f ${res.assistantContainer}`,
814
+ );
815
+ await broadcastUpgradeEvent(
816
+ entry.runtimeUrl,
817
+ entry.assistantId,
818
+ buildCompleteEvent(currentVersion ?? "unknown", false),
819
+ );
820
+ emitCliError(
821
+ "ROLLBACK_FAILED",
822
+ "Auto-rollback failed after readiness timeout. Manual intervention required.",
823
+ revertDetail,
824
+ );
825
+ }
826
+ } else {
827
+ console.log(` No previous images available for auto-rollback.`);
828
+ console.log(
829
+ ` Check logs with: docker logs -f ${res.assistantContainer}`,
830
+ );
831
+ await broadcastUpgradeEvent(
832
+ entry.runtimeUrl,
833
+ entry.assistantId,
834
+ buildCompleteEvent(currentVersion ?? "unknown", false),
835
+ );
836
+ emitCliError(
837
+ "ROLLBACK_NO_STATE",
838
+ "Containers failed to become ready and no previous images available for auto-rollback.",
839
+ );
840
+ }
841
+
842
+ process.exit(1);
843
+ }
844
+ }