@vellumai/cli 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/knip.json CHANGED
@@ -2,7 +2,8 @@
2
2
  "entry": [
3
3
  "src/**/*.test.ts",
4
4
  "src/**/__tests__/**/*.ts",
5
- "src/adapters/openclaw-http-server.ts"
5
+ "src/adapters/openclaw-http-server.ts",
6
+ "src/lib/version-compat.ts"
6
7
  ],
7
8
  "project": ["src/**/*.ts", "src/**/*.tsx"]
8
9
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/cli",
3
- "version": "0.5.5",
3
+ "version": "0.5.6",
4
4
  "description": "CLI tools for vellum-assistant",
5
5
  "type": "module",
6
6
  "exports": {
@@ -10,6 +10,7 @@ describe("checkHealth", () => {
10
10
  test("returns unreachable for non-existent host", async () => {
11
11
  const result = await checkHealth("http://127.0.0.1:1");
12
12
  expect(["unreachable", "timeout"]).toContain(result.status);
13
+ expect(result.version).toBeUndefined();
13
14
  });
14
15
 
15
16
  test("returns healthy for a mock healthy endpoint", async () => {
@@ -24,6 +25,24 @@ describe("checkHealth", () => {
24
25
  const result = await checkHealth(`http://localhost:${server.port}`);
25
26
  expect(result.status).toBe("healthy");
26
27
  expect(result.detail).toBeNull();
28
+ expect(result.version).toBeUndefined();
29
+ } finally {
30
+ server.stop(true);
31
+ }
32
+ });
33
+
34
+ test("returns version when present in response", async () => {
35
+ const server = Bun.serve({
36
+ port: 0,
37
+ fetch() {
38
+ return Response.json({ status: "healthy", version: "1.2.3" });
39
+ },
40
+ });
41
+
42
+ try {
43
+ const result = await checkHealth(`http://localhost:${server.port}`);
44
+ expect(result.status).toBe("healthy");
45
+ expect(result.version).toBe("1.2.3");
27
46
  } finally {
28
47
  server.stop(true);
29
48
  }
@@ -33,7 +52,11 @@ describe("checkHealth", () => {
33
52
  const server = Bun.serve({
34
53
  port: 0,
35
54
  fetch() {
36
- return Response.json({ status: "degraded", message: "high latency" });
55
+ return Response.json({
56
+ status: "degraded",
57
+ message: "high latency",
58
+ version: "0.9.0",
59
+ });
37
60
  },
38
61
  });
39
62
 
@@ -41,6 +64,7 @@ describe("checkHealth", () => {
41
64
  const result = await checkHealth(`http://localhost:${server.port}`);
42
65
  expect(result.status).toBe("degraded");
43
66
  expect(result.detail).toBe("high latency");
67
+ expect(result.version).toBe("0.9.0");
44
68
  } finally {
45
69
  server.stop(true);
46
70
  }
@@ -57,6 +81,7 @@ describe("checkHealth", () => {
57
81
  try {
58
82
  const result = await checkHealth(`http://localhost:${server.port}`);
59
83
  expect(result.status).toBe("error (500)");
84
+ expect(result.version).toBeUndefined();
60
85
  } finally {
61
86
  server.stop(true);
62
87
  }
@@ -4,6 +4,7 @@ import {
4
4
  findAssistantByName,
5
5
  getActiveAssistant,
6
6
  loadAllAssistants,
7
+ updateServiceGroupVersion,
7
8
  type AssistantEntry,
8
9
  } from "../lib/assistant-config";
9
10
  import { loadGuardianToken } from "../lib/guardian-token";
@@ -424,7 +425,7 @@ async function listAllAssistants(): Promise<void> {
424
425
  // hitting the health endpoint. If the PID file is missing or the
425
426
  // process isn't running, the assistant is sleeping — skip the
426
427
  // network health check to avoid a misleading "unreachable" status.
427
- let health: { status: string; detail: string | null };
428
+ let health: { status: string; detail: string | null; version?: string };
428
429
  const resources = a.resources;
429
430
  if (a.cloud === "local" && resources) {
430
431
  const pid = readPidFile(resources.pidFile);
@@ -451,6 +452,10 @@ async function listAllAssistants(): Promise<void> {
451
452
  health = await checkHealth(a.localUrl ?? a.runtimeUrl, token);
452
453
  }
453
454
 
455
+ if (health.status === "healthy" && health.version) {
456
+ updateServiceGroupVersion(a.assistantId, health.version);
457
+ }
458
+
454
459
  const infoParts = [a.runtimeUrl];
455
460
  if (a.cloud) infoParts.push(`cloud: ${a.cloud}`);
456
461
  if (a.species) infoParts.push(`species: ${a.species}`);
@@ -89,28 +89,39 @@ interface PreflightFileEntry {
89
89
  action: string;
90
90
  }
91
91
 
92
+ interface StructuredError {
93
+ code: string;
94
+ message: string;
95
+ path?: string;
96
+ }
97
+
92
98
  interface PreflightResponse {
93
99
  can_import: boolean;
94
- errors?: string[];
100
+ validation?: {
101
+ is_valid: false;
102
+ errors: StructuredError[];
103
+ };
95
104
  files?: PreflightFileEntry[];
96
105
  summary?: {
97
- create: number;
98
- overwrite: number;
99
- unchanged: number;
100
- total: number;
106
+ files_to_create: number;
107
+ files_to_overwrite: number;
108
+ files_unchanged: number;
109
+ total_files: number;
101
110
  };
102
- conflicts?: string[];
111
+ conflicts?: StructuredError[];
103
112
  }
104
113
 
105
114
  interface ImportResponse {
106
115
  success: boolean;
107
116
  reason?: string;
108
- errors?: string[];
117
+ errors?: StructuredError[];
118
+ message?: string;
109
119
  warnings?: string[];
110
120
  summary?: {
111
- created: number;
112
- overwritten: number;
113
- skipped: number;
121
+ total_files: number;
122
+ files_created: number;
123
+ files_overwritten: number;
124
+ files_skipped: number;
114
125
  backups_created: number;
115
126
  };
116
127
  }
@@ -201,30 +212,38 @@ export async function restore(): Promise<void> {
201
212
  const result = (await response.json()) as PreflightResponse;
202
213
 
203
214
  if (!result.can_import) {
204
- console.error("Import blocked by validation errors:");
205
- for (const err of result.errors ?? []) {
206
- console.error(` - ${err}`);
215
+ if (result.validation?.errors?.length) {
216
+ console.error("Import blocked by validation errors:");
217
+ for (const err of result.validation.errors) {
218
+ console.error(` - ${err.message}${err.path ? ` (${err.path})` : ""}`);
219
+ }
220
+ }
221
+ if (result.conflicts?.length) {
222
+ console.error("Import blocked by conflicts:");
223
+ for (const conflict of result.conflicts) {
224
+ console.error(` - ${conflict.message}${conflict.path ? ` (${conflict.path})` : ""}`);
225
+ }
207
226
  }
208
227
  process.exit(1);
209
228
  }
210
229
 
211
230
  // Print summary table
212
231
  const summary = result.summary ?? {
213
- create: 0,
214
- overwrite: 0,
215
- unchanged: 0,
216
- total: 0,
232
+ files_to_create: 0,
233
+ files_to_overwrite: 0,
234
+ files_unchanged: 0,
235
+ total_files: 0,
217
236
  };
218
237
  console.log("Preflight analysis:");
219
- console.log(` Files to create: ${summary.create}`);
220
- console.log(` Files to overwrite: ${summary.overwrite}`);
221
- console.log(` Files unchanged: ${summary.unchanged}`);
222
- console.log(` Total: ${summary.total}`);
238
+ console.log(` Files to create: ${summary.files_to_create}`);
239
+ console.log(` Files to overwrite: ${summary.files_to_overwrite}`);
240
+ console.log(` Files unchanged: ${summary.files_unchanged}`);
241
+ console.log(` Total: ${summary.total_files}`);
223
242
  console.log("");
224
243
 
225
244
  const conflicts = result.conflicts ?? [];
226
245
  console.log(
227
- `Conflicts: ${conflicts.length > 0 ? conflicts.join(", ") : "none"}`,
246
+ `Conflicts: ${conflicts.length > 0 ? conflicts.map((c) => c.message).join(", ") : "none"}`,
228
247
  );
229
248
 
230
249
  // List individual files with their action
@@ -276,25 +295,26 @@ export async function restore(): Promise<void> {
276
295
 
277
296
  if (!result.success) {
278
297
  console.error(
279
- `Error: Import failed — ${result.reason ?? "unknown reason"}`,
298
+ `Error: Import failed — ${result.message ?? result.reason ?? "unknown reason"}`,
280
299
  );
281
300
  for (const err of result.errors ?? []) {
282
- console.error(` - ${err}`);
301
+ console.error(` - ${err.message}${err.path ? ` (${err.path})` : ""}`);
283
302
  }
284
303
  process.exit(1);
285
304
  }
286
305
 
287
306
  // Print import report
288
307
  const summary = result.summary ?? {
289
- created: 0,
290
- overwritten: 0,
291
- skipped: 0,
308
+ total_files: 0,
309
+ files_created: 0,
310
+ files_overwritten: 0,
311
+ files_skipped: 0,
292
312
  backups_created: 0,
293
313
  };
294
314
  console.log("✅ Restore complete.");
295
- console.log(` Files created: ${summary.created}`);
296
- console.log(` Files overwritten: ${summary.overwritten}`);
297
- console.log(` Files skipped: ${summary.skipped}`);
315
+ console.log(` Files created: ${summary.files_created}`);
316
+ console.log(` Files overwritten: ${summary.files_overwritten}`);
317
+ console.log(` Files skipped: ${summary.files_skipped}`);
298
318
  console.log(` Backups created: ${summary.backups_created}`);
299
319
 
300
320
  // Print warnings if any
@@ -0,0 +1,280 @@
1
+ import { randomBytes } from "crypto";
2
+
3
+ import {
4
+ findAssistantByName,
5
+ getActiveAssistant,
6
+ loadAllAssistants,
7
+ saveAssistantEntry,
8
+ } from "../lib/assistant-config";
9
+ import type { AssistantEntry } from "../lib/assistant-config";
10
+ import {
11
+ captureImageRefs,
12
+ clearSigningKeyBootstrapLock,
13
+ GATEWAY_INTERNAL_PORT,
14
+ dockerResourceNames,
15
+ migrateCesSecurityFiles,
16
+ migrateGatewaySecurityFiles,
17
+ startContainers,
18
+ stopContainers,
19
+ } from "../lib/docker";
20
+ import type { ServiceName } from "../lib/docker";
21
+ import { loadBootstrapSecret } from "../lib/guardian-token";
22
+ import {
23
+ broadcastUpgradeEvent,
24
+ captureContainerEnv,
25
+ waitForReady,
26
+ } from "./upgrade";
27
+
28
+ function parseArgs(): { name: string | null } {
29
+ const args = process.argv.slice(3);
30
+ let name: string | null = null;
31
+
32
+ for (let i = 0; i < args.length; i++) {
33
+ const arg = args[i];
34
+ if (arg === "--help" || arg === "-h") {
35
+ console.log("Usage: vellum rollback [<name>]");
36
+ console.log("");
37
+ console.log("Roll back a Docker assistant to the previous version.");
38
+ console.log("");
39
+ console.log("Arguments:");
40
+ console.log(
41
+ " <name> Name of the assistant to roll back (default: active or only assistant)",
42
+ );
43
+ console.log("");
44
+ console.log("Examples:");
45
+ console.log(
46
+ " vellum rollback # Roll back the active assistant",
47
+ );
48
+ console.log(
49
+ " vellum rollback my-assistant # Roll back a specific assistant by name",
50
+ );
51
+ process.exit(0);
52
+ } else if (!arg.startsWith("-")) {
53
+ name = arg;
54
+ } else {
55
+ console.error(`Error: Unknown option '${arg}'.`);
56
+ process.exit(1);
57
+ }
58
+ }
59
+
60
+ return { name };
61
+ }
62
+
63
+ function resolveCloud(entry: AssistantEntry): string {
64
+ if (entry.cloud) {
65
+ return entry.cloud;
66
+ }
67
+ if (entry.project) {
68
+ return "gcp";
69
+ }
70
+ if (entry.sshUser) {
71
+ return "custom";
72
+ }
73
+ return "local";
74
+ }
75
+
76
+ /**
77
+ * Resolve which assistant to target for the rollback command. Priority:
78
+ * 1. Explicit name argument
79
+ * 2. Active assistant set via `vellum use`
80
+ * 3. Sole assistant (when exactly one exists)
81
+ */
82
+ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
83
+ if (nameArg) {
84
+ const entry = findAssistantByName(nameArg);
85
+ if (!entry) {
86
+ console.error(`No assistant found with name '${nameArg}'.`);
87
+ process.exit(1);
88
+ }
89
+ return entry;
90
+ }
91
+
92
+ const active = getActiveAssistant();
93
+ if (active) {
94
+ const entry = findAssistantByName(active);
95
+ if (entry) return entry;
96
+ }
97
+
98
+ const all = loadAllAssistants();
99
+ if (all.length === 1) return all[0];
100
+
101
+ if (all.length === 0) {
102
+ console.error("No assistants found. Run 'vellum hatch' first.");
103
+ } else {
104
+ console.error(
105
+ "Multiple assistants found. Specify a name or set an active assistant with 'vellum use <name>'.",
106
+ );
107
+ }
108
+ process.exit(1);
109
+ }
110
+
111
+ export async function rollback(): Promise<void> {
112
+ const { name } = parseArgs();
113
+ const entry = resolveTargetAssistant(name);
114
+ const cloud = resolveCloud(entry);
115
+
116
+ // Only Docker assistants support rollback
117
+ if (cloud !== "docker") {
118
+ console.error(
119
+ "Rollback is only supported for Docker assistants. For managed assistants, use the version picker to upgrade to the previous version.",
120
+ );
121
+ process.exit(1);
122
+ }
123
+
124
+ // Verify rollback state exists
125
+ if (!entry.previousServiceGroupVersion || !entry.previousContainerInfo) {
126
+ console.error(
127
+ "No rollback state available. Run `vellum upgrade` first to create a rollback point.",
128
+ );
129
+ process.exit(1);
130
+ }
131
+
132
+ // Verify all three digest fields are present
133
+ const prev = entry.previousContainerInfo;
134
+ if (!prev.assistantDigest || !prev.gatewayDigest || !prev.cesDigest) {
135
+ console.error(
136
+ "Incomplete rollback state. Previous container digests are missing.",
137
+ );
138
+ process.exit(1);
139
+ }
140
+
141
+ // Build image refs from the previous digests
142
+ const previousImageRefs: Record<ServiceName, string> = {
143
+ assistant: prev.assistantDigest,
144
+ "credential-executor": prev.cesDigest,
145
+ gateway: prev.gatewayDigest,
146
+ };
147
+
148
+ const instanceName = entry.assistantId;
149
+ const res = dockerResourceNames(instanceName);
150
+
151
+ console.log(
152
+ `🔄 Rolling back Docker assistant '${instanceName}' to ${entry.previousServiceGroupVersion}...\n`,
153
+ );
154
+
155
+ // Capture current container env
156
+ console.log("💾 Capturing existing container environment...");
157
+ const capturedEnv = await captureContainerEnv(res.assistantContainer);
158
+ console.log(
159
+ ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
160
+ );
161
+
162
+ // Extract CES_SERVICE_TOKEN from captured env, or generate fresh one
163
+ const cesServiceToken =
164
+ capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
165
+
166
+ // Retrieve or generate a bootstrap secret for the gateway.
167
+ const bootstrapSecret =
168
+ loadBootstrapSecret(instanceName) || randomBytes(32).toString("hex");
169
+
170
+ // Build extra env vars, excluding keys managed by serviceDockerRunArgs
171
+ const envKeysSetByRunArgs = new Set([
172
+ "CES_SERVICE_TOKEN",
173
+ "VELLUM_ASSISTANT_NAME",
174
+ "RUNTIME_HTTP_HOST",
175
+ "PATH",
176
+ ]);
177
+ for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
178
+ if (process.env[envVar]) {
179
+ envKeysSetByRunArgs.add(envVar);
180
+ }
181
+ }
182
+ const extraAssistantEnv: Record<string, string> = {};
183
+ for (const [key, value] of Object.entries(capturedEnv)) {
184
+ if (!envKeysSetByRunArgs.has(key)) {
185
+ extraAssistantEnv[key] = value;
186
+ }
187
+ }
188
+
189
+ // Parse gateway port from entry's runtimeUrl, fall back to default
190
+ let gatewayPort = GATEWAY_INTERNAL_PORT;
191
+ try {
192
+ const parsed = new URL(entry.runtimeUrl);
193
+ const port = parseInt(parsed.port, 10);
194
+ if (!isNaN(port)) {
195
+ gatewayPort = port;
196
+ }
197
+ } catch {
198
+ // use default
199
+ }
200
+
201
+ // Notify connected clients that a rollback is about to begin (best-effort)
202
+ console.log("📢 Notifying connected clients...");
203
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
204
+ type: "starting",
205
+ targetVersion: entry.previousServiceGroupVersion,
206
+ expectedDowntimeSeconds: 60,
207
+ });
208
+ // Brief pause to allow SSE delivery before containers stop.
209
+ await new Promise((r) => setTimeout(r, 500));
210
+
211
+ console.log("🛑 Stopping existing containers...");
212
+ await stopContainers(res);
213
+ console.log("✅ Containers stopped\n");
214
+
215
+ // Run security file migrations and signing key cleanup
216
+ console.log("🔄 Migrating security files to gateway volume...");
217
+ await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
218
+
219
+ console.log("🔄 Migrating credential files to CES security volume...");
220
+ await migrateCesSecurityFiles(res, (msg) => console.log(msg));
221
+
222
+ console.log("🔑 Clearing signing key bootstrap lock...");
223
+ await clearSigningKeyBootstrapLock(res);
224
+
225
+ console.log("🚀 Starting containers with previous version...");
226
+ await startContainers(
227
+ {
228
+ bootstrapSecret,
229
+ cesServiceToken,
230
+ extraAssistantEnv,
231
+ gatewayPort,
232
+ imageTags: previousImageRefs,
233
+ instanceName,
234
+ res,
235
+ },
236
+ (msg) => console.log(msg),
237
+ );
238
+ console.log("✅ Containers started\n");
239
+
240
+ console.log("Waiting for assistant to become ready...");
241
+ const ready = await waitForReady(entry.runtimeUrl);
242
+
243
+ if (ready) {
244
+ // Capture new digests from the rolled-back containers
245
+ const newDigests = await captureImageRefs(res);
246
+
247
+ // Swap current/previous state to enable "rollback the rollback"
248
+ const updatedEntry: AssistantEntry = {
249
+ ...entry,
250
+ serviceGroupVersion: entry.previousServiceGroupVersion,
251
+ containerInfo: {
252
+ assistantImage: prev.assistantImage ?? previousImageRefs.assistant,
253
+ gatewayImage: prev.gatewayImage ?? previousImageRefs.gateway,
254
+ cesImage: prev.cesImage ?? previousImageRefs["credential-executor"],
255
+ assistantDigest: newDigests?.assistant,
256
+ gatewayDigest: newDigests?.gateway,
257
+ cesDigest: newDigests?.["credential-executor"],
258
+ networkName: res.network,
259
+ },
260
+ previousServiceGroupVersion: entry.serviceGroupVersion,
261
+ previousContainerInfo: entry.containerInfo,
262
+ };
263
+ saveAssistantEntry(updatedEntry);
264
+
265
+ // Notify clients that the rollback succeeded
266
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
267
+ type: "complete",
268
+ installedVersion: entry.previousServiceGroupVersion,
269
+ success: true,
270
+ });
271
+
272
+ console.log(
273
+ `\n✅ Docker assistant '${instanceName}' rolled back to ${entry.previousServiceGroupVersion}.`,
274
+ );
275
+ } else {
276
+ console.error(`\n❌ Containers failed to become ready within the timeout.`);
277
+ console.log(` Check logs with: docker logs -f ${res.assistantContainer}`);
278
+ process.exit(1);
279
+ }
280
+ }
@@ -11,6 +11,7 @@ import {
11
11
  import type { AssistantEntry } from "../lib/assistant-config";
12
12
  import {
13
13
  captureImageRefs,
14
+ clearSigningKeyBootstrapLock,
14
15
  DOCKERHUB_IMAGES,
15
16
  DOCKER_READY_TIMEOUT_MS,
16
17
  GATEWAY_INTERNAL_PORT,
@@ -26,6 +27,7 @@ import {
26
27
  getPlatformUrl,
27
28
  readPlatformToken,
28
29
  } from "../lib/platform-client";
30
+ import { loadBootstrapSecret, loadGuardianToken } from "../lib/guardian-token";
29
31
  import { exec, execOutput } from "../lib/step-runner";
30
32
 
31
33
  interface UpgradeArgs {
@@ -137,7 +139,7 @@ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
137
139
  * Capture environment variables from a running Docker container so they
138
140
  * can be replayed onto the replacement container after upgrade.
139
141
  */
140
- async function captureContainerEnv(
142
+ export async function captureContainerEnv(
141
143
  containerName: string,
142
144
  ): Promise<Record<string, string>> {
143
145
  const captured: Record<string, string> = {};
@@ -165,7 +167,7 @@ async function captureContainerEnv(
165
167
  * Poll the gateway `/readyz` endpoint until it returns 200 or the timeout
166
168
  * elapses. Returns whether the assistant became ready.
167
169
  */
168
- async function waitForReady(runtimeUrl: string): Promise<boolean> {
170
+ export async function waitForReady(runtimeUrl: string): Promise<boolean> {
169
171
  const readyUrl = `${runtimeUrl}/readyz`;
170
172
  const start = Date.now();
171
173
 
@@ -199,6 +201,35 @@ async function waitForReady(runtimeUrl: string): Promise<boolean> {
199
201
  return false;
200
202
  }
201
203
 
204
+ /**
205
+ * Best-effort broadcast of an upgrade lifecycle event to connected clients
206
+ * via the gateway's upgrade-broadcast proxy. Uses guardian token auth.
207
+ * Failures are logged but never block the upgrade flow.
208
+ */
209
+ export async function broadcastUpgradeEvent(
210
+ gatewayUrl: string,
211
+ assistantId: string,
212
+ event: Record<string, unknown>,
213
+ ): Promise<void> {
214
+ try {
215
+ const token = loadGuardianToken(assistantId);
216
+ const headers: Record<string, string> = {
217
+ "Content-Type": "application/json",
218
+ };
219
+ if (token?.accessToken) {
220
+ headers["Authorization"] = `Bearer ${token.accessToken}`;
221
+ }
222
+ await fetch(`${gatewayUrl}/v1/admin/upgrade-broadcast`, {
223
+ method: "POST",
224
+ headers,
225
+ body: JSON.stringify(event),
226
+ signal: AbortSignal.timeout(3000),
227
+ });
228
+ } catch {
229
+ // Best-effort — gateway/daemon may already be shutting down or not yet ready
230
+ }
231
+ }
232
+
202
233
  async function upgradeDocker(
203
234
  entry: AssistantEntry,
204
235
  version: string | null,
@@ -234,6 +265,18 @@ async function upgradeDocker(
234
265
  );
235
266
  }
236
267
 
268
+ // Persist rollback state to lockfile BEFORE any destructive changes.
269
+ // This enables the `vellum rollback` command to restore the previous version.
270
+ if (entry.serviceGroupVersion && entry.containerInfo) {
271
+ const rollbackEntry: AssistantEntry = {
272
+ ...entry,
273
+ previousServiceGroupVersion: entry.serviceGroupVersion,
274
+ previousContainerInfo: { ...entry.containerInfo },
275
+ };
276
+ saveAssistantEntry(rollbackEntry);
277
+ console.log(` Saved rollback state: ${entry.serviceGroupVersion}\n`);
278
+ }
279
+
237
280
  console.log("💾 Capturing existing container environment...");
238
281
  const capturedEnv = await captureContainerEnv(res.assistantContainer);
239
282
  console.log(
@@ -246,6 +289,16 @@ async function upgradeDocker(
246
289
  await exec("docker", ["pull", imageTags["credential-executor"]]);
247
290
  console.log("✅ Docker images pulled\n");
248
291
 
292
+ // Notify connected clients that an upgrade is about to begin.
293
+ console.log("📢 Notifying connected clients...");
294
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
295
+ type: "starting",
296
+ targetVersion: versionTag,
297
+ expectedDowntimeSeconds: 60,
298
+ });
299
+ // Brief pause to allow SSE delivery before containers stop.
300
+ await new Promise((r) => setTimeout(r, 500));
301
+
249
302
  console.log("🛑 Stopping existing containers...");
250
303
  await stopContainers(res);
251
304
  console.log("✅ Containers stopped\n");
@@ -269,6 +322,11 @@ async function upgradeDocker(
269
322
  const cesServiceToken =
270
323
  capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
271
324
 
325
+ // Retrieve or generate a bootstrap secret for the gateway. The secret was
326
+ // persisted to disk during hatch; older instances won't have one yet.
327
+ const bootstrapSecret =
328
+ loadBootstrapSecret(instanceName) || randomBytes(32).toString("hex");
329
+
272
330
  // Build the set of extra env vars to replay on the new assistant container.
273
331
  // Captured env vars serve as the base; keys already managed by
274
332
  // serviceDockerRunArgs are excluded to avoid duplicates.
@@ -297,9 +355,13 @@ async function upgradeDocker(
297
355
  console.log("🔄 Migrating credential files to CES security volume...");
298
356
  await migrateCesSecurityFiles(res, (msg) => console.log(msg));
299
357
 
358
+ console.log("🔑 Clearing signing key bootstrap lock...");
359
+ await clearSigningKeyBootstrapLock(res);
360
+
300
361
  console.log("🚀 Starting upgraded containers...");
301
362
  await startContainers(
302
363
  {
364
+ bootstrapSecret,
303
365
  cesServiceToken,
304
366
  extraAssistantEnv,
305
367
  gatewayPort,
@@ -328,9 +390,18 @@ async function upgradeDocker(
328
390
  cesDigest: newDigests?.["credential-executor"],
329
391
  networkName: res.network,
330
392
  },
393
+ previousServiceGroupVersion: entry.serviceGroupVersion,
394
+ previousContainerInfo: entry.containerInfo,
331
395
  };
332
396
  saveAssistantEntry(updatedEntry);
333
397
 
398
+ // Notify clients on the new service group that the upgrade succeeded.
399
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
400
+ type: "complete",
401
+ installedVersion: versionTag,
402
+ success: true,
403
+ });
404
+
334
405
  console.log(
335
406
  `\n✅ Docker assistant '${instanceName}' upgraded to ${versionTag}.`,
336
407
  );
@@ -344,6 +415,7 @@ async function upgradeDocker(
344
415
 
345
416
  await startContainers(
346
417
  {
418
+ bootstrapSecret,
347
419
  cesServiceToken,
348
420
  extraAssistantEnv,
349
421
  gatewayPort,
@@ -356,19 +428,43 @@ async function upgradeDocker(
356
428
 
357
429
  const rollbackReady = await waitForReady(entry.runtimeUrl);
358
430
  if (rollbackReady) {
359
- // Restore previous container info in lockfile after rollback
431
+ // Restore previous container info in lockfile after rollback.
432
+ // previousImageRefs contains sha256 digests from `docker inspect
433
+ // --format {{.Image}}`. The *Image fields should hold
434
+ // human-readable image:tag names, so prefer the pre-upgrade
435
+ // containerInfo values and store digests in the *Digest fields.
360
436
  if (previousImageRefs) {
361
437
  const rolledBackEntry: AssistantEntry = {
362
438
  ...entry,
363
439
  containerInfo: {
364
- assistantImage: previousImageRefs.assistant,
365
- gatewayImage: previousImageRefs.gateway,
366
- cesImage: previousImageRefs["credential-executor"],
440
+ assistantImage:
441
+ entry.containerInfo?.assistantImage ??
442
+ previousImageRefs.assistant,
443
+ gatewayImage:
444
+ entry.containerInfo?.gatewayImage ??
445
+ previousImageRefs.gateway,
446
+ cesImage:
447
+ entry.containerInfo?.cesImage ??
448
+ previousImageRefs["credential-executor"],
449
+ assistantDigest: previousImageRefs.assistant,
450
+ gatewayDigest: previousImageRefs.gateway,
451
+ cesDigest: previousImageRefs["credential-executor"],
367
452
  networkName: res.network,
368
453
  },
454
+ previousServiceGroupVersion: undefined,
455
+ previousContainerInfo: undefined,
369
456
  };
370
457
  saveAssistantEntry(rolledBackEntry);
371
458
  }
459
+
460
+ // Notify clients that the upgrade failed and rolled back.
461
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
462
+ type: "complete",
463
+ installedVersion: entry.serviceGroupVersion ?? "unknown",
464
+ success: false,
465
+ rolledBackToVersion: entry.serviceGroupVersion,
466
+ });
467
+
372
468
  console.log(
373
469
  `\n⚠️ Rolled back to previous version. Upgrade to ${versionTag} failed.`,
374
470
  );
@@ -431,6 +527,15 @@ async function upgradePlatform(
431
527
  body.version = version;
432
528
  }
433
529
 
530
+ // Notify connected clients that an upgrade is about to begin.
531
+ const targetVersion = version ?? `v${cliPkg.version}`;
532
+ console.log("📢 Notifying connected clients...");
533
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
534
+ type: "starting",
535
+ targetVersion,
536
+ expectedDowntimeSeconds: 90,
537
+ });
538
+
434
539
  const response = await fetch(url, {
435
540
  method: "POST",
436
541
  headers: {
@@ -446,10 +551,23 @@ async function upgradePlatform(
446
551
  console.error(
447
552
  `Error: Platform upgrade failed (${response.status}): ${text}`,
448
553
  );
554
+ await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
555
+ type: "complete",
556
+ installedVersion: entry.serviceGroupVersion ?? "unknown",
557
+ success: false,
558
+ });
449
559
  process.exit(1);
450
560
  }
451
561
 
452
562
  const result = (await response.json()) as UpgradeApiResponse;
563
+
564
+ // NOTE: We intentionally do NOT broadcast a "complete" event here.
565
+ // The platform API returning 200 only means "upgrade request accepted" —
566
+ // the service group has not yet restarted with the new version. The
567
+ // completion signal will come from the client's health-check
568
+ // version-change detection (DaemonConnection.swift) once the new
569
+ // version actually appears after the platform restarts the service group.
570
+
453
571
  console.log(`✅ ${result.detail}`);
454
572
  if (result.version) {
455
573
  console.log(` Version: ${result.version}`);
@@ -40,6 +40,14 @@ export async function wake(): Promise<void> {
40
40
  const entry = resolveTargetAssistant(nameArg);
41
41
 
42
42
  if (entry.cloud === "docker") {
43
+ if (watch || foreground) {
44
+ const ignored = [watch && "--watch", foreground && "--foreground"]
45
+ .filter(Boolean)
46
+ .join(" and ");
47
+ console.warn(
48
+ `Warning: ${ignored} ignored for Docker instances (not supported).`,
49
+ );
50
+ }
43
51
  const res = dockerResourceNames(entry.assistantId);
44
52
  await wakeContainers(res);
45
53
  console.log("Docker containers started.");
package/src/index.ts CHANGED
@@ -11,6 +11,7 @@ import { ps } from "./commands/ps";
11
11
  import { recover } from "./commands/recover";
12
12
  import { restore } from "./commands/restore";
13
13
  import { retire } from "./commands/retire";
14
+ import { rollback } from "./commands/rollback";
14
15
  import { setup } from "./commands/setup";
15
16
  import { sleep } from "./commands/sleep";
16
17
  import { ssh } from "./commands/ssh";
@@ -39,6 +40,7 @@ const commands = {
39
40
  recover,
40
41
  restore,
41
42
  retire,
43
+ rollback,
42
44
  setup,
43
45
  sleep,
44
46
  ssh,
@@ -68,6 +70,9 @@ function printHelp(): void {
68
70
  console.log(" recover Restore a previously retired local assistant");
69
71
  console.log(" restore Restore a .vbundle backup into a running assistant");
70
72
  console.log(" retire Delete an assistant instance");
73
+ console.log(
74
+ " rollback Roll back a Docker assistant to the previous version",
75
+ );
71
76
  console.log(" setup Configure API keys interactively");
72
77
  console.log(" sleep Stop the assistant process");
73
78
  console.log(" ssh SSH into a remote assistant instance");
@@ -78,6 +78,10 @@ export interface AssistantEntry {
78
78
  serviceGroupVersion?: string;
79
79
  /** Docker image metadata for rollback. Only present for docker topology entries. */
80
80
  containerInfo?: ContainerInfo;
81
+ /** The service group version that was running before the last upgrade. */
82
+ previousServiceGroupVersion?: string;
83
+ /** Docker image metadata from before the last upgrade. Enables rollback to the prior version. */
84
+ previousContainerInfo?: ContainerInfo;
81
85
  [key: string]: unknown;
82
86
  }
83
87
 
@@ -360,6 +364,23 @@ export function saveAssistantEntry(entry: AssistantEntry): void {
360
364
  writeAssistants(entries);
361
365
  }
362
366
 
367
+ /**
368
+ * Update just the serviceGroupVersion field on a lockfile entry.
369
+ * Reads the current entry, updates the version if changed, and writes back.
370
+ * No-op if the entry doesn't exist or the version hasn't changed.
371
+ */
372
+ export function updateServiceGroupVersion(
373
+ assistantId: string,
374
+ version: string,
375
+ ): void {
376
+ const entries = readAssistants();
377
+ const entry = entries.find((e) => e.assistantId === assistantId);
378
+ if (!entry) return;
379
+ if (entry.serviceGroupVersion === version) return;
380
+ entry.serviceGroupVersion = version;
381
+ writeAssistants(entries);
382
+ }
383
+
363
384
  /**
364
385
  * Scan upward from `basePort` to find an available port. A port is considered
365
386
  * available when `probePort()` returns false (nothing listening). Scans up to
@@ -426,6 +447,7 @@ export async function allocateLocalResources(
426
447
  entry.resources.daemonPort,
427
448
  entry.resources.gatewayPort,
428
449
  entry.resources.qdrantPort,
450
+ entry.resources.cesPort,
429
451
  );
430
452
  }
431
453
  }
@@ -445,13 +467,19 @@ export async function allocateLocalResources(
445
467
  daemonPort,
446
468
  gatewayPort,
447
469
  ]);
470
+ const cesPort = await findAvailablePort(DEFAULT_CES_PORT, [
471
+ ...reservedPorts,
472
+ daemonPort,
473
+ gatewayPort,
474
+ qdrantPort,
475
+ ]);
448
476
 
449
477
  return {
450
478
  instanceDir,
451
479
  daemonPort,
452
480
  gatewayPort,
453
481
  qdrantPort,
454
- cesPort: DEFAULT_CES_PORT,
482
+ cesPort,
455
483
  pidFile: join(instanceDir, ".vellum", "vellum.pid"),
456
484
  };
457
485
  }
package/src/lib/docker.ts CHANGED
@@ -14,7 +14,7 @@ import {
14
14
  import type { AssistantEntry } from "./assistant-config";
15
15
  import { DEFAULT_GATEWAY_PORT, PROVIDER_ENV_VAR_NAMES } from "./constants";
16
16
  import type { Species } from "./constants";
17
- import { leaseGuardianToken } from "./guardian-token";
17
+ import { leaseGuardianToken, saveBootstrapSecret } from "./guardian-token";
18
18
  import { isVellumProcess, stopProcess } from "./process";
19
19
  import { generateInstanceName } from "./random-name";
20
20
  import { exec, execOutput } from "./step-runner";
@@ -464,6 +464,7 @@ async function buildAllImages(
464
464
  * can be restarted independently.
465
465
  */
466
466
  export function serviceDockerRunArgs(opts: {
467
+ bootstrapSecret?: string;
467
468
  cesServiceToken?: string;
468
469
  extraAssistantEnv?: Record<string, string>;
469
470
  gatewayPort: number;
@@ -552,6 +553,9 @@ export function serviceDockerRunArgs(opts: {
552
553
  ...(cesServiceToken
553
554
  ? ["-e", `CES_SERVICE_TOKEN=${cesServiceToken}`]
554
555
  : []),
556
+ ...(opts.bootstrapSecret
557
+ ? ["-e", `GUARDIAN_BOOTSTRAP_SECRET=${opts.bootstrapSecret}`]
558
+ : []),
555
559
  imageTags.gateway,
556
560
  ],
557
561
  "credential-executor": () => [
@@ -735,6 +739,7 @@ export const SERVICE_START_ORDER: ServiceName[] = [
735
739
  /** Start all three containers in dependency order. */
736
740
  export async function startContainers(
737
741
  opts: {
742
+ bootstrapSecret?: string;
738
743
  cesServiceToken?: string;
739
744
  extraAssistantEnv?: Record<string, string>;
740
745
  gatewayPort: number;
@@ -760,6 +765,27 @@ export async function stopContainers(
760
765
  await removeContainer(res.assistantContainer);
761
766
  }
762
767
 
768
+ /**
769
+ * Remove the signing-key-bootstrap lockfile from the gateway security volume.
770
+ * This allows the daemon to re-fetch the signing key from the gateway on the
771
+ * next startup — necessary during upgrades where the gateway may generate a
772
+ * new key.
773
+ */
774
+ export async function clearSigningKeyBootstrapLock(
775
+ res: ReturnType<typeof dockerResourceNames>,
776
+ ): Promise<void> {
777
+ await exec("docker", [
778
+ "run",
779
+ "--rm",
780
+ "-v",
781
+ `${res.gatewaySecurityVolume}:/gateway-security`,
782
+ "busybox",
783
+ "rm",
784
+ "-f",
785
+ "/gateway-security/signing-key-bootstrap.lock",
786
+ ]);
787
+ }
788
+
763
789
  /** Stop containers without removing them (preserves state for `docker start`). */
764
790
  export async function sleepContainers(
765
791
  res: ReturnType<typeof dockerResourceNames>,
@@ -771,8 +797,14 @@ export async function sleepContainers(
771
797
  ]) {
772
798
  try {
773
799
  await exec("docker", ["stop", container]);
774
- } catch {
775
- // container may not exist or already stopped
800
+ } catch (err) {
801
+ const msg =
802
+ err instanceof Error ? err.message.toLowerCase() : String(err);
803
+ if (msg.includes("no such container") || msg.includes("is not running")) {
804
+ // container doesn't exist or already stopped — expected, skip
805
+ continue;
806
+ }
807
+ throw err;
776
808
  }
777
809
  }
778
810
  }
@@ -1071,9 +1103,36 @@ export async function hatchDocker(
1071
1103
  await exec("docker", ["volume", "create", res.cesSecurityVolume]);
1072
1104
  await exec("docker", ["volume", "create", res.gatewaySecurityVolume]);
1073
1105
 
1106
+ // Set workspace volume ownership so non-root containers (UID 1001) can write.
1107
+ await exec("docker", [
1108
+ "run",
1109
+ "--rm",
1110
+ "-v",
1111
+ `${res.workspaceVolume}:/workspace`,
1112
+ "busybox",
1113
+ "chown",
1114
+ "1001:1001",
1115
+ "/workspace",
1116
+ ]);
1117
+
1118
+ // Clear any stale signing-key bootstrap lockfile so the daemon can
1119
+ // fetch the key from the gateway on first startup.
1120
+ await exec("docker", [
1121
+ "run",
1122
+ "--rm",
1123
+ "-v",
1124
+ `${res.gatewaySecurityVolume}:/gateway-security`,
1125
+ "busybox",
1126
+ "rm",
1127
+ "-f",
1128
+ "/gateway-security/signing-key-bootstrap.lock",
1129
+ ]);
1130
+
1074
1131
  const cesServiceToken = randomBytes(32).toString("hex");
1132
+ const bootstrapSecret = randomBytes(32).toString("hex");
1133
+ saveBootstrapSecret(instanceName, bootstrapSecret);
1075
1134
  await startContainers(
1076
- { cesServiceToken, gatewayPort, imageTags, instanceName, res },
1135
+ { bootstrapSecret, cesServiceToken, gatewayPort, imageTags, instanceName, res },
1077
1136
  log,
1078
1137
  );
1079
1138
 
@@ -1252,7 +1311,9 @@ async function waitForGatewayAndLease(opts: {
1252
1311
  // Log periodically so the user knows we're still trying
1253
1312
  const elapsed = ((Date.now() - leaseStart) / 1000).toFixed(0);
1254
1313
  log(
1255
- `Guardian token lease: attempt failed after ${elapsed}s (${lastLeaseError.split("\n")[0]}), retrying...`,
1314
+ `Guardian token lease: attempt failed after ${elapsed}s (${
1315
+ lastLeaseError.split("\n")[0]
1316
+ }), retrying...`,
1256
1317
  );
1257
1318
  }
1258
1319
  await new Promise((r) => setTimeout(r, 2000));
@@ -1260,7 +1321,10 @@ async function waitForGatewayAndLease(opts: {
1260
1321
 
1261
1322
  if (!leaseSuccess) {
1262
1323
  log(
1263
- `\u26a0\ufe0f Guardian token lease: FAILED after ${((Date.now() - leaseStart) / 1000).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
1324
+ `\u26a0\ufe0f Guardian token lease: FAILED after ${(
1325
+ (Date.now() - leaseStart) /
1326
+ 1000
1327
+ ).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
1264
1328
  );
1265
1329
  }
1266
1330
 
@@ -42,6 +42,46 @@ function getPersistedDeviceIdPath(): string {
42
42
  return join(getXdgConfigHome(), "vellum", "device-id");
43
43
  }
44
44
 
45
+ function getBootstrapSecretPath(assistantId: string): string {
46
+ return join(
47
+ getXdgConfigHome(),
48
+ "vellum",
49
+ "assistants",
50
+ assistantId,
51
+ "bootstrap-secret",
52
+ );
53
+ }
54
+
55
+ /**
56
+ * Load a previously saved bootstrap secret for the given assistant.
57
+ * Returns null if the file does not exist or is unreadable.
58
+ */
59
+ export function loadBootstrapSecret(assistantId: string): string | null {
60
+ try {
61
+ const raw = readFileSync(getBootstrapSecretPath(assistantId), "utf-8").trim();
62
+ return raw.length > 0 ? raw : null;
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Persist a bootstrap secret for the given assistant so that the desktop
70
+ * client and upgrade/rollback paths can retrieve it later.
71
+ */
72
+ export function saveBootstrapSecret(
73
+ assistantId: string,
74
+ secret: string,
75
+ ): void {
76
+ const path = getBootstrapSecretPath(assistantId);
77
+ const dir = dirname(path);
78
+ if (!existsSync(dir)) {
79
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
80
+ }
81
+ writeFileSync(path, secret + "\n", { mode: 0o600 });
82
+ chmodSync(path, 0o600);
83
+ }
84
+
45
85
  function hashWithSalt(input: string): string {
46
86
  return createHash("sha256")
47
87
  .update(input + DEVICE_ID_SALT)
@@ -168,9 +208,14 @@ export async function leaseGuardianToken(
168
208
  assistantId: string,
169
209
  ): Promise<GuardianTokenData> {
170
210
  const deviceId = computeDeviceId();
211
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
212
+ const bootstrapSecret = loadBootstrapSecret(assistantId);
213
+ if (bootstrapSecret) {
214
+ headers["x-bootstrap-secret"] = bootstrapSecret;
215
+ }
171
216
  const response = await fetch(`${gatewayUrl}/v1/guardian/init`, {
172
217
  method: "POST",
173
- headers: { "Content-Type": "application/json" },
218
+ headers,
174
219
  body: JSON.stringify({ platform: "cli", deviceId }),
175
220
  });
176
221
 
@@ -3,11 +3,13 @@ export const HEALTH_CHECK_TIMEOUT_MS = 1500;
3
3
  interface HealthResponse {
4
4
  status: string;
5
5
  message?: string;
6
+ version?: string;
6
7
  }
7
8
 
8
9
  export interface HealthCheckResult {
9
10
  status: string;
10
11
  detail: string | null;
12
+ version?: string;
11
13
  }
12
14
 
13
15
  export async function checkManagedHealth(
@@ -63,6 +65,7 @@ export async function checkManagedHealth(
63
65
  return {
64
66
  status,
65
67
  detail: status !== "healthy" ? (data.message ?? null) : null,
68
+ version: data.version,
66
69
  };
67
70
  } catch (error) {
68
71
  const status =
@@ -108,6 +111,7 @@ export async function checkHealth(
108
111
  return {
109
112
  status,
110
113
  detail: status !== "healthy" ? (data.message ?? null) : null,
114
+ version: data.version,
111
115
  };
112
116
  } catch (error) {
113
117
  const status =
@@ -60,14 +60,15 @@ interface OrganizationListResponse {
60
60
  }
61
61
 
62
62
  export async function fetchOrganizationId(token: string): Promise<string> {
63
- const url = `${getPlatformUrl()}/v1/organizations/`;
63
+ const platformUrl = getPlatformUrl();
64
+ const url = `${platformUrl}/v1/organizations/`;
64
65
  const response = await fetch(url, {
65
66
  headers: { "X-Session-Token": token },
66
67
  });
67
68
 
68
69
  if (!response.ok) {
69
70
  throw new Error(
70
- `Failed to fetch organizations (${response.status}). Try logging in again.`,
71
+ `Failed to fetch organizations from ${platformUrl} (${response.status}). Try logging in again.`,
71
72
  );
72
73
  }
73
74
 
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Parse a version string into { major, minor, patch } components.
3
+ * Handles optional `v` prefix (e.g., "v1.2.3" or "1.2.3").
4
+ * Returns null if the string cannot be parsed as semver.
5
+ */
6
+ export function parseVersion(
7
+ version: string,
8
+ ): { major: number; minor: number; patch: number } | null {
9
+ const stripped = version.replace(/^[vV]/, "");
10
+ const segments = stripped.split(".");
11
+
12
+ if (segments.length < 2) {
13
+ return null;
14
+ }
15
+
16
+ const major = parseInt(segments[0], 10);
17
+ const minor = parseInt(segments[1], 10);
18
+ const patch = segments.length >= 3 ? parseInt(segments[2], 10) : 0;
19
+
20
+ if (isNaN(major) || isNaN(minor) || isNaN(patch)) {
21
+ return null;
22
+ }
23
+
24
+ return { major, minor, patch };
25
+ }
26
+
27
+ /**
28
+ * Check whether two version strings are compatible.
29
+ * Compatibility requires matching major AND minor versions.
30
+ * Patch differences are allowed.
31
+ * Returns false if either version cannot be parsed.
32
+ */
33
+ export function isVersionCompatible(
34
+ clientVersion: string,
35
+ serviceGroupVersion: string,
36
+ ): boolean {
37
+ const a = parseVersion(clientVersion);
38
+ const b = parseVersion(serviceGroupVersion);
39
+
40
+ if (a === null || b === null) {
41
+ return false;
42
+ }
43
+
44
+ return a.major === b.major && a.minor === b.minor;
45
+ }