@vellumai/cli 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/knip.json +2 -1
- package/package.json +1 -1
- package/src/__tests__/health-check.test.ts +26 -1
- package/src/commands/ps.ts +6 -1
- package/src/commands/restore.ts +50 -30
- package/src/commands/rollback.ts +280 -0
- package/src/commands/upgrade.ts +124 -6
- package/src/commands/wake.ts +8 -0
- package/src/index.ts +5 -0
- package/src/lib/assistant-config.ts +29 -1
- package/src/lib/docker.ts +70 -6
- package/src/lib/guardian-token.ts +46 -1
- package/src/lib/health-check.ts +4 -0
- package/src/lib/platform-client.ts +3 -2
- package/src/lib/version-compat.ts +45 -0
package/knip.json
CHANGED
package/package.json
CHANGED
|
@@ -10,6 +10,7 @@ describe("checkHealth", () => {
|
|
|
10
10
|
test("returns unreachable for non-existent host", async () => {
|
|
11
11
|
const result = await checkHealth("http://127.0.0.1:1");
|
|
12
12
|
expect(["unreachable", "timeout"]).toContain(result.status);
|
|
13
|
+
expect(result.version).toBeUndefined();
|
|
13
14
|
});
|
|
14
15
|
|
|
15
16
|
test("returns healthy for a mock healthy endpoint", async () => {
|
|
@@ -24,6 +25,24 @@ describe("checkHealth", () => {
|
|
|
24
25
|
const result = await checkHealth(`http://localhost:${server.port}`);
|
|
25
26
|
expect(result.status).toBe("healthy");
|
|
26
27
|
expect(result.detail).toBeNull();
|
|
28
|
+
expect(result.version).toBeUndefined();
|
|
29
|
+
} finally {
|
|
30
|
+
server.stop(true);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("returns version when present in response", async () => {
|
|
35
|
+
const server = Bun.serve({
|
|
36
|
+
port: 0,
|
|
37
|
+
fetch() {
|
|
38
|
+
return Response.json({ status: "healthy", version: "1.2.3" });
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
const result = await checkHealth(`http://localhost:${server.port}`);
|
|
44
|
+
expect(result.status).toBe("healthy");
|
|
45
|
+
expect(result.version).toBe("1.2.3");
|
|
27
46
|
} finally {
|
|
28
47
|
server.stop(true);
|
|
29
48
|
}
|
|
@@ -33,7 +52,11 @@ describe("checkHealth", () => {
|
|
|
33
52
|
const server = Bun.serve({
|
|
34
53
|
port: 0,
|
|
35
54
|
fetch() {
|
|
36
|
-
return Response.json({
|
|
55
|
+
return Response.json({
|
|
56
|
+
status: "degraded",
|
|
57
|
+
message: "high latency",
|
|
58
|
+
version: "0.9.0",
|
|
59
|
+
});
|
|
37
60
|
},
|
|
38
61
|
});
|
|
39
62
|
|
|
@@ -41,6 +64,7 @@ describe("checkHealth", () => {
|
|
|
41
64
|
const result = await checkHealth(`http://localhost:${server.port}`);
|
|
42
65
|
expect(result.status).toBe("degraded");
|
|
43
66
|
expect(result.detail).toBe("high latency");
|
|
67
|
+
expect(result.version).toBe("0.9.0");
|
|
44
68
|
} finally {
|
|
45
69
|
server.stop(true);
|
|
46
70
|
}
|
|
@@ -57,6 +81,7 @@ describe("checkHealth", () => {
|
|
|
57
81
|
try {
|
|
58
82
|
const result = await checkHealth(`http://localhost:${server.port}`);
|
|
59
83
|
expect(result.status).toBe("error (500)");
|
|
84
|
+
expect(result.version).toBeUndefined();
|
|
60
85
|
} finally {
|
|
61
86
|
server.stop(true);
|
|
62
87
|
}
|
package/src/commands/ps.ts
CHANGED
|
@@ -4,6 +4,7 @@ import {
|
|
|
4
4
|
findAssistantByName,
|
|
5
5
|
getActiveAssistant,
|
|
6
6
|
loadAllAssistants,
|
|
7
|
+
updateServiceGroupVersion,
|
|
7
8
|
type AssistantEntry,
|
|
8
9
|
} from "../lib/assistant-config";
|
|
9
10
|
import { loadGuardianToken } from "../lib/guardian-token";
|
|
@@ -424,7 +425,7 @@ async function listAllAssistants(): Promise<void> {
|
|
|
424
425
|
// hitting the health endpoint. If the PID file is missing or the
|
|
425
426
|
// process isn't running, the assistant is sleeping — skip the
|
|
426
427
|
// network health check to avoid a misleading "unreachable" status.
|
|
427
|
-
let health: { status: string; detail: string | null };
|
|
428
|
+
let health: { status: string; detail: string | null; version?: string };
|
|
428
429
|
const resources = a.resources;
|
|
429
430
|
if (a.cloud === "local" && resources) {
|
|
430
431
|
const pid = readPidFile(resources.pidFile);
|
|
@@ -451,6 +452,10 @@ async function listAllAssistants(): Promise<void> {
|
|
|
451
452
|
health = await checkHealth(a.localUrl ?? a.runtimeUrl, token);
|
|
452
453
|
}
|
|
453
454
|
|
|
455
|
+
if (health.status === "healthy" && health.version) {
|
|
456
|
+
updateServiceGroupVersion(a.assistantId, health.version);
|
|
457
|
+
}
|
|
458
|
+
|
|
454
459
|
const infoParts = [a.runtimeUrl];
|
|
455
460
|
if (a.cloud) infoParts.push(`cloud: ${a.cloud}`);
|
|
456
461
|
if (a.species) infoParts.push(`species: ${a.species}`);
|
package/src/commands/restore.ts
CHANGED
|
@@ -89,28 +89,39 @@ interface PreflightFileEntry {
|
|
|
89
89
|
action: string;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
interface StructuredError {
|
|
93
|
+
code: string;
|
|
94
|
+
message: string;
|
|
95
|
+
path?: string;
|
|
96
|
+
}
|
|
97
|
+
|
|
92
98
|
interface PreflightResponse {
|
|
93
99
|
can_import: boolean;
|
|
94
|
-
|
|
100
|
+
validation?: {
|
|
101
|
+
is_valid: false;
|
|
102
|
+
errors: StructuredError[];
|
|
103
|
+
};
|
|
95
104
|
files?: PreflightFileEntry[];
|
|
96
105
|
summary?: {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
106
|
+
files_to_create: number;
|
|
107
|
+
files_to_overwrite: number;
|
|
108
|
+
files_unchanged: number;
|
|
109
|
+
total_files: number;
|
|
101
110
|
};
|
|
102
|
-
conflicts?:
|
|
111
|
+
conflicts?: StructuredError[];
|
|
103
112
|
}
|
|
104
113
|
|
|
105
114
|
interface ImportResponse {
|
|
106
115
|
success: boolean;
|
|
107
116
|
reason?: string;
|
|
108
|
-
errors?:
|
|
117
|
+
errors?: StructuredError[];
|
|
118
|
+
message?: string;
|
|
109
119
|
warnings?: string[];
|
|
110
120
|
summary?: {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
121
|
+
total_files: number;
|
|
122
|
+
files_created: number;
|
|
123
|
+
files_overwritten: number;
|
|
124
|
+
files_skipped: number;
|
|
114
125
|
backups_created: number;
|
|
115
126
|
};
|
|
116
127
|
}
|
|
@@ -201,30 +212,38 @@ export async function restore(): Promise<void> {
|
|
|
201
212
|
const result = (await response.json()) as PreflightResponse;
|
|
202
213
|
|
|
203
214
|
if (!result.can_import) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
215
|
+
if (result.validation?.errors?.length) {
|
|
216
|
+
console.error("Import blocked by validation errors:");
|
|
217
|
+
for (const err of result.validation.errors) {
|
|
218
|
+
console.error(` - ${err.message}${err.path ? ` (${err.path})` : ""}`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
if (result.conflicts?.length) {
|
|
222
|
+
console.error("Import blocked by conflicts:");
|
|
223
|
+
for (const conflict of result.conflicts) {
|
|
224
|
+
console.error(` - ${conflict.message}${conflict.path ? ` (${conflict.path})` : ""}`);
|
|
225
|
+
}
|
|
207
226
|
}
|
|
208
227
|
process.exit(1);
|
|
209
228
|
}
|
|
210
229
|
|
|
211
230
|
// Print summary table
|
|
212
231
|
const summary = result.summary ?? {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
232
|
+
files_to_create: 0,
|
|
233
|
+
files_to_overwrite: 0,
|
|
234
|
+
files_unchanged: 0,
|
|
235
|
+
total_files: 0,
|
|
217
236
|
};
|
|
218
237
|
console.log("Preflight analysis:");
|
|
219
|
-
console.log(` Files to create: ${summary.
|
|
220
|
-
console.log(` Files to overwrite: ${summary.
|
|
221
|
-
console.log(` Files unchanged: ${summary.
|
|
222
|
-
console.log(` Total: ${summary.
|
|
238
|
+
console.log(` Files to create: ${summary.files_to_create}`);
|
|
239
|
+
console.log(` Files to overwrite: ${summary.files_to_overwrite}`);
|
|
240
|
+
console.log(` Files unchanged: ${summary.files_unchanged}`);
|
|
241
|
+
console.log(` Total: ${summary.total_files}`);
|
|
223
242
|
console.log("");
|
|
224
243
|
|
|
225
244
|
const conflicts = result.conflicts ?? [];
|
|
226
245
|
console.log(
|
|
227
|
-
`Conflicts: ${conflicts.length > 0 ? conflicts.join(", ") : "none"}`,
|
|
246
|
+
`Conflicts: ${conflicts.length > 0 ? conflicts.map((c) => c.message).join(", ") : "none"}`,
|
|
228
247
|
);
|
|
229
248
|
|
|
230
249
|
// List individual files with their action
|
|
@@ -276,25 +295,26 @@ export async function restore(): Promise<void> {
|
|
|
276
295
|
|
|
277
296
|
if (!result.success) {
|
|
278
297
|
console.error(
|
|
279
|
-
`Error: Import failed — ${result.reason ?? "unknown reason"}`,
|
|
298
|
+
`Error: Import failed — ${result.message ?? result.reason ?? "unknown reason"}`,
|
|
280
299
|
);
|
|
281
300
|
for (const err of result.errors ?? []) {
|
|
282
|
-
console.error(` - ${err}`);
|
|
301
|
+
console.error(` - ${err.message}${err.path ? ` (${err.path})` : ""}`);
|
|
283
302
|
}
|
|
284
303
|
process.exit(1);
|
|
285
304
|
}
|
|
286
305
|
|
|
287
306
|
// Print import report
|
|
288
307
|
const summary = result.summary ?? {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
308
|
+
total_files: 0,
|
|
309
|
+
files_created: 0,
|
|
310
|
+
files_overwritten: 0,
|
|
311
|
+
files_skipped: 0,
|
|
292
312
|
backups_created: 0,
|
|
293
313
|
};
|
|
294
314
|
console.log("✅ Restore complete.");
|
|
295
|
-
console.log(` Files created: ${summary.
|
|
296
|
-
console.log(` Files overwritten: ${summary.
|
|
297
|
-
console.log(` Files skipped: ${summary.
|
|
315
|
+
console.log(` Files created: ${summary.files_created}`);
|
|
316
|
+
console.log(` Files overwritten: ${summary.files_overwritten}`);
|
|
317
|
+
console.log(` Files skipped: ${summary.files_skipped}`);
|
|
298
318
|
console.log(` Backups created: ${summary.backups_created}`);
|
|
299
319
|
|
|
300
320
|
// Print warnings if any
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import { randomBytes } from "crypto";
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
findAssistantByName,
|
|
5
|
+
getActiveAssistant,
|
|
6
|
+
loadAllAssistants,
|
|
7
|
+
saveAssistantEntry,
|
|
8
|
+
} from "../lib/assistant-config";
|
|
9
|
+
import type { AssistantEntry } from "../lib/assistant-config";
|
|
10
|
+
import {
|
|
11
|
+
captureImageRefs,
|
|
12
|
+
clearSigningKeyBootstrapLock,
|
|
13
|
+
GATEWAY_INTERNAL_PORT,
|
|
14
|
+
dockerResourceNames,
|
|
15
|
+
migrateCesSecurityFiles,
|
|
16
|
+
migrateGatewaySecurityFiles,
|
|
17
|
+
startContainers,
|
|
18
|
+
stopContainers,
|
|
19
|
+
} from "../lib/docker";
|
|
20
|
+
import type { ServiceName } from "../lib/docker";
|
|
21
|
+
import { loadBootstrapSecret } from "../lib/guardian-token";
|
|
22
|
+
import {
|
|
23
|
+
broadcastUpgradeEvent,
|
|
24
|
+
captureContainerEnv,
|
|
25
|
+
waitForReady,
|
|
26
|
+
} from "./upgrade";
|
|
27
|
+
|
|
28
|
+
function parseArgs(): { name: string | null } {
|
|
29
|
+
const args = process.argv.slice(3);
|
|
30
|
+
let name: string | null = null;
|
|
31
|
+
|
|
32
|
+
for (let i = 0; i < args.length; i++) {
|
|
33
|
+
const arg = args[i];
|
|
34
|
+
if (arg === "--help" || arg === "-h") {
|
|
35
|
+
console.log("Usage: vellum rollback [<name>]");
|
|
36
|
+
console.log("");
|
|
37
|
+
console.log("Roll back a Docker assistant to the previous version.");
|
|
38
|
+
console.log("");
|
|
39
|
+
console.log("Arguments:");
|
|
40
|
+
console.log(
|
|
41
|
+
" <name> Name of the assistant to roll back (default: active or only assistant)",
|
|
42
|
+
);
|
|
43
|
+
console.log("");
|
|
44
|
+
console.log("Examples:");
|
|
45
|
+
console.log(
|
|
46
|
+
" vellum rollback # Roll back the active assistant",
|
|
47
|
+
);
|
|
48
|
+
console.log(
|
|
49
|
+
" vellum rollback my-assistant # Roll back a specific assistant by name",
|
|
50
|
+
);
|
|
51
|
+
process.exit(0);
|
|
52
|
+
} else if (!arg.startsWith("-")) {
|
|
53
|
+
name = arg;
|
|
54
|
+
} else {
|
|
55
|
+
console.error(`Error: Unknown option '${arg}'.`);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return { name };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function resolveCloud(entry: AssistantEntry): string {
|
|
64
|
+
if (entry.cloud) {
|
|
65
|
+
return entry.cloud;
|
|
66
|
+
}
|
|
67
|
+
if (entry.project) {
|
|
68
|
+
return "gcp";
|
|
69
|
+
}
|
|
70
|
+
if (entry.sshUser) {
|
|
71
|
+
return "custom";
|
|
72
|
+
}
|
|
73
|
+
return "local";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Resolve which assistant to target for the rollback command. Priority:
|
|
78
|
+
* 1. Explicit name argument
|
|
79
|
+
* 2. Active assistant set via `vellum use`
|
|
80
|
+
* 3. Sole assistant (when exactly one exists)
|
|
81
|
+
*/
|
|
82
|
+
function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
|
|
83
|
+
if (nameArg) {
|
|
84
|
+
const entry = findAssistantByName(nameArg);
|
|
85
|
+
if (!entry) {
|
|
86
|
+
console.error(`No assistant found with name '${nameArg}'.`);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
}
|
|
89
|
+
return entry;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const active = getActiveAssistant();
|
|
93
|
+
if (active) {
|
|
94
|
+
const entry = findAssistantByName(active);
|
|
95
|
+
if (entry) return entry;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const all = loadAllAssistants();
|
|
99
|
+
if (all.length === 1) return all[0];
|
|
100
|
+
|
|
101
|
+
if (all.length === 0) {
|
|
102
|
+
console.error("No assistants found. Run 'vellum hatch' first.");
|
|
103
|
+
} else {
|
|
104
|
+
console.error(
|
|
105
|
+
"Multiple assistants found. Specify a name or set an active assistant with 'vellum use <name>'.",
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
process.exit(1);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export async function rollback(): Promise<void> {
|
|
112
|
+
const { name } = parseArgs();
|
|
113
|
+
const entry = resolveTargetAssistant(name);
|
|
114
|
+
const cloud = resolveCloud(entry);
|
|
115
|
+
|
|
116
|
+
// Only Docker assistants support rollback
|
|
117
|
+
if (cloud !== "docker") {
|
|
118
|
+
console.error(
|
|
119
|
+
"Rollback is only supported for Docker assistants. For managed assistants, use the version picker to upgrade to the previous version.",
|
|
120
|
+
);
|
|
121
|
+
process.exit(1);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Verify rollback state exists
|
|
125
|
+
if (!entry.previousServiceGroupVersion || !entry.previousContainerInfo) {
|
|
126
|
+
console.error(
|
|
127
|
+
"No rollback state available. Run `vellum upgrade` first to create a rollback point.",
|
|
128
|
+
);
|
|
129
|
+
process.exit(1);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Verify all three digest fields are present
|
|
133
|
+
const prev = entry.previousContainerInfo;
|
|
134
|
+
if (!prev.assistantDigest || !prev.gatewayDigest || !prev.cesDigest) {
|
|
135
|
+
console.error(
|
|
136
|
+
"Incomplete rollback state. Previous container digests are missing.",
|
|
137
|
+
);
|
|
138
|
+
process.exit(1);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Build image refs from the previous digests
|
|
142
|
+
const previousImageRefs: Record<ServiceName, string> = {
|
|
143
|
+
assistant: prev.assistantDigest,
|
|
144
|
+
"credential-executor": prev.cesDigest,
|
|
145
|
+
gateway: prev.gatewayDigest,
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
const instanceName = entry.assistantId;
|
|
149
|
+
const res = dockerResourceNames(instanceName);
|
|
150
|
+
|
|
151
|
+
console.log(
|
|
152
|
+
`🔄 Rolling back Docker assistant '${instanceName}' to ${entry.previousServiceGroupVersion}...\n`,
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
// Capture current container env
|
|
156
|
+
console.log("💾 Capturing existing container environment...");
|
|
157
|
+
const capturedEnv = await captureContainerEnv(res.assistantContainer);
|
|
158
|
+
console.log(
|
|
159
|
+
` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
// Extract CES_SERVICE_TOKEN from captured env, or generate fresh one
|
|
163
|
+
const cesServiceToken =
|
|
164
|
+
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
|
|
165
|
+
|
|
166
|
+
// Retrieve or generate a bootstrap secret for the gateway.
|
|
167
|
+
const bootstrapSecret =
|
|
168
|
+
loadBootstrapSecret(instanceName) || randomBytes(32).toString("hex");
|
|
169
|
+
|
|
170
|
+
// Build extra env vars, excluding keys managed by serviceDockerRunArgs
|
|
171
|
+
const envKeysSetByRunArgs = new Set([
|
|
172
|
+
"CES_SERVICE_TOKEN",
|
|
173
|
+
"VELLUM_ASSISTANT_NAME",
|
|
174
|
+
"RUNTIME_HTTP_HOST",
|
|
175
|
+
"PATH",
|
|
176
|
+
]);
|
|
177
|
+
for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
|
|
178
|
+
if (process.env[envVar]) {
|
|
179
|
+
envKeysSetByRunArgs.add(envVar);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const extraAssistantEnv: Record<string, string> = {};
|
|
183
|
+
for (const [key, value] of Object.entries(capturedEnv)) {
|
|
184
|
+
if (!envKeysSetByRunArgs.has(key)) {
|
|
185
|
+
extraAssistantEnv[key] = value;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Parse gateway port from entry's runtimeUrl, fall back to default
|
|
190
|
+
let gatewayPort = GATEWAY_INTERNAL_PORT;
|
|
191
|
+
try {
|
|
192
|
+
const parsed = new URL(entry.runtimeUrl);
|
|
193
|
+
const port = parseInt(parsed.port, 10);
|
|
194
|
+
if (!isNaN(port)) {
|
|
195
|
+
gatewayPort = port;
|
|
196
|
+
}
|
|
197
|
+
} catch {
|
|
198
|
+
// use default
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Notify connected clients that a rollback is about to begin (best-effort)
|
|
202
|
+
console.log("📢 Notifying connected clients...");
|
|
203
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
204
|
+
type: "starting",
|
|
205
|
+
targetVersion: entry.previousServiceGroupVersion,
|
|
206
|
+
expectedDowntimeSeconds: 60,
|
|
207
|
+
});
|
|
208
|
+
// Brief pause to allow SSE delivery before containers stop.
|
|
209
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
210
|
+
|
|
211
|
+
console.log("🛑 Stopping existing containers...");
|
|
212
|
+
await stopContainers(res);
|
|
213
|
+
console.log("✅ Containers stopped\n");
|
|
214
|
+
|
|
215
|
+
// Run security file migrations and signing key cleanup
|
|
216
|
+
console.log("🔄 Migrating security files to gateway volume...");
|
|
217
|
+
await migrateGatewaySecurityFiles(res, (msg) => console.log(msg));
|
|
218
|
+
|
|
219
|
+
console.log("🔄 Migrating credential files to CES security volume...");
|
|
220
|
+
await migrateCesSecurityFiles(res, (msg) => console.log(msg));
|
|
221
|
+
|
|
222
|
+
console.log("🔑 Clearing signing key bootstrap lock...");
|
|
223
|
+
await clearSigningKeyBootstrapLock(res);
|
|
224
|
+
|
|
225
|
+
console.log("🚀 Starting containers with previous version...");
|
|
226
|
+
await startContainers(
|
|
227
|
+
{
|
|
228
|
+
bootstrapSecret,
|
|
229
|
+
cesServiceToken,
|
|
230
|
+
extraAssistantEnv,
|
|
231
|
+
gatewayPort,
|
|
232
|
+
imageTags: previousImageRefs,
|
|
233
|
+
instanceName,
|
|
234
|
+
res,
|
|
235
|
+
},
|
|
236
|
+
(msg) => console.log(msg),
|
|
237
|
+
);
|
|
238
|
+
console.log("✅ Containers started\n");
|
|
239
|
+
|
|
240
|
+
console.log("Waiting for assistant to become ready...");
|
|
241
|
+
const ready = await waitForReady(entry.runtimeUrl);
|
|
242
|
+
|
|
243
|
+
if (ready) {
|
|
244
|
+
// Capture new digests from the rolled-back containers
|
|
245
|
+
const newDigests = await captureImageRefs(res);
|
|
246
|
+
|
|
247
|
+
// Swap current/previous state to enable "rollback the rollback"
|
|
248
|
+
const updatedEntry: AssistantEntry = {
|
|
249
|
+
...entry,
|
|
250
|
+
serviceGroupVersion: entry.previousServiceGroupVersion,
|
|
251
|
+
containerInfo: {
|
|
252
|
+
assistantImage: prev.assistantImage ?? previousImageRefs.assistant,
|
|
253
|
+
gatewayImage: prev.gatewayImage ?? previousImageRefs.gateway,
|
|
254
|
+
cesImage: prev.cesImage ?? previousImageRefs["credential-executor"],
|
|
255
|
+
assistantDigest: newDigests?.assistant,
|
|
256
|
+
gatewayDigest: newDigests?.gateway,
|
|
257
|
+
cesDigest: newDigests?.["credential-executor"],
|
|
258
|
+
networkName: res.network,
|
|
259
|
+
},
|
|
260
|
+
previousServiceGroupVersion: entry.serviceGroupVersion,
|
|
261
|
+
previousContainerInfo: entry.containerInfo,
|
|
262
|
+
};
|
|
263
|
+
saveAssistantEntry(updatedEntry);
|
|
264
|
+
|
|
265
|
+
// Notify clients that the rollback succeeded
|
|
266
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
267
|
+
type: "complete",
|
|
268
|
+
installedVersion: entry.previousServiceGroupVersion,
|
|
269
|
+
success: true,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
console.log(
|
|
273
|
+
`\n✅ Docker assistant '${instanceName}' rolled back to ${entry.previousServiceGroupVersion}.`,
|
|
274
|
+
);
|
|
275
|
+
} else {
|
|
276
|
+
console.error(`\n❌ Containers failed to become ready within the timeout.`);
|
|
277
|
+
console.log(` Check logs with: docker logs -f ${res.assistantContainer}`);
|
|
278
|
+
process.exit(1);
|
|
279
|
+
}
|
|
280
|
+
}
|
package/src/commands/upgrade.ts
CHANGED
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
import type { AssistantEntry } from "../lib/assistant-config";
|
|
12
12
|
import {
|
|
13
13
|
captureImageRefs,
|
|
14
|
+
clearSigningKeyBootstrapLock,
|
|
14
15
|
DOCKERHUB_IMAGES,
|
|
15
16
|
DOCKER_READY_TIMEOUT_MS,
|
|
16
17
|
GATEWAY_INTERNAL_PORT,
|
|
@@ -26,6 +27,7 @@ import {
|
|
|
26
27
|
getPlatformUrl,
|
|
27
28
|
readPlatformToken,
|
|
28
29
|
} from "../lib/platform-client";
|
|
30
|
+
import { loadBootstrapSecret, loadGuardianToken } from "../lib/guardian-token";
|
|
29
31
|
import { exec, execOutput } from "../lib/step-runner";
|
|
30
32
|
|
|
31
33
|
interface UpgradeArgs {
|
|
@@ -137,7 +139,7 @@ function resolveTargetAssistant(nameArg: string | null): AssistantEntry {
|
|
|
137
139
|
* Capture environment variables from a running Docker container so they
|
|
138
140
|
* can be replayed onto the replacement container after upgrade.
|
|
139
141
|
*/
|
|
140
|
-
async function captureContainerEnv(
|
|
142
|
+
export async function captureContainerEnv(
|
|
141
143
|
containerName: string,
|
|
142
144
|
): Promise<Record<string, string>> {
|
|
143
145
|
const captured: Record<string, string> = {};
|
|
@@ -165,7 +167,7 @@ async function captureContainerEnv(
|
|
|
165
167
|
* Poll the gateway `/readyz` endpoint until it returns 200 or the timeout
|
|
166
168
|
* elapses. Returns whether the assistant became ready.
|
|
167
169
|
*/
|
|
168
|
-
async function waitForReady(runtimeUrl: string): Promise<boolean> {
|
|
170
|
+
export async function waitForReady(runtimeUrl: string): Promise<boolean> {
|
|
169
171
|
const readyUrl = `${runtimeUrl}/readyz`;
|
|
170
172
|
const start = Date.now();
|
|
171
173
|
|
|
@@ -199,6 +201,35 @@ async function waitForReady(runtimeUrl: string): Promise<boolean> {
|
|
|
199
201
|
return false;
|
|
200
202
|
}
|
|
201
203
|
|
|
204
|
+
/**
|
|
205
|
+
* Best-effort broadcast of an upgrade lifecycle event to connected clients
|
|
206
|
+
* via the gateway's upgrade-broadcast proxy. Uses guardian token auth.
|
|
207
|
+
* Failures are logged but never block the upgrade flow.
|
|
208
|
+
*/
|
|
209
|
+
export async function broadcastUpgradeEvent(
|
|
210
|
+
gatewayUrl: string,
|
|
211
|
+
assistantId: string,
|
|
212
|
+
event: Record<string, unknown>,
|
|
213
|
+
): Promise<void> {
|
|
214
|
+
try {
|
|
215
|
+
const token = loadGuardianToken(assistantId);
|
|
216
|
+
const headers: Record<string, string> = {
|
|
217
|
+
"Content-Type": "application/json",
|
|
218
|
+
};
|
|
219
|
+
if (token?.accessToken) {
|
|
220
|
+
headers["Authorization"] = `Bearer ${token.accessToken}`;
|
|
221
|
+
}
|
|
222
|
+
await fetch(`${gatewayUrl}/v1/admin/upgrade-broadcast`, {
|
|
223
|
+
method: "POST",
|
|
224
|
+
headers,
|
|
225
|
+
body: JSON.stringify(event),
|
|
226
|
+
signal: AbortSignal.timeout(3000),
|
|
227
|
+
});
|
|
228
|
+
} catch {
|
|
229
|
+
// Best-effort — gateway/daemon may already be shutting down or not yet ready
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
202
233
|
async function upgradeDocker(
|
|
203
234
|
entry: AssistantEntry,
|
|
204
235
|
version: string | null,
|
|
@@ -234,6 +265,18 @@ async function upgradeDocker(
|
|
|
234
265
|
);
|
|
235
266
|
}
|
|
236
267
|
|
|
268
|
+
// Persist rollback state to lockfile BEFORE any destructive changes.
|
|
269
|
+
// This enables the `vellum rollback` command to restore the previous version.
|
|
270
|
+
if (entry.serviceGroupVersion && entry.containerInfo) {
|
|
271
|
+
const rollbackEntry: AssistantEntry = {
|
|
272
|
+
...entry,
|
|
273
|
+
previousServiceGroupVersion: entry.serviceGroupVersion,
|
|
274
|
+
previousContainerInfo: { ...entry.containerInfo },
|
|
275
|
+
};
|
|
276
|
+
saveAssistantEntry(rollbackEntry);
|
|
277
|
+
console.log(` Saved rollback state: ${entry.serviceGroupVersion}\n`);
|
|
278
|
+
}
|
|
279
|
+
|
|
237
280
|
console.log("💾 Capturing existing container environment...");
|
|
238
281
|
const capturedEnv = await captureContainerEnv(res.assistantContainer);
|
|
239
282
|
console.log(
|
|
@@ -246,6 +289,16 @@ async function upgradeDocker(
|
|
|
246
289
|
await exec("docker", ["pull", imageTags["credential-executor"]]);
|
|
247
290
|
console.log("✅ Docker images pulled\n");
|
|
248
291
|
|
|
292
|
+
// Notify connected clients that an upgrade is about to begin.
|
|
293
|
+
console.log("📢 Notifying connected clients...");
|
|
294
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
295
|
+
type: "starting",
|
|
296
|
+
targetVersion: versionTag,
|
|
297
|
+
expectedDowntimeSeconds: 60,
|
|
298
|
+
});
|
|
299
|
+
// Brief pause to allow SSE delivery before containers stop.
|
|
300
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
301
|
+
|
|
249
302
|
console.log("🛑 Stopping existing containers...");
|
|
250
303
|
await stopContainers(res);
|
|
251
304
|
console.log("✅ Containers stopped\n");
|
|
@@ -269,6 +322,11 @@ async function upgradeDocker(
|
|
|
269
322
|
const cesServiceToken =
|
|
270
323
|
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
|
|
271
324
|
|
|
325
|
+
// Retrieve or generate a bootstrap secret for the gateway. The secret was
|
|
326
|
+
// persisted to disk during hatch; older instances won't have one yet.
|
|
327
|
+
const bootstrapSecret =
|
|
328
|
+
loadBootstrapSecret(instanceName) || randomBytes(32).toString("hex");
|
|
329
|
+
|
|
272
330
|
// Build the set of extra env vars to replay on the new assistant container.
|
|
273
331
|
// Captured env vars serve as the base; keys already managed by
|
|
274
332
|
// serviceDockerRunArgs are excluded to avoid duplicates.
|
|
@@ -297,9 +355,13 @@ async function upgradeDocker(
|
|
|
297
355
|
console.log("🔄 Migrating credential files to CES security volume...");
|
|
298
356
|
await migrateCesSecurityFiles(res, (msg) => console.log(msg));
|
|
299
357
|
|
|
358
|
+
console.log("🔑 Clearing signing key bootstrap lock...");
|
|
359
|
+
await clearSigningKeyBootstrapLock(res);
|
|
360
|
+
|
|
300
361
|
console.log("🚀 Starting upgraded containers...");
|
|
301
362
|
await startContainers(
|
|
302
363
|
{
|
|
364
|
+
bootstrapSecret,
|
|
303
365
|
cesServiceToken,
|
|
304
366
|
extraAssistantEnv,
|
|
305
367
|
gatewayPort,
|
|
@@ -328,9 +390,18 @@ async function upgradeDocker(
|
|
|
328
390
|
cesDigest: newDigests?.["credential-executor"],
|
|
329
391
|
networkName: res.network,
|
|
330
392
|
},
|
|
393
|
+
previousServiceGroupVersion: entry.serviceGroupVersion,
|
|
394
|
+
previousContainerInfo: entry.containerInfo,
|
|
331
395
|
};
|
|
332
396
|
saveAssistantEntry(updatedEntry);
|
|
333
397
|
|
|
398
|
+
// Notify clients on the new service group that the upgrade succeeded.
|
|
399
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
400
|
+
type: "complete",
|
|
401
|
+
installedVersion: versionTag,
|
|
402
|
+
success: true,
|
|
403
|
+
});
|
|
404
|
+
|
|
334
405
|
console.log(
|
|
335
406
|
`\n✅ Docker assistant '${instanceName}' upgraded to ${versionTag}.`,
|
|
336
407
|
);
|
|
@@ -344,6 +415,7 @@ async function upgradeDocker(
|
|
|
344
415
|
|
|
345
416
|
await startContainers(
|
|
346
417
|
{
|
|
418
|
+
bootstrapSecret,
|
|
347
419
|
cesServiceToken,
|
|
348
420
|
extraAssistantEnv,
|
|
349
421
|
gatewayPort,
|
|
@@ -356,19 +428,43 @@ async function upgradeDocker(
|
|
|
356
428
|
|
|
357
429
|
const rollbackReady = await waitForReady(entry.runtimeUrl);
|
|
358
430
|
if (rollbackReady) {
|
|
359
|
-
// Restore previous container info in lockfile after rollback
|
|
431
|
+
// Restore previous container info in lockfile after rollback.
|
|
432
|
+
// previousImageRefs contains sha256 digests from `docker inspect
|
|
433
|
+
// --format {{.Image}}`. The *Image fields should hold
|
|
434
|
+
// human-readable image:tag names, so prefer the pre-upgrade
|
|
435
|
+
// containerInfo values and store digests in the *Digest fields.
|
|
360
436
|
if (previousImageRefs) {
|
|
361
437
|
const rolledBackEntry: AssistantEntry = {
|
|
362
438
|
...entry,
|
|
363
439
|
containerInfo: {
|
|
364
|
-
assistantImage:
|
|
365
|
-
|
|
366
|
-
|
|
440
|
+
assistantImage:
|
|
441
|
+
entry.containerInfo?.assistantImage ??
|
|
442
|
+
previousImageRefs.assistant,
|
|
443
|
+
gatewayImage:
|
|
444
|
+
entry.containerInfo?.gatewayImage ??
|
|
445
|
+
previousImageRefs.gateway,
|
|
446
|
+
cesImage:
|
|
447
|
+
entry.containerInfo?.cesImage ??
|
|
448
|
+
previousImageRefs["credential-executor"],
|
|
449
|
+
assistantDigest: previousImageRefs.assistant,
|
|
450
|
+
gatewayDigest: previousImageRefs.gateway,
|
|
451
|
+
cesDigest: previousImageRefs["credential-executor"],
|
|
367
452
|
networkName: res.network,
|
|
368
453
|
},
|
|
454
|
+
previousServiceGroupVersion: undefined,
|
|
455
|
+
previousContainerInfo: undefined,
|
|
369
456
|
};
|
|
370
457
|
saveAssistantEntry(rolledBackEntry);
|
|
371
458
|
}
|
|
459
|
+
|
|
460
|
+
// Notify clients that the upgrade failed and rolled back.
|
|
461
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
462
|
+
type: "complete",
|
|
463
|
+
installedVersion: entry.serviceGroupVersion ?? "unknown",
|
|
464
|
+
success: false,
|
|
465
|
+
rolledBackToVersion: entry.serviceGroupVersion,
|
|
466
|
+
});
|
|
467
|
+
|
|
372
468
|
console.log(
|
|
373
469
|
`\n⚠️ Rolled back to previous version. Upgrade to ${versionTag} failed.`,
|
|
374
470
|
);
|
|
@@ -431,6 +527,15 @@ async function upgradePlatform(
|
|
|
431
527
|
body.version = version;
|
|
432
528
|
}
|
|
433
529
|
|
|
530
|
+
// Notify connected clients that an upgrade is about to begin.
|
|
531
|
+
const targetVersion = version ?? `v${cliPkg.version}`;
|
|
532
|
+
console.log("📢 Notifying connected clients...");
|
|
533
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
534
|
+
type: "starting",
|
|
535
|
+
targetVersion,
|
|
536
|
+
expectedDowntimeSeconds: 90,
|
|
537
|
+
});
|
|
538
|
+
|
|
434
539
|
const response = await fetch(url, {
|
|
435
540
|
method: "POST",
|
|
436
541
|
headers: {
|
|
@@ -446,10 +551,23 @@ async function upgradePlatform(
|
|
|
446
551
|
console.error(
|
|
447
552
|
`Error: Platform upgrade failed (${response.status}): ${text}`,
|
|
448
553
|
);
|
|
554
|
+
await broadcastUpgradeEvent(entry.runtimeUrl, entry.assistantId, {
|
|
555
|
+
type: "complete",
|
|
556
|
+
installedVersion: entry.serviceGroupVersion ?? "unknown",
|
|
557
|
+
success: false,
|
|
558
|
+
});
|
|
449
559
|
process.exit(1);
|
|
450
560
|
}
|
|
451
561
|
|
|
452
562
|
const result = (await response.json()) as UpgradeApiResponse;
|
|
563
|
+
|
|
564
|
+
// NOTE: We intentionally do NOT broadcast a "complete" event here.
|
|
565
|
+
// The platform API returning 200 only means "upgrade request accepted" —
|
|
566
|
+
// the service group has not yet restarted with the new version. The
|
|
567
|
+
// completion signal will come from the client's health-check
|
|
568
|
+
// version-change detection (DaemonConnection.swift) once the new
|
|
569
|
+
// version actually appears after the platform restarts the service group.
|
|
570
|
+
|
|
453
571
|
console.log(`✅ ${result.detail}`);
|
|
454
572
|
if (result.version) {
|
|
455
573
|
console.log(` Version: ${result.version}`);
|
package/src/commands/wake.ts
CHANGED
|
@@ -40,6 +40,14 @@ export async function wake(): Promise<void> {
|
|
|
40
40
|
const entry = resolveTargetAssistant(nameArg);
|
|
41
41
|
|
|
42
42
|
if (entry.cloud === "docker") {
|
|
43
|
+
if (watch || foreground) {
|
|
44
|
+
const ignored = [watch && "--watch", foreground && "--foreground"]
|
|
45
|
+
.filter(Boolean)
|
|
46
|
+
.join(" and ");
|
|
47
|
+
console.warn(
|
|
48
|
+
`Warning: ${ignored} ignored for Docker instances (not supported).`,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
43
51
|
const res = dockerResourceNames(entry.assistantId);
|
|
44
52
|
await wakeContainers(res);
|
|
45
53
|
console.log("Docker containers started.");
|
package/src/index.ts
CHANGED
|
@@ -11,6 +11,7 @@ import { ps } from "./commands/ps";
|
|
|
11
11
|
import { recover } from "./commands/recover";
|
|
12
12
|
import { restore } from "./commands/restore";
|
|
13
13
|
import { retire } from "./commands/retire";
|
|
14
|
+
import { rollback } from "./commands/rollback";
|
|
14
15
|
import { setup } from "./commands/setup";
|
|
15
16
|
import { sleep } from "./commands/sleep";
|
|
16
17
|
import { ssh } from "./commands/ssh";
|
|
@@ -39,6 +40,7 @@ const commands = {
|
|
|
39
40
|
recover,
|
|
40
41
|
restore,
|
|
41
42
|
retire,
|
|
43
|
+
rollback,
|
|
42
44
|
setup,
|
|
43
45
|
sleep,
|
|
44
46
|
ssh,
|
|
@@ -68,6 +70,9 @@ function printHelp(): void {
|
|
|
68
70
|
console.log(" recover Restore a previously retired local assistant");
|
|
69
71
|
console.log(" restore Restore a .vbundle backup into a running assistant");
|
|
70
72
|
console.log(" retire Delete an assistant instance");
|
|
73
|
+
console.log(
|
|
74
|
+
" rollback Roll back a Docker assistant to the previous version",
|
|
75
|
+
);
|
|
71
76
|
console.log(" setup Configure API keys interactively");
|
|
72
77
|
console.log(" sleep Stop the assistant process");
|
|
73
78
|
console.log(" ssh SSH into a remote assistant instance");
|
|
@@ -78,6 +78,10 @@ export interface AssistantEntry {
|
|
|
78
78
|
serviceGroupVersion?: string;
|
|
79
79
|
/** Docker image metadata for rollback. Only present for docker topology entries. */
|
|
80
80
|
containerInfo?: ContainerInfo;
|
|
81
|
+
/** The service group version that was running before the last upgrade. */
|
|
82
|
+
previousServiceGroupVersion?: string;
|
|
83
|
+
/** Docker image metadata from before the last upgrade. Enables rollback to the prior version. */
|
|
84
|
+
previousContainerInfo?: ContainerInfo;
|
|
81
85
|
[key: string]: unknown;
|
|
82
86
|
}
|
|
83
87
|
|
|
@@ -360,6 +364,23 @@ export function saveAssistantEntry(entry: AssistantEntry): void {
|
|
|
360
364
|
writeAssistants(entries);
|
|
361
365
|
}
|
|
362
366
|
|
|
367
|
+
/**
|
|
368
|
+
* Update just the serviceGroupVersion field on a lockfile entry.
|
|
369
|
+
* Reads the current entry, updates the version if changed, and writes back.
|
|
370
|
+
* No-op if the entry doesn't exist or the version hasn't changed.
|
|
371
|
+
*/
|
|
372
|
+
export function updateServiceGroupVersion(
|
|
373
|
+
assistantId: string,
|
|
374
|
+
version: string,
|
|
375
|
+
): void {
|
|
376
|
+
const entries = readAssistants();
|
|
377
|
+
const entry = entries.find((e) => e.assistantId === assistantId);
|
|
378
|
+
if (!entry) return;
|
|
379
|
+
if (entry.serviceGroupVersion === version) return;
|
|
380
|
+
entry.serviceGroupVersion = version;
|
|
381
|
+
writeAssistants(entries);
|
|
382
|
+
}
|
|
383
|
+
|
|
363
384
|
/**
|
|
364
385
|
* Scan upward from `basePort` to find an available port. A port is considered
|
|
365
386
|
* available when `probePort()` returns false (nothing listening). Scans up to
|
|
@@ -426,6 +447,7 @@ export async function allocateLocalResources(
|
|
|
426
447
|
entry.resources.daemonPort,
|
|
427
448
|
entry.resources.gatewayPort,
|
|
428
449
|
entry.resources.qdrantPort,
|
|
450
|
+
entry.resources.cesPort,
|
|
429
451
|
);
|
|
430
452
|
}
|
|
431
453
|
}
|
|
@@ -445,13 +467,19 @@ export async function allocateLocalResources(
|
|
|
445
467
|
daemonPort,
|
|
446
468
|
gatewayPort,
|
|
447
469
|
]);
|
|
470
|
+
const cesPort = await findAvailablePort(DEFAULT_CES_PORT, [
|
|
471
|
+
...reservedPorts,
|
|
472
|
+
daemonPort,
|
|
473
|
+
gatewayPort,
|
|
474
|
+
qdrantPort,
|
|
475
|
+
]);
|
|
448
476
|
|
|
449
477
|
return {
|
|
450
478
|
instanceDir,
|
|
451
479
|
daemonPort,
|
|
452
480
|
gatewayPort,
|
|
453
481
|
qdrantPort,
|
|
454
|
-
cesPort
|
|
482
|
+
cesPort,
|
|
455
483
|
pidFile: join(instanceDir, ".vellum", "vellum.pid"),
|
|
456
484
|
};
|
|
457
485
|
}
|
package/src/lib/docker.ts
CHANGED
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
import type { AssistantEntry } from "./assistant-config";
|
|
15
15
|
import { DEFAULT_GATEWAY_PORT, PROVIDER_ENV_VAR_NAMES } from "./constants";
|
|
16
16
|
import type { Species } from "./constants";
|
|
17
|
-
import { leaseGuardianToken } from "./guardian-token";
|
|
17
|
+
import { leaseGuardianToken, saveBootstrapSecret } from "./guardian-token";
|
|
18
18
|
import { isVellumProcess, stopProcess } from "./process";
|
|
19
19
|
import { generateInstanceName } from "./random-name";
|
|
20
20
|
import { exec, execOutput } from "./step-runner";
|
|
@@ -464,6 +464,7 @@ async function buildAllImages(
|
|
|
464
464
|
* can be restarted independently.
|
|
465
465
|
*/
|
|
466
466
|
export function serviceDockerRunArgs(opts: {
|
|
467
|
+
bootstrapSecret?: string;
|
|
467
468
|
cesServiceToken?: string;
|
|
468
469
|
extraAssistantEnv?: Record<string, string>;
|
|
469
470
|
gatewayPort: number;
|
|
@@ -552,6 +553,9 @@ export function serviceDockerRunArgs(opts: {
|
|
|
552
553
|
...(cesServiceToken
|
|
553
554
|
? ["-e", `CES_SERVICE_TOKEN=${cesServiceToken}`]
|
|
554
555
|
: []),
|
|
556
|
+
...(opts.bootstrapSecret
|
|
557
|
+
? ["-e", `GUARDIAN_BOOTSTRAP_SECRET=${opts.bootstrapSecret}`]
|
|
558
|
+
: []),
|
|
555
559
|
imageTags.gateway,
|
|
556
560
|
],
|
|
557
561
|
"credential-executor": () => [
|
|
@@ -735,6 +739,7 @@ export const SERVICE_START_ORDER: ServiceName[] = [
|
|
|
735
739
|
/** Start all three containers in dependency order. */
|
|
736
740
|
export async function startContainers(
|
|
737
741
|
opts: {
|
|
742
|
+
bootstrapSecret?: string;
|
|
738
743
|
cesServiceToken?: string;
|
|
739
744
|
extraAssistantEnv?: Record<string, string>;
|
|
740
745
|
gatewayPort: number;
|
|
@@ -760,6 +765,27 @@ export async function stopContainers(
|
|
|
760
765
|
await removeContainer(res.assistantContainer);
|
|
761
766
|
}
|
|
762
767
|
|
|
768
|
+
/**
|
|
769
|
+
* Remove the signing-key-bootstrap lockfile from the gateway security volume.
|
|
770
|
+
* This allows the daemon to re-fetch the signing key from the gateway on the
|
|
771
|
+
* next startup — necessary during upgrades where the gateway may generate a
|
|
772
|
+
* new key.
|
|
773
|
+
*/
|
|
774
|
+
export async function clearSigningKeyBootstrapLock(
|
|
775
|
+
res: ReturnType<typeof dockerResourceNames>,
|
|
776
|
+
): Promise<void> {
|
|
777
|
+
await exec("docker", [
|
|
778
|
+
"run",
|
|
779
|
+
"--rm",
|
|
780
|
+
"-v",
|
|
781
|
+
`${res.gatewaySecurityVolume}:/gateway-security`,
|
|
782
|
+
"busybox",
|
|
783
|
+
"rm",
|
|
784
|
+
"-f",
|
|
785
|
+
"/gateway-security/signing-key-bootstrap.lock",
|
|
786
|
+
]);
|
|
787
|
+
}
|
|
788
|
+
|
|
763
789
|
/** Stop containers without removing them (preserves state for `docker start`). */
|
|
764
790
|
export async function sleepContainers(
|
|
765
791
|
res: ReturnType<typeof dockerResourceNames>,
|
|
@@ -771,8 +797,14 @@ export async function sleepContainers(
|
|
|
771
797
|
]) {
|
|
772
798
|
try {
|
|
773
799
|
await exec("docker", ["stop", container]);
|
|
774
|
-
} catch {
|
|
775
|
-
|
|
800
|
+
} catch (err) {
|
|
801
|
+
const msg =
|
|
802
|
+
err instanceof Error ? err.message.toLowerCase() : String(err);
|
|
803
|
+
if (msg.includes("no such container") || msg.includes("is not running")) {
|
|
804
|
+
// container doesn't exist or already stopped — expected, skip
|
|
805
|
+
continue;
|
|
806
|
+
}
|
|
807
|
+
throw err;
|
|
776
808
|
}
|
|
777
809
|
}
|
|
778
810
|
}
|
|
@@ -1071,9 +1103,36 @@ export async function hatchDocker(
|
|
|
1071
1103
|
await exec("docker", ["volume", "create", res.cesSecurityVolume]);
|
|
1072
1104
|
await exec("docker", ["volume", "create", res.gatewaySecurityVolume]);
|
|
1073
1105
|
|
|
1106
|
+
// Set workspace volume ownership so non-root containers (UID 1001) can write.
|
|
1107
|
+
await exec("docker", [
|
|
1108
|
+
"run",
|
|
1109
|
+
"--rm",
|
|
1110
|
+
"-v",
|
|
1111
|
+
`${res.workspaceVolume}:/workspace`,
|
|
1112
|
+
"busybox",
|
|
1113
|
+
"chown",
|
|
1114
|
+
"1001:1001",
|
|
1115
|
+
"/workspace",
|
|
1116
|
+
]);
|
|
1117
|
+
|
|
1118
|
+
// Clear any stale signing-key bootstrap lockfile so the daemon can
|
|
1119
|
+
// fetch the key from the gateway on first startup.
|
|
1120
|
+
await exec("docker", [
|
|
1121
|
+
"run",
|
|
1122
|
+
"--rm",
|
|
1123
|
+
"-v",
|
|
1124
|
+
`${res.gatewaySecurityVolume}:/gateway-security`,
|
|
1125
|
+
"busybox",
|
|
1126
|
+
"rm",
|
|
1127
|
+
"-f",
|
|
1128
|
+
"/gateway-security/signing-key-bootstrap.lock",
|
|
1129
|
+
]);
|
|
1130
|
+
|
|
1074
1131
|
const cesServiceToken = randomBytes(32).toString("hex");
|
|
1132
|
+
const bootstrapSecret = randomBytes(32).toString("hex");
|
|
1133
|
+
saveBootstrapSecret(instanceName, bootstrapSecret);
|
|
1075
1134
|
await startContainers(
|
|
1076
|
-
{ cesServiceToken, gatewayPort, imageTags, instanceName, res },
|
|
1135
|
+
{ bootstrapSecret, cesServiceToken, gatewayPort, imageTags, instanceName, res },
|
|
1077
1136
|
log,
|
|
1078
1137
|
);
|
|
1079
1138
|
|
|
@@ -1252,7 +1311,9 @@ async function waitForGatewayAndLease(opts: {
|
|
|
1252
1311
|
// Log periodically so the user knows we're still trying
|
|
1253
1312
|
const elapsed = ((Date.now() - leaseStart) / 1000).toFixed(0);
|
|
1254
1313
|
log(
|
|
1255
|
-
`Guardian token lease: attempt failed after ${elapsed}s (${
|
|
1314
|
+
`Guardian token lease: attempt failed after ${elapsed}s (${
|
|
1315
|
+
lastLeaseError.split("\n")[0]
|
|
1316
|
+
}), retrying...`,
|
|
1256
1317
|
);
|
|
1257
1318
|
}
|
|
1258
1319
|
await new Promise((r) => setTimeout(r, 2000));
|
|
@@ -1260,7 +1321,10 @@ async function waitForGatewayAndLease(opts: {
|
|
|
1260
1321
|
|
|
1261
1322
|
if (!leaseSuccess) {
|
|
1262
1323
|
log(
|
|
1263
|
-
`\u26a0\ufe0f Guardian token lease: FAILED after ${(
|
|
1324
|
+
`\u26a0\ufe0f Guardian token lease: FAILED after ${(
|
|
1325
|
+
(Date.now() - leaseStart) /
|
|
1326
|
+
1000
|
|
1327
|
+
).toFixed(1)}s — ${lastLeaseError ?? "unknown error"}`,
|
|
1264
1328
|
);
|
|
1265
1329
|
}
|
|
1266
1330
|
|
|
@@ -42,6 +42,46 @@ function getPersistedDeviceIdPath(): string {
|
|
|
42
42
|
return join(getXdgConfigHome(), "vellum", "device-id");
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
function getBootstrapSecretPath(assistantId: string): string {
|
|
46
|
+
return join(
|
|
47
|
+
getXdgConfigHome(),
|
|
48
|
+
"vellum",
|
|
49
|
+
"assistants",
|
|
50
|
+
assistantId,
|
|
51
|
+
"bootstrap-secret",
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Load a previously saved bootstrap secret for the given assistant.
|
|
57
|
+
* Returns null if the file does not exist or is unreadable.
|
|
58
|
+
*/
|
|
59
|
+
export function loadBootstrapSecret(assistantId: string): string | null {
|
|
60
|
+
try {
|
|
61
|
+
const raw = readFileSync(getBootstrapSecretPath(assistantId), "utf-8").trim();
|
|
62
|
+
return raw.length > 0 ? raw : null;
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Persist a bootstrap secret for the given assistant so that the desktop
|
|
70
|
+
* client and upgrade/rollback paths can retrieve it later.
|
|
71
|
+
*/
|
|
72
|
+
export function saveBootstrapSecret(
|
|
73
|
+
assistantId: string,
|
|
74
|
+
secret: string,
|
|
75
|
+
): void {
|
|
76
|
+
const path = getBootstrapSecretPath(assistantId);
|
|
77
|
+
const dir = dirname(path);
|
|
78
|
+
if (!existsSync(dir)) {
|
|
79
|
+
mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
80
|
+
}
|
|
81
|
+
writeFileSync(path, secret + "\n", { mode: 0o600 });
|
|
82
|
+
chmodSync(path, 0o600);
|
|
83
|
+
}
|
|
84
|
+
|
|
45
85
|
function hashWithSalt(input: string): string {
|
|
46
86
|
return createHash("sha256")
|
|
47
87
|
.update(input + DEVICE_ID_SALT)
|
|
@@ -168,9 +208,14 @@ export async function leaseGuardianToken(
|
|
|
168
208
|
assistantId: string,
|
|
169
209
|
): Promise<GuardianTokenData> {
|
|
170
210
|
const deviceId = computeDeviceId();
|
|
211
|
+
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
212
|
+
const bootstrapSecret = loadBootstrapSecret(assistantId);
|
|
213
|
+
if (bootstrapSecret) {
|
|
214
|
+
headers["x-bootstrap-secret"] = bootstrapSecret;
|
|
215
|
+
}
|
|
171
216
|
const response = await fetch(`${gatewayUrl}/v1/guardian/init`, {
|
|
172
217
|
method: "POST",
|
|
173
|
-
headers
|
|
218
|
+
headers,
|
|
174
219
|
body: JSON.stringify({ platform: "cli", deviceId }),
|
|
175
220
|
});
|
|
176
221
|
|
package/src/lib/health-check.ts
CHANGED
|
@@ -3,11 +3,13 @@ export const HEALTH_CHECK_TIMEOUT_MS = 1500;
|
|
|
3
3
|
interface HealthResponse {
|
|
4
4
|
status: string;
|
|
5
5
|
message?: string;
|
|
6
|
+
version?: string;
|
|
6
7
|
}
|
|
7
8
|
|
|
8
9
|
export interface HealthCheckResult {
|
|
9
10
|
status: string;
|
|
10
11
|
detail: string | null;
|
|
12
|
+
version?: string;
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
export async function checkManagedHealth(
|
|
@@ -63,6 +65,7 @@ export async function checkManagedHealth(
|
|
|
63
65
|
return {
|
|
64
66
|
status,
|
|
65
67
|
detail: status !== "healthy" ? (data.message ?? null) : null,
|
|
68
|
+
version: data.version,
|
|
66
69
|
};
|
|
67
70
|
} catch (error) {
|
|
68
71
|
const status =
|
|
@@ -108,6 +111,7 @@ export async function checkHealth(
|
|
|
108
111
|
return {
|
|
109
112
|
status,
|
|
110
113
|
detail: status !== "healthy" ? (data.message ?? null) : null,
|
|
114
|
+
version: data.version,
|
|
111
115
|
};
|
|
112
116
|
} catch (error) {
|
|
113
117
|
const status =
|
|
@@ -60,14 +60,15 @@ interface OrganizationListResponse {
|
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
export async function fetchOrganizationId(token: string): Promise<string> {
|
|
63
|
-
const
|
|
63
|
+
const platformUrl = getPlatformUrl();
|
|
64
|
+
const url = `${platformUrl}/v1/organizations/`;
|
|
64
65
|
const response = await fetch(url, {
|
|
65
66
|
headers: { "X-Session-Token": token },
|
|
66
67
|
});
|
|
67
68
|
|
|
68
69
|
if (!response.ok) {
|
|
69
70
|
throw new Error(
|
|
70
|
-
`Failed to fetch organizations (${response.status}). Try logging in again.`,
|
|
71
|
+
`Failed to fetch organizations from ${platformUrl} (${response.status}). Try logging in again.`,
|
|
71
72
|
);
|
|
72
73
|
}
|
|
73
74
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse a version string into { major, minor, patch } components.
|
|
3
|
+
* Handles optional `v` prefix (e.g., "v1.2.3" or "1.2.3").
|
|
4
|
+
* Returns null if the string cannot be parsed as semver.
|
|
5
|
+
*/
|
|
6
|
+
export function parseVersion(
|
|
7
|
+
version: string,
|
|
8
|
+
): { major: number; minor: number; patch: number } | null {
|
|
9
|
+
const stripped = version.replace(/^[vV]/, "");
|
|
10
|
+
const segments = stripped.split(".");
|
|
11
|
+
|
|
12
|
+
if (segments.length < 2) {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const major = parseInt(segments[0], 10);
|
|
17
|
+
const minor = parseInt(segments[1], 10);
|
|
18
|
+
const patch = segments.length >= 3 ? parseInt(segments[2], 10) : 0;
|
|
19
|
+
|
|
20
|
+
if (isNaN(major) || isNaN(minor) || isNaN(patch)) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return { major, minor, patch };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Check whether two version strings are compatible.
|
|
29
|
+
* Compatibility requires matching major AND minor versions.
|
|
30
|
+
* Patch differences are allowed.
|
|
31
|
+
* Returns false if either version cannot be parsed.
|
|
32
|
+
*/
|
|
33
|
+
export function isVersionCompatible(
|
|
34
|
+
clientVersion: string,
|
|
35
|
+
serviceGroupVersion: string,
|
|
36
|
+
): boolean {
|
|
37
|
+
const a = parseVersion(clientVersion);
|
|
38
|
+
const b = parseVersion(serviceGroupVersion);
|
|
39
|
+
|
|
40
|
+
if (a === null || b === null) {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return a.major === b.major && a.minor === b.minor;
|
|
45
|
+
}
|