@vellumai/cli 0.8.10-dev.202606102253.fbea648 → 0.8.10-dev.202606110059.319a8d3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -63,6 +63,8 @@ The CLI must **never** read from or write to the `.vellum/` directory (e.g. `~/.
63
63
 
64
64
  For example, the signing key used for JWT auth between the daemon and gateway is persisted in the lockfile (`resources.signingKey`) so that client actor tokens survive daemon/gateway restarts. On first start (or when the key is missing), the CLI generates a new key via `generateLocalSigningKey()` in `lib/local.ts`, saves it to the lockfile entry, and passes it to both `startLocalDaemon` and `startGateway` as the `ACTOR_TOKEN_SIGNING_KEY` env var. The CLI does **not** read or write to the `.vellum/` directory for signing keys — it uses the lockfile instead.
65
65
 
66
+ **Exception: `~/.vellum/device.json`.** That file is the machine-wide shared device-identity file, co-owned by the Swift clients, the Electron main process, the host-mode assistant, and the CLI (see `clients/shared/App/Auth/DeviceIdStore.swift` and `apps/macos/src/main/device-id.ts`). The boundary rule covers daemon/gateway-internal state (e.g. `~/.vellum/protected/`, instance dirs), not this file.
67
+
66
68
  ## Process liveness
67
69
 
68
70
  Use `resolveProcessState()` from `lib/process.ts` when checking whether a daemon or gateway should be (re)started. It combines PID existence with an HTTP `/healthz` probe, a readiness grace period, and a [`isVellumProcess()`](https://man7.org/linux/man-pages/man1/ps.1.html) guard against PID reuse — see the function's JSDoc for the full flow.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/cli",
3
- "version": "0.8.10-dev.202606102253.fbea648",
3
+ "version": "0.8.10-dev.202606110059.319a8d3",
4
4
  "description": "CLI tools for vellum-assistant",
5
5
  "type": "module",
6
6
  "exports": {
@@ -0,0 +1,167 @@
1
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
+ import {
3
+ existsSync,
4
+ mkdirSync,
5
+ mkdtempSync,
6
+ readFileSync,
7
+ rmSync,
8
+ statSync,
9
+ writeFileSync,
10
+ } from "fs";
11
+ import { tmpdir } from "os";
12
+ import { join } from "path";
13
+
14
+ // Bun's os.homedir() ignores runtime HOME changes, so mock it (same pattern
15
+ // as multi-local.test.ts) to keep production-path tests off the real ~/.vellum.
16
+ let fakeHome: string | undefined;
17
+ const realOs = await import("node:os");
18
+ const osMock = () => ({
19
+ ...realOs,
20
+ homedir: () => fakeHome ?? realOs.homedir(),
21
+ });
22
+ mock.module("node:os", osMock);
23
+ mock.module("os", osMock);
24
+
25
+ import {
26
+ getOrCreateHostDeviceId,
27
+ resetHostDeviceIdCache,
28
+ } from "../lib/device-id.js";
29
+ import { snapshotEnv } from "./helpers/env.js";
30
+
31
+ const UUID_RE =
32
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
33
+
34
+ const restoreEnv = snapshotEnv([
35
+ "XDG_CONFIG_HOME",
36
+ "VELLUM_ENVIRONMENT",
37
+ "VELLUM_DEVICE_ID",
38
+ ]);
39
+
40
+ describe("getOrCreateHostDeviceId", () => {
41
+ let tempHome: string;
42
+ let deviceFile: string;
43
+
44
+ beforeEach(() => {
45
+ delete process.env.VELLUM_DEVICE_ID;
46
+ tempHome = mkdtempSync(join(tmpdir(), "cli-device-id-test-"));
47
+ process.env.XDG_CONFIG_HOME = tempHome;
48
+ // Non-prod so the resolver targets $XDG_CONFIG_HOME/vellum-dev/
49
+ // instead of the real ~/.config/vellum/.
50
+ process.env.VELLUM_ENVIRONMENT = "dev";
51
+ deviceFile = join(tempHome, "vellum-dev", "device.json");
52
+ resetHostDeviceIdCache();
53
+ });
54
+
55
+ afterEach(() => {
56
+ restoreEnv();
57
+ rmSync(tempHome, { recursive: true, force: true });
58
+ resetHostDeviceIdCache();
59
+ });
60
+
61
+ test("creates device.json with a UUID when missing", () => {
62
+ const id = getOrCreateHostDeviceId();
63
+
64
+ expect(id).toMatch(UUID_RE);
65
+ expect(existsSync(deviceFile)).toBe(true);
66
+ const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
67
+ expect(parsed.deviceId).toBe(id);
68
+ expect(readFileSync(deviceFile, "utf-8").endsWith("\n")).toBe(true);
69
+ });
70
+
71
+ test("returns the existing deviceId without rewriting the file", () => {
72
+ mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
73
+ writeFileSync(deviceFile, JSON.stringify({ deviceId: "existing-id" }));
74
+ const before = statSync(deviceFile).mtimeMs;
75
+
76
+ expect(getOrCreateHostDeviceId()).toBe("existing-id");
77
+ expect(statSync(deviceFile).mtimeMs).toBe(before);
78
+ expect(readFileSync(deviceFile, "utf-8")).toBe(
79
+ JSON.stringify({ deviceId: "existing-id" }),
80
+ );
81
+ });
82
+
83
+ test("caches the resolved id until reset", () => {
84
+ const first = getOrCreateHostDeviceId();
85
+ rmSync(deviceFile);
86
+
87
+ expect(getOrCreateHostDeviceId()).toBe(first);
88
+
89
+ resetHostDeviceIdCache();
90
+ const second = getOrCreateHostDeviceId();
91
+ expect(second).toMatch(UUID_RE);
92
+ expect(second).not.toBe(first);
93
+ });
94
+
95
+ test("preserves unrelated fields when adding deviceId", () => {
96
+ mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
97
+ writeFileSync(deviceFile, JSON.stringify({ other: "kept", deviceId: "" }));
98
+
99
+ const id = getOrCreateHostDeviceId();
100
+
101
+ expect(id).toMatch(UUID_RE);
102
+ const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
103
+ expect(parsed.other).toBe("kept");
104
+ expect(parsed.deviceId).toBe(id);
105
+ });
106
+
107
+ test("VELLUM_DEVICE_ID env var wins and skips file access", () => {
108
+ process.env.VELLUM_DEVICE_ID = "env-device-id";
109
+
110
+ expect(getOrCreateHostDeviceId()).toBe("env-device-id");
111
+ expect(existsSync(deviceFile)).toBe(false);
112
+ });
113
+
114
+ test("malformed JSON regenerates without throwing", () => {
115
+ mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
116
+ writeFileSync(deviceFile, "{not json");
117
+
118
+ const id = getOrCreateHostDeviceId();
119
+
120
+ expect(id).toMatch(UUID_RE);
121
+ const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
122
+ expect(parsed).toEqual({ deviceId: id });
123
+ });
124
+ });
125
+
126
+ describe("getOrCreateHostDeviceId (production)", () => {
127
+ let tempHome: string;
128
+ let deviceFile: string;
129
+
130
+ beforeEach(() => {
131
+ delete process.env.VELLUM_DEVICE_ID;
132
+ tempHome = mkdtempSync(join(tmpdir(), "cli-device-id-prod-test-"));
133
+ fakeHome = tempHome;
134
+ process.env.XDG_CONFIG_HOME = join(tempHome, ".config");
135
+ process.env.VELLUM_ENVIRONMENT = "production";
136
+ deviceFile = join(tempHome, ".vellum", "device.json");
137
+ resetHostDeviceIdCache();
138
+ });
139
+
140
+ afterEach(() => {
141
+ fakeHome = undefined;
142
+ restoreEnv();
143
+ rmSync(tempHome, { recursive: true, force: true });
144
+ resetHostDeviceIdCache();
145
+ });
146
+
147
+ test("creates device.json in the shared ~/.vellum dir", () => {
148
+ const id = getOrCreateHostDeviceId();
149
+
150
+ expect(id).toMatch(UUID_RE);
151
+ expect(existsSync(deviceFile)).toBe(true);
152
+ expect(JSON.parse(readFileSync(deviceFile, "utf-8")).deviceId).toBe(id);
153
+ });
154
+
155
+ test("reuses an existing ~/.vellum/device.json", () => {
156
+ mkdirSync(join(tempHome, ".vellum"), { recursive: true });
157
+ writeFileSync(
158
+ deviceFile,
159
+ JSON.stringify({ deviceId: "shared-prod-id" }),
160
+ );
161
+
162
+ expect(getOrCreateHostDeviceId()).toBe("shared-prod-id");
163
+ expect(readFileSync(deviceFile, "utf-8")).toBe(
164
+ JSON.stringify({ deviceId: "shared-prod-id" }),
165
+ );
166
+ });
167
+ });
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Snapshot the given env vars now; returns a restore function suitable for
3
+ * `afterEach` that resets each var to its captured value (or deletes it).
4
+ */
5
+ export function snapshotEnv(keys: readonly string[]): () => void {
6
+ const saved: Record<string, string | undefined> = {};
7
+ for (const key of keys) {
8
+ saved[key] = process.env[key];
9
+ }
10
+ return () => {
11
+ for (const key of keys) {
12
+ if (saved[key] === undefined) {
13
+ delete process.env[key];
14
+ } else {
15
+ process.env[key] = saved[key];
16
+ }
17
+ }
18
+ };
19
+ }
@@ -0,0 +1,149 @@
1
+ import { describe, expect, test } from "bun:test";
2
+
3
+ import {
4
+ buildServiceRunArgs,
5
+ getBuilderManagedEnvKeys,
6
+ type BuildServiceRunArgsOpts,
7
+ type DockerStatefulSetSpec,
8
+ type ServiceName,
9
+ } from "../lib/statefulset.js";
10
+ import { PROVIDER_ENV_VAR_NAMES } from "../shared/provider-env-vars.js";
11
+
12
+ const SECRET_KEYS = [
13
+ "CES_SERVICE_TOKEN",
14
+ "ACTOR_TOKEN_SIGNING_KEY",
15
+ "GUARDIAN_BOOTSTRAP_SECRET",
16
+ ];
17
+
18
+ describe("getBuilderManagedEnvKeys", () => {
19
+ test("gateway always-set keys cover spec static + secret entries and PATH", () => {
20
+ const { always } = getBuilderManagedEnvKeys("gateway");
21
+
22
+ const expected = [
23
+ "VELLUM_WORKSPACE_DIR",
24
+ "GATEWAY_SECURITY_DIR",
25
+ "ASSISTANT_HOST",
26
+ "CES_CREDENTIAL_URL",
27
+ "GATEWAY_IPC_SOCKET_DIR",
28
+ "ASSISTANT_IPC_SOCKET_DIR",
29
+ "GATEWAY_PORT",
30
+ "RUNTIME_HTTP_PORT",
31
+ ...SECRET_KEYS,
32
+ "PATH",
33
+ ];
34
+ for (const key of expected) {
35
+ expect(always.has(key)).toBe(true);
36
+ }
37
+
38
+ expect(always.has("VELLUM_DISABLE_PLATFORM")).toBe(false);
39
+ expect(always.has("VELLUM_DEVICE_ID")).toBe(false);
40
+ });
41
+
42
+ test("assistant always-set keys include secrets and builder-computed extras", () => {
43
+ const { always } = getBuilderManagedEnvKeys("assistant");
44
+
45
+ const expected = [
46
+ ...SECRET_KEYS,
47
+ "VELLUM_ASSISTANT_NAME",
48
+ "GATEWAY_INTERNAL_URL",
49
+ "RUNTIME_HTTP_HOST",
50
+ "PATH",
51
+ ];
52
+ for (const key of expected) {
53
+ expect(always.has(key)).toBe(true);
54
+ }
55
+ });
56
+
57
+ test("gateway hostForwarded equals the three spec host entries", () => {
58
+ const { hostForwarded } = getBuilderManagedEnvKeys("gateway");
59
+ const sorted = [...hostForwarded].sort((a, b) => a.name.localeCompare(b.name));
60
+ expect(sorted).toEqual([
61
+ { name: "VELAY_BASE_URL", hostVar: "VELAY_BASE_URL" },
62
+ { name: "VELLUM_ENVIRONMENT", hostVar: "VELLUM_ENVIRONMENT" },
63
+ { name: "VELLUM_PLATFORM_URL", hostVar: "VELLUM_PLATFORM_URL" },
64
+ ]);
65
+ });
66
+
67
+ test("assistant hostForwarded includes provider keys and platform URL", () => {
68
+ const { hostForwarded } = getBuilderManagedEnvKeys("assistant");
69
+ expect(hostForwarded).toContainEqual({
70
+ name: "ANTHROPIC_API_KEY",
71
+ hostVar: "ANTHROPIC_API_KEY",
72
+ });
73
+ for (const envVar of Object.values(PROVIDER_ENV_VAR_NAMES)) {
74
+ expect(hostForwarded).toContainEqual({ name: envVar, hostVar: envVar });
75
+ }
76
+ expect(hostForwarded).toContainEqual({
77
+ name: "VELLUM_PLATFORM_URL",
78
+ hostVar: "VELLUM_PLATFORM_URL",
79
+ });
80
+ });
81
+
82
+ test("hostForwarded keeps container name when hostVar differs", () => {
83
+ const spec: DockerStatefulSetSpec = {
84
+ startOrder: ["gateway"],
85
+ readiness: { endpoint: "/readyz", timeoutMs: 1, intervalMs: 1 },
86
+ volumeClaimTemplates: [],
87
+ containers: [
88
+ {
89
+ name: "gateway-sidecar",
90
+ internalName: "gateway",
91
+ network: "container",
92
+ env: [{ kind: "host", name: "CONTAINER_NAME", hostVar: "HOST_NAME" }],
93
+ volumeMounts: [],
94
+ },
95
+ ],
96
+ };
97
+
98
+ const { hostForwarded } = getBuilderManagedEnvKeys("gateway", spec);
99
+ expect(hostForwarded).toEqual([
100
+ { name: "CONTAINER_NAME", hostVar: "HOST_NAME" },
101
+ ]);
102
+ });
103
+
104
+ test("throws on unknown service name", () => {
105
+ expect(() => getBuilderManagedEnvKeys("bogus" as ServiceName)).toThrow(
106
+ 'docker-statefulset: unknown service "bogus"',
107
+ );
108
+ });
109
+ });
110
+
111
+ describe("buildServiceRunArgs extra env routing", () => {
112
+ const opts: BuildServiceRunArgsOpts = {
113
+ gatewayPort: 18080,
114
+ imageTags: {
115
+ assistant: "assistant:test",
116
+ gateway: "gateway:test",
117
+ "credential-executor": "ces:test",
118
+ },
119
+ instanceName: "test-instance",
120
+ res: {
121
+ assistantContainer: "test-assistant",
122
+ cesContainer: "test-ces",
123
+ gatewayContainer: "test-gateway",
124
+ network: "test-net",
125
+ },
126
+ extraGatewayEnv: { VELLUM_DISABLE_PLATFORM: "1" },
127
+ extraAssistantEnv: { FOO: "bar" },
128
+ };
129
+
130
+ const runArgs = buildServiceRunArgs(opts);
131
+
132
+ test("extraGatewayEnv lands only in gateway args", () => {
133
+ const gatewayArgs = runArgs.gateway();
134
+ expect(gatewayArgs).toContain("VELLUM_DISABLE_PLATFORM=1");
135
+ expect(gatewayArgs).not.toContain("FOO=bar");
136
+ });
137
+
138
+ test("extraAssistantEnv lands only in assistant args", () => {
139
+ const assistantArgs = runArgs.assistant();
140
+ expect(assistantArgs).toContain("FOO=bar");
141
+ expect(assistantArgs).not.toContain("VELLUM_DISABLE_PLATFORM=1");
142
+ });
143
+
144
+ test("credential-executor args get neither extra env map", () => {
145
+ const cesArgs = runArgs["credential-executor"]();
146
+ expect(cesArgs).not.toContain("VELLUM_DISABLE_PLATFORM=1");
147
+ expect(cesArgs).not.toContain("FOO=bar");
148
+ });
149
+ });
@@ -0,0 +1,149 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+
3
+ import { resetHostDeviceIdCache } from "../lib/device-id.js";
4
+ import type { DockerStatefulSetSpec } from "../lib/statefulset.js";
5
+ import { buildReplayEnv, buildReplayState } from "../lib/upgrade-lifecycle.js";
6
+ import { snapshotEnv } from "./helpers/env.js";
7
+
8
+ const restoreEnv = snapshotEnv([
9
+ "VELLUM_PLATFORM_URL",
10
+ "ANTHROPIC_API_KEY",
11
+ "VELLUM_DEVICE_ID",
12
+ ]);
13
+
14
+ afterEach(() => {
15
+ restoreEnv();
16
+ resetHostDeviceIdCache();
17
+ });
18
+
19
+ describe("buildReplayEnv", () => {
20
+ test("gateway: drops secrets, statics, and PATH; keeps flag overrides", () => {
21
+ const captured = {
22
+ GUARDIAN_BOOTSTRAP_SECRET: "s1",
23
+ CES_SERVICE_TOKEN: "s2",
24
+ ACTOR_TOKEN_SIGNING_KEY: "s3",
25
+ PATH: "/usr/bin",
26
+ GATEWAY_PORT: "18080",
27
+ VELLUM_DISABLE_PLATFORM: "1",
28
+ VELLUM_DEVICE_ID: "abc",
29
+ };
30
+
31
+ expect(buildReplayEnv(captured, "gateway")).toEqual({
32
+ VELLUM_DISABLE_PLATFORM: "1",
33
+ VELLUM_DEVICE_ID: "abc",
34
+ });
35
+ });
36
+
37
+ test("gateway: captured VELLUM_PLATFORM_URL dropped when set on host", () => {
38
+ process.env.VELLUM_PLATFORM_URL = "https://host.example.com";
39
+ const replay = buildReplayEnv(
40
+ { VELLUM_PLATFORM_URL: "https://stale.example.com" },
41
+ "gateway",
42
+ );
43
+ expect(replay).toEqual({});
44
+ });
45
+
46
+ test("gateway: captured VELLUM_PLATFORM_URL kept when unset on host", () => {
47
+ delete process.env.VELLUM_PLATFORM_URL;
48
+ const replay = buildReplayEnv(
49
+ { VELLUM_PLATFORM_URL: "https://stale.example.com" },
50
+ "gateway",
51
+ );
52
+ expect(replay).toEqual({
53
+ VELLUM_PLATFORM_URL: "https://stale.example.com",
54
+ });
55
+ });
56
+
57
+ test("assistant: drops builder-computed extras, secrets, and PATH; keeps custom flags", () => {
58
+ delete process.env.ANTHROPIC_API_KEY;
59
+ const captured = {
60
+ VELLUM_ASSISTANT_NAME: "my-assistant",
61
+ GATEWAY_INTERNAL_URL: "http://localhost:8080",
62
+ GUARDIAN_BOOTSTRAP_SECRET: "s1",
63
+ CES_SERVICE_TOKEN: "s2",
64
+ ACTOR_TOKEN_SIGNING_KEY: "s3",
65
+ PATH: "/usr/bin",
66
+ MY_CUSTOM_FLAG: "yes",
67
+ ANTHROPIC_API_KEY: "sk-captured",
68
+ };
69
+
70
+ expect(buildReplayEnv(captured, "assistant")).toEqual({
71
+ MY_CUSTOM_FLAG: "yes",
72
+ ANTHROPIC_API_KEY: "sk-captured",
73
+ });
74
+ });
75
+
76
+ test("assistant: captured ANTHROPIC_API_KEY dropped when set on host", () => {
77
+ process.env.ANTHROPIC_API_KEY = "sk-host";
78
+ const replay = buildReplayEnv(
79
+ { ANTHROPIC_API_KEY: "sk-captured", MY_CUSTOM_FLAG: "yes" },
80
+ "assistant",
81
+ );
82
+ expect(replay).toEqual({ MY_CUSTOM_FLAG: "yes" });
83
+ });
84
+
85
+ test("a secret added to the spec is auto-excluded with no code change", () => {
86
+ const spec: DockerStatefulSetSpec = {
87
+ startOrder: ["gateway"],
88
+ readiness: { endpoint: "/readyz", timeoutMs: 1, intervalMs: 1 },
89
+ volumeClaimTemplates: [],
90
+ containers: [
91
+ {
92
+ name: "gateway-sidecar",
93
+ internalName: "gateway",
94
+ network: "container",
95
+ env: [
96
+ { kind: "secret", name: "FUTURE_SECRET", secret: "signingKey" },
97
+ ],
98
+ volumeMounts: [],
99
+ },
100
+ ],
101
+ };
102
+
103
+ const replay = buildReplayEnv(
104
+ { FUTURE_SECRET: "leaky", VELLUM_DEVICE_ID: "abc" },
105
+ "gateway",
106
+ spec,
107
+ );
108
+ expect(replay).toEqual({ VELLUM_DEVICE_ID: "abc" });
109
+ });
110
+ });
111
+
112
+ describe("buildReplayState", () => {
113
+ beforeEach(() => {
114
+ // VELLUM_DEVICE_ID env precedence keeps getOrCreateHostDeviceId off the
115
+ // filesystem in tests.
116
+ process.env.VELLUM_DEVICE_ID = "host-device-id";
117
+ resetHostDeviceIdCache();
118
+ });
119
+
120
+ test("backfills VELLUM_DEVICE_ID on gateway replay env when absent", () => {
121
+ const state = buildReplayState({}, { VELLUM_DISABLE_PLATFORM: "1" });
122
+ expect(state.extraGatewayEnv).toEqual({
123
+ VELLUM_DISABLE_PLATFORM: "1",
124
+ VELLUM_DEVICE_ID: "host-device-id",
125
+ });
126
+ });
127
+
128
+ test("captured VELLUM_DEVICE_ID wins over host-derived id", () => {
129
+ const state = buildReplayState({}, { VELLUM_DEVICE_ID: "existing" });
130
+ expect(state.extraGatewayEnv.VELLUM_DEVICE_ID).toBe("existing");
131
+ });
132
+
133
+ test("plucks secrets from the captured envs", () => {
134
+ const state = buildReplayState(
135
+ { CES_SERVICE_TOKEN: "ces-token", ACTOR_TOKEN_SIGNING_KEY: "sign-key" },
136
+ { GUARDIAN_BOOTSTRAP_SECRET: "bootstrap" },
137
+ );
138
+ expect(state.bootstrapSecret).toBe("bootstrap");
139
+ expect(state.cesServiceToken).toBe("ces-token");
140
+ expect(state.signingKey).toBe("sign-key");
141
+ });
142
+
143
+ test("generates fresh secrets when missing from captured env", () => {
144
+ const state = buildReplayState({}, {});
145
+ expect(state.bootstrapSecret).toBeUndefined();
146
+ expect(state.cesServiceToken).toMatch(/^[0-9a-f]{64}$/);
147
+ expect(state.signingKey).toMatch(/^[0-9a-f]{64}$/);
148
+ });
149
+ });
@@ -1,5 +1,3 @@
1
- import { randomBytes } from "crypto";
2
-
3
1
  import {
4
2
  findAssistantByName,
5
3
  getActiveAssistant,
@@ -27,9 +25,8 @@ import {
27
25
  buildProgressEvent,
28
26
  buildStartingEvent,
29
27
  buildUpgradeCommitMessage,
30
- captureContainerEnv,
28
+ captureReplayState,
31
29
  commitWorkspaceViaGateway,
32
- CONTAINER_ENV_EXCLUDE_KEYS,
33
30
  fetchCurrentVersion,
34
31
  fetchPreviousVersion,
35
32
  performDockerRollback,
@@ -308,39 +305,13 @@ export async function rollback(): Promise<void> {
308
305
  `🔄 Rolling back Docker assistant '${instanceName}' to ${previousVersion}...\n`,
309
306
  );
310
307
 
311
- // Capture current container env
312
- console.log("💾 Capturing existing container environment...");
313
- const capturedEnv = await captureContainerEnv(res.assistantContainer);
314
- console.log(
315
- ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
316
- );
317
-
318
- // Capture GUARDIAN_BOOTSTRAP_SECRET from the gateway container (it is only
319
- // set on gateway, not assistant) so it persists across container restarts.
320
- const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
321
- const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
322
-
323
- // Extract CES_SERVICE_TOKEN from captured env, or generate fresh one
324
- const cesServiceToken =
325
- capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
326
-
327
- // Extract or generate the shared JWT signing key.
328
- const signingKey =
329
- capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
330
-
331
- // Build extra env vars, excluding keys managed by buildServiceRunArgs
332
- const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
333
- for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
334
- if (process.env[envVar]) {
335
- envKeysSetByRunArgs.add(envVar);
336
- }
337
- }
338
- const extraAssistantEnv: Record<string, string> = {};
339
- for (const [key, value] of Object.entries(capturedEnv)) {
340
- if (!envKeysSetByRunArgs.has(key)) {
341
- extraAssistantEnv[key] = value;
342
- }
343
- }
308
+ const {
309
+ bootstrapSecret,
310
+ cesServiceToken,
311
+ signingKey,
312
+ extraAssistantEnv,
313
+ extraGatewayEnv,
314
+ } = await captureReplayState(res);
344
315
 
345
316
  // Parse gateway port from entry's runtimeUrl, fall back to default
346
317
  let gatewayPort = GATEWAY_INTERNAL_PORT;
@@ -401,6 +372,7 @@ export async function rollback(): Promise<void> {
401
372
  bootstrapSecret,
402
373
  cesServiceToken,
403
374
  extraAssistantEnv,
375
+ extraGatewayEnv,
404
376
  gatewayPort,
405
377
  imageTags: previousImageRefs,
406
378
  instanceName,
@@ -1,4 +1,3 @@
1
- import { randomBytes } from "crypto";
2
1
  import { spawnSync } from "child_process";
3
2
 
4
3
  import cliPkg from "../../package.json";
@@ -40,10 +39,9 @@ import {
40
39
  buildProgressEvent,
41
40
  buildStartingEvent,
42
41
  buildUpgradeCommitMessage,
43
- captureContainerEnv,
42
+ captureReplayState,
44
43
  captureUpgradeFailureLogs,
45
44
  commitWorkspaceViaGateway,
46
- CONTAINER_ENV_EXCLUDE_KEYS,
47
45
  rollbackMigrations,
48
46
  UPGRADE_PROGRESS,
49
47
  waitForReady,
@@ -297,16 +295,13 @@ async function upgradeDocker(
297
295
  }),
298
296
  );
299
297
 
300
- console.log("💾 Capturing existing container environment...");
301
- const capturedEnv = await captureContainerEnv(res.assistantContainer);
302
- console.log(
303
- ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
304
- );
305
-
306
- // Capture GUARDIAN_BOOTSTRAP_SECRET from the gateway container (it is only
307
- // set on gateway, not assistant) so it persists across container restarts.
308
- const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
309
- const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
298
+ const {
299
+ bootstrapSecret,
300
+ cesServiceToken,
301
+ signingKey,
302
+ extraAssistantEnv,
303
+ extraGatewayEnv,
304
+ } = await captureReplayState(res);
310
305
 
311
306
  // Notify connected clients that an upgrade is about to begin.
312
307
  // This must fire BEFORE any progress broadcasts so the UI sets
@@ -361,18 +356,6 @@ async function upgradeDocker(
361
356
  // use default
362
357
  }
363
358
 
364
- // Extract CES_SERVICE_TOKEN from the captured env so it can be passed via
365
- // the dedicated cesServiceToken parameter (which propagates it to all three
366
- // containers). If the old instance predates CES_SERVICE_TOKEN, generate a
367
- // fresh one so gateway and CES can authenticate.
368
- const cesServiceToken =
369
- capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
370
-
371
- // Extract or generate the shared JWT signing key. Pre-env-var instances
372
- // won't have it in capturedEnv, so generate fresh in that case.
373
- const signingKey =
374
- capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
375
-
376
359
  // Create pre-upgrade backup (best-effort, daemon must be running)
377
360
  await broadcastUpgradeEvent(
378
361
  entry.runtimeUrl,
@@ -415,23 +398,6 @@ async function upgradeDocker(
415
398
  await stopContainers(res);
416
399
  console.log("✅ Containers stopped\n");
417
400
 
418
- // Build the set of extra env vars to replay on the new assistant container.
419
- // Captured env vars serve as the base; keys already managed by
420
- // buildServiceRunArgs are excluded to avoid duplicates.
421
- const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
422
- // Only exclude keys that buildServiceRunArgs will actually set
423
- for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
424
- if (process.env[envVar]) {
425
- envKeysSetByRunArgs.add(envVar);
426
- }
427
- }
428
- const extraAssistantEnv: Record<string, string> = {};
429
- for (const [key, value] of Object.entries(capturedEnv)) {
430
- if (!envKeysSetByRunArgs.has(key)) {
431
- extraAssistantEnv[key] = value;
432
- }
433
- }
434
-
435
401
  console.log("🚀 Starting upgraded containers...");
436
402
  await startContainers(
437
403
  {
@@ -439,6 +405,7 @@ async function upgradeDocker(
439
405
  bootstrapSecret,
440
406
  cesServiceToken,
441
407
  extraAssistantEnv,
408
+ extraGatewayEnv,
442
409
  gatewayPort,
443
410
  imageTags,
444
411
  instanceName,
@@ -544,6 +511,7 @@ async function upgradeDocker(
544
511
  bootstrapSecret,
545
512
  cesServiceToken,
546
513
  extraAssistantEnv,
514
+ extraGatewayEnv,
547
515
  gatewayPort,
548
516
  imageTags: previousImageRefs,
549
517
  instanceName,
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Host device ID resolver. Resolution order: `VELLUM_DEVICE_ID` env var,
3
+ * then `device.json`. Production uses the machine-wide shared
4
+ * `~/.vellum/device.json`, matching Electron (`apps/macos/src/main/device-id.ts`)
5
+ * and Swift (`VellumPaths.deviceIdFile`); non-production uses
6
+ * `<configDir>/device.json`.
7
+ *
8
+ * Not to be confused with `guardian-token.ts`'s salted-hash Guardian
9
+ * identity (`computeDeviceId` / `getOrCreatePersistedDeviceId`) — do not
10
+ * merge the two.
11
+ */
12
+
13
+ import { randomUUID } from "crypto";
14
+ import { mkdirSync, readFileSync, writeFileSync } from "fs";
15
+ import { homedir } from "os";
16
+ import { join } from "path";
17
+
18
+ import { getConfigDir } from "./environments/paths.js";
19
+ import { getCurrentEnvironment } from "./environments/resolve.js";
20
+
21
+ let cached: string | undefined;
22
+
23
+ function resolveDeviceIdPaths(): { dir: string; file: string } {
24
+ const env = getCurrentEnvironment();
25
+ const dir =
26
+ env.name === "production"
27
+ ? join(homedir(), ".vellum")
28
+ : getConfigDir(env);
29
+ return { dir, file: join(dir, "device.json") };
30
+ }
31
+
32
+ /**
33
+ * Get the stable device ID for this host machine, creating and persisting
34
+ * one in `device.json` if absent. `VELLUM_DEVICE_ID` takes precedence over
35
+ * any file. Never throws: on write failure the generated UUID is still
36
+ * cached and returned for the process lifetime.
37
+ */
38
+ export function getOrCreateHostDeviceId(): string {
39
+ if (cached !== undefined) {
40
+ return cached;
41
+ }
42
+
43
+ const fromEnv = process.env.VELLUM_DEVICE_ID?.trim();
44
+ if (fromEnv) {
45
+ cached = fromEnv;
46
+ return cached;
47
+ }
48
+
49
+ const { dir, file } = resolveDeviceIdPaths();
50
+
51
+ // Preserve unrelated fields from any existing JSON object.
52
+ let existing: Record<string, unknown> = {};
53
+ try {
54
+ const raw: unknown = JSON.parse(readFileSync(file, "utf-8"));
55
+ if (raw && typeof raw === "object" && !Array.isArray(raw)) {
56
+ existing = raw as Record<string, unknown>;
57
+ }
58
+ } catch {
59
+ // Missing, unreadable, or malformed — start fresh.
60
+ }
61
+
62
+ if (typeof existing.deviceId === "string" && existing.deviceId.length > 0) {
63
+ cached = existing.deviceId;
64
+ return cached;
65
+ }
66
+
67
+ const generated = randomUUID();
68
+ try {
69
+ mkdirSync(dir, { recursive: true });
70
+ existing.deviceId = generated;
71
+ writeFileSync(file, JSON.stringify(existing, null, 2) + "\n", {
72
+ mode: 0o644,
73
+ });
74
+ } catch {
75
+ // Write failure — use the generated ID in-memory only.
76
+ }
77
+
78
+ cached = generated;
79
+ return cached;
80
+ }
81
+
82
+ /** Reset the cached device ID. Used by tests to force re-resolution. */
83
+ export function resetHostDeviceIdCache(): void {
84
+ cached = undefined;
85
+ }
package/src/lib/docker.ts CHANGED
@@ -22,6 +22,7 @@ import type { AssistantEntry } from "./assistant-config";
22
22
  import { buildHatchConfigValues, writeInitialConfig } from "./config-utils";
23
23
  import { buildServiceRunArgs } from "./statefulset.js";
24
24
  import type { Species } from "./constants";
25
+ import { getOrCreateHostDeviceId } from "./device-id.js";
25
26
  import { getDefaultPorts } from "./environments/paths.js";
26
27
  import { getCurrentEnvironment } from "./environments/resolve.js";
27
28
  import { leaseGuardianToken } from "./guardian-token";
@@ -882,6 +883,8 @@ function startFileWatcher(opts: {
882
883
  signingKey?: string;
883
884
  bootstrapSecret?: string;
884
885
  cesServiceToken?: string;
886
+ extraAssistantEnv?: Record<string, string>;
887
+ extraGatewayEnv?: Record<string, string>;
885
888
  gatewayPort: number;
886
889
  imageTags: Record<ServiceName, string>;
887
890
  instanceName: string;
@@ -901,6 +904,8 @@ function startFileWatcher(opts: {
901
904
  signingKey: opts.signingKey,
902
905
  bootstrapSecret: opts.bootstrapSecret,
903
906
  cesServiceToken: opts.cesServiceToken,
907
+ extraAssistantEnv: opts.extraAssistantEnv,
908
+ extraGatewayEnv: opts.extraGatewayEnv,
904
909
  gatewayPort,
905
910
  imageTags,
906
911
  instanceName,
@@ -1327,8 +1332,10 @@ export async function hatchDocker(
1327
1332
  extraAssistantEnv.VELLUM_DISABLE_PLATFORM =
1328
1333
  flagEnvVars.VELLUM_DISABLE_PLATFORM;
1329
1334
  }
1330
- const extraGatewayEnv =
1331
- Object.keys(flagEnvVars).length > 0 ? flagEnvVars : undefined;
1335
+ const extraGatewayEnv = {
1336
+ ...flagEnvVars,
1337
+ VELLUM_DEVICE_ID: getOrCreateHostDeviceId(),
1338
+ };
1332
1339
  await startContainers(
1333
1340
  {
1334
1341
  signingKey,
@@ -1430,6 +1437,8 @@ export async function hatchDocker(
1430
1437
  signingKey,
1431
1438
  bootstrapSecret,
1432
1439
  cesServiceToken,
1440
+ extraAssistantEnv,
1441
+ extraGatewayEnv,
1433
1442
  gatewayPort,
1434
1443
  imageTags,
1435
1444
  instanceName,
@@ -262,6 +262,49 @@ export interface BuildServiceRunArgsOpts extends DockerRunSecrets {
262
262
  avatarDevicePath?: string;
263
263
  }
264
264
 
265
+ interface BuilderManagedEnvKeys {
266
+ /** Always set by buildServiceRunArgs (spec static/secret entries, builder-computed extras, image-baked PATH). Never replay. */
267
+ always: ReadonlySet<string>;
268
+ /**
269
+ * Spec host-forwarded entries. `name` is the container-side env key (what
270
+ * docker inspect captures); `hostVar` is the host process.env variable
271
+ * buildServiceRunArgs reads. Exclude captured `name` from replay only when
272
+ * process.env[hostVar] is set.
273
+ */
274
+ hostForwarded: ReadonlyArray<{ name: string; hostVar: string }>;
275
+ }
276
+
277
+ /**
278
+ * Env var names that `buildServiceRunArgs` manages for a service, derived
279
+ * from the spec so future entries are picked up automatically.
280
+ */
281
+ export function getBuilderManagedEnvKeys(
282
+ service: ServiceName,
283
+ spec = DOCKER_STATEFUL_SET_SPEC,
284
+ ): BuilderManagedEnvKeys {
285
+ const container = spec.containers.find((c) => c.internalName === service);
286
+ if (!container) throw new Error(`docker-statefulset: unknown service "${service}"`);
287
+
288
+ const always = new Set<string>(["PATH"]);
289
+ const hostForwarded: Array<{ name: string; hostVar: string }> = [];
290
+ for (const entry of container.env) {
291
+ if (entry.kind === "host") {
292
+ hostForwarded.push({ name: entry.name, hostVar: entry.hostVar ?? entry.name });
293
+ } else {
294
+ always.add(entry.name);
295
+ }
296
+ }
297
+
298
+ // Builder-computed extras added outside the spec env arrays
299
+ if (service === "assistant") {
300
+ always.add("VELLUM_ASSISTANT_NAME");
301
+ always.add("GATEWAY_INTERNAL_URL");
302
+ always.add(AVATAR_DEVICE_ENV_VAR);
303
+ }
304
+
305
+ return { always, hostForwarded };
306
+ }
307
+
265
308
  function resolveVolume(
266
309
  spec: DockerStatefulSetSpec,
267
310
  instanceName: string,
@@ -7,6 +7,7 @@ import type { AssistantEntry } from "./assistant-config.js";
7
7
  import { saveAssistantEntry } from "./assistant-config.js";
8
8
  import { createBackup, pruneOldBackups, restoreBackup } from "./backup-ops.js";
9
9
  import { emitCliError } from "./cli-error.js";
10
+ import { getOrCreateHostDeviceId } from "./device-id.js";
10
11
  import {
11
12
  captureImageRefs,
12
13
  DOCKER_READY_TIMEOUT_MS,
@@ -19,6 +20,11 @@ import { getStateDir } from "./environments/paths.js";
19
20
  import { getCurrentEnvironment } from "./environments/resolve.js";
20
21
  import { loadGuardianToken } from "./guardian-token.js";
21
22
  import { resolveImageRefs } from "./platform-releases.js";
23
+ import {
24
+ getBuilderManagedEnvKeys,
25
+ type DockerStatefulSetSpec,
26
+ type ServiceName,
27
+ } from "./statefulset.js";
22
28
  import { exec, execOutput } from "./step-runner.js";
23
29
  import { compareVersions } from "./version-compat.js";
24
30
 
@@ -141,20 +147,6 @@ export function buildUpgradeCommitMessage(options: {
141
147
  return lines.join("\n");
142
148
  }
143
149
 
144
- /**
145
- * Environment variable keys that are set by CLI run arguments and should
146
- * not be replayed from a captured container environment during upgrades
147
- * or rollbacks. Shared between upgrade.ts and rollback.ts.
148
- */
149
- export const CONTAINER_ENV_EXCLUDE_KEYS: ReadonlySet<string> = new Set([
150
- "CES_SERVICE_TOKEN",
151
- "GUARDIAN_BOOTSTRAP_SECRET",
152
- "VELLUM_ASSISTANT_NAME",
153
- "RUNTIME_HTTP_HOST",
154
- "PATH",
155
- "ACTOR_TOKEN_SIGNING_KEY",
156
- ]);
157
-
158
150
  /**
159
151
  * Capture environment variables from a running Docker container so they
160
152
  * can be replayed onto the replacement container after upgrade.
@@ -183,6 +175,93 @@ export async function captureContainerEnv(
183
175
  return captured;
184
176
  }
185
177
 
178
+ /**
179
+ * Filter a captured container env down to the entries safe to replay onto a
180
+ * replacement container.
181
+ *
182
+ * Drops every key `buildServiceRunArgs` sets itself (spec static/secret
183
+ * entries, builder-computed extras, PATH). Spec-managed secrets re-enter via
184
+ * the dedicated `DockerRunSecrets` path, so adding a secret to the spec
185
+ * automatically excludes it here. Spec host-forwarded keys are dropped only
186
+ * when the host variable is currently set, so fresh host values win over
187
+ * stale captured ones.
188
+ *
189
+ * Security contract: the returned env is memory-only — never persist it to
190
+ * disk, and log counts only, never values.
191
+ */
192
+ export function buildReplayEnv(
193
+ capturedEnv: Record<string, string>,
194
+ service: ServiceName,
195
+ spec?: DockerStatefulSetSpec,
196
+ ): Record<string, string> {
197
+ const { always, hostForwarded } = getBuilderManagedEnvKeys(service, spec);
198
+ const hostManaged = new Set(
199
+ hostForwarded.filter((h) => process.env[h.hostVar]).map((h) => h.name),
200
+ );
201
+ return Object.fromEntries(
202
+ Object.entries(capturedEnv).filter(
203
+ ([key]) => !always.has(key) && !hostManaged.has(key),
204
+ ),
205
+ );
206
+ }
207
+
208
+ /** Secrets and replay env derived from the outgoing containers. */
209
+ interface ReplayState {
210
+ bootstrapSecret: string | undefined;
211
+ cesServiceToken: string;
212
+ signingKey: string;
213
+ extraAssistantEnv: Record<string, string>;
214
+ extraGatewayEnv: Record<string, string>;
215
+ }
216
+
217
+ /**
218
+ * Derive the secrets and replay env for replacement containers from
219
+ * already-captured assistant/gateway envs. GUARDIAN_BOOTSTRAP_SECRET is only
220
+ * set on the gateway; CES_SERVICE_TOKEN and ACTOR_TOKEN_SIGNING_KEY fall back
221
+ * to fresh values for instances that predate them. VELLUM_DEVICE_ID is
222
+ * backfilled from the host for gateways hatched before device-id injection
223
+ * (captured value wins — it was itself host-derived).
224
+ */
225
+ export function buildReplayState(
226
+ capturedEnv: Record<string, string>,
227
+ gatewayEnv: Record<string, string>,
228
+ ): ReplayState {
229
+ const extraGatewayEnv = buildReplayEnv(gatewayEnv, "gateway");
230
+ extraGatewayEnv.VELLUM_DEVICE_ID ??= getOrCreateHostDeviceId();
231
+
232
+ return {
233
+ bootstrapSecret: gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"],
234
+ cesServiceToken:
235
+ capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex"),
236
+ signingKey:
237
+ capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex"),
238
+ extraAssistantEnv: buildReplayEnv(capturedEnv, "assistant"),
239
+ extraGatewayEnv,
240
+ };
241
+ }
242
+
243
+ /**
244
+ * Capture the assistant and gateway container envs and derive the replay
245
+ * state for the replacement containers. Logs only the assistant env-var
246
+ * count (security contract on `buildReplayEnv`).
247
+ */
248
+ export async function captureReplayState(
249
+ res: Pick<
250
+ ReturnType<typeof dockerResourceNames>,
251
+ "assistantContainer" | "gatewayContainer"
252
+ >,
253
+ ): Promise<ReplayState> {
254
+ console.log("💾 Capturing existing container environment...");
255
+ const [capturedEnv, gatewayEnv] = await Promise.all([
256
+ captureContainerEnv(res.assistantContainer),
257
+ captureContainerEnv(res.gatewayContainer),
258
+ ]);
259
+ console.log(
260
+ ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
261
+ );
262
+ return buildReplayState(capturedEnv, gatewayEnv);
263
+ }
264
+
186
265
  /**
187
266
  * Best-effort fetch of the running service group version from the gateway
188
267
  * `/healthz` endpoint. Returns `undefined` when the endpoint is
@@ -581,37 +660,13 @@ export async function performDockerRollback(
581
660
  console.warn("⚠️ Pre-rollback backup failed (continuing with rollback)\n");
582
661
  }
583
662
 
584
- // Capture container env, extract secrets
585
- console.log("💾 Capturing existing container environment...");
586
- const capturedEnv = await captureContainerEnv(res.assistantContainer);
587
- console.log(
588
- ` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
589
- );
590
-
591
- // Capture GUARDIAN_BOOTSTRAP_SECRET from the gateway container (it is only
592
- // set on gateway, not assistant) so it persists across container restarts.
593
- const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
594
- const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
595
-
596
- const cesServiceToken =
597
- capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
598
-
599
- const signingKey =
600
- capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
601
-
602
- // Build extra env vars, excluding keys managed by buildServiceRunArgs
603
- const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
604
- for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
605
- if (process.env[envVar]) {
606
- envKeysSetByRunArgs.add(envVar);
607
- }
608
- }
609
- const extraAssistantEnv: Record<string, string> = {};
610
- for (const [key, value] of Object.entries(capturedEnv)) {
611
- if (!envKeysSetByRunArgs.has(key)) {
612
- extraAssistantEnv[key] = value;
613
- }
614
- }
663
+ const {
664
+ bootstrapSecret,
665
+ cesServiceToken,
666
+ signingKey,
667
+ extraAssistantEnv,
668
+ extraGatewayEnv,
669
+ } = await captureReplayState(res);
615
670
 
616
671
  // Parse gateway port from entry's runtimeUrl
617
672
  let gatewayPort = GATEWAY_INTERNAL_PORT;
@@ -684,6 +739,7 @@ export async function performDockerRollback(
684
739
  bootstrapSecret,
685
740
  cesServiceToken,
686
741
  extraAssistantEnv,
742
+ extraGatewayEnv,
687
743
  gatewayPort,
688
744
  imageTags: targetImageTags,
689
745
  instanceName,
@@ -801,6 +857,7 @@ export async function performDockerRollback(
801
857
  bootstrapSecret,
802
858
  cesServiceToken,
803
859
  extraAssistantEnv,
860
+ extraGatewayEnv,
804
861
  gatewayPort,
805
862
  imageTags: currentImageRefs,
806
863
  instanceName,