@vellumai/cli 0.8.10-dev.202606102242.5285563 → 0.8.10-dev.202606102342.319a8d3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -0
- package/package.json +1 -1
- package/src/__tests__/device-id.test.ts +167 -0
- package/src/__tests__/helpers/env.ts +19 -0
- package/src/__tests__/statefulset.test.ts +149 -0
- package/src/__tests__/upgrade-replay-env.test.ts +149 -0
- package/src/commands/rollback.ts +9 -37
- package/src/commands/upgrade.ts +10 -42
- package/src/lib/device-id.ts +85 -0
- package/src/lib/docker.ts +11 -2
- package/src/lib/statefulset.ts +43 -0
- package/src/lib/upgrade-lifecycle.ts +102 -45
package/AGENTS.md
CHANGED
|
@@ -63,6 +63,8 @@ The CLI must **never** read from or write to the `.vellum/` directory (e.g. `~/.
|
|
|
63
63
|
|
|
64
64
|
For example, the signing key used for JWT auth between the daemon and gateway is persisted in the lockfile (`resources.signingKey`) so that client actor tokens survive daemon/gateway restarts. On first start (or when the key is missing), the CLI generates a new key via `generateLocalSigningKey()` in `lib/local.ts`, saves it to the lockfile entry, and passes it to both `startLocalDaemon` and `startGateway` as the `ACTOR_TOKEN_SIGNING_KEY` env var. The CLI does **not** read or write to the `.vellum/` directory for signing keys — it uses the lockfile instead.
|
|
65
65
|
|
|
66
|
+
**Exception: `~/.vellum/device.json`.** That file is the machine-wide shared device-identity file, co-owned by the Swift clients, the Electron main process, the host-mode assistant, and the CLI (see `clients/shared/App/Auth/DeviceIdStore.swift` and `apps/macos/src/main/device-id.ts`). The boundary rule covers daemon/gateway-internal state (e.g. `~/.vellum/protected/`, instance dirs), not this file.
|
|
67
|
+
|
|
66
68
|
## Process liveness
|
|
67
69
|
|
|
68
70
|
Use `resolveProcessState()` from `lib/process.ts` when checking whether a daemon or gateway should be (re)started. It combines PID existence with an HTTP `/healthz` probe, a readiness grace period, and a [`isVellumProcess()`](https://man7.org/linux/man-pages/man1/ps.1.html) guard against PID reuse — see the function's JSDoc for the full flow.
|
package/package.json
CHANGED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
existsSync,
|
|
4
|
+
mkdirSync,
|
|
5
|
+
mkdtempSync,
|
|
6
|
+
readFileSync,
|
|
7
|
+
rmSync,
|
|
8
|
+
statSync,
|
|
9
|
+
writeFileSync,
|
|
10
|
+
} from "fs";
|
|
11
|
+
import { tmpdir } from "os";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
|
|
14
|
+
// Bun's os.homedir() ignores runtime HOME changes, so mock it (same pattern
|
|
15
|
+
// as multi-local.test.ts) to keep production-path tests off the real ~/.vellum.
|
|
16
|
+
let fakeHome: string | undefined;
|
|
17
|
+
const realOs = await import("node:os");
|
|
18
|
+
const osMock = () => ({
|
|
19
|
+
...realOs,
|
|
20
|
+
homedir: () => fakeHome ?? realOs.homedir(),
|
|
21
|
+
});
|
|
22
|
+
mock.module("node:os", osMock);
|
|
23
|
+
mock.module("os", osMock);
|
|
24
|
+
|
|
25
|
+
import {
|
|
26
|
+
getOrCreateHostDeviceId,
|
|
27
|
+
resetHostDeviceIdCache,
|
|
28
|
+
} from "../lib/device-id.js";
|
|
29
|
+
import { snapshotEnv } from "./helpers/env.js";
|
|
30
|
+
|
|
31
|
+
const UUID_RE =
|
|
32
|
+
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
|
|
33
|
+
|
|
34
|
+
const restoreEnv = snapshotEnv([
|
|
35
|
+
"XDG_CONFIG_HOME",
|
|
36
|
+
"VELLUM_ENVIRONMENT",
|
|
37
|
+
"VELLUM_DEVICE_ID",
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
describe("getOrCreateHostDeviceId", () => {
|
|
41
|
+
let tempHome: string;
|
|
42
|
+
let deviceFile: string;
|
|
43
|
+
|
|
44
|
+
beforeEach(() => {
|
|
45
|
+
delete process.env.VELLUM_DEVICE_ID;
|
|
46
|
+
tempHome = mkdtempSync(join(tmpdir(), "cli-device-id-test-"));
|
|
47
|
+
process.env.XDG_CONFIG_HOME = tempHome;
|
|
48
|
+
// Non-prod so the resolver targets $XDG_CONFIG_HOME/vellum-dev/
|
|
49
|
+
// instead of the real ~/.config/vellum/.
|
|
50
|
+
process.env.VELLUM_ENVIRONMENT = "dev";
|
|
51
|
+
deviceFile = join(tempHome, "vellum-dev", "device.json");
|
|
52
|
+
resetHostDeviceIdCache();
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
afterEach(() => {
|
|
56
|
+
restoreEnv();
|
|
57
|
+
rmSync(tempHome, { recursive: true, force: true });
|
|
58
|
+
resetHostDeviceIdCache();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("creates device.json with a UUID when missing", () => {
|
|
62
|
+
const id = getOrCreateHostDeviceId();
|
|
63
|
+
|
|
64
|
+
expect(id).toMatch(UUID_RE);
|
|
65
|
+
expect(existsSync(deviceFile)).toBe(true);
|
|
66
|
+
const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
|
|
67
|
+
expect(parsed.deviceId).toBe(id);
|
|
68
|
+
expect(readFileSync(deviceFile, "utf-8").endsWith("\n")).toBe(true);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("returns the existing deviceId without rewriting the file", () => {
|
|
72
|
+
mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
|
|
73
|
+
writeFileSync(deviceFile, JSON.stringify({ deviceId: "existing-id" }));
|
|
74
|
+
const before = statSync(deviceFile).mtimeMs;
|
|
75
|
+
|
|
76
|
+
expect(getOrCreateHostDeviceId()).toBe("existing-id");
|
|
77
|
+
expect(statSync(deviceFile).mtimeMs).toBe(before);
|
|
78
|
+
expect(readFileSync(deviceFile, "utf-8")).toBe(
|
|
79
|
+
JSON.stringify({ deviceId: "existing-id" }),
|
|
80
|
+
);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("caches the resolved id until reset", () => {
|
|
84
|
+
const first = getOrCreateHostDeviceId();
|
|
85
|
+
rmSync(deviceFile);
|
|
86
|
+
|
|
87
|
+
expect(getOrCreateHostDeviceId()).toBe(first);
|
|
88
|
+
|
|
89
|
+
resetHostDeviceIdCache();
|
|
90
|
+
const second = getOrCreateHostDeviceId();
|
|
91
|
+
expect(second).toMatch(UUID_RE);
|
|
92
|
+
expect(second).not.toBe(first);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("preserves unrelated fields when adding deviceId", () => {
|
|
96
|
+
mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
|
|
97
|
+
writeFileSync(deviceFile, JSON.stringify({ other: "kept", deviceId: "" }));
|
|
98
|
+
|
|
99
|
+
const id = getOrCreateHostDeviceId();
|
|
100
|
+
|
|
101
|
+
expect(id).toMatch(UUID_RE);
|
|
102
|
+
const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
|
|
103
|
+
expect(parsed.other).toBe("kept");
|
|
104
|
+
expect(parsed.deviceId).toBe(id);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("VELLUM_DEVICE_ID env var wins and skips file access", () => {
|
|
108
|
+
process.env.VELLUM_DEVICE_ID = "env-device-id";
|
|
109
|
+
|
|
110
|
+
expect(getOrCreateHostDeviceId()).toBe("env-device-id");
|
|
111
|
+
expect(existsSync(deviceFile)).toBe(false);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test("malformed JSON regenerates without throwing", () => {
|
|
115
|
+
mkdirSync(join(tempHome, "vellum-dev"), { recursive: true });
|
|
116
|
+
writeFileSync(deviceFile, "{not json");
|
|
117
|
+
|
|
118
|
+
const id = getOrCreateHostDeviceId();
|
|
119
|
+
|
|
120
|
+
expect(id).toMatch(UUID_RE);
|
|
121
|
+
const parsed = JSON.parse(readFileSync(deviceFile, "utf-8"));
|
|
122
|
+
expect(parsed).toEqual({ deviceId: id });
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe("getOrCreateHostDeviceId (production)", () => {
|
|
127
|
+
let tempHome: string;
|
|
128
|
+
let deviceFile: string;
|
|
129
|
+
|
|
130
|
+
beforeEach(() => {
|
|
131
|
+
delete process.env.VELLUM_DEVICE_ID;
|
|
132
|
+
tempHome = mkdtempSync(join(tmpdir(), "cli-device-id-prod-test-"));
|
|
133
|
+
fakeHome = tempHome;
|
|
134
|
+
process.env.XDG_CONFIG_HOME = join(tempHome, ".config");
|
|
135
|
+
process.env.VELLUM_ENVIRONMENT = "production";
|
|
136
|
+
deviceFile = join(tempHome, ".vellum", "device.json");
|
|
137
|
+
resetHostDeviceIdCache();
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
afterEach(() => {
|
|
141
|
+
fakeHome = undefined;
|
|
142
|
+
restoreEnv();
|
|
143
|
+
rmSync(tempHome, { recursive: true, force: true });
|
|
144
|
+
resetHostDeviceIdCache();
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("creates device.json in the shared ~/.vellum dir", () => {
|
|
148
|
+
const id = getOrCreateHostDeviceId();
|
|
149
|
+
|
|
150
|
+
expect(id).toMatch(UUID_RE);
|
|
151
|
+
expect(existsSync(deviceFile)).toBe(true);
|
|
152
|
+
expect(JSON.parse(readFileSync(deviceFile, "utf-8")).deviceId).toBe(id);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test("reuses an existing ~/.vellum/device.json", () => {
|
|
156
|
+
mkdirSync(join(tempHome, ".vellum"), { recursive: true });
|
|
157
|
+
writeFileSync(
|
|
158
|
+
deviceFile,
|
|
159
|
+
JSON.stringify({ deviceId: "shared-prod-id" }),
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
expect(getOrCreateHostDeviceId()).toBe("shared-prod-id");
|
|
163
|
+
expect(readFileSync(deviceFile, "utf-8")).toBe(
|
|
164
|
+
JSON.stringify({ deviceId: "shared-prod-id" }),
|
|
165
|
+
);
|
|
166
|
+
});
|
|
167
|
+
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snapshot the given env vars now; returns a restore function suitable for
|
|
3
|
+
* `afterEach` that resets each var to its captured value (or deletes it).
|
|
4
|
+
*/
|
|
5
|
+
export function snapshotEnv(keys: readonly string[]): () => void {
|
|
6
|
+
const saved: Record<string, string | undefined> = {};
|
|
7
|
+
for (const key of keys) {
|
|
8
|
+
saved[key] = process.env[key];
|
|
9
|
+
}
|
|
10
|
+
return () => {
|
|
11
|
+
for (const key of keys) {
|
|
12
|
+
if (saved[key] === undefined) {
|
|
13
|
+
delete process.env[key];
|
|
14
|
+
} else {
|
|
15
|
+
process.env[key] = saved[key];
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
buildServiceRunArgs,
|
|
5
|
+
getBuilderManagedEnvKeys,
|
|
6
|
+
type BuildServiceRunArgsOpts,
|
|
7
|
+
type DockerStatefulSetSpec,
|
|
8
|
+
type ServiceName,
|
|
9
|
+
} from "../lib/statefulset.js";
|
|
10
|
+
import { PROVIDER_ENV_VAR_NAMES } from "../shared/provider-env-vars.js";
|
|
11
|
+
|
|
12
|
+
const SECRET_KEYS = [
|
|
13
|
+
"CES_SERVICE_TOKEN",
|
|
14
|
+
"ACTOR_TOKEN_SIGNING_KEY",
|
|
15
|
+
"GUARDIAN_BOOTSTRAP_SECRET",
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
describe("getBuilderManagedEnvKeys", () => {
|
|
19
|
+
test("gateway always-set keys cover spec static + secret entries and PATH", () => {
|
|
20
|
+
const { always } = getBuilderManagedEnvKeys("gateway");
|
|
21
|
+
|
|
22
|
+
const expected = [
|
|
23
|
+
"VELLUM_WORKSPACE_DIR",
|
|
24
|
+
"GATEWAY_SECURITY_DIR",
|
|
25
|
+
"ASSISTANT_HOST",
|
|
26
|
+
"CES_CREDENTIAL_URL",
|
|
27
|
+
"GATEWAY_IPC_SOCKET_DIR",
|
|
28
|
+
"ASSISTANT_IPC_SOCKET_DIR",
|
|
29
|
+
"GATEWAY_PORT",
|
|
30
|
+
"RUNTIME_HTTP_PORT",
|
|
31
|
+
...SECRET_KEYS,
|
|
32
|
+
"PATH",
|
|
33
|
+
];
|
|
34
|
+
for (const key of expected) {
|
|
35
|
+
expect(always.has(key)).toBe(true);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
expect(always.has("VELLUM_DISABLE_PLATFORM")).toBe(false);
|
|
39
|
+
expect(always.has("VELLUM_DEVICE_ID")).toBe(false);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test("assistant always-set keys include secrets and builder-computed extras", () => {
|
|
43
|
+
const { always } = getBuilderManagedEnvKeys("assistant");
|
|
44
|
+
|
|
45
|
+
const expected = [
|
|
46
|
+
...SECRET_KEYS,
|
|
47
|
+
"VELLUM_ASSISTANT_NAME",
|
|
48
|
+
"GATEWAY_INTERNAL_URL",
|
|
49
|
+
"RUNTIME_HTTP_HOST",
|
|
50
|
+
"PATH",
|
|
51
|
+
];
|
|
52
|
+
for (const key of expected) {
|
|
53
|
+
expect(always.has(key)).toBe(true);
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("gateway hostForwarded equals the three spec host entries", () => {
|
|
58
|
+
const { hostForwarded } = getBuilderManagedEnvKeys("gateway");
|
|
59
|
+
const sorted = [...hostForwarded].sort((a, b) => a.name.localeCompare(b.name));
|
|
60
|
+
expect(sorted).toEqual([
|
|
61
|
+
{ name: "VELAY_BASE_URL", hostVar: "VELAY_BASE_URL" },
|
|
62
|
+
{ name: "VELLUM_ENVIRONMENT", hostVar: "VELLUM_ENVIRONMENT" },
|
|
63
|
+
{ name: "VELLUM_PLATFORM_URL", hostVar: "VELLUM_PLATFORM_URL" },
|
|
64
|
+
]);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test("assistant hostForwarded includes provider keys and platform URL", () => {
|
|
68
|
+
const { hostForwarded } = getBuilderManagedEnvKeys("assistant");
|
|
69
|
+
expect(hostForwarded).toContainEqual({
|
|
70
|
+
name: "ANTHROPIC_API_KEY",
|
|
71
|
+
hostVar: "ANTHROPIC_API_KEY",
|
|
72
|
+
});
|
|
73
|
+
for (const envVar of Object.values(PROVIDER_ENV_VAR_NAMES)) {
|
|
74
|
+
expect(hostForwarded).toContainEqual({ name: envVar, hostVar: envVar });
|
|
75
|
+
}
|
|
76
|
+
expect(hostForwarded).toContainEqual({
|
|
77
|
+
name: "VELLUM_PLATFORM_URL",
|
|
78
|
+
hostVar: "VELLUM_PLATFORM_URL",
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test("hostForwarded keeps container name when hostVar differs", () => {
|
|
83
|
+
const spec: DockerStatefulSetSpec = {
|
|
84
|
+
startOrder: ["gateway"],
|
|
85
|
+
readiness: { endpoint: "/readyz", timeoutMs: 1, intervalMs: 1 },
|
|
86
|
+
volumeClaimTemplates: [],
|
|
87
|
+
containers: [
|
|
88
|
+
{
|
|
89
|
+
name: "gateway-sidecar",
|
|
90
|
+
internalName: "gateway",
|
|
91
|
+
network: "container",
|
|
92
|
+
env: [{ kind: "host", name: "CONTAINER_NAME", hostVar: "HOST_NAME" }],
|
|
93
|
+
volumeMounts: [],
|
|
94
|
+
},
|
|
95
|
+
],
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
const { hostForwarded } = getBuilderManagedEnvKeys("gateway", spec);
|
|
99
|
+
expect(hostForwarded).toEqual([
|
|
100
|
+
{ name: "CONTAINER_NAME", hostVar: "HOST_NAME" },
|
|
101
|
+
]);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test("throws on unknown service name", () => {
|
|
105
|
+
expect(() => getBuilderManagedEnvKeys("bogus" as ServiceName)).toThrow(
|
|
106
|
+
'docker-statefulset: unknown service "bogus"',
|
|
107
|
+
);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
describe("buildServiceRunArgs extra env routing", () => {
|
|
112
|
+
const opts: BuildServiceRunArgsOpts = {
|
|
113
|
+
gatewayPort: 18080,
|
|
114
|
+
imageTags: {
|
|
115
|
+
assistant: "assistant:test",
|
|
116
|
+
gateway: "gateway:test",
|
|
117
|
+
"credential-executor": "ces:test",
|
|
118
|
+
},
|
|
119
|
+
instanceName: "test-instance",
|
|
120
|
+
res: {
|
|
121
|
+
assistantContainer: "test-assistant",
|
|
122
|
+
cesContainer: "test-ces",
|
|
123
|
+
gatewayContainer: "test-gateway",
|
|
124
|
+
network: "test-net",
|
|
125
|
+
},
|
|
126
|
+
extraGatewayEnv: { VELLUM_DISABLE_PLATFORM: "1" },
|
|
127
|
+
extraAssistantEnv: { FOO: "bar" },
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const runArgs = buildServiceRunArgs(opts);
|
|
131
|
+
|
|
132
|
+
test("extraGatewayEnv lands only in gateway args", () => {
|
|
133
|
+
const gatewayArgs = runArgs.gateway();
|
|
134
|
+
expect(gatewayArgs).toContain("VELLUM_DISABLE_PLATFORM=1");
|
|
135
|
+
expect(gatewayArgs).not.toContain("FOO=bar");
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("extraAssistantEnv lands only in assistant args", () => {
|
|
139
|
+
const assistantArgs = runArgs.assistant();
|
|
140
|
+
expect(assistantArgs).toContain("FOO=bar");
|
|
141
|
+
expect(assistantArgs).not.toContain("VELLUM_DISABLE_PLATFORM=1");
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test("credential-executor args get neither extra env map", () => {
|
|
145
|
+
const cesArgs = runArgs["credential-executor"]();
|
|
146
|
+
expect(cesArgs).not.toContain("VELLUM_DISABLE_PLATFORM=1");
|
|
147
|
+
expect(cesArgs).not.toContain("FOO=bar");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import { resetHostDeviceIdCache } from "../lib/device-id.js";
|
|
4
|
+
import type { DockerStatefulSetSpec } from "../lib/statefulset.js";
|
|
5
|
+
import { buildReplayEnv, buildReplayState } from "../lib/upgrade-lifecycle.js";
|
|
6
|
+
import { snapshotEnv } from "./helpers/env.js";
|
|
7
|
+
|
|
8
|
+
const restoreEnv = snapshotEnv([
|
|
9
|
+
"VELLUM_PLATFORM_URL",
|
|
10
|
+
"ANTHROPIC_API_KEY",
|
|
11
|
+
"VELLUM_DEVICE_ID",
|
|
12
|
+
]);
|
|
13
|
+
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
restoreEnv();
|
|
16
|
+
resetHostDeviceIdCache();
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
describe("buildReplayEnv", () => {
|
|
20
|
+
test("gateway: drops secrets, statics, and PATH; keeps flag overrides", () => {
|
|
21
|
+
const captured = {
|
|
22
|
+
GUARDIAN_BOOTSTRAP_SECRET: "s1",
|
|
23
|
+
CES_SERVICE_TOKEN: "s2",
|
|
24
|
+
ACTOR_TOKEN_SIGNING_KEY: "s3",
|
|
25
|
+
PATH: "/usr/bin",
|
|
26
|
+
GATEWAY_PORT: "18080",
|
|
27
|
+
VELLUM_DISABLE_PLATFORM: "1",
|
|
28
|
+
VELLUM_DEVICE_ID: "abc",
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
expect(buildReplayEnv(captured, "gateway")).toEqual({
|
|
32
|
+
VELLUM_DISABLE_PLATFORM: "1",
|
|
33
|
+
VELLUM_DEVICE_ID: "abc",
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("gateway: captured VELLUM_PLATFORM_URL dropped when set on host", () => {
|
|
38
|
+
process.env.VELLUM_PLATFORM_URL = "https://host.example.com";
|
|
39
|
+
const replay = buildReplayEnv(
|
|
40
|
+
{ VELLUM_PLATFORM_URL: "https://stale.example.com" },
|
|
41
|
+
"gateway",
|
|
42
|
+
);
|
|
43
|
+
expect(replay).toEqual({});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("gateway: captured VELLUM_PLATFORM_URL kept when unset on host", () => {
|
|
47
|
+
delete process.env.VELLUM_PLATFORM_URL;
|
|
48
|
+
const replay = buildReplayEnv(
|
|
49
|
+
{ VELLUM_PLATFORM_URL: "https://stale.example.com" },
|
|
50
|
+
"gateway",
|
|
51
|
+
);
|
|
52
|
+
expect(replay).toEqual({
|
|
53
|
+
VELLUM_PLATFORM_URL: "https://stale.example.com",
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("assistant: drops builder-computed extras, secrets, and PATH; keeps custom flags", () => {
|
|
58
|
+
delete process.env.ANTHROPIC_API_KEY;
|
|
59
|
+
const captured = {
|
|
60
|
+
VELLUM_ASSISTANT_NAME: "my-assistant",
|
|
61
|
+
GATEWAY_INTERNAL_URL: "http://localhost:8080",
|
|
62
|
+
GUARDIAN_BOOTSTRAP_SECRET: "s1",
|
|
63
|
+
CES_SERVICE_TOKEN: "s2",
|
|
64
|
+
ACTOR_TOKEN_SIGNING_KEY: "s3",
|
|
65
|
+
PATH: "/usr/bin",
|
|
66
|
+
MY_CUSTOM_FLAG: "yes",
|
|
67
|
+
ANTHROPIC_API_KEY: "sk-captured",
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
expect(buildReplayEnv(captured, "assistant")).toEqual({
|
|
71
|
+
MY_CUSTOM_FLAG: "yes",
|
|
72
|
+
ANTHROPIC_API_KEY: "sk-captured",
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("assistant: captured ANTHROPIC_API_KEY dropped when set on host", () => {
|
|
77
|
+
process.env.ANTHROPIC_API_KEY = "sk-host";
|
|
78
|
+
const replay = buildReplayEnv(
|
|
79
|
+
{ ANTHROPIC_API_KEY: "sk-captured", MY_CUSTOM_FLAG: "yes" },
|
|
80
|
+
"assistant",
|
|
81
|
+
);
|
|
82
|
+
expect(replay).toEqual({ MY_CUSTOM_FLAG: "yes" });
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test("a secret added to the spec is auto-excluded with no code change", () => {
|
|
86
|
+
const spec: DockerStatefulSetSpec = {
|
|
87
|
+
startOrder: ["gateway"],
|
|
88
|
+
readiness: { endpoint: "/readyz", timeoutMs: 1, intervalMs: 1 },
|
|
89
|
+
volumeClaimTemplates: [],
|
|
90
|
+
containers: [
|
|
91
|
+
{
|
|
92
|
+
name: "gateway-sidecar",
|
|
93
|
+
internalName: "gateway",
|
|
94
|
+
network: "container",
|
|
95
|
+
env: [
|
|
96
|
+
{ kind: "secret", name: "FUTURE_SECRET", secret: "signingKey" },
|
|
97
|
+
],
|
|
98
|
+
volumeMounts: [],
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const replay = buildReplayEnv(
|
|
104
|
+
{ FUTURE_SECRET: "leaky", VELLUM_DEVICE_ID: "abc" },
|
|
105
|
+
"gateway",
|
|
106
|
+
spec,
|
|
107
|
+
);
|
|
108
|
+
expect(replay).toEqual({ VELLUM_DEVICE_ID: "abc" });
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
describe("buildReplayState", () => {
|
|
113
|
+
beforeEach(() => {
|
|
114
|
+
// VELLUM_DEVICE_ID env precedence keeps getOrCreateHostDeviceId off the
|
|
115
|
+
// filesystem in tests.
|
|
116
|
+
process.env.VELLUM_DEVICE_ID = "host-device-id";
|
|
117
|
+
resetHostDeviceIdCache();
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("backfills VELLUM_DEVICE_ID on gateway replay env when absent", () => {
|
|
121
|
+
const state = buildReplayState({}, { VELLUM_DISABLE_PLATFORM: "1" });
|
|
122
|
+
expect(state.extraGatewayEnv).toEqual({
|
|
123
|
+
VELLUM_DISABLE_PLATFORM: "1",
|
|
124
|
+
VELLUM_DEVICE_ID: "host-device-id",
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test("captured VELLUM_DEVICE_ID wins over host-derived id", () => {
|
|
129
|
+
const state = buildReplayState({}, { VELLUM_DEVICE_ID: "existing" });
|
|
130
|
+
expect(state.extraGatewayEnv.VELLUM_DEVICE_ID).toBe("existing");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("plucks secrets from the captured envs", () => {
|
|
134
|
+
const state = buildReplayState(
|
|
135
|
+
{ CES_SERVICE_TOKEN: "ces-token", ACTOR_TOKEN_SIGNING_KEY: "sign-key" },
|
|
136
|
+
{ GUARDIAN_BOOTSTRAP_SECRET: "bootstrap" },
|
|
137
|
+
);
|
|
138
|
+
expect(state.bootstrapSecret).toBe("bootstrap");
|
|
139
|
+
expect(state.cesServiceToken).toBe("ces-token");
|
|
140
|
+
expect(state.signingKey).toBe("sign-key");
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test("generates fresh secrets when missing from captured env", () => {
|
|
144
|
+
const state = buildReplayState({}, {});
|
|
145
|
+
expect(state.bootstrapSecret).toBeUndefined();
|
|
146
|
+
expect(state.cesServiceToken).toMatch(/^[0-9a-f]{64}$/);
|
|
147
|
+
expect(state.signingKey).toMatch(/^[0-9a-f]{64}$/);
|
|
148
|
+
});
|
|
149
|
+
});
|
package/src/commands/rollback.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { randomBytes } from "crypto";
|
|
2
|
-
|
|
3
1
|
import {
|
|
4
2
|
findAssistantByName,
|
|
5
3
|
getActiveAssistant,
|
|
@@ -27,9 +25,8 @@ import {
|
|
|
27
25
|
buildProgressEvent,
|
|
28
26
|
buildStartingEvent,
|
|
29
27
|
buildUpgradeCommitMessage,
|
|
30
|
-
|
|
28
|
+
captureReplayState,
|
|
31
29
|
commitWorkspaceViaGateway,
|
|
32
|
-
CONTAINER_ENV_EXCLUDE_KEYS,
|
|
33
30
|
fetchCurrentVersion,
|
|
34
31
|
fetchPreviousVersion,
|
|
35
32
|
performDockerRollback,
|
|
@@ -308,39 +305,13 @@ export async function rollback(): Promise<void> {
|
|
|
308
305
|
`🔄 Rolling back Docker assistant '${instanceName}' to ${previousVersion}...\n`,
|
|
309
306
|
);
|
|
310
307
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
// Capture GUARDIAN_BOOTSTRAP_SECRET from the gateway container (it is only
|
|
319
|
-
// set on gateway, not assistant) so it persists across container restarts.
|
|
320
|
-
const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
|
|
321
|
-
const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
|
|
322
|
-
|
|
323
|
-
// Extract CES_SERVICE_TOKEN from captured env, or generate fresh one
|
|
324
|
-
const cesServiceToken =
|
|
325
|
-
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
|
|
326
|
-
|
|
327
|
-
// Extract or generate the shared JWT signing key.
|
|
328
|
-
const signingKey =
|
|
329
|
-
capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
|
|
330
|
-
|
|
331
|
-
// Build extra env vars, excluding keys managed by buildServiceRunArgs
|
|
332
|
-
const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
|
|
333
|
-
for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
|
|
334
|
-
if (process.env[envVar]) {
|
|
335
|
-
envKeysSetByRunArgs.add(envVar);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
const extraAssistantEnv: Record<string, string> = {};
|
|
339
|
-
for (const [key, value] of Object.entries(capturedEnv)) {
|
|
340
|
-
if (!envKeysSetByRunArgs.has(key)) {
|
|
341
|
-
extraAssistantEnv[key] = value;
|
|
342
|
-
}
|
|
343
|
-
}
|
|
308
|
+
const {
|
|
309
|
+
bootstrapSecret,
|
|
310
|
+
cesServiceToken,
|
|
311
|
+
signingKey,
|
|
312
|
+
extraAssistantEnv,
|
|
313
|
+
extraGatewayEnv,
|
|
314
|
+
} = await captureReplayState(res);
|
|
344
315
|
|
|
345
316
|
// Parse gateway port from entry's runtimeUrl, fall back to default
|
|
346
317
|
let gatewayPort = GATEWAY_INTERNAL_PORT;
|
|
@@ -401,6 +372,7 @@ export async function rollback(): Promise<void> {
|
|
|
401
372
|
bootstrapSecret,
|
|
402
373
|
cesServiceToken,
|
|
403
374
|
extraAssistantEnv,
|
|
375
|
+
extraGatewayEnv,
|
|
404
376
|
gatewayPort,
|
|
405
377
|
imageTags: previousImageRefs,
|
|
406
378
|
instanceName,
|
package/src/commands/upgrade.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { randomBytes } from "crypto";
|
|
2
1
|
import { spawnSync } from "child_process";
|
|
3
2
|
|
|
4
3
|
import cliPkg from "../../package.json";
|
|
@@ -40,10 +39,9 @@ import {
|
|
|
40
39
|
buildProgressEvent,
|
|
41
40
|
buildStartingEvent,
|
|
42
41
|
buildUpgradeCommitMessage,
|
|
43
|
-
|
|
42
|
+
captureReplayState,
|
|
44
43
|
captureUpgradeFailureLogs,
|
|
45
44
|
commitWorkspaceViaGateway,
|
|
46
|
-
CONTAINER_ENV_EXCLUDE_KEYS,
|
|
47
45
|
rollbackMigrations,
|
|
48
46
|
UPGRADE_PROGRESS,
|
|
49
47
|
waitForReady,
|
|
@@ -297,16 +295,13 @@ async function upgradeDocker(
|
|
|
297
295
|
}),
|
|
298
296
|
);
|
|
299
297
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
// set on gateway, not assistant) so it persists across container restarts.
|
|
308
|
-
const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
|
|
309
|
-
const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
|
|
298
|
+
const {
|
|
299
|
+
bootstrapSecret,
|
|
300
|
+
cesServiceToken,
|
|
301
|
+
signingKey,
|
|
302
|
+
extraAssistantEnv,
|
|
303
|
+
extraGatewayEnv,
|
|
304
|
+
} = await captureReplayState(res);
|
|
310
305
|
|
|
311
306
|
// Notify connected clients that an upgrade is about to begin.
|
|
312
307
|
// This must fire BEFORE any progress broadcasts so the UI sets
|
|
@@ -361,18 +356,6 @@ async function upgradeDocker(
|
|
|
361
356
|
// use default
|
|
362
357
|
}
|
|
363
358
|
|
|
364
|
-
// Extract CES_SERVICE_TOKEN from the captured env so it can be passed via
|
|
365
|
-
// the dedicated cesServiceToken parameter (which propagates it to all three
|
|
366
|
-
// containers). If the old instance predates CES_SERVICE_TOKEN, generate a
|
|
367
|
-
// fresh one so gateway and CES can authenticate.
|
|
368
|
-
const cesServiceToken =
|
|
369
|
-
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
|
|
370
|
-
|
|
371
|
-
// Extract or generate the shared JWT signing key. Pre-env-var instances
|
|
372
|
-
// won't have it in capturedEnv, so generate fresh in that case.
|
|
373
|
-
const signingKey =
|
|
374
|
-
capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
|
|
375
|
-
|
|
376
359
|
// Create pre-upgrade backup (best-effort, daemon must be running)
|
|
377
360
|
await broadcastUpgradeEvent(
|
|
378
361
|
entry.runtimeUrl,
|
|
@@ -415,23 +398,6 @@ async function upgradeDocker(
|
|
|
415
398
|
await stopContainers(res);
|
|
416
399
|
console.log("✅ Containers stopped\n");
|
|
417
400
|
|
|
418
|
-
// Build the set of extra env vars to replay on the new assistant container.
|
|
419
|
-
// Captured env vars serve as the base; keys already managed by
|
|
420
|
-
// buildServiceRunArgs are excluded to avoid duplicates.
|
|
421
|
-
const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
|
|
422
|
-
// Only exclude keys that buildServiceRunArgs will actually set
|
|
423
|
-
for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
|
|
424
|
-
if (process.env[envVar]) {
|
|
425
|
-
envKeysSetByRunArgs.add(envVar);
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
const extraAssistantEnv: Record<string, string> = {};
|
|
429
|
-
for (const [key, value] of Object.entries(capturedEnv)) {
|
|
430
|
-
if (!envKeysSetByRunArgs.has(key)) {
|
|
431
|
-
extraAssistantEnv[key] = value;
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
|
|
435
401
|
console.log("🚀 Starting upgraded containers...");
|
|
436
402
|
await startContainers(
|
|
437
403
|
{
|
|
@@ -439,6 +405,7 @@ async function upgradeDocker(
|
|
|
439
405
|
bootstrapSecret,
|
|
440
406
|
cesServiceToken,
|
|
441
407
|
extraAssistantEnv,
|
|
408
|
+
extraGatewayEnv,
|
|
442
409
|
gatewayPort,
|
|
443
410
|
imageTags,
|
|
444
411
|
instanceName,
|
|
@@ -544,6 +511,7 @@ async function upgradeDocker(
|
|
|
544
511
|
bootstrapSecret,
|
|
545
512
|
cesServiceToken,
|
|
546
513
|
extraAssistantEnv,
|
|
514
|
+
extraGatewayEnv,
|
|
547
515
|
gatewayPort,
|
|
548
516
|
imageTags: previousImageRefs,
|
|
549
517
|
instanceName,
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Host device ID resolver. Resolution order: `VELLUM_DEVICE_ID` env var,
|
|
3
|
+
* then `device.json`. Production uses the machine-wide shared
|
|
4
|
+
* `~/.vellum/device.json`, matching Electron (`apps/macos/src/main/device-id.ts`)
|
|
5
|
+
* and Swift (`VellumPaths.deviceIdFile`); non-production uses
|
|
6
|
+
* `<configDir>/device.json`.
|
|
7
|
+
*
|
|
8
|
+
* Not to be confused with `guardian-token.ts`'s salted-hash Guardian
|
|
9
|
+
* identity (`computeDeviceId` / `getOrCreatePersistedDeviceId`) — do not
|
|
10
|
+
* merge the two.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { randomUUID } from "crypto";
|
|
14
|
+
import { mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { join } from "path";
|
|
17
|
+
|
|
18
|
+
import { getConfigDir } from "./environments/paths.js";
|
|
19
|
+
import { getCurrentEnvironment } from "./environments/resolve.js";
|
|
20
|
+
|
|
21
|
+
let cached: string | undefined;
|
|
22
|
+
|
|
23
|
+
function resolveDeviceIdPaths(): { dir: string; file: string } {
|
|
24
|
+
const env = getCurrentEnvironment();
|
|
25
|
+
const dir =
|
|
26
|
+
env.name === "production"
|
|
27
|
+
? join(homedir(), ".vellum")
|
|
28
|
+
: getConfigDir(env);
|
|
29
|
+
return { dir, file: join(dir, "device.json") };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Get the stable device ID for this host machine, creating and persisting
|
|
34
|
+
* one in `device.json` if absent. `VELLUM_DEVICE_ID` takes precedence over
|
|
35
|
+
* any file. Never throws: on write failure the generated UUID is still
|
|
36
|
+
* cached and returned for the process lifetime.
|
|
37
|
+
*/
|
|
38
|
+
export function getOrCreateHostDeviceId(): string {
|
|
39
|
+
if (cached !== undefined) {
|
|
40
|
+
return cached;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const fromEnv = process.env.VELLUM_DEVICE_ID?.trim();
|
|
44
|
+
if (fromEnv) {
|
|
45
|
+
cached = fromEnv;
|
|
46
|
+
return cached;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const { dir, file } = resolveDeviceIdPaths();
|
|
50
|
+
|
|
51
|
+
// Preserve unrelated fields from any existing JSON object.
|
|
52
|
+
let existing: Record<string, unknown> = {};
|
|
53
|
+
try {
|
|
54
|
+
const raw: unknown = JSON.parse(readFileSync(file, "utf-8"));
|
|
55
|
+
if (raw && typeof raw === "object" && !Array.isArray(raw)) {
|
|
56
|
+
existing = raw as Record<string, unknown>;
|
|
57
|
+
}
|
|
58
|
+
} catch {
|
|
59
|
+
// Missing, unreadable, or malformed — start fresh.
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (typeof existing.deviceId === "string" && existing.deviceId.length > 0) {
|
|
63
|
+
cached = existing.deviceId;
|
|
64
|
+
return cached;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const generated = randomUUID();
|
|
68
|
+
try {
|
|
69
|
+
mkdirSync(dir, { recursive: true });
|
|
70
|
+
existing.deviceId = generated;
|
|
71
|
+
writeFileSync(file, JSON.stringify(existing, null, 2) + "\n", {
|
|
72
|
+
mode: 0o644,
|
|
73
|
+
});
|
|
74
|
+
} catch {
|
|
75
|
+
// Write failure — use the generated ID in-memory only.
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
cached = generated;
|
|
79
|
+
return cached;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** Reset the cached device ID. Used by tests to force re-resolution. */
|
|
83
|
+
export function resetHostDeviceIdCache(): void {
|
|
84
|
+
cached = undefined;
|
|
85
|
+
}
|
package/src/lib/docker.ts
CHANGED
|
@@ -22,6 +22,7 @@ import type { AssistantEntry } from "./assistant-config";
|
|
|
22
22
|
import { buildHatchConfigValues, writeInitialConfig } from "./config-utils";
|
|
23
23
|
import { buildServiceRunArgs } from "./statefulset.js";
|
|
24
24
|
import type { Species } from "./constants";
|
|
25
|
+
import { getOrCreateHostDeviceId } from "./device-id.js";
|
|
25
26
|
import { getDefaultPorts } from "./environments/paths.js";
|
|
26
27
|
import { getCurrentEnvironment } from "./environments/resolve.js";
|
|
27
28
|
import { leaseGuardianToken } from "./guardian-token";
|
|
@@ -882,6 +883,8 @@ function startFileWatcher(opts: {
|
|
|
882
883
|
signingKey?: string;
|
|
883
884
|
bootstrapSecret?: string;
|
|
884
885
|
cesServiceToken?: string;
|
|
886
|
+
extraAssistantEnv?: Record<string, string>;
|
|
887
|
+
extraGatewayEnv?: Record<string, string>;
|
|
885
888
|
gatewayPort: number;
|
|
886
889
|
imageTags: Record<ServiceName, string>;
|
|
887
890
|
instanceName: string;
|
|
@@ -901,6 +904,8 @@ function startFileWatcher(opts: {
|
|
|
901
904
|
signingKey: opts.signingKey,
|
|
902
905
|
bootstrapSecret: opts.bootstrapSecret,
|
|
903
906
|
cesServiceToken: opts.cesServiceToken,
|
|
907
|
+
extraAssistantEnv: opts.extraAssistantEnv,
|
|
908
|
+
extraGatewayEnv: opts.extraGatewayEnv,
|
|
904
909
|
gatewayPort,
|
|
905
910
|
imageTags,
|
|
906
911
|
instanceName,
|
|
@@ -1327,8 +1332,10 @@ export async function hatchDocker(
|
|
|
1327
1332
|
extraAssistantEnv.VELLUM_DISABLE_PLATFORM =
|
|
1328
1333
|
flagEnvVars.VELLUM_DISABLE_PLATFORM;
|
|
1329
1334
|
}
|
|
1330
|
-
const extraGatewayEnv =
|
|
1331
|
-
|
|
1335
|
+
const extraGatewayEnv = {
|
|
1336
|
+
...flagEnvVars,
|
|
1337
|
+
VELLUM_DEVICE_ID: getOrCreateHostDeviceId(),
|
|
1338
|
+
};
|
|
1332
1339
|
await startContainers(
|
|
1333
1340
|
{
|
|
1334
1341
|
signingKey,
|
|
@@ -1430,6 +1437,8 @@ export async function hatchDocker(
|
|
|
1430
1437
|
signingKey,
|
|
1431
1438
|
bootstrapSecret,
|
|
1432
1439
|
cesServiceToken,
|
|
1440
|
+
extraAssistantEnv,
|
|
1441
|
+
extraGatewayEnv,
|
|
1433
1442
|
gatewayPort,
|
|
1434
1443
|
imageTags,
|
|
1435
1444
|
instanceName,
|
package/src/lib/statefulset.ts
CHANGED
|
@@ -262,6 +262,49 @@ export interface BuildServiceRunArgsOpts extends DockerRunSecrets {
|
|
|
262
262
|
avatarDevicePath?: string;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
+
interface BuilderManagedEnvKeys {
|
|
266
|
+
/** Always set by buildServiceRunArgs (spec static/secret entries, builder-computed extras, image-baked PATH). Never replay. */
|
|
267
|
+
always: ReadonlySet<string>;
|
|
268
|
+
/**
|
|
269
|
+
* Spec host-forwarded entries. `name` is the container-side env key (what
|
|
270
|
+
* docker inspect captures); `hostVar` is the host process.env variable
|
|
271
|
+
* buildServiceRunArgs reads. Exclude captured `name` from replay only when
|
|
272
|
+
* process.env[hostVar] is set.
|
|
273
|
+
*/
|
|
274
|
+
hostForwarded: ReadonlyArray<{ name: string; hostVar: string }>;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Env var names that `buildServiceRunArgs` manages for a service, derived
|
|
279
|
+
* from the spec so future entries are picked up automatically.
|
|
280
|
+
*/
|
|
281
|
+
export function getBuilderManagedEnvKeys(
|
|
282
|
+
service: ServiceName,
|
|
283
|
+
spec = DOCKER_STATEFUL_SET_SPEC,
|
|
284
|
+
): BuilderManagedEnvKeys {
|
|
285
|
+
const container = spec.containers.find((c) => c.internalName === service);
|
|
286
|
+
if (!container) throw new Error(`docker-statefulset: unknown service "${service}"`);
|
|
287
|
+
|
|
288
|
+
const always = new Set<string>(["PATH"]);
|
|
289
|
+
const hostForwarded: Array<{ name: string; hostVar: string }> = [];
|
|
290
|
+
for (const entry of container.env) {
|
|
291
|
+
if (entry.kind === "host") {
|
|
292
|
+
hostForwarded.push({ name: entry.name, hostVar: entry.hostVar ?? entry.name });
|
|
293
|
+
} else {
|
|
294
|
+
always.add(entry.name);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Builder-computed extras added outside the spec env arrays
|
|
299
|
+
if (service === "assistant") {
|
|
300
|
+
always.add("VELLUM_ASSISTANT_NAME");
|
|
301
|
+
always.add("GATEWAY_INTERNAL_URL");
|
|
302
|
+
always.add(AVATAR_DEVICE_ENV_VAR);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return { always, hostForwarded };
|
|
306
|
+
}
|
|
307
|
+
|
|
265
308
|
function resolveVolume(
|
|
266
309
|
spec: DockerStatefulSetSpec,
|
|
267
310
|
instanceName: string,
|
|
@@ -7,6 +7,7 @@ import type { AssistantEntry } from "./assistant-config.js";
|
|
|
7
7
|
import { saveAssistantEntry } from "./assistant-config.js";
|
|
8
8
|
import { createBackup, pruneOldBackups, restoreBackup } from "./backup-ops.js";
|
|
9
9
|
import { emitCliError } from "./cli-error.js";
|
|
10
|
+
import { getOrCreateHostDeviceId } from "./device-id.js";
|
|
10
11
|
import {
|
|
11
12
|
captureImageRefs,
|
|
12
13
|
DOCKER_READY_TIMEOUT_MS,
|
|
@@ -19,6 +20,11 @@ import { getStateDir } from "./environments/paths.js";
|
|
|
19
20
|
import { getCurrentEnvironment } from "./environments/resolve.js";
|
|
20
21
|
import { loadGuardianToken } from "./guardian-token.js";
|
|
21
22
|
import { resolveImageRefs } from "./platform-releases.js";
|
|
23
|
+
import {
|
|
24
|
+
getBuilderManagedEnvKeys,
|
|
25
|
+
type DockerStatefulSetSpec,
|
|
26
|
+
type ServiceName,
|
|
27
|
+
} from "./statefulset.js";
|
|
22
28
|
import { exec, execOutput } from "./step-runner.js";
|
|
23
29
|
import { compareVersions } from "./version-compat.js";
|
|
24
30
|
|
|
@@ -141,20 +147,6 @@ export function buildUpgradeCommitMessage(options: {
|
|
|
141
147
|
return lines.join("\n");
|
|
142
148
|
}
|
|
143
149
|
|
|
144
|
-
/**
|
|
145
|
-
* Environment variable keys that are set by CLI run arguments and should
|
|
146
|
-
* not be replayed from a captured container environment during upgrades
|
|
147
|
-
* or rollbacks. Shared between upgrade.ts and rollback.ts.
|
|
148
|
-
*/
|
|
149
|
-
export const CONTAINER_ENV_EXCLUDE_KEYS: ReadonlySet<string> = new Set([
|
|
150
|
-
"CES_SERVICE_TOKEN",
|
|
151
|
-
"GUARDIAN_BOOTSTRAP_SECRET",
|
|
152
|
-
"VELLUM_ASSISTANT_NAME",
|
|
153
|
-
"RUNTIME_HTTP_HOST",
|
|
154
|
-
"PATH",
|
|
155
|
-
"ACTOR_TOKEN_SIGNING_KEY",
|
|
156
|
-
]);
|
|
157
|
-
|
|
158
150
|
/**
|
|
159
151
|
* Capture environment variables from a running Docker container so they
|
|
160
152
|
* can be replayed onto the replacement container after upgrade.
|
|
@@ -183,6 +175,93 @@ export async function captureContainerEnv(
|
|
|
183
175
|
return captured;
|
|
184
176
|
}
|
|
185
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Filter a captured container env down to the entries safe to replay onto a
|
|
180
|
+
* replacement container.
|
|
181
|
+
*
|
|
182
|
+
* Drops every key `buildServiceRunArgs` sets itself (spec static/secret
|
|
183
|
+
* entries, builder-computed extras, PATH). Spec-managed secrets re-enter via
|
|
184
|
+
* the dedicated `DockerRunSecrets` path, so adding a secret to the spec
|
|
185
|
+
* automatically excludes it here. Spec host-forwarded keys are dropped only
|
|
186
|
+
* when the host variable is currently set, so fresh host values win over
|
|
187
|
+
* stale captured ones.
|
|
188
|
+
*
|
|
189
|
+
* Security contract: the returned env is memory-only — never persist it to
|
|
190
|
+
* disk, and log counts only, never values.
|
|
191
|
+
*/
|
|
192
|
+
export function buildReplayEnv(
|
|
193
|
+
capturedEnv: Record<string, string>,
|
|
194
|
+
service: ServiceName,
|
|
195
|
+
spec?: DockerStatefulSetSpec,
|
|
196
|
+
): Record<string, string> {
|
|
197
|
+
const { always, hostForwarded } = getBuilderManagedEnvKeys(service, spec);
|
|
198
|
+
const hostManaged = new Set(
|
|
199
|
+
hostForwarded.filter((h) => process.env[h.hostVar]).map((h) => h.name),
|
|
200
|
+
);
|
|
201
|
+
return Object.fromEntries(
|
|
202
|
+
Object.entries(capturedEnv).filter(
|
|
203
|
+
([key]) => !always.has(key) && !hostManaged.has(key),
|
|
204
|
+
),
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** Secrets and replay env derived from the outgoing containers. */
|
|
209
|
+
interface ReplayState {
|
|
210
|
+
bootstrapSecret: string | undefined;
|
|
211
|
+
cesServiceToken: string;
|
|
212
|
+
signingKey: string;
|
|
213
|
+
extraAssistantEnv: Record<string, string>;
|
|
214
|
+
extraGatewayEnv: Record<string, string>;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Derive the secrets and replay env for replacement containers from
|
|
219
|
+
* already-captured assistant/gateway envs. GUARDIAN_BOOTSTRAP_SECRET is only
|
|
220
|
+
* set on the gateway; CES_SERVICE_TOKEN and ACTOR_TOKEN_SIGNING_KEY fall back
|
|
221
|
+
* to fresh values for instances that predate them. VELLUM_DEVICE_ID is
|
|
222
|
+
* backfilled from the host for gateways hatched before device-id injection
|
|
223
|
+
* (captured value wins — it was itself host-derived).
|
|
224
|
+
*/
|
|
225
|
+
export function buildReplayState(
|
|
226
|
+
capturedEnv: Record<string, string>,
|
|
227
|
+
gatewayEnv: Record<string, string>,
|
|
228
|
+
): ReplayState {
|
|
229
|
+
const extraGatewayEnv = buildReplayEnv(gatewayEnv, "gateway");
|
|
230
|
+
extraGatewayEnv.VELLUM_DEVICE_ID ??= getOrCreateHostDeviceId();
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
bootstrapSecret: gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"],
|
|
234
|
+
cesServiceToken:
|
|
235
|
+
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex"),
|
|
236
|
+
signingKey:
|
|
237
|
+
capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex"),
|
|
238
|
+
extraAssistantEnv: buildReplayEnv(capturedEnv, "assistant"),
|
|
239
|
+
extraGatewayEnv,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Capture the assistant and gateway container envs and derive the replay
|
|
245
|
+
* state for the replacement containers. Logs only the assistant env-var
|
|
246
|
+
* count (security contract on `buildReplayEnv`).
|
|
247
|
+
*/
|
|
248
|
+
export async function captureReplayState(
|
|
249
|
+
res: Pick<
|
|
250
|
+
ReturnType<typeof dockerResourceNames>,
|
|
251
|
+
"assistantContainer" | "gatewayContainer"
|
|
252
|
+
>,
|
|
253
|
+
): Promise<ReplayState> {
|
|
254
|
+
console.log("💾 Capturing existing container environment...");
|
|
255
|
+
const [capturedEnv, gatewayEnv] = await Promise.all([
|
|
256
|
+
captureContainerEnv(res.assistantContainer),
|
|
257
|
+
captureContainerEnv(res.gatewayContainer),
|
|
258
|
+
]);
|
|
259
|
+
console.log(
|
|
260
|
+
` Captured ${Object.keys(capturedEnv).length} env var(s) from ${res.assistantContainer}\n`,
|
|
261
|
+
);
|
|
262
|
+
return buildReplayState(capturedEnv, gatewayEnv);
|
|
263
|
+
}
|
|
264
|
+
|
|
186
265
|
/**
|
|
187
266
|
* Best-effort fetch of the running service group version from the gateway
|
|
188
267
|
* `/healthz` endpoint. Returns `undefined` when the endpoint is
|
|
@@ -581,37 +660,13 @@ export async function performDockerRollback(
|
|
|
581
660
|
console.warn("⚠️ Pre-rollback backup failed (continuing with rollback)\n");
|
|
582
661
|
}
|
|
583
662
|
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
// Capture GUARDIAN_BOOTSTRAP_SECRET from the gateway container (it is only
|
|
592
|
-
// set on gateway, not assistant) so it persists across container restarts.
|
|
593
|
-
const gatewayEnv = await captureContainerEnv(res.gatewayContainer);
|
|
594
|
-
const bootstrapSecret = gatewayEnv["GUARDIAN_BOOTSTRAP_SECRET"];
|
|
595
|
-
|
|
596
|
-
const cesServiceToken =
|
|
597
|
-
capturedEnv["CES_SERVICE_TOKEN"] || randomBytes(32).toString("hex");
|
|
598
|
-
|
|
599
|
-
const signingKey =
|
|
600
|
-
capturedEnv["ACTOR_TOKEN_SIGNING_KEY"] || randomBytes(32).toString("hex");
|
|
601
|
-
|
|
602
|
-
// Build extra env vars, excluding keys managed by buildServiceRunArgs
|
|
603
|
-
const envKeysSetByRunArgs = new Set(CONTAINER_ENV_EXCLUDE_KEYS);
|
|
604
|
-
for (const envVar of ["ANTHROPIC_API_KEY", "VELLUM_PLATFORM_URL"]) {
|
|
605
|
-
if (process.env[envVar]) {
|
|
606
|
-
envKeysSetByRunArgs.add(envVar);
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
const extraAssistantEnv: Record<string, string> = {};
|
|
610
|
-
for (const [key, value] of Object.entries(capturedEnv)) {
|
|
611
|
-
if (!envKeysSetByRunArgs.has(key)) {
|
|
612
|
-
extraAssistantEnv[key] = value;
|
|
613
|
-
}
|
|
614
|
-
}
|
|
663
|
+
const {
|
|
664
|
+
bootstrapSecret,
|
|
665
|
+
cesServiceToken,
|
|
666
|
+
signingKey,
|
|
667
|
+
extraAssistantEnv,
|
|
668
|
+
extraGatewayEnv,
|
|
669
|
+
} = await captureReplayState(res);
|
|
615
670
|
|
|
616
671
|
// Parse gateway port from entry's runtimeUrl
|
|
617
672
|
let gatewayPort = GATEWAY_INTERNAL_PORT;
|
|
@@ -684,6 +739,7 @@ export async function performDockerRollback(
|
|
|
684
739
|
bootstrapSecret,
|
|
685
740
|
cesServiceToken,
|
|
686
741
|
extraAssistantEnv,
|
|
742
|
+
extraGatewayEnv,
|
|
687
743
|
gatewayPort,
|
|
688
744
|
imageTags: targetImageTags,
|
|
689
745
|
instanceName,
|
|
@@ -801,6 +857,7 @@ export async function performDockerRollback(
|
|
|
801
857
|
bootstrapSecret,
|
|
802
858
|
cesServiceToken,
|
|
803
859
|
extraAssistantEnv,
|
|
860
|
+
extraGatewayEnv,
|
|
804
861
|
gatewayPort,
|
|
805
862
|
imageTags: currentImageRefs,
|
|
806
863
|
instanceName,
|