@vellumai/cli 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/AGENTS.md +12 -2
  2. package/README.md +3 -3
  3. package/bunfig.toml +6 -0
  4. package/package.json +1 -1
  5. package/src/__tests__/assistant-config.test.ts +124 -0
  6. package/src/__tests__/env-drift.test.ts +87 -0
  7. package/src/__tests__/guardian-token.test.ts +172 -0
  8. package/src/__tests__/multi-local.test.ts +61 -14
  9. package/src/__tests__/orphan-detection.test.ts +214 -0
  10. package/src/__tests__/platform-client.test.ts +204 -0
  11. package/src/__tests__/preload.ts +27 -0
  12. package/src/__tests__/ssh-user-guard.test.ts +28 -0
  13. package/src/__tests__/teleport.test.ts +1073 -56
  14. package/src/commands/backup.ts +8 -0
  15. package/src/commands/hatch.ts +1 -1
  16. package/src/commands/login.ts +178 -9
  17. package/src/commands/logs.ts +652 -0
  18. package/src/commands/pair.ts +9 -1
  19. package/src/commands/ps.ts +37 -7
  20. package/src/commands/recover.ts +8 -4
  21. package/src/commands/restore.ts +8 -0
  22. package/src/commands/retire.ts +16 -9
  23. package/src/commands/rollback.ts +32 -33
  24. package/src/commands/ssh-apple-container.ts +162 -0
  25. package/src/commands/ssh.ts +7 -0
  26. package/src/commands/teleport.ts +226 -1
  27. package/src/commands/upgrade.ts +43 -52
  28. package/src/commands/wake.ts +14 -10
  29. package/src/components/DefaultMainScreen.tsx +7 -1
  30. package/src/index.ts +3 -0
  31. package/src/lib/__tests__/docker.test.ts +78 -0
  32. package/src/lib/assistant-config.ts +48 -87
  33. package/src/lib/aws.ts +12 -1
  34. package/src/lib/constants.ts +0 -10
  35. package/src/lib/docker.ts +70 -4
  36. package/src/lib/environments/__tests__/paths.test.ts +234 -0
  37. package/src/lib/environments/__tests__/resolve.test.ts +226 -0
  38. package/src/lib/environments/paths.ts +110 -0
  39. package/src/lib/environments/resolve.ts +96 -0
  40. package/src/lib/environments/seeds.ts +46 -0
  41. package/src/lib/environments/types.ts +60 -0
  42. package/src/lib/gcp.ts +12 -1
  43. package/src/lib/guardian-token.ts +8 -10
  44. package/src/lib/hatch-local.ts +24 -19
  45. package/src/lib/local.ts +46 -5
  46. package/src/lib/orphan-detection.ts +28 -12
  47. package/src/lib/platform-client.ts +220 -24
  48. package/src/lib/retire-apple-container.ts +102 -0
  49. package/src/lib/upgrade-lifecycle.ts +101 -28
@@ -0,0 +1,78 @@
1
+ import { describe, test, expect } from "bun:test";
2
+ import {
3
+ ASSISTANT_INTERNAL_PORT,
4
+ dockerResourceNames,
5
+ serviceDockerRunArgs,
6
+ type ServiceName,
7
+ } from "../docker.js";
8
+
9
+ const instanceName = "test-instance";
10
+ const imageTags: Record<ServiceName, string> = {
11
+ assistant: "vellumai/vellum-assistant:test",
12
+ "credential-executor": "vellumai/vellum-credential-executor:test",
13
+ gateway: "vellumai/vellum-gateway:test",
14
+ };
15
+
16
+ function buildAssistantArgs(): string[] {
17
+ const res = dockerResourceNames(instanceName);
18
+ const builders = serviceDockerRunArgs({
19
+ gatewayPort: 7830,
20
+ imageTags,
21
+ instanceName,
22
+ res,
23
+ });
24
+ return builders.assistant();
25
+ }
26
+
27
+ describe("serviceDockerRunArgs — assistant", () => {
28
+ test("runs privileged so the inner dockerd can manage cgroups/iptables/overlayfs", () => {
29
+ const args = buildAssistantArgs();
30
+ expect(args).toContain("--privileged");
31
+ });
32
+
33
+ test("mounts a dedicated named volume at /var/lib/docker for the inner dockerd data store", () => {
34
+ const args = buildAssistantArgs();
35
+ const spec = `${instanceName}-dockerd-data:/var/lib/docker`;
36
+ const mountIndex = args.indexOf(spec);
37
+ expect(mountIndex).toBeGreaterThan(0);
38
+ expect(args[mountIndex - 1]).toBe("-v");
39
+ });
40
+
41
+ test("does NOT bind-mount the host Docker socket (DinD replaces host-socket access)", () => {
42
+ const args = buildAssistantArgs();
43
+ expect(args).not.toContain("/var/run/docker.sock:/var/run/docker.sock");
44
+ });
45
+
46
+ test("does NOT set VELLUM_WORKSPACE_VOLUME_NAME (legacy Phase 1.8 hint, no longer needed in DinD)", () => {
47
+ const args = buildAssistantArgs();
48
+ expect(
49
+ args.some((a) => a.startsWith("VELLUM_WORKSPACE_VOLUME_NAME=")),
50
+ ).toBe(false);
51
+ });
52
+
53
+ test("keeps existing workspace and socket volume mounts intact", () => {
54
+ const args = buildAssistantArgs();
55
+ expect(args).toContain(`${instanceName}-workspace:/workspace`);
56
+ expect(args).toContain(`${instanceName}-socket:/run/ces-bootstrap`);
57
+ });
58
+
59
+ test("preserves existing required env vars", () => {
60
+ const args = buildAssistantArgs();
61
+ expect(args).toContain("IS_CONTAINERIZED=true");
62
+ expect(args).toContain("VELLUM_WORKSPACE_DIR=/workspace");
63
+ expect(args).toContain(`VELLUM_ASSISTANT_NAME=${instanceName}`);
64
+ });
65
+
66
+ test("publishes the assistant HTTP port on all host interfaces so sibling bot containers can reach the daemon via host.docker.internal on both Docker Desktop and Linux", () => {
67
+ const args = buildAssistantArgs();
68
+ // The port mapping is expressed as two adjacent args: "-p" then the spec.
69
+ // Bound to all interfaces (no `127.0.0.1:` prefix) because on vanilla
70
+ // Linux Docker, host.docker.internal:host-gateway resolves to the Docker
71
+ // bridge gateway IP — packets arrive at the bridge interface, not
72
+ // loopback, so a 127.0.0.1 DNAT rule would not match.
73
+ const portSpec = `${ASSISTANT_INTERNAL_PORT}:${ASSISTANT_INTERNAL_PORT}`;
74
+ const portIndex = args.indexOf(portSpec);
75
+ expect(portIndex).toBeGreaterThan(0);
76
+ expect(args[portIndex - 1]).toBe("-p");
77
+ });
78
+ });
@@ -8,7 +8,7 @@ import {
8
8
  writeFileSync,
9
9
  } from "fs";
10
10
  import { homedir } from "os";
11
- import { join } from "path";
11
+ import { dirname, join } from "path";
12
12
 
13
13
  import {
14
14
  DAEMON_INTERNAL_ASSISTANT_ID,
@@ -16,8 +16,13 @@ import {
16
16
  DEFAULT_DAEMON_PORT,
17
17
  DEFAULT_GATEWAY_PORT,
18
18
  DEFAULT_QDRANT_PORT,
19
- LOCKFILE_NAMES,
20
19
  } from "./constants.js";
20
+ import {
21
+ getLockfilePath,
22
+ getLockfilePaths,
23
+ getMultiInstanceDir,
24
+ } from "./environments/paths.js";
25
+ import { getCurrentEnvironment } from "./environments/resolve.js";
21
26
  import { probePort } from "./port-probe.js";
22
27
 
23
28
  /**
@@ -27,10 +32,11 @@ import { probePort } from "./port-probe.js";
27
32
  */
28
33
  export interface LocalInstanceResources {
29
34
  /**
30
- * Instance-specific data root. The first local assistant uses `~` (home
31
- * directory) with default ports. Subsequent instances are placed under
32
- * `~/.local/share/vellum/assistants/<name>/`.
33
- * The daemon's `.vellum/` directory lives inside it.
35
+ * Instance-specific data root. New local assistants are placed under
36
+ * `$XDG_DATA_HOME/vellum{-env}/assistants/<name>/`. Legacy entries
37
+ * (pre env-data-layout) may still point at `~` — the read path honors
38
+ * whatever `instanceDir` is stored. The daemon's `.vellum/` directory
39
+ * lives inside it.
34
40
  */
35
41
  instanceDir: string;
36
42
  /** HTTP port for the daemon runtime server */
@@ -84,18 +90,17 @@ export interface AssistantEntry {
84
90
  resources?: LocalInstanceResources;
85
91
  /** PID of the file watcher process for docker instances hatched with --watch. */
86
92
  watcherPid?: number;
87
- /** Last-known version of the service group, populated at hatch and updated by health checks. */
88
- serviceGroupVersion?: string;
89
93
  /** Docker image metadata for rollback. Only present for docker topology entries. */
90
94
  containerInfo?: ContainerInfo;
91
- /** The service group version that was running before the last upgrade. */
92
- previousServiceGroupVersion?: string;
93
95
  /** Docker image metadata from before the last upgrade. Enables rollback to the prior version. */
94
96
  previousContainerInfo?: ContainerInfo;
95
97
  /** Path to the .vbundle backup created for the most recent upgrade. Used by rollback to restore
96
98
  * only the backup from the specific upgrade being rolled back — never a stale backup from a
97
99
  * previous upgrade cycle. */
98
100
  preUpgradeBackupPath?: string;
101
+ /** Running version of the service group at the time of the last upgrade, as reported by
102
+ * the health endpoint. Used by saved-state rollback for logging / broadcast events. */
103
+ previousVersion?: string;
99
104
  /** Pre-upgrade DB migration version — used by rollback to know how far back to revert. */
100
105
  previousDbMigrationVersion?: number;
101
106
  /** Pre-upgrade workspace migration ID — used by rollback to know how far back to revert. */
@@ -114,15 +119,8 @@ export function getBaseDir(): string {
114
119
  return process.env.BASE_DATA_DIR?.trim() || homedir();
115
120
  }
116
121
 
117
- /** The lockfile always lives under the home directory. */
118
- function getLockfileDir(): string {
119
- return process.env.VELLUM_LOCKFILE_DIR?.trim() || homedir();
120
- }
121
-
122
122
  function readLockfile(): LockfileData {
123
- const base = getLockfileDir();
124
- const candidates = LOCKFILE_NAMES.map((name) => join(base, name));
125
- for (const lockfilePath of candidates) {
123
+ for (const lockfilePath of getLockfilePaths(getCurrentEnvironment())) {
126
124
  if (!existsSync(lockfilePath)) continue;
127
125
  try {
128
126
  const raw = readFileSync(lockfilePath, "utf-8");
@@ -138,7 +136,8 @@ function readLockfile(): LockfileData {
138
136
  }
139
137
 
140
138
  function writeLockfile(data: LockfileData): void {
141
- const lockfilePath = join(getLockfileDir(), LOCKFILE_NAMES[0]);
139
+ const lockfilePath = getLockfilePath(getCurrentEnvironment());
140
+ mkdirSync(dirname(lockfilePath), { recursive: true });
142
141
  const tmpPath = `${lockfilePath}.${randomBytes(4).toString("hex")}.tmp`;
143
142
  try {
144
143
  writeFileSync(tmpPath, JSON.stringify(data, null, 2) + "\n");
@@ -187,6 +186,7 @@ export function migrateLegacyEntry(raw: Record<string, unknown>): boolean {
187
186
  return false;
188
187
  }
189
188
 
189
+ const env = getCurrentEnvironment();
190
190
  let mutated = false;
191
191
 
192
192
  // Migrate top-level `baseDataDir` → `resources.instanceDir`
@@ -208,11 +208,7 @@ export function migrateLegacyEntry(raw: Record<string, unknown>): boolean {
208
208
  const gatewayPort =
209
209
  parsePortFromUrl(raw.runtimeUrl) ?? DEFAULT_GATEWAY_PORT;
210
210
  const instanceDir = join(
211
- homedir(),
212
- ".local",
213
- "share",
214
- "vellum",
215
- "assistants",
211
+ getMultiInstanceDir(env),
216
212
  typeof raw.assistantId === "string"
217
213
  ? raw.assistantId
218
214
  : DAEMON_INTERNAL_ASSISTANT_ID,
@@ -231,11 +227,7 @@ export function migrateLegacyEntry(raw: Record<string, unknown>): boolean {
231
227
  const res = raw.resources as Record<string, unknown>;
232
228
  if (!res.instanceDir) {
233
229
  res.instanceDir = join(
234
- homedir(),
235
- ".local",
236
- "share",
237
- "vellum",
238
- "assistants",
230
+ getMultiInstanceDir(env),
239
231
  typeof raw.assistantId === "string"
240
232
  ? raw.assistantId
241
233
  : DAEMON_INTERNAL_ASSISTANT_ID,
@@ -394,23 +386,6 @@ export function saveAssistantEntry(entry: AssistantEntry): void {
394
386
  writeAssistants(entries);
395
387
  }
396
388
 
397
- /**
398
- * Update just the serviceGroupVersion field on a lockfile entry.
399
- * Reads the current entry, updates the version if changed, and writes back.
400
- * No-op if the entry doesn't exist or the version hasn't changed.
401
- */
402
- export function updateServiceGroupVersion(
403
- assistantId: string,
404
- version: string,
405
- ): void {
406
- const entries = readAssistants();
407
- const entry = entries.find((e) => e.assistantId === assistantId);
408
- if (!entry) return;
409
- if (entry.serviceGroupVersion === version) return;
410
- entry.serviceGroupVersion = version;
411
- writeAssistants(entries);
412
- }
413
-
414
389
  /**
415
390
  * Scan upward from `basePort` to find an available port. A port is considered
416
391
  * available when `probePort()` returns false (nothing listening). Scans up to
@@ -434,58 +409,32 @@ async function findAvailablePort(
434
409
 
435
410
  /**
436
411
  * Allocate an isolated set of resources for a named local instance.
437
- * The first local assistant uses the home directory with default ports.
438
- * Subsequent assistants are placed under
439
- * `~/.local/share/vellum/assistants/<name>/` with scanned ports.
412
+ * Every new local assistant is allocated under
413
+ * `$XDG_DATA_HOME/vellum{-env}/assistants/<name>/`. The legacy `~/.vellum/`
414
+ * path is only reached via existing lockfile entries from before this change
415
+ * — the read path honors whatever `resources.instanceDir` is stored, so
416
+ * production users' existing first-local assistants keep their `~/.vellum/`
417
+ * roots unchanged.
440
418
  */
441
419
  export async function allocateLocalResources(
442
420
  instanceName: string,
443
421
  ): Promise<LocalInstanceResources> {
444
- // First local assistant gets the home directory with default ports.
445
- // Respect BASE_DATA_DIR when set (e.g. in e2e tests) so the daemon,
446
- // gateway, and credential store all resolve paths under the same root.
447
- const existingLocals = loadAllAssistants().filter((e) => e.cloud === "local");
448
- if (existingLocals.length === 0) {
449
- const baseDir = getBaseDir();
450
- const vellumDir = join(baseDir, ".vellum");
451
- return {
452
- instanceDir: baseDir,
453
- daemonPort: DEFAULT_DAEMON_PORT,
454
- gatewayPort: DEFAULT_GATEWAY_PORT,
455
- qdrantPort: DEFAULT_QDRANT_PORT,
456
- cesPort: DEFAULT_CES_PORT,
457
- pidFile: join(vellumDir, "vellum.pid"),
458
- };
459
- }
460
-
461
- const instanceDir = join(
462
- homedir(),
463
- ".local",
464
- "share",
465
- "vellum",
466
- "assistants",
467
- instanceName,
468
- );
422
+ const env = getCurrentEnvironment();
423
+ const instanceDir = join(getMultiInstanceDir(env), instanceName);
469
424
  mkdirSync(instanceDir, { recursive: true });
470
425
 
471
426
  // Collect ports already assigned to other local instances in the lockfile.
472
- // Even if those instances are stopped, we must avoid reusing their ports
473
- // to prevent binding collisions when both are woken.
474
427
  const reservedPorts: number[] = [];
475
428
  for (const entry of loadAllAssistants()) {
476
- if (entry.cloud !== "local") continue;
477
- if (entry.resources) {
478
- reservedPorts.push(
479
- entry.resources.daemonPort,
480
- entry.resources.gatewayPort,
481
- entry.resources.qdrantPort,
482
- entry.resources.cesPort,
483
- );
484
- }
429
+ if (entry.cloud !== "local" || !entry.resources) continue;
430
+ reservedPorts.push(
431
+ entry.resources.daemonPort,
432
+ entry.resources.gatewayPort,
433
+ entry.resources.qdrantPort,
434
+ entry.resources.cesPort,
435
+ );
485
436
  }
486
437
 
487
- // Allocate ports sequentially to avoid overlapping ranges assigning the
488
- // same port to multiple services (e.g. daemon 7821-7920 overlaps gateway 7830-7929).
489
438
  const daemonPort = await findAvailablePort(
490
439
  DEFAULT_DAEMON_PORT,
491
440
  reservedPorts,
@@ -516,6 +465,18 @@ export async function allocateLocalResources(
516
465
  };
517
466
  }
518
467
 
468
+ /**
469
+ * Return `platformBaseUrl` from the lockfile, if set. This is the value
470
+ * persisted by {@link syncConfigToLockfile} the last time the active
471
+ * assistant was hatched/waked, and is the source of truth for "which
472
+ * platform does the currently-active assistant target".
473
+ */
474
+ export function getLockfilePlatformBaseUrl(): string | undefined {
475
+ const url = readLockfile().platformBaseUrl;
476
+ if (typeof url === "string" && url.trim()) return url.trim();
477
+ return undefined;
478
+ }
479
+
519
480
  /**
520
481
  * Read the assistant config file and sync client-relevant values to the
521
482
  * lockfile. This lets external tools (e.g. vel) discover the platform URL
package/src/lib/aws.ts CHANGED
@@ -411,7 +411,18 @@ export async function hatchAws(
411
411
  }
412
412
  }
413
413
 
414
- const sshUser = userInfo().username;
414
+ let sshUser: string;
415
+ try {
416
+ sshUser = userInfo().username;
417
+ } catch {
418
+ sshUser = process.env.USER ?? "";
419
+ }
420
+ if (!sshUser) {
421
+ console.error(
422
+ "Error: Could not determine SSH username. Set the USER environment variable and try again.",
423
+ );
424
+ process.exit(1);
425
+ }
415
426
  const hatchedBy = process.env.VELLUM_HATCHED_BY;
416
427
  const providerApiKeys: Record<string, string> = {};
417
428
  for (const [, envVar] of Object.entries(PROVIDER_ENV_VAR_NAMES)) {
@@ -16,16 +16,6 @@ export const DEFAULT_GATEWAY_PORT = 7830;
16
16
  export const DEFAULT_QDRANT_PORT = 6333;
17
17
  export const DEFAULT_CES_PORT = 8090;
18
18
 
19
- /**
20
- * Lockfile candidate filenames, checked in priority order.
21
- * `.vellum.lock.json` is the current name; `.vellum.lockfile.json` is the
22
- * legacy name kept for backwards compatibility with older installs.
23
- */
24
- export const LOCKFILE_NAMES = [
25
- ".vellum.lock.json",
26
- ".vellum.lockfile.json",
27
- ] as const;
28
-
29
19
  export const VALID_REMOTE_HOSTS = [
30
20
  "local",
31
21
  "gcp",
package/src/lib/docker.ts CHANGED
@@ -39,13 +39,17 @@ export const DOCKERHUB_IMAGES: Record<ServiceName, string> = {
39
39
  };
40
40
 
41
41
  /** Internal ports exposed by each service's Dockerfile. */
42
- export const ASSISTANT_INTERNAL_PORT = 3001;
42
+ export const ASSISTANT_INTERNAL_PORT = 7821;
43
43
  export const GATEWAY_INTERNAL_PORT = 7830;
44
44
 
45
45
  /** Max time to wait for the assistant container to emit the readiness sentinel. */
46
46
  export const DOCKER_READY_TIMEOUT_MS = 3 * 60 * 1000;
47
47
 
48
+ /** Default memory (GiB) allocated to the Colima VM. */
49
+ const COLIMA_DEFAULT_MEMORY_GIB = 8;
50
+
48
51
  /** Directory for user-local binary installs (no sudo required). */
52
+
49
53
  const LOCAL_BIN_DIR = join(
50
54
  process.env.HOME || process.env.USERPROFILE || ".",
51
55
  ".local",
@@ -294,7 +298,11 @@ async function ensureDockerInstalled(): Promise<void> {
294
298
 
295
299
  console.log("🚀 Docker daemon not running. Starting Colima...");
296
300
  try {
297
- await exec("colima", ["start"]);
301
+ await exec("colima", [
302
+ "start",
303
+ "--memory",
304
+ String(COLIMA_DEFAULT_MEMORY_GIB),
305
+ ]);
298
306
  } catch {
299
307
  // Colima may fail if a previous VM instance is in a corrupt state.
300
308
  // Attempt to delete the stale instance and retry once.
@@ -311,7 +319,11 @@ async function ensureDockerInstalled(): Promise<void> {
311
319
 
312
320
  try {
313
321
  console.log("🔄 Retrying colima start...");
314
- await exec("colima", ["start"]);
322
+ await exec("colima", [
323
+ "start",
324
+ "--memory",
325
+ String(COLIMA_DEFAULT_MEMORY_GIB),
326
+ ]);
315
327
  } catch (retryErr) {
316
328
  const message =
317
329
  retryErr instanceof Error ? retryErr.message : String(retryErr);
@@ -329,6 +341,7 @@ export function dockerResourceNames(instanceName: string) {
329
341
  assistantContainer: `${instanceName}-assistant`,
330
342
  cesContainer: `${instanceName}-credential-executor`,
331
343
  cesSecurityVolume: `${instanceName}-ces-sec`,
344
+ dockerdDataVolume: `${instanceName}-dockerd-data`,
332
345
  gatewayContainer: `${instanceName}-gateway`,
333
346
  gatewaySecurityVolume: `${instanceName}-gateway-sec`,
334
347
  network: `${instanceName}-net`,
@@ -388,6 +401,7 @@ export async function retireDocker(name: string): Promise<void> {
388
401
  res.workspaceVolume,
389
402
  res.cesSecurityVolume,
390
403
  res.gatewaySecurityVolume,
404
+ res.dockerdDataVolume,
391
405
  ]) {
392
406
  try {
393
407
  await exec("docker", ["volume", "rm", vol]);
@@ -551,19 +565,53 @@ export function serviceDockerRunArgs(opts: {
551
565
  } = opts;
552
566
  return {
553
567
  assistant: () => {
568
+ // Run the assistant container in Docker-in-Docker (DinD) mode: the
569
+ // container runs its own `dockerd` so the Meet subsystem can spawn
570
+ // sibling meet-bot containers without needing access to the host's
571
+ // Docker engine. This requires:
572
+ // - `--privileged` so the inner dockerd can manage cgroups, iptables,
573
+ // overlayfs mounts, etc.
574
+ // - A dedicated named volume mounted at `/var/lib/docker` so the
575
+ // inner Docker image cache and container state survive restarts of
576
+ // the assistant container.
577
+ // The host's `/var/run/docker.sock` is intentionally NOT mounted — all
578
+ // Meet-bot spawning happens against the inner dockerd.
554
579
  const args: string[] = [
555
580
  "run",
556
581
  "--init",
557
582
  "-d",
583
+ "--privileged",
558
584
  "--name",
559
585
  res.assistantContainer,
560
586
  `--network=${res.network}`,
561
587
  "-p",
562
588
  `${gatewayPort}:${GATEWAY_INTERNAL_PORT}`,
589
+ // Published so the Meet subsystem's sibling bot containers can reach
590
+ // the daemon's internal HTTP API at host.docker.internal:<port>.
591
+ //
592
+ // Published on all host interfaces (no `127.0.0.1:` prefix) because on
593
+ // vanilla Linux Docker, `host.docker.internal:host-gateway` resolves
594
+ // to the Docker bridge gateway IP (e.g. 172.17.0.1), not loopback.
595
+ // Packets from sibling containers arrive at the host's bridge
596
+ // interface, and an iptables DNAT rule keyed on dest=127.0.0.1 would
597
+ // not match — causing connection refused. Docker Desktop (macOS/
598
+ // Windows) still works because its VM proxy forwards to the same
599
+ // published port regardless of the binding address.
600
+ //
601
+ // Security tradeoff: the daemon HTTP API is now reachable from the
602
+ // host's LAN (any device that can hit the host IP on this port).
603
+ // This matches the gateway port's existing posture and is acceptable
604
+ // for single-user self-hosted Docker mode per the Phase 1.8 security
605
+ // note. Managed/multi-tenant deployments are out of scope and would
606
+ // require a different design.
607
+ "-p",
608
+ `${ASSISTANT_INTERNAL_PORT}:${ASSISTANT_INTERNAL_PORT}`,
563
609
  "-v",
564
610
  `${res.workspaceVolume}:/workspace`,
565
611
  "-v",
566
612
  `${res.socketVolume}:/run/ces-bootstrap`,
613
+ "-v",
614
+ `${res.dockerdDataVolume}:/var/lib/docker`,
567
615
  "-e",
568
616
  "IS_CONTAINERIZED=true",
569
617
  "-e",
@@ -575,6 +623,10 @@ export function serviceDockerRunArgs(opts: {
575
623
  "-e",
576
624
  "VELLUM_WORKSPACE_DIR=/workspace",
577
625
  "-e",
626
+ "VELLUM_BACKUP_DIR=/workspace/.backups",
627
+ "-e",
628
+ "VELLUM_BACKUP_KEY_PATH=/workspace/.backup.key",
629
+ "-e",
578
630
  "CES_CREDENTIAL_URL=http://localhost:8090",
579
631
  "-e",
580
632
  `GATEWAY_INTERNAL_URL=http://localhost:${GATEWAY_INTERNAL_PORT}`,
@@ -596,6 +648,7 @@ export function serviceDockerRunArgs(opts: {
596
648
  }
597
649
  for (const envVar of [
598
650
  ...Object.values(PROVIDER_ENV_VAR_NAMES),
651
+ "VELLUM_ENVIRONMENT",
599
652
  "VELLUM_PLATFORM_URL",
600
653
  ]) {
601
654
  if (process.env[envVar]) {
@@ -644,6 +697,9 @@ export function serviceDockerRunArgs(opts: {
644
697
  ...(opts.bootstrapSecret
645
698
  ? ["-e", `GUARDIAN_BOOTSTRAP_SECRET=${opts.bootstrapSecret}`]
646
699
  : []),
700
+ ...(process.env.VELLUM_ENVIRONMENT
701
+ ? ["-e", `VELLUM_ENVIRONMENT=${process.env.VELLUM_ENVIRONMENT}`]
702
+ : []),
647
703
  ...(process.env.VELLUM_PLATFORM_URL
648
704
  ? ["-e", `VELLUM_PLATFORM_URL=${process.env.VELLUM_PLATFORM_URL}`]
649
705
  : []),
@@ -700,6 +756,16 @@ export async function startContainers(
700
756
  },
701
757
  log: (msg: string) => void,
702
758
  ): Promise<void> {
759
+ // Ensure the inner dockerd's data volume exists before mounting it.
760
+ // For instances hatched on Phase 1.10+, this is created in hatchDocker and
761
+ // is a no-op here. For instances that pre-date Phase 1.10 (DinD) and are
762
+ // upgrading in place, Docker would otherwise auto-create the volume on
763
+ // first `-v` mount without our standard ownership/labeling. Creating it
764
+ // explicitly keeps volume provenance consistent across fresh and upgraded
765
+ // instances. `docker volume create` is idempotent for an existing volume
766
+ // of the same name, so this is safe to run on every start.
767
+ await exec("docker", ["volume", "create", opts.res.dockerdDataVolume]);
768
+
703
769
  const runArgs = serviceDockerRunArgs(opts);
704
770
  for (const service of SERVICE_START_ORDER) {
705
771
  log(`🚀 Starting ${service} container...`);
@@ -1110,6 +1176,7 @@ export async function hatchDocker(
1110
1176
  await exec("docker", ["volume", "create", res.workspaceVolume]);
1111
1177
  await exec("docker", ["volume", "create", res.cesSecurityVolume]);
1112
1178
  await exec("docker", ["volume", "create", res.gatewaySecurityVolume]);
1179
+ await exec("docker", ["volume", "create", res.dockerdDataVolume]);
1113
1180
 
1114
1181
  // Set workspace volume ownership so non-root containers (UID 1001) can write.
1115
1182
  await exec("docker", [
@@ -1165,7 +1232,6 @@ export async function hatchDocker(
1165
1232
  cloud: "docker",
1166
1233
  species,
1167
1234
  hatchedAt: new Date().toISOString(),
1168
- serviceGroupVersion: cliPkg.version ? `v${cliPkg.version}` : undefined,
1169
1235
  containerInfo: {
1170
1236
  assistantImage: imageTags.assistant,
1171
1237
  gatewayImage: imageTags.gateway,