npm - @openpalm/lib - Versions diffs - 0.11.0 → 0.11.2-rc.1 - Mend

@openpalm/lib 0.11.0 → 0.11.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/control-plane/config-persistence.ts +37 -3
package/src/control-plane/docker.test.ts +61 -0
package/src/control-plane/docker.ts +92 -1
package/src/control-plane/hardware-detect.ts +146 -0
package/src/control-plane/lifecycle.ts +68 -18
package/src/control-plane/registry.ts +10 -1
package/src/control-plane/setup-recommendation.test.ts +146 -0
package/src/control-plane/setup-recommendation.ts +127 -0
package/src/control-plane/upgrade-path.test.ts +113 -0
package/src/index.ts +17 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@openpalm/lib",
-  "version": "0.11.0",
+  "version": "0.11.2-rc.1",
   "license": "MPL-2.0",
   "type": "module",
   "description": "Shared control-plane library for OpenPalm — lifecycle, staging, secrets, channels, connections, scheduler",

package/src/control-plane/config-persistence.ts CHANGED Viewed

@@ -5,15 +5,16 @@
  * Files are validated in-place before writing; rollback is handled by
  * the rollback module (snapshot to OP_HOME/data/rollback/).
  */
-import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync } from "node:fs";
+import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync, chownSync } from "node:fs";
 import { dirname, resolve as resolvePath } from "node:path";
 import { parse as yamlParse } from "yaml";
+import { createLogger } from "../logger.js";
 import { parseEnvContent, parseEnvFile, mergeEnvContent, expandEnvVars } from './env.js';
 import { assertNoSecretLikeStackEnvKeys, isSecretLikeStackEnvKey } from './secrets.js';
 import { ensureSecret } from './secrets-files.js';
 import type { ControlPlaneState, ArtifactMeta } from "./types.js";
 import { listEnabledAddonIds } from "./registry.js";
-import { resolveOperatorIds, hasUsableOperatorId } from "./operator-ids.js";
+import { resolveOperatorIds, hasUsableOperatorId, type OperatorIds } from "./operator-ids.js";
 import { SPEC_DEFAULTS } from "./defaults.js";
 import { CURRENT_LAYOUT_VERSION } from "./migrations.js";
@@ -26,6 +27,8 @@ import { sha256, randomHex } from "./crypto.js";
 const DEFAULT_IMAGE_TAG = "latest";
+const logger = createLogger("config-persistence");
 // ── Env File Management ──────────────────────────────────────────────
 /**
@@ -223,6 +226,13 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
   const composeFiles = discoverStackOverlays(state.stackDir, state.homeDir);
   if (composeFiles.length === 0) return;
+  // Resolve the operator UID/GID compose runs containers as (`user:`), so we
+  // can chown the dirs we pre-create to match. Without this, dirs created by
+  // a root-running install (or a host UID that differs from the forced
+  // container UID) are unwritable inside the non-root container — on OrbStack
+  // real UIDs are preserved, so e.g. ollama's mkdir is denied (issue #452).
+  const operatorIds = resolveOperatorIds(state.homeDir);
   const envVars: Record<string, string> = {
     ...(process.env as Record<string, string>),
     ...parseEnvFile(`${state.stashDir}/env/stack.env`),
@@ -264,16 +274,40 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
         const isFile = basename.includes('.');
         if (isFile) {
-          mkdirSync(dirname(resolvedHostPath), { recursive: true });
+          const parent = dirname(resolvedHostPath);
+          mkdirSync(parent, { recursive: true });
           writeFileSync(resolvedHostPath, '');
+          chownVolumeTarget(parent, operatorIds);
+          chownVolumeTarget(resolvedHostPath, operatorIds);
         } else {
           mkdirSync(resolvedHostPath, { recursive: true });
+          chownVolumeTarget(resolvedHostPath, operatorIds);
         }
       }
     }
   }
 }
+/**
+ * chown a just-created bind-mount target to the operator UID/GID so the
+ * non-root container (`user: ${OP_UID}:${OP_GID}`) can write to it.
+ *
+ * No-op on Windows (chown is meaningless there) or when no operator can be
+ * resolved. A failure (e.g. not the owner) is logged and swallowed — the
+ * mkdir already succeeded and Docker Desktop's gRPC-FUSE masks ownership
+ * anyway, so a chown failure must not abort the install.
+ */
+function chownVolumeTarget(path: string, operatorIds: OperatorIds | null): void {
+  if (process.platform === "win32" || !operatorIds) return;
+  try {
+    chownSync(path, operatorIds.uid, operatorIds.gid);
+  } catch (error) {
+    logger.warn(
+      `Could not chown volume target ${path} to ${operatorIds.uid}:${operatorIds.gid}: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
 // ── Persistence (direct-write to live paths) ────────────────────────
 export function writeRuntimeFiles(

package/src/control-plane/docker.test.ts ADDED Viewed

@@ -0,0 +1,61 @@
+import { afterEach, describe, expect, it } from "bun:test";
+import {
+  detectExistingProject,
+  isProjectOurs,
+  resolveComposeProjectName,
+} from "./docker.js";
+describe("isProjectOurs (ours-vs-foreign decision)", () => {
+  it("treats a matching working_dir as ours", () => {
+    expect(isProjectOurs("/home/me/.openpalm", "/home/me/.openpalm")).toBe(true);
+  });
+  it("treats a different working_dir as foreign", () => {
+    expect(isProjectOurs("/home/other/.openpalm", "/home/me/.openpalm")).toBe(false);
+  });
+  it("treats an empty/unknown working_dir as ours (reconcile, don't refuse)", () => {
+    expect(isProjectOurs("", "/home/me/.openpalm")).toBe(true);
+    expect(isProjectOurs("   ", "/home/me/.openpalm")).toBe(true);
+  });
+  it("ignores surrounding whitespace on the label", () => {
+    expect(isProjectOurs("  /home/me/.openpalm \n", "/home/me/.openpalm")).toBe(true);
+  });
+});
+describe("detectExistingProject", () => {
+  // Use a project name that cannot possibly match any running container so the
+  // result is deterministic whether or not a docker daemon is present:
+  //  - docker error (no daemon)      → { exists:false }
+  //  - docker ok, no matching label  → { exists:false }
+  const ghostName = `openpalm-detect-test-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+  it("returns exists:false when no project matches (or docker is unavailable)", async () => {
+    const result = await detectExistingProject({
+      projectName: ghostName,
+      expectedWorkingDir: "/nonexistent/op_home",
+    });
+    expect(result.exists).toBe(false);
+    expect(result.isOurs).toBe(false);
+    expect(result.workingDir).toBe("");
+  });
+});
+describe("resolveComposeProjectName", () => {
+  const saved = process.env.OP_PROJECT_NAME;
+  afterEach(() => {
+    if (saved === undefined) delete process.env.OP_PROJECT_NAME;
+    else process.env.OP_PROJECT_NAME = saved;
+  });
+  it("defaults to openpalm", () => {
+    delete process.env.OP_PROJECT_NAME;
+    delete process.env.COMPOSE_PROJECT_NAME;
+    expect(resolveComposeProjectName({})).toBe("openpalm");
+  });
+  it("honors OP_PROJECT_NAME from overrides first", () => {
+    expect(resolveComposeProjectName({ OP_PROJECT_NAME: "openpalm-dev" })).toBe("openpalm-dev");
+  });
+});

package/src/control-plane/docker.ts CHANGED Viewed

@@ -51,6 +51,83 @@ export function resolveComposeProjectName(envOverrides: Record<string, string> =
   );
 }
+/**
+ * Result of probing the Docker daemon for an existing compose project that
+ * shares our project name.
+ *
+ * - `exists`   — at least one running container carries the project label.
+ * - `isOurs`   — those containers were launched from THIS install's working
+ *                dir (compose working_dir label === expectedWorkingDir). When
+ *                true the caller should reconcile in place (up --force-recreate).
+ *                When false a DIFFERENT OpenPalm install (e.g. dev vs host) owns
+ *                the name and the caller must refuse.
+ * - `workingDir` — the working_dir label read off the first container, for
+ *                error messages. Empty string when unknown.
+ */
+export type ExistingProject = {
+  exists: boolean;
+  isOurs: boolean;
+  workingDir: string;
+};
+/**
+ * Decide whether a running compose project (identified by its
+ * `com.docker.compose.project.working_dir` label) is OURS — i.e. was launched
+ * from this install's working dir. An empty/unknown label can't prove foreign,
+ * so it counts as ours (reconcile rather than wrongly refuse a redeploy).
+ *
+ * Pure decision split out from detectExistingProject so the ours-vs-foreign
+ * rule is unit-testable without a Docker daemon.
+ */
+export function isProjectOurs(workingDirLabel: string, expectedWorkingDir: string): boolean {
+  const label = workingDirLabel.trim();
+  return label === "" || label === expectedWorkingDir;
+}
+/**
+ * Probe the Docker daemon for a running compose project that shares
+ * `projectName`. Decides ours-vs-foreign by comparing the project's
+ * `com.docker.compose.project.working_dir` label against `expectedWorkingDir`
+ * (the install's OP_HOME / compose context).
+ *
+ * Returns `{ exists:false }` on any docker error (daemon down, no permission) —
+ * detection is best-effort and never blocks the caller; a real failure surfaces
+ * later through composeUp.
+ */
+export function detectExistingProject(opts: {
+  projectName: string;
+  expectedWorkingDir: string;
+}): Promise<ExistingProject> {
+  const none: ExistingProject = { exists: false, isOurs: false, workingDir: "" };
+  return new Promise((resolve) => {
+    execFile(
+      "docker",
+      ["ps", "-q", "--filter", `label=com.docker.compose.project=${opts.projectName}`],
+      { timeout: 10_000 },
+      (err, stdout) => {
+        if (err) return resolve(none);
+        const ids = stdout.toString().trim().split(/\s+/).filter(Boolean);
+        if (ids.length === 0) return resolve(none);
+        execFile(
+          "docker",
+          [
+            "inspect",
+            "--format",
+            '{{ index .Config.Labels "com.docker.compose.project.working_dir" }}',
+            ids[0],
+          ],
+          { timeout: 10_000 },
+          (err2, stdout2) => {
+            if (err2) return resolve({ exists: true, isOurs: false, workingDir: "" });
+            const workingDir = stdout2.toString().trim();
+            resolve({ exists: true, isOurs: isProjectOurs(workingDir, opts.expectedWorkingDir), workingDir });
+          },
+        );
+      },
+    );
+  });
+}
 /** Check if Docker is available */
 export async function checkDocker(): Promise<DockerResult> {
   return new Promise((resolve) => {
@@ -172,7 +249,21 @@ export async function composeUp(
   if (options.forceRecreate) args.push("--force-recreate");
   if (options.removeOrphans) args.push("--remove-orphans");
   if (options.services?.length) args.push(...options.services);
-  return run(args, undefined, 300_000, collectEnvOverrides(options.envFiles));
+  return run(args, undefined, composeUpTimeoutMs(), collectEnvOverrides(options.envFiles));
+}
+/**
+ * Timeout budget for `compose up`. A first install extracts multi-GB images
+ * (voice CUDA ~7.6 GB) onto slow disks; the previous hard 5-minute cap
+ * SIGTERM-killed the start mid-extraction and surfaced as an empty/opaque
+ * error. Default 30 min, override with OP_COMPOSE_UP_TIMEOUT_MS. Kept bounded
+ * (never removed) so a genuinely hung start still eventually fails.
+ */
+function composeUpTimeoutMs(): number {
+  const raw = process.env.OP_COMPOSE_UP_TIMEOUT_MS?.trim();
+  const parsed = raw ? Number(raw) : NaN;
+  if (Number.isFinite(parsed) && parsed > 0) return parsed;
+  return 30 * 60_000;
 }
 /**

package/src/control-plane/hardware-detect.ts ADDED Viewed

@@ -0,0 +1,146 @@
+// Host GPU / VRAM detection for setup recommendations.
+//
+// Data-driven on purpose: each entry in GPU_PROBES is a vendor + a command to
+// run + a pure parser. Adding a new accelerator (Intel Arc, Apple Metal, a new
+// rocm/CUDA query, etc.) is a one-entry change here — nothing downstream needs to
+// know. detectGpu() runs every probe, ignores the ones whose tool is absent, and
+// returns the single best (highest-VRAM) result, or null when no GPU is found.
+import { execFile } from "node:child_process";
+import { createLogger } from "../logger.js";
+const logger = createLogger("hardware-detect");
+export type GpuVendor = "nvidia" | "amd" | "apple" | "unknown";
+export type GpuInfo = {
+  vendor: GpuVendor;
+  /** Human-readable adapter name, e.g. "NVIDIA GeForce RTX 4090". */
+  name: string;
+  /** Total VRAM in MiB. 0 when the tool reported the GPU but not its memory. */
+  vramMb: number;
+};
+type GpuProbe = {
+  vendor: GpuVendor;
+  command: string;
+  args: string[];
+  /** Pure parser: tool stdout -> detected GPUs. Must not throw. */
+  parse: (stdout: string) => GpuInfo[];
+  /** Optional gate — when present and false, the probe is skipped entirely. */
+  enabled?: boolean;
+};
+/** Parse `nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits`. */
+export function parseNvidiaSmi(stdout: string): GpuInfo[] {
+  return stdout
+    .split("\n")
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .map((line): GpuInfo | null => {
+      // "NVIDIA GeForce RTX 4090, 24564"
+      const idx = line.lastIndexOf(",");
+      if (idx === -1) return null;
+      const name = line.slice(0, idx).trim();
+      const vramMb = Number.parseInt(line.slice(idx + 1).trim(), 10);
+      if (!name || !Number.isFinite(vramMb)) return null;
+      return { vendor: "nvidia", name, vramMb };
+    })
+    .filter((g): g is GpuInfo => g !== null);
+}
+/** Parse `rocm-smi --showmeminfo vram --showproductname --json`. */
+export function parseRocmSmi(stdout: string): GpuInfo[] {
+  let doc: Record<string, Record<string, string>>;
+  try {
+    doc = JSON.parse(stdout);
+  } catch {
+    return [];
+  }
+  const out: GpuInfo[] = [];
+  for (const card of Object.values(doc)) {
+    if (!card || typeof card !== "object") continue;
+    // rocm-smi key names drift across versions — match loosely.
+    const vramKey = Object.keys(card).find((k) => /vram total memory/i.test(k));
+    const nameKey = Object.keys(card).find((k) => /product name|card series|gfx/i.test(k));
+    const bytes = vramKey ? Number.parseInt(String(card[vramKey]).trim(), 10) : NaN;
+    const vramMb = Number.isFinite(bytes) ? Math.round(bytes / (1024 * 1024)) : 0;
+    out.push({ vendor: "amd", name: nameKey ? String(card[nameKey]).trim() : "AMD GPU", vramMb });
+  }
+  return out;
+}
+/**
+ * Parse `sysctl -n hw.memsize hw.model` (two lines: total bytes, then model id)
+ * into an Apple-Silicon GpuInfo. `hw.memsize` is UNIFIED memory shared between
+ * CPU and GPU, carried here as vramMb for informational display only — callers
+ * must NOT treat it like discrete VRAM (see setup-recommendation). Pure; never throws.
+ */
+export function parseAppleSilicon(stdout: string): GpuInfo[] {
+  const lines = stdout
+    .split("\n")
+    .map((l) => l.trim())
+    .filter(Boolean);
+  if (lines.length === 0) return [];
+  const bytes = Number.parseInt(lines[0] ?? "", 10);
+  if (!Number.isFinite(bytes) || bytes <= 0) return [];
+  const vramMb = Math.round(bytes / (1024 * 1024));
+  const model = lines[1] && lines[1].length > 0 ? lines[1] : "arm64";
+  return [{ vendor: "apple", name: `Apple Silicon (${model})`, vramMb }];
+}
+const GPU_PROBES: GpuProbe[] = [
+  {
+    vendor: "nvidia",
+    command: "nvidia-smi",
+    args: ["--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
+    parse: parseNvidiaSmi,
+  },
+  {
+    vendor: "amd",
+    command: "rocm-smi",
+    args: ["--showmeminfo", "vram", "--showproductname", "--json"],
+    parse: parseRocmSmi,
+  },
+  {
+    // Apple Silicon Macs expose no nvidia-smi/rocm-smi. Probe macOS sysctl for
+    // unified-memory size + model id. Gated to darwin/arm64 so it never runs (and
+    // never spawns a missing binary) on Linux/Intel.
+    vendor: "apple",
+    command: "sysctl",
+    args: ["-n", "hw.memsize", "hw.model"],
+    parse: parseAppleSilicon,
+    enabled: process.platform === "darwin" && process.arch === "arm64",
+  },
+];
+function run(command: string, args: string[], timeoutMs = 3_000): Promise<string | null> {
+  return new Promise((resolve) => {
+    execFile(command, args, { timeout: timeoutMs }, (err, stdout) => {
+      // ENOENT (tool not installed) and any non-zero exit -> not available.
+      resolve(err ? null : stdout?.toString() ?? "");
+    });
+  });
+}
+/**
+ * Detect the host's best GPU. Returns the highest-VRAM adapter across all probes,
+ * or null when none is found. Never throws.
+ */
+export async function detectGpu(): Promise<GpuInfo | null> {
+  const found: GpuInfo[] = [];
+  await Promise.all(
+    GPU_PROBES.map(async (probe) => {
+      if (probe.enabled === false) return;
+      const stdout = await run(probe.command, probe.args);
+      if (stdout === null) return;
+      try {
+        found.push(...probe.parse(stdout));
+      } catch (error) {
+        logger.debug("gpu probe parse failed", { vendor: probe.vendor, error: String(error) });
+      }
+    }),
+  );
+  if (found.length === 0) return null;
+  return found.reduce((best, g) => (g.vramMb > best.vramMb ? g : best));
+}

package/src/control-plane/lifecycle.ts CHANGED Viewed

@@ -172,10 +172,7 @@ function resolveNewestDockerTag(payload: unknown): string | null {
   return fallback;
 }
-export async function updateStackEnvToLatestImageTag(state: ControlPlaneState): Promise<{
-  namespace: string;
-  tag: string;
-}> {
+function resolveImageNamespace(state: ControlPlaneState): string {
   const systemEnvPath = `${state.stashDir}/env/stack.env`;
   const parsed = parseEnvFile(systemEnvPath);
   const namespace = (parsed.OP_IMAGE_NAMESPACE ?? process.env.OP_IMAGE_NAMESPACE ?? "openpalm").trim().toLowerCase();
@@ -183,11 +180,21 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
   if (!IMAGE_NAMESPACE_RE.test(namespace)) {
     throw new Error(`Invalid image namespace in system.env: ${namespace}`);
   }
+  return namespace;
+}
-  // `assistant` is the version-of-record image: all platform images
-  // (assistant, guardian, channel, voice) are published in lockstep under the
-  // same OP_IMAGE_TAG, so its newest tag is the canonical platform version.
+/**
+ * Resolve the newest published platform tag from the Docker registry.
+ *
+ * `assistant` is the version-of-record image: all platform images
+ * (assistant, guardian, channel, voice) are published in lockstep under the
+ * same OP_IMAGE_TAG, so its newest tag is the canonical platform version.
+ *
+ * Used both to auto-detect during "Update now" and to resolve a requested
+ * `latest` selection into a concrete release tag before fetching stack assets
+ * (GitHub has no asset tree at a `latest` ref).
+ */
+export async function resolveLatestPlatformTag(namespace: string): Promise<string> {
   let response: Response;
   try {
     response = await fetch(
@@ -207,6 +214,16 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
   if (!latestTag) {
     throw new Error("No usable Docker image tag found");
   }
+  return latestTag;
+}
+export async function updateStackEnvToLatestImageTag(state: ControlPlaneState): Promise<{
+  namespace: string;
+  tag: string;
+}> {
+  const systemEnvPath = `${state.stashDir}/env/stack.env`;
+  const namespace = resolveImageNamespace(state);
+  const latestTag = await resolveLatestPlatformTag(namespace);
   const currentContent = existsSync(systemEnvPath) ? readFileSync(systemEnvPath, "utf-8") : "";
   const updatedContent = mergeEnvContent(currentContent, { OP_IMAGE_TAG: latestTag }, { uncomment: true });
@@ -288,9 +305,14 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
     throw new Error(`Failed to pull images: ${pullResult.stderr}`);
   }
-  // 4. Recreate containers (includes profiles for voice addon)
+  // 4. Recreate containers (includes profiles for voice addon).
+  // forceRecreate is REQUIRED: channel adapters are installed at container
+  // startup from npm dist-tags (CHANNEL_PACKAGE, e.g. @openpalm/channel-discord@latest),
+  // so an unchanged compose config would leave those containers running on the
+  // old adapter. --force-recreate guarantees guardian + channel containers
+  // restart and re-resolve their dist-tag adapters (issue #450).
   const services = await buildManagedServices(state);
-  const upResult = await composeUp({ ...composeOpts, services, removeOrphans: true });
+  const upResult = await composeUp({ ...composeOpts, services, forceRecreate: true, removeOrphans: true });
   if (!upResult.ok) {
     throw new Error(`Images pulled but failed to recreate containers: ${upResult.stderr}`);
   }
@@ -309,13 +331,34 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
  * Used by the admin "set version" action — skips the auto-detect step in performUpgrade.
  */
 export async function applyTagChange(state: ControlPlaneState, tag: string): Promise<UpgradeResult> {
+  const namespace = resolveImageNamespace(state);
+  // "latest" (or an empty selection) is not a real GitHub ref — there are no
+  // `.openpalm/...` stack assets at a `latest` tag, so refreshCoreAssets would
+  // fail with a raw download error. Resolve it to the concrete newest published
+  // platform tag BEFORE writing the env or fetching assets, so images and
+  // stack assets stay in lockstep on a real release tag.
+  const requested = tag.trim();
+  let resolvedTag = requested;
+  if (requested === "" || requested.toLowerCase() === "latest") {
+    try {
+      resolvedTag = await resolveLatestPlatformTag(namespace);
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      throw new Error(
+        `Cannot resolve "latest" to a concrete release: ${msg}. ` +
+        "Check your network connection or select a specific version."
+      );
+    }
+  }
   const stackEnvPath = `${state.stashDir}/env/stack.env`;
   const currentContent = existsSync(stackEnvPath) ? readFileSync(stackEnvPath, "utf-8") : "";
-  writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG: tag }, { uncomment: true }));
-  const upgradeResult = await applyUpgrade(state, tag);
+  writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG: resolvedTag }, { uncomment: true }));
+  const upgradeResult = await applyUpgrade(state, resolvedTag);
   return {
-    imageTag: tag,
-    namespace: "openpalm",
+    imageTag: resolvedTag,
+    namespace,
     backupDir: upgradeResult.backupDir,
     assetsUpdated: upgradeResult.updated,
     restarted: upgradeResult.restarted,
@@ -329,20 +372,27 @@ export function buildComposeFileList(state: ControlPlaneState): string[] {
 export async function buildManagedServices(state: ControlPlaneState): Promise<string[]> {
   const composeOpts = buildComposeOptions(state);
+  // Always force-recreate the core services (assistant + guardian) on upgrade,
+  // regardless of how the service set is discovered. getAddonServiceNames
+  // deliberately EXCLUDES guardian, so a fallback that relied on it alone would
+  // drop guardian from the recreated set when channel profiles are active —
+  // leaving guardian on stale state (issue #450).
+  const services = new Set<string>(CORE_SERVICES);
   // Prefer compose-derived service list when Docker is available
   if (composeOpts.files.length > 0 && !process.env.OP_SKIP_COMPOSE_PREFLIGHT) {
     const result = await composeConfigServices(composeOpts);
     if (result.ok && result.services.length > 0) {
-      return result.services;
+      for (const s of result.services) services.add(s);
+      return [...services];
     }
   }
   // Fallback: static inference from CORE_SERVICES + active addon overlays
-  const services: string[] = [...CORE_SERVICES];
   for (const addon of listEnabledAddonIds(state.homeDir)) {
-    services.push(...getAddonServiceNames(state.homeDir, addon));
+    for (const s of getAddonServiceNames(state.homeDir, addon)) services.add(s);
   }
-  return services;
+  return [...services];
 }

package/src/control-plane/registry.ts CHANGED Viewed

@@ -12,7 +12,7 @@ import { tmpdir } from 'node:os';
 import { parse as parseYaml } from 'yaml';
 import { createLogger } from '../logger.js';
 import { resolveLocalOpenpalmDir } from './ui-assets.js';
-import { ensureChannelSecret } from './config-persistence.js';
+import { ensureChannelSecret, ensureComposeVolumeTargets } from './config-persistence.js';
 import { patchSecretsEnvFile, readStackEnv } from './secrets.js';
 import { readBundledStackAsset } from './core-assets.js';
 import { canonicalAddonProfileSelection, resolveHardwareProfileVariant } from './profile-ids.js';
@@ -907,6 +907,15 @@ export function setAddonEnabled(homeDir: string, stackDir: string, name: string,
         ensureChannelSecret(stackDir, channel);
       }
     }
+    // Pre-create (and chown) any host-side bind-mount targets the newly
+    // enabled addon declares — e.g. ollama's data dir. Matches the install
+    // path (applyInstall → ensureComposeVolumeTargets) so enabling an addon
+    // post-install isn't more exposed than enabling it at install time
+    // (issue #452). Guarded on `state` since callers may omit it.
+    if (state) {
+      ensureComposeVolumeTargets(state);
+    }
   }

package/src/control-plane/setup-recommendation.test.ts ADDED Viewed

@@ -0,0 +1,146 @@
+import { test, expect, describe } from "bun:test";
+import {
+  recommendSetup,
+  gpuToProfileVariant,
+  MIN_LOCAL_GPU_VRAM_MB,
+  type SetupRecommendationInput,
+} from "./setup-recommendation.js";
+import { parseNvidiaSmi, parseRocmSmi, parseAppleSilicon, type GpuInfo } from "./hardware-detect.js";
+const base: SetupRecommendationInput = { cloudProviders: [], hostProviders: [], gpu: null };
+const gpu = (vendor: GpuInfo["vendor"], vramMb: number, name = "Test GPU"): GpuInfo => ({ vendor, name, vramMb });
+describe("recommendSetup", () => {
+  test("cloud provider connected -> use-cloud (wins over everything)", () => {
+    const r = recommendSetup({
+      cloudProviders: ["openai"],
+      hostProviders: [{ provider: "ollama", url: "x" }],
+      gpu: gpu("nvidia", 24576),
+    });
+    expect(r.action).toBe("use-cloud");
+  });
+  test("no cloud, host provider running -> use-host-providers", () => {
+    const r = recommendSetup({ ...base, hostProviders: [{ provider: "ollama", url: "http://host:11434" }], gpu: gpu("nvidia", 24576) });
+    expect(r.action).toBe("use-host-providers");
+    if (r.action === "use-host-providers") expect(r.alert).toContain("ollama");
+  });
+  test("host providers win over GPU enable-ollama", () => {
+    const r = recommendSetup({ ...base, hostProviders: [{ provider: "lmstudio", url: "x" }], gpu: gpu("nvidia", 24576) });
+    expect(r.action).toBe("use-host-providers");
+  });
+  test("no cloud, no host, capable nvidia GPU -> enable-ollama cuda", () => {
+    const r = recommendSetup({ ...base, gpu: gpu("nvidia", 12288) });
+    expect(r.action).toBe("enable-ollama");
+    if (r.action === "enable-ollama") expect(r.profileVariant).toBe("cuda");
+  });
+  test("capable amd GPU -> enable-ollama rocm", () => {
+    const r = recommendSetup({ ...base, gpu: gpu("amd", 16384) });
+    if (r.action === "enable-ollama") expect(r.profileVariant).toBe("rocm");
+    else throw new Error("expected enable-ollama");
+  });
+  test("VRAM exactly at threshold -> enable-ollama", () => {
+    const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB) });
+    expect(r.action).toBe("enable-ollama");
+  });
+  test("VRAM just under threshold -> connect-manually", () => {
+    const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB - 1) });
+    expect(r.action).toBe("connect-manually");
+  });
+  test("darwin + apple GPU + no provider -> connect-manually (NOT enable-ollama), Mac-tailored alert", () => {
+    const r = recommendSetup({ ...base, platform: "darwin", gpu: gpu("apple", 65536, "Apple Silicon (Mac15,7)") });
+    expect(r.action).toBe("connect-manually");
+    expect(r.action).not.toBe("enable-ollama");
+    if (r.action === "connect-manually") {
+      expect(r.alert).toContain("macOS");
+      expect(r.alert).toContain("Metal");
+      expect(r.alert.toLowerCase()).toContain("ollama");
+    }
+  });
+  test("darwin + apple GPU never selects cuda/rocm (no in-stack enable)", () => {
+    // Even with huge unified memory, darwin+apple must not enable in-stack ollama.
+    const r = recommendSetup({ ...base, platform: "darwin", gpu: gpu("apple", 131072) });
+    expect(r.action).not.toBe("enable-ollama");
+  });
+  test("darwin + host ollama running -> still use-host-providers (wins over apple guidance)", () => {
+    const r = recommendSetup({
+      ...base,
+      platform: "darwin",
+      hostProviders: [{ provider: "ollama", url: "http://localhost:11434" }],
+      gpu: gpu("apple", 65536),
+    });
+    expect(r.action).toBe("use-host-providers");
+  });
+  test("linux + nvidia >= threshold -> still enable-ollama cuda (unchanged)", () => {
+    const r = recommendSetup({ ...base, platform: "linux", gpu: gpu("nvidia", 24576) });
+    expect(r.action).toBe("enable-ollama");
+    if (r.action === "enable-ollama") expect(r.profileVariant).toBe("cuda");
+  });
+  test("no cloud, no host, no GPU -> connect-manually", () => {
+    const r = recommendSetup(base);
+    expect(r.action).toBe("connect-manually");
+    if (r.action === "connect-manually") expect(r.alert).toContain("custom OpenAI-compatible");
+  });
+});
+describe("gpuToProfileVariant", () => {
+  test("nvidia->cuda, amd->rocm, apple->cpu, unknown->cpu", () => {
+    expect(gpuToProfileVariant(gpu("nvidia", 8192))).toBe("cuda");
+    expect(gpuToProfileVariant(gpu("amd", 8192))).toBe("rocm");
+    expect(gpuToProfileVariant(gpu("apple", 65536))).toBe("cpu");
+    expect(gpuToProfileVariant(gpu("unknown", 8192))).toBe("cpu");
+  });
+});
+describe("parseAppleSilicon", () => {
+  test("parses hw.memsize bytes -> MiB + vendor apple + model name", () => {
+    const stdout = `${16 * 1024 * 1024 * 1024}\nMac15,7\n`;
+    const out = parseAppleSilicon(stdout);
+    expect(out).toEqual([{ vendor: "apple", name: "Apple Silicon (Mac15,7)", vramMb: 16384 }]);
+  });
+  test("missing model line -> falls back to arm64", () => {
+    const out = parseAppleSilicon(`${8 * 1024 * 1024 * 1024}\n`);
+    expect(out[0]?.vendor).toBe("apple");
+    expect(out[0]?.name).toBe("Apple Silicon (arm64)");
+    expect(out[0]?.vramMb).toBe(8192);
+  });
+  test("garbage / empty -> []", () => {
+    expect(parseAppleSilicon("")).toEqual([]);
+    expect(parseAppleSilicon("not-a-number\nMac15,7")).toEqual([]);
+  });
+});
+describe("parseNvidiaSmi", () => {
+  test("parses name + VRAM (MiB), handles commas in name", () => {
+    const out = parseNvidiaSmi("NVIDIA GeForce RTX 4090, 24564\nNVIDIA A100, 81920\n");
+    expect(out).toEqual([
+      { vendor: "nvidia", name: "NVIDIA GeForce RTX 4090", vramMb: 24564 },
+      { vendor: "nvidia", name: "NVIDIA A100", vramMb: 81920 },
+    ]);
+  });
+  test("ignores blank/garbage lines", () => {
+    expect(parseNvidiaSmi("\n  \nbadline\n")).toEqual([]);
+  });
+});
+describe("parseRocmSmi", () => {
+  test("parses VRAM bytes -> MiB", () => {
+    const json = JSON.stringify({ card0: { "VRAM Total Memory (B)": String(16 * 1024 * 1024 * 1024), "Card Series": "Radeon RX 7900 XTX" } });
+    const out = parseRocmSmi(json);
+    expect(out[0]?.vendor).toBe("amd");
+    expect(out[0]?.vramMb).toBe(16384);
+  });
+  test("invalid json -> []", () => {
+    expect(parseRocmSmi("not json")).toEqual([]);
+  });
+});

package/src/control-plane/setup-recommendation.ts ADDED Viewed

@@ -0,0 +1,127 @@
+// Pure decision engine for "what should setup do about AI providers?".
+//
+// Inputs are gathered by the caller (detected cloud providers, host-local
+// providers, GPU). This module makes the call and produces a recommendation +
+// user-facing alert. It is intentionally pure and free of I/O so it is trivially
+// unit-testable and easy to evolve as new hardware/providers/models ship — the
+// only things to edit are the constants at the top and the ordered rules in
+// recommendSetup().
+import type { GpuInfo, GpuVendor } from "./hardware-detect.js";
+export type { GpuInfo, GpuVendor } from "./hardware-detect.js";
+/** Minimum VRAM to auto-enable in-stack Ollama for local models. Edit freely. */
+export const MIN_LOCAL_GPU_VRAM_MB = 8 * 1024;
+/** Ollama hardware-profile variant chosen per GPU vendor. Extend per new vendor. */
+const VENDOR_PROFILE_VARIANT: Record<GpuVendor, "cuda" | "rocm" | "cpu"> = {
+  nvidia: "cuda",
+  amd: "rocm",
+  // The in-stack Ollama container on a Mac is a Linux container with no Metal
+  // access, so it can only ever run CPU. (On darwin apple GPUs are routed to
+  // host-Ollama guidance and never reach enable-ollama — see recommendSetup.)
+  apple: "cpu",
+  unknown: "cpu",
+};
+export function gpuToProfileVariant(gpu: GpuInfo): "cuda" | "rocm" | "cpu" {
+  return VENDOR_PROFILE_VARIANT[gpu.vendor] ?? "cpu";
+}
+export type DetectedHostProvider = { provider: string; url: string };
+export type SetupRecommendationInput = {
+  /** Cloud providers already connected (api-key / oauth / env). */
+  cloudProviders: string[];
+  /** Local providers reachable on the host (e.g. ollama, lmstudio), available only. */
+  hostProviders: DetectedHostProvider[];
+  /** Best detected GPU, or null. */
+  gpu: GpuInfo | null;
+  /**
+   * Host platform. Defaults to `process.platform` when omitted, but the decision
+   * logic only reads this field (never `process.*`) so the function stays pure.
+   * On darwin the in-stack Linux Ollama can't reach the Mac's Metal GPU, so an
+   * apple GPU is routed to host-Ollama guidance instead of enable-ollama.
+   */
+  platform?: NodeJS.Platform;
+};
+export type SetupRecommendation =
+  // A cloud provider is connected — nothing to auto-configure; proceed normally.
+  | { action: "use-cloud"; cloudProviders: string[] }
+  // No cloud, but local providers are running on the host — add them and proceed
+  // to model detection.
+  | { action: "use-host-providers"; hostProviders: DetectedHostProvider[]; alert: string }
+  // No provider at all, but a capable GPU exists — enable in-stack Ollama.
+  | { action: "enable-ollama"; profileVariant: "cuda" | "rocm" | "cpu"; gpu: GpuInfo; alert: string }
+  // No provider and no capable GPU — the user must connect one manually.
+  | { action: "connect-manually"; alert: string };
+const fmtGb = (mb: number): string => (mb / 1024).toFixed(mb % 1024 === 0 ? 0 : 1);
+const labelHostProviders = (h: DetectedHostProvider[]): string =>
+  h.map((p) => p.provider).join(" and ");
+/**
+ * Decide what setup should do, given detected providers + hardware.
+ *
+ * Order (first match wins):
+ *  1. cloud provider connected      -> use it.
+ *  2. host-local provider running   -> add it, proceed.
+ *  3. darwin + apple GPU            -> guide to HOST Ollama (Metal); never in-stack.
+ *  4. capable GPU (>= threshold)    -> enable in-stack Ollama.
+ *  5. otherwise                     -> ask the user to connect a provider.
+ */
+export function recommendSetup(input: SetupRecommendationInput): SetupRecommendation {
+  const { cloudProviders, hostProviders, gpu } = input;
+  const platform = input.platform ?? process.platform;
+  if (cloudProviders.length > 0) {
+    return { action: "use-cloud", cloudProviders };
+  }
+  if (hostProviders.length > 0) {
+    return {
+      action: "use-host-providers",
+      hostProviders,
+      alert: `No cloud AI provider was detected, but ${labelHostProviders(hostProviders)} ${
+        hostProviders.length > 1 ? "are" : "is"
+      } running on your computer — added automatically. Pick your models on the next step.`,
+    };
+  }
+  // macOS: the in-stack Ollama is a Linux container with no access to the Mac's
+  // Metal GPU, so enabling it would silently fall back to slow CPU. When the Mac
+  // has an Apple-Silicon GPU and nothing is connected yet, steer the user to a
+  // native host Ollama (which DOES use Metal) via connect-manually — reusing the
+  // existing action avoids a new wizard branch (chosen for minimal UI impact).
+  if (platform === "darwin" && gpu && gpu.vendor === "apple") {
+    return {
+      action: "connect-manually",
+      alert:
+        "No AI provider was detected. On macOS, fast local models need Ollama running " +
+        "natively (it uses your Apple Silicon / Metal GPU) — the bundled in-stack Ollama " +
+        "runs in Linux and cannot reach Metal. Install Ollama for macOS (https://ollama.com/download), " +
+        "or connect a provider on the next step.",
+    };
+  }
+  if (gpu && gpu.vramMb >= MIN_LOCAL_GPU_VRAM_MB) {
+    return {
+      action: "enable-ollama",
+      profileVariant: gpuToProfileVariant(gpu),
+      gpu,
+      alert: `No AI provider was detected, but a capable GPU was found (${gpu.name}, ${fmtGb(
+        gpu.vramMb,
+      )} GB). Local models via Ollama have been enabled for you.`,
+    };
+  }
+  return {
+    action: "connect-manually",
+    alert:
+      "No AI provider was detected and no GPU with enough memory for local models was found. " +
+      "Connect a provider to continue — sign in to a provider on the next step, or add a custom OpenAI-compatible endpoint and key.",
+  };
+}

package/src/control-plane/upgrade-path.test.ts ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * Upgrade-path regression tests.
+ *
+ * #449 — Check-up "latest" install: a `latest` (or empty) tag selection must be
+ * resolved to the concrete newest published platform tag BEFORE fetching stack
+ * assets. GitHub has no `.openpalm/...` asset tree at a `latest` ref, so passing
+ * `latest` straight through used to fail with a raw download error.
+ *
+ * #450 — "Update now" must force-recreate guardian + channel containers so they
+ * re-resolve their npm dist-tag adapters; guardian must never fall out of the
+ * recreated service set.
+ */
+import { describe, test, expect, afterEach } from "bun:test";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { mkdtempSync, writeFileSync, mkdirSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { resolveLatestPlatformTag, applyTagChange } from "./lifecycle.js";
+import type { ControlPlaneState } from "./types.js";
+const LIB_CONTROL_PLANE_DIR = join(import.meta.dir);
+const realFetch = globalThis.fetch;
+afterEach(() => {
+  globalThis.fetch = realFetch;
+});
+function dockerTagsResponse(names: string[]): Response {
+  return new Response(
+    JSON.stringify({ results: names.map((name) => ({ name })) }),
+    { status: 200, headers: { "content-type": "application/json" } },
+  );
+}
+// ── #449: latest-tag resolution ──────────────────────────────────────────
+describe("resolveLatestPlatformTag (#449)", () => {
+  test("returns the newest semver tag from the Docker registry", async () => {
+    globalThis.fetch = (async () =>
+      dockerTagsResponse(["latest", "v0.11.0", "edge"])) as typeof fetch;
+    const tag = await resolveLatestPlatformTag("openpalm");
+    expect(tag).toBe("v0.11.0");
+  });
+  test("throws when the registry yields no usable tag", async () => {
+    globalThis.fetch = (async () => dockerTagsResponse(["latest"])) as typeof fetch;
+    await expect(resolveLatestPlatformTag("openpalm")).rejects.toThrow(
+      /No usable Docker image tag/,
+    );
+  });
+});
+describe("applyTagChange latest resolution (#449)", () => {
+  function makeState(): ControlPlaneState {
+    const home = mkdtempSync(join(tmpdir(), "openpalm-upgrade-test-"));
+    mkdirSync(join(home, "knowledge", "env"), { recursive: true });
+    writeFileSync(join(home, "knowledge", "env", "stack.env"), "OP_IMAGE_NAMESPACE=openpalm\n");
+    return {
+      homeDir: home,
+      configDir: join(home, "config"),
+      stashDir: join(home, "knowledge"),
+      workspaceDir: join(home, "workspace"),
+      dataDir: join(home, "data"),
+      stackDir: join(home, "config", "stack"),
+      services: {},
+      artifacts: { compose: "" },
+      artifactMeta: [],
+    };
+  }
+  test('a "latest" selection that cannot be resolved fails with a clear validation error, not a raw download error', async () => {
+    globalThis.fetch = (async () => {
+      throw new Error("network down");
+    }) as typeof fetch;
+    const state = makeState();
+    // Resolution happens BEFORE any asset download, so the error must be the
+    // resolution message — never the GitHub "Failed to download ..." error.
+    await expect(applyTagChange(state, "latest")).rejects.toThrow(
+      /Cannot resolve "latest" to a concrete release/,
+    );
+  });
+  test('an empty selection is treated like "latest" and resolved (not passed through as a blank ref)', async () => {
+    globalThis.fetch = (async () => {
+      throw new Error("network down");
+    }) as typeof fetch;
+    const state = makeState();
+    await expect(applyTagChange(state, "   ")).rejects.toThrow(
+      /Cannot resolve "latest" to a concrete release/,
+    );
+  });
+});
+// ── #450: upgrade recreates guardian + channel containers ─────────────────
+describe("performUpgrade force-recreates managed services (#450)", () => {
+  test("performUpgrade passes forceRecreate to composeUp", () => {
+    const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
+    // The post-pull composeUp in performUpgrade must force-recreate so channel
+    // containers re-resolve their dist-tag adapters.
+    expect(src).toMatch(/composeUp\(\{[^}]*forceRecreate:\s*true/);
+  });
+  test("buildManagedServices always includes the core services (guardian)", () => {
+    const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
+    // Guardian comes from CORE_SERVICES and must be seeded into the set
+    // regardless of how the rest of the service list is discovered.
+    expect(src).toContain("new Set<string>(CORE_SERVICES)");
+  });
+});

package/src/index.ts CHANGED Viewed

@@ -252,6 +252,7 @@ export {
   applyUpgrade,
   performUpgrade,
   applyTagChange,
+  resolveLatestPlatformTag,
   updateStackEnvToLatestImageTag,
   buildComposeFileList,
   buildManagedServices,
@@ -259,10 +260,11 @@ export {
 } from "./control-plane/lifecycle.js";
 // ── Docker ──────────────────────────────────────────────────────────────
-export type { DockerResult } from "./control-plane/docker.js";
+export type { DockerResult, ExistingProject } from "./control-plane/docker.js";
 export {
   checkDocker,
   checkDockerCompose,
+  detectExistingProject,
   resolveComposeProjectName,
   composePreflight,
   composeUp,
@@ -296,6 +298,20 @@ export {
 export type { LocalProviderDetection } from "./control-plane/model-runner.js";
 export { detectLocalProviders } from "./control-plane/model-runner.js";
+// ── Hardware detection + setup recommendation ───────────────────────────
+export type { GpuInfo, GpuVendor } from "./control-plane/hardware-detect.js";
+export { detectGpu, parseNvidiaSmi, parseRocmSmi, parseAppleSilicon } from "./control-plane/hardware-detect.js";
+export type {
+  DetectedHostProvider,
+  SetupRecommendation,
+  SetupRecommendationInput,
+} from "./control-plane/setup-recommendation.js";
+export {
+  recommendSetup,
+  gpuToProfileVariant,
+  MIN_LOCAL_GPU_VRAM_MB,
+} from "./control-plane/setup-recommendation.js";
 // ── Compose Arguments ────────────────────────────────────────────────────
 export {
   buildComposeOptions,