@openpalm/lib 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openpalm/lib",
3
- "version": "0.11.0",
3
+ "version": "0.11.1",
4
4
  "license": "MPL-2.0",
5
5
  "type": "module",
6
6
  "description": "Shared control-plane library for OpenPalm — lifecycle, staging, secrets, channels, connections, scheduler",
@@ -5,15 +5,16 @@
5
5
  * Files are validated in-place before writing; rollback is handled by
6
6
  * the rollback module (snapshot to OP_HOME/data/rollback/).
7
7
  */
8
- import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync } from "node:fs";
8
+ import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync, chownSync } from "node:fs";
9
9
  import { dirname, resolve as resolvePath } from "node:path";
10
10
  import { parse as yamlParse } from "yaml";
11
+ import { createLogger } from "../logger.js";
11
12
  import { parseEnvContent, parseEnvFile, mergeEnvContent, expandEnvVars } from './env.js';
12
13
  import { assertNoSecretLikeStackEnvKeys, isSecretLikeStackEnvKey } from './secrets.js';
13
14
  import { ensureSecret } from './secrets-files.js';
14
15
  import type { ControlPlaneState, ArtifactMeta } from "./types.js";
15
16
  import { listEnabledAddonIds } from "./registry.js";
16
- import { resolveOperatorIds, hasUsableOperatorId } from "./operator-ids.js";
17
+ import { resolveOperatorIds, hasUsableOperatorId, type OperatorIds } from "./operator-ids.js";
17
18
  import { SPEC_DEFAULTS } from "./defaults.js";
18
19
  import { CURRENT_LAYOUT_VERSION } from "./migrations.js";
19
20
 
@@ -26,6 +27,8 @@ import { sha256, randomHex } from "./crypto.js";
26
27
 
27
28
  const DEFAULT_IMAGE_TAG = "latest";
28
29
 
30
+ const logger = createLogger("config-persistence");
31
+
29
32
  // ── Env File Management ──────────────────────────────────────────────
30
33
 
31
34
  /**
@@ -223,6 +226,13 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
223
226
  const composeFiles = discoverStackOverlays(state.stackDir, state.homeDir);
224
227
  if (composeFiles.length === 0) return;
225
228
 
229
+ // Resolve the operator UID/GID compose runs containers as (`user:`), so we
230
+ // can chown the dirs we pre-create to match. Without this, dirs created by
231
+ // a root-running install (or a host UID that differs from the forced
232
+ // container UID) are unwritable inside the non-root container — on OrbStack
233
+ // real UIDs are preserved, so e.g. ollama's mkdir is denied (issue #452).
234
+ const operatorIds = resolveOperatorIds(state.homeDir);
235
+
226
236
  const envVars: Record<string, string> = {
227
237
  ...(process.env as Record<string, string>),
228
238
  ...parseEnvFile(`${state.stashDir}/env/stack.env`),
@@ -264,16 +274,40 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
264
274
  const isFile = basename.includes('.');
265
275
 
266
276
  if (isFile) {
267
- mkdirSync(dirname(resolvedHostPath), { recursive: true });
277
+ const parent = dirname(resolvedHostPath);
278
+ mkdirSync(parent, { recursive: true });
268
279
  writeFileSync(resolvedHostPath, '');
280
+ chownVolumeTarget(parent, operatorIds);
281
+ chownVolumeTarget(resolvedHostPath, operatorIds);
269
282
  } else {
270
283
  mkdirSync(resolvedHostPath, { recursive: true });
284
+ chownVolumeTarget(resolvedHostPath, operatorIds);
271
285
  }
272
286
  }
273
287
  }
274
288
  }
275
289
  }
276
290
 
291
+ /**
292
+ * chown a just-created bind-mount target to the operator UID/GID so the
293
+ * non-root container (`user: ${OP_UID}:${OP_GID}`) can write to it.
294
+ *
295
+ * No-op on Windows (chown is meaningless there) or when no operator can be
296
+ * resolved. A failure (e.g. not the owner) is logged and swallowed — the
297
+ * mkdir already succeeded and Docker Desktop's gRPC-FUSE masks ownership
298
+ * anyway, so a chown failure must not abort the install.
299
+ */
300
+ function chownVolumeTarget(path: string, operatorIds: OperatorIds | null): void {
301
+ if (process.platform === "win32" || !operatorIds) return;
302
+ try {
303
+ chownSync(path, operatorIds.uid, operatorIds.gid);
304
+ } catch (error) {
305
+ logger.warn(
306
+ `Could not chown volume target ${path} to ${operatorIds.uid}:${operatorIds.gid}: ${error instanceof Error ? error.message : String(error)}`
307
+ );
308
+ }
309
+ }
310
+
277
311
  // ── Persistence (direct-write to live paths) ────────────────────────
278
312
 
279
313
  export function writeRuntimeFiles(
@@ -0,0 +1,114 @@
1
+ // Host GPU / VRAM detection for setup recommendations.
2
+ //
3
+ // Data-driven on purpose: each entry in GPU_PROBES is a vendor + a command to
4
+ // run + a pure parser. Adding a new accelerator (Intel Arc, Apple Metal, a new
5
+ // rocm/CUDA query, etc.) is a one-entry change here — nothing downstream needs to
6
+ // know. detectGpu() runs every probe, ignores the ones whose tool is absent, and
7
+ // returns the single best (highest-VRAM) result, or null when no GPU is found.
8
+
9
+ import { execFile } from "node:child_process";
10
+ import { createLogger } from "../logger.js";
11
+
12
+ const logger = createLogger("hardware-detect");
13
+
14
+ export type GpuVendor = "nvidia" | "amd" | "unknown";
15
+
16
+ export type GpuInfo = {
17
+ vendor: GpuVendor;
18
+ /** Human-readable adapter name, e.g. "NVIDIA GeForce RTX 4090". */
19
+ name: string;
20
+ /** Total VRAM in MiB. 0 when the tool reported the GPU but not its memory. */
21
+ vramMb: number;
22
+ };
23
+
24
+ type GpuProbe = {
25
+ vendor: GpuVendor;
26
+ command: string;
27
+ args: string[];
28
+ /** Pure parser: tool stdout -> detected GPUs. Must not throw. */
29
+ parse: (stdout: string) => GpuInfo[];
30
+ };
31
+
32
+ /** Parse `nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits`. */
33
+ export function parseNvidiaSmi(stdout: string): GpuInfo[] {
34
+ return stdout
35
+ .split("\n")
36
+ .map((line) => line.trim())
37
+ .filter(Boolean)
38
+ .map((line): GpuInfo | null => {
39
+ // "NVIDIA GeForce RTX 4090, 24564"
40
+ const idx = line.lastIndexOf(",");
41
+ if (idx === -1) return null;
42
+ const name = line.slice(0, idx).trim();
43
+ const vramMb = Number.parseInt(line.slice(idx + 1).trim(), 10);
44
+ if (!name || !Number.isFinite(vramMb)) return null;
45
+ return { vendor: "nvidia", name, vramMb };
46
+ })
47
+ .filter((g): g is GpuInfo => g !== null);
48
+ }
49
+
50
+ /** Parse `rocm-smi --showmeminfo vram --showproductname --json`. */
51
+ export function parseRocmSmi(stdout: string): GpuInfo[] {
52
+ let doc: Record<string, Record<string, string>>;
53
+ try {
54
+ doc = JSON.parse(stdout);
55
+ } catch {
56
+ return [];
57
+ }
58
+ const out: GpuInfo[] = [];
59
+ for (const card of Object.values(doc)) {
60
+ if (!card || typeof card !== "object") continue;
61
+ // rocm-smi key names drift across versions — match loosely.
62
+ const vramKey = Object.keys(card).find((k) => /vram total memory/i.test(k));
63
+ const nameKey = Object.keys(card).find((k) => /product name|card series|gfx/i.test(k));
64
+ const bytes = vramKey ? Number.parseInt(String(card[vramKey]).trim(), 10) : NaN;
65
+ const vramMb = Number.isFinite(bytes) ? Math.round(bytes / (1024 * 1024)) : 0;
66
+ out.push({ vendor: "amd", name: nameKey ? String(card[nameKey]).trim() : "AMD GPU", vramMb });
67
+ }
68
+ return out;
69
+ }
70
+
71
+ const GPU_PROBES: GpuProbe[] = [
72
+ {
73
+ vendor: "nvidia",
74
+ command: "nvidia-smi",
75
+ args: ["--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
76
+ parse: parseNvidiaSmi,
77
+ },
78
+ {
79
+ vendor: "amd",
80
+ command: "rocm-smi",
81
+ args: ["--showmeminfo", "vram", "--showproductname", "--json"],
82
+ parse: parseRocmSmi,
83
+ },
84
+ ];
85
+
86
+ function run(command: string, args: string[], timeoutMs = 3_000): Promise<string | null> {
87
+ return new Promise((resolve) => {
88
+ execFile(command, args, { timeout: timeoutMs }, (err, stdout) => {
89
+ // ENOENT (tool not installed) and any non-zero exit -> not available.
90
+ resolve(err ? null : stdout?.toString() ?? "");
91
+ });
92
+ });
93
+ }
94
+
95
+ /**
96
+ * Detect the host's best GPU. Returns the highest-VRAM adapter across all probes,
97
+ * or null when none is found. Never throws.
98
+ */
99
+ export async function detectGpu(): Promise<GpuInfo | null> {
100
+ const found: GpuInfo[] = [];
101
+ await Promise.all(
102
+ GPU_PROBES.map(async (probe) => {
103
+ const stdout = await run(probe.command, probe.args);
104
+ if (stdout === null) return;
105
+ try {
106
+ found.push(...probe.parse(stdout));
107
+ } catch (error) {
108
+ logger.debug("gpu probe parse failed", { vendor: probe.vendor, error: String(error) });
109
+ }
110
+ }),
111
+ );
112
+ if (found.length === 0) return null;
113
+ return found.reduce((best, g) => (g.vramMb > best.vramMb ? g : best));
114
+ }
@@ -172,10 +172,7 @@ function resolveNewestDockerTag(payload: unknown): string | null {
172
172
  return fallback;
173
173
  }
174
174
 
175
- export async function updateStackEnvToLatestImageTag(state: ControlPlaneState): Promise<{
176
- namespace: string;
177
- tag: string;
178
- }> {
175
+ function resolveImageNamespace(state: ControlPlaneState): string {
179
176
  const systemEnvPath = `${state.stashDir}/env/stack.env`;
180
177
  const parsed = parseEnvFile(systemEnvPath);
181
178
  const namespace = (parsed.OP_IMAGE_NAMESPACE ?? process.env.OP_IMAGE_NAMESPACE ?? "openpalm").trim().toLowerCase();
@@ -183,11 +180,21 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
183
180
  if (!IMAGE_NAMESPACE_RE.test(namespace)) {
184
181
  throw new Error(`Invalid image namespace in system.env: ${namespace}`);
185
182
  }
183
+ return namespace;
184
+ }
186
185
 
187
- // `assistant` is the version-of-record image: all platform images
188
- // (assistant, guardian, channel, voice) are published in lockstep under the
189
- // same OP_IMAGE_TAG, so its newest tag is the canonical platform version.
190
-
186
+ /**
187
+ * Resolve the newest published platform tag from the Docker registry.
188
+ *
189
+ * `assistant` is the version-of-record image: all platform images
190
+ * (assistant, guardian, channel, voice) are published in lockstep under the
191
+ * same OP_IMAGE_TAG, so its newest tag is the canonical platform version.
192
+ *
193
+ * Used both to auto-detect during "Update now" and to resolve a requested
194
+ * `latest` selection into a concrete release tag before fetching stack assets
195
+ * (GitHub has no asset tree at a `latest` ref).
196
+ */
197
+ export async function resolveLatestPlatformTag(namespace: string): Promise<string> {
191
198
  let response: Response;
192
199
  try {
193
200
  response = await fetch(
@@ -207,6 +214,16 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
207
214
  if (!latestTag) {
208
215
  throw new Error("No usable Docker image tag found");
209
216
  }
217
+ return latestTag;
218
+ }
219
+
220
+ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState): Promise<{
221
+ namespace: string;
222
+ tag: string;
223
+ }> {
224
+ const systemEnvPath = `${state.stashDir}/env/stack.env`;
225
+ const namespace = resolveImageNamespace(state);
226
+ const latestTag = await resolveLatestPlatformTag(namespace);
210
227
 
211
228
  const currentContent = existsSync(systemEnvPath) ? readFileSync(systemEnvPath, "utf-8") : "";
212
229
  const updatedContent = mergeEnvContent(currentContent, { OP_IMAGE_TAG: latestTag }, { uncomment: true });
@@ -288,9 +305,14 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
288
305
  throw new Error(`Failed to pull images: ${pullResult.stderr}`);
289
306
  }
290
307
 
291
- // 4. Recreate containers (includes profiles for voice addon)
308
+ // 4. Recreate containers (includes profiles for voice addon).
309
+ // forceRecreate is REQUIRED: channel adapters are installed at container
310
+ // startup from npm dist-tags (CHANNEL_PACKAGE, e.g. @openpalm/channel-discord@latest),
311
+ // so an unchanged compose config would leave those containers running on the
312
+ // old adapter. --force-recreate guarantees guardian + channel containers
313
+ // restart and re-resolve their dist-tag adapters (issue #450).
292
314
  const services = await buildManagedServices(state);
293
- const upResult = await composeUp({ ...composeOpts, services, removeOrphans: true });
315
+ const upResult = await composeUp({ ...composeOpts, services, forceRecreate: true, removeOrphans: true });
294
316
  if (!upResult.ok) {
295
317
  throw new Error(`Images pulled but failed to recreate containers: ${upResult.stderr}`);
296
318
  }
@@ -309,13 +331,34 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
309
331
  * Used by the admin "set version" action — skips the auto-detect step in performUpgrade.
310
332
  */
311
333
  export async function applyTagChange(state: ControlPlaneState, tag: string): Promise<UpgradeResult> {
334
+ const namespace = resolveImageNamespace(state);
335
+
336
+ // "latest" (or an empty selection) is not a real GitHub ref — there are no
337
+ // `.openpalm/...` stack assets at a `latest` tag, so refreshCoreAssets would
338
+ // fail with a raw download error. Resolve it to the concrete newest published
339
+ // platform tag BEFORE writing the env or fetching assets, so images and
340
+ // stack assets stay in lockstep on a real release tag.
341
+ const requested = tag.trim();
342
+ let resolvedTag = requested;
343
+ if (requested === "" || requested.toLowerCase() === "latest") {
344
+ try {
345
+ resolvedTag = await resolveLatestPlatformTag(namespace);
346
+ } catch (e) {
347
+ const msg = e instanceof Error ? e.message : String(e);
348
+ throw new Error(
349
+ `Cannot resolve "latest" to a concrete release: ${msg}. ` +
350
+ "Check your network connection or select a specific version."
351
+ );
352
+ }
353
+ }
354
+
312
355
  const stackEnvPath = `${state.stashDir}/env/stack.env`;
313
356
  const currentContent = existsSync(stackEnvPath) ? readFileSync(stackEnvPath, "utf-8") : "";
314
- writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG: tag }, { uncomment: true }));
315
- const upgradeResult = await applyUpgrade(state, tag);
357
+ writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG: resolvedTag }, { uncomment: true }));
358
+ const upgradeResult = await applyUpgrade(state, resolvedTag);
316
359
  return {
317
- imageTag: tag,
318
- namespace: "openpalm",
360
+ imageTag: resolvedTag,
361
+ namespace,
319
362
  backupDir: upgradeResult.backupDir,
320
363
  assetsUpdated: upgradeResult.updated,
321
364
  restarted: upgradeResult.restarted,
@@ -329,20 +372,27 @@ export function buildComposeFileList(state: ControlPlaneState): string[] {
329
372
  export async function buildManagedServices(state: ControlPlaneState): Promise<string[]> {
330
373
  const composeOpts = buildComposeOptions(state);
331
374
 
375
+ // Always force-recreate the core services (assistant + guardian) on upgrade,
376
+ // regardless of how the service set is discovered. getAddonServiceNames
377
+ // deliberately EXCLUDES guardian, so a fallback that relied on it alone would
378
+ // drop guardian from the recreated set when channel profiles are active —
379
+ // leaving guardian on stale state (issue #450).
380
+ const services = new Set<string>(CORE_SERVICES);
381
+
332
382
  // Prefer compose-derived service list when Docker is available
333
383
  if (composeOpts.files.length > 0 && !process.env.OP_SKIP_COMPOSE_PREFLIGHT) {
334
384
  const result = await composeConfigServices(composeOpts);
335
385
  if (result.ok && result.services.length > 0) {
336
- return result.services;
386
+ for (const s of result.services) services.add(s);
387
+ return [...services];
337
388
  }
338
389
  }
339
390
 
340
391
  // Fallback: static inference from CORE_SERVICES + active addon overlays
341
- const services: string[] = [...CORE_SERVICES];
342
392
  for (const addon of listEnabledAddonIds(state.homeDir)) {
343
- services.push(...getAddonServiceNames(state.homeDir, addon));
393
+ for (const s of getAddonServiceNames(state.homeDir, addon)) services.add(s);
344
394
  }
345
- return services;
395
+ return [...services];
346
396
  }
347
397
 
348
398
 
@@ -12,7 +12,7 @@ import { tmpdir } from 'node:os';
12
12
  import { parse as parseYaml } from 'yaml';
13
13
  import { createLogger } from '../logger.js';
14
14
  import { resolveLocalOpenpalmDir } from './ui-assets.js';
15
- import { ensureChannelSecret } from './config-persistence.js';
15
+ import { ensureChannelSecret, ensureComposeVolumeTargets } from './config-persistence.js';
16
16
  import { patchSecretsEnvFile, readStackEnv } from './secrets.js';
17
17
  import { readBundledStackAsset } from './core-assets.js';
18
18
  import { canonicalAddonProfileSelection, resolveHardwareProfileVariant } from './profile-ids.js';
@@ -907,6 +907,15 @@ export function setAddonEnabled(homeDir: string, stackDir: string, name: string,
907
907
  ensureChannelSecret(stackDir, channel);
908
908
  }
909
909
  }
910
+
911
+ // Pre-create (and chown) any host-side bind-mount targets the newly
912
+ // enabled addon declares — e.g. ollama's data dir. Matches the install
913
+ // path (applyInstall → ensureComposeVolumeTargets) so enabling an addon
914
+ // post-install isn't more exposed than enabling it at install time
915
+ // (issue #452). Guarded on `state` since callers may omit it.
916
+ if (state) {
917
+ ensureComposeVolumeTargets(state);
918
+ }
910
919
  }
911
920
 
912
921
 
@@ -0,0 +1,94 @@
1
+ import { test, expect, describe } from "bun:test";
2
+ import {
3
+ recommendSetup,
4
+ gpuToProfileVariant,
5
+ MIN_LOCAL_GPU_VRAM_MB,
6
+ type SetupRecommendationInput,
7
+ } from "./setup-recommendation.js";
8
+ import { parseNvidiaSmi, parseRocmSmi, type GpuInfo } from "./hardware-detect.js";
9
+
10
+ const base: SetupRecommendationInput = { cloudProviders: [], hostProviders: [], gpu: null };
11
+ const gpu = (vendor: GpuInfo["vendor"], vramMb: number, name = "Test GPU"): GpuInfo => ({ vendor, name, vramMb });
12
+
13
+ describe("recommendSetup", () => {
14
+ test("cloud provider connected -> use-cloud (wins over everything)", () => {
15
+ const r = recommendSetup({
16
+ cloudProviders: ["openai"],
17
+ hostProviders: [{ provider: "ollama", url: "x" }],
18
+ gpu: gpu("nvidia", 24576),
19
+ });
20
+ expect(r.action).toBe("use-cloud");
21
+ });
22
+
23
+ test("no cloud, host provider running -> use-host-providers", () => {
24
+ const r = recommendSetup({ ...base, hostProviders: [{ provider: "ollama", url: "http://host:11434" }], gpu: gpu("nvidia", 24576) });
25
+ expect(r.action).toBe("use-host-providers");
26
+ if (r.action === "use-host-providers") expect(r.alert).toContain("ollama");
27
+ });
28
+
29
+ test("host providers win over GPU enable-ollama", () => {
30
+ const r = recommendSetup({ ...base, hostProviders: [{ provider: "lmstudio", url: "x" }], gpu: gpu("nvidia", 24576) });
31
+ expect(r.action).toBe("use-host-providers");
32
+ });
33
+
34
+ test("no cloud, no host, capable nvidia GPU -> enable-ollama cuda", () => {
35
+ const r = recommendSetup({ ...base, gpu: gpu("nvidia", 12288) });
36
+ expect(r.action).toBe("enable-ollama");
37
+ if (r.action === "enable-ollama") expect(r.profileVariant).toBe("cuda");
38
+ });
39
+
40
+ test("capable amd GPU -> enable-ollama rocm", () => {
41
+ const r = recommendSetup({ ...base, gpu: gpu("amd", 16384) });
42
+ if (r.action === "enable-ollama") expect(r.profileVariant).toBe("rocm");
43
+ else throw new Error("expected enable-ollama");
44
+ });
45
+
46
+ test("VRAM exactly at threshold -> enable-ollama", () => {
47
+ const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB) });
48
+ expect(r.action).toBe("enable-ollama");
49
+ });
50
+
51
+ test("VRAM just under threshold -> connect-manually", () => {
52
+ const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB - 1) });
53
+ expect(r.action).toBe("connect-manually");
54
+ });
55
+
56
+ test("no cloud, no host, no GPU -> connect-manually", () => {
57
+ const r = recommendSetup(base);
58
+ expect(r.action).toBe("connect-manually");
59
+ if (r.action === "connect-manually") expect(r.alert).toContain("custom OpenAI-compatible");
60
+ });
61
+ });
62
+
63
+ describe("gpuToProfileVariant", () => {
64
+ test("nvidia->cuda, amd->rocm, unknown->cpu", () => {
65
+ expect(gpuToProfileVariant(gpu("nvidia", 8192))).toBe("cuda");
66
+ expect(gpuToProfileVariant(gpu("amd", 8192))).toBe("rocm");
67
+ expect(gpuToProfileVariant(gpu("unknown", 8192))).toBe("cpu");
68
+ });
69
+ });
70
+
71
+ describe("parseNvidiaSmi", () => {
72
+ test("parses name + VRAM (MiB), handles commas in name", () => {
73
+ const out = parseNvidiaSmi("NVIDIA GeForce RTX 4090, 24564\nNVIDIA A100, 81920\n");
74
+ expect(out).toEqual([
75
+ { vendor: "nvidia", name: "NVIDIA GeForce RTX 4090", vramMb: 24564 },
76
+ { vendor: "nvidia", name: "NVIDIA A100", vramMb: 81920 },
77
+ ]);
78
+ });
79
+ test("ignores blank/garbage lines", () => {
80
+ expect(parseNvidiaSmi("\n \nbadline\n")).toEqual([]);
81
+ });
82
+ });
83
+
84
+ describe("parseRocmSmi", () => {
85
+ test("parses VRAM bytes -> MiB", () => {
86
+ const json = JSON.stringify({ card0: { "VRAM Total Memory (B)": String(16 * 1024 * 1024 * 1024), "Card Series": "Radeon RX 7900 XTX" } });
87
+ const out = parseRocmSmi(json);
88
+ expect(out[0]?.vendor).toBe("amd");
89
+ expect(out[0]?.vramMb).toBe(16384);
90
+ });
91
+ test("invalid json -> []", () => {
92
+ expect(parseRocmSmi("not json")).toEqual([]);
93
+ });
94
+ });
@@ -0,0 +1,98 @@
1
+ // Pure decision engine for "what should setup do about AI providers?".
2
+ //
3
+ // Inputs are gathered by the caller (detected cloud providers, host-local
4
+ // providers, GPU). This module makes the call and produces a recommendation +
5
+ // user-facing alert. It is intentionally pure and free of I/O so it is trivially
6
+ // unit-testable and easy to evolve as new hardware/providers/models ship — the
7
+ // only things to edit are the constants at the top and the ordered rules in
8
+ // recommendSetup().
9
+
10
+ import type { GpuInfo, GpuVendor } from "./hardware-detect.js";
11
+
12
+ export type { GpuInfo, GpuVendor } from "./hardware-detect.js";
13
+
14
+ /** Minimum VRAM to auto-enable in-stack Ollama for local models. Edit freely. */
15
+ export const MIN_LOCAL_GPU_VRAM_MB = 8 * 1024;
16
+
17
+ /** Ollama hardware-profile variant chosen per GPU vendor. Extend per new vendor. */
18
+ const VENDOR_PROFILE_VARIANT: Record<GpuVendor, "cuda" | "rocm" | "cpu"> = {
19
+ nvidia: "cuda",
20
+ amd: "rocm",
21
+ unknown: "cpu",
22
+ };
23
+
24
+ export function gpuToProfileVariant(gpu: GpuInfo): "cuda" | "rocm" | "cpu" {
25
+ return VENDOR_PROFILE_VARIANT[gpu.vendor] ?? "cpu";
26
+ }
27
+
28
+ export type DetectedHostProvider = { provider: string; url: string };
29
+
30
+ export type SetupRecommendationInput = {
31
+ /** Cloud providers already connected (api-key / oauth / env). */
32
+ cloudProviders: string[];
33
+ /** Local providers reachable on the host (e.g. ollama, lmstudio), available only. */
34
+ hostProviders: DetectedHostProvider[];
35
+ /** Best detected GPU, or null. */
36
+ gpu: GpuInfo | null;
37
+ };
38
+
39
+ export type SetupRecommendation =
40
+ // A cloud provider is connected — nothing to auto-configure; proceed normally.
41
+ | { action: "use-cloud"; cloudProviders: string[] }
42
+ // No cloud, but local providers are running on the host — add them and proceed
43
+ // to model detection.
44
+ | { action: "use-host-providers"; hostProviders: DetectedHostProvider[]; alert: string }
45
+ // No provider at all, but a capable GPU exists — enable in-stack Ollama.
46
+ | { action: "enable-ollama"; profileVariant: "cuda" | "rocm" | "cpu"; gpu: GpuInfo; alert: string }
47
+ // No provider and no capable GPU — the user must connect one manually.
48
+ | { action: "connect-manually"; alert: string };
49
+
50
+ const fmtGb = (mb: number): string => (mb / 1024).toFixed(mb % 1024 === 0 ? 0 : 1);
51
+
52
+ const labelHostProviders = (h: DetectedHostProvider[]): string =>
53
+ h.map((p) => p.provider).join(" and ");
54
+
55
+ /**
56
+ * Decide what setup should do, given detected providers + hardware.
57
+ *
58
+ * Order (first match wins):
59
+ * 1. cloud provider connected -> use it.
60
+ * 2. host-local provider running -> add it, proceed.
61
+ * 3. capable GPU (>= threshold) -> enable in-stack Ollama.
62
+ * 4. otherwise -> ask the user to connect a provider.
63
+ */
64
+ export function recommendSetup(input: SetupRecommendationInput): SetupRecommendation {
65
+ const { cloudProviders, hostProviders, gpu } = input;
66
+
67
+ if (cloudProviders.length > 0) {
68
+ return { action: "use-cloud", cloudProviders };
69
+ }
70
+
71
+ if (hostProviders.length > 0) {
72
+ return {
73
+ action: "use-host-providers",
74
+ hostProviders,
75
+ alert: `No cloud AI provider was detected, but ${labelHostProviders(hostProviders)} ${
76
+ hostProviders.length > 1 ? "are" : "is"
77
+ } running on your computer — added automatically. Pick your models on the next step.`,
78
+ };
79
+ }
80
+
81
+ if (gpu && gpu.vramMb >= MIN_LOCAL_GPU_VRAM_MB) {
82
+ return {
83
+ action: "enable-ollama",
84
+ profileVariant: gpuToProfileVariant(gpu),
85
+ gpu,
86
+ alert: `No AI provider was detected, but a capable GPU was found (${gpu.name}, ${fmtGb(
87
+ gpu.vramMb,
88
+ )} GB). Local models via Ollama have been enabled for you.`,
89
+ };
90
+ }
91
+
92
+ return {
93
+ action: "connect-manually",
94
+ alert:
95
+ "No AI provider was detected and no GPU with enough memory for local models was found. " +
96
+ "Connect a provider to continue — sign in to a provider on the next step, or add a custom OpenAI-compatible endpoint and key.",
97
+ };
98
+ }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Upgrade-path regression tests.
3
+ *
4
+ * #449 — Check-up "latest" install: a `latest` (or empty) tag selection must be
5
+ * resolved to the concrete newest published platform tag BEFORE fetching stack
6
+ * assets. GitHub has no `.openpalm/...` asset tree at a `latest` ref, so passing
7
+ * `latest` straight through used to fail with a raw download error.
8
+ *
9
+ * #450 — "Update now" must force-recreate guardian + channel containers so they
10
+ * re-resolve their npm dist-tag adapters; guardian must never fall out of the
11
+ * recreated service set.
12
+ */
13
+ import { describe, test, expect, afterEach } from "bun:test";
14
+ import { readFileSync } from "node:fs";
15
+ import { join } from "node:path";
16
+ import { mkdtempSync, writeFileSync, mkdirSync } from "node:fs";
17
+ import { tmpdir } from "node:os";
18
+ import { resolveLatestPlatformTag, applyTagChange } from "./lifecycle.js";
19
+ import type { ControlPlaneState } from "./types.js";
20
+
21
+ const LIB_CONTROL_PLANE_DIR = join(import.meta.dir);
22
+
23
+ const realFetch = globalThis.fetch;
24
+ afterEach(() => {
25
+ globalThis.fetch = realFetch;
26
+ });
27
+
28
+ function dockerTagsResponse(names: string[]): Response {
29
+ return new Response(
30
+ JSON.stringify({ results: names.map((name) => ({ name })) }),
31
+ { status: 200, headers: { "content-type": "application/json" } },
32
+ );
33
+ }
34
+
35
+ // ── #449: latest-tag resolution ──────────────────────────────────────────
36
+
37
+ describe("resolveLatestPlatformTag (#449)", () => {
38
+ test("returns the newest semver tag from the Docker registry", async () => {
39
+ globalThis.fetch = (async () =>
40
+ dockerTagsResponse(["latest", "v0.11.0", "edge"])) as typeof fetch;
41
+
42
+ const tag = await resolveLatestPlatformTag("openpalm");
43
+ expect(tag).toBe("v0.11.0");
44
+ });
45
+
46
+ test("throws when the registry yields no usable tag", async () => {
47
+ globalThis.fetch = (async () => dockerTagsResponse(["latest"])) as typeof fetch;
48
+ await expect(resolveLatestPlatformTag("openpalm")).rejects.toThrow(
49
+ /No usable Docker image tag/,
50
+ );
51
+ });
52
+ });
53
+
54
+ describe("applyTagChange latest resolution (#449)", () => {
55
+ function makeState(): ControlPlaneState {
56
+ const home = mkdtempSync(join(tmpdir(), "openpalm-upgrade-test-"));
57
+ mkdirSync(join(home, "knowledge", "env"), { recursive: true });
58
+ writeFileSync(join(home, "knowledge", "env", "stack.env"), "OP_IMAGE_NAMESPACE=openpalm\n");
59
+ return {
60
+ homeDir: home,
61
+ configDir: join(home, "config"),
62
+ stashDir: join(home, "knowledge"),
63
+ workspaceDir: join(home, "workspace"),
64
+ dataDir: join(home, "data"),
65
+ stackDir: join(home, "config", "stack"),
66
+ services: {},
67
+ artifacts: { compose: "" },
68
+ artifactMeta: [],
69
+ };
70
+ }
71
+
72
+ test('a "latest" selection that cannot be resolved fails with a clear validation error, not a raw download error', async () => {
73
+ globalThis.fetch = (async () => {
74
+ throw new Error("network down");
75
+ }) as typeof fetch;
76
+
77
+ const state = makeState();
78
+ // Resolution happens BEFORE any asset download, so the error must be the
79
+ // resolution message — never the GitHub "Failed to download ..." error.
80
+ await expect(applyTagChange(state, "latest")).rejects.toThrow(
81
+ /Cannot resolve "latest" to a concrete release/,
82
+ );
83
+ });
84
+
85
+ test('an empty selection is treated like "latest" and resolved (not passed through as a blank ref)', async () => {
86
+ globalThis.fetch = (async () => {
87
+ throw new Error("network down");
88
+ }) as typeof fetch;
89
+
90
+ const state = makeState();
91
+ await expect(applyTagChange(state, " ")).rejects.toThrow(
92
+ /Cannot resolve "latest" to a concrete release/,
93
+ );
94
+ });
95
+ });
96
+
97
+ // ── #450: upgrade recreates guardian + channel containers ─────────────────
98
+
99
+ describe("performUpgrade force-recreates managed services (#450)", () => {
100
+ test("performUpgrade passes forceRecreate to composeUp", () => {
101
+ const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
102
+ // The post-pull composeUp in performUpgrade must force-recreate so channel
103
+ // containers re-resolve their dist-tag adapters.
104
+ expect(src).toMatch(/composeUp\(\{[^}]*forceRecreate:\s*true/);
105
+ });
106
+
107
+ test("buildManagedServices always includes the core services (guardian)", () => {
108
+ const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
109
+ // Guardian comes from CORE_SERVICES and must be seeded into the set
110
+ // regardless of how the rest of the service list is discovered.
111
+ expect(src).toContain("new Set<string>(CORE_SERVICES)");
112
+ });
113
+ });
package/src/index.ts CHANGED
@@ -252,6 +252,7 @@ export {
252
252
  applyUpgrade,
253
253
  performUpgrade,
254
254
  applyTagChange,
255
+ resolveLatestPlatformTag,
255
256
  updateStackEnvToLatestImageTag,
256
257
  buildComposeFileList,
257
258
  buildManagedServices,
@@ -296,6 +297,20 @@ export {
296
297
  export type { LocalProviderDetection } from "./control-plane/model-runner.js";
297
298
  export { detectLocalProviders } from "./control-plane/model-runner.js";
298
299
 
300
+ // ── Hardware detection + setup recommendation ───────────────────────────
301
+ export type { GpuInfo, GpuVendor } from "./control-plane/hardware-detect.js";
302
+ export { detectGpu, parseNvidiaSmi, parseRocmSmi } from "./control-plane/hardware-detect.js";
303
+ export type {
304
+ DetectedHostProvider,
305
+ SetupRecommendation,
306
+ SetupRecommendationInput,
307
+ } from "./control-plane/setup-recommendation.js";
308
+ export {
309
+ recommendSetup,
310
+ gpuToProfileVariant,
311
+ MIN_LOCAL_GPU_VRAM_MB,
312
+ } from "./control-plane/setup-recommendation.js";
313
+
299
314
  // ── Compose Arguments ────────────────────────────────────────────────────
300
315
  export {
301
316
  buildComposeOptions,