@openpalm/lib 0.11.0 → 0.11.2-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/control-plane/config-persistence.ts +37 -3
- package/src/control-plane/docker.test.ts +61 -0
- package/src/control-plane/docker.ts +92 -1
- package/src/control-plane/hardware-detect.ts +146 -0
- package/src/control-plane/lifecycle.ts +68 -18
- package/src/control-plane/registry.ts +10 -1
- package/src/control-plane/setup-recommendation.test.ts +146 -0
- package/src/control-plane/setup-recommendation.ts +127 -0
- package/src/control-plane/upgrade-path.test.ts +113 -0
- package/src/index.ts +17 -1
package/package.json
CHANGED
|
@@ -5,15 +5,16 @@
|
|
|
5
5
|
* Files are validated in-place before writing; rollback is handled by
|
|
6
6
|
* the rollback module (snapshot to OP_HOME/data/rollback/).
|
|
7
7
|
*/
|
|
8
|
-
import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync } from "node:fs";
|
|
8
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, chmodSync, chownSync } from "node:fs";
|
|
9
9
|
import { dirname, resolve as resolvePath } from "node:path";
|
|
10
10
|
import { parse as yamlParse } from "yaml";
|
|
11
|
+
import { createLogger } from "../logger.js";
|
|
11
12
|
import { parseEnvContent, parseEnvFile, mergeEnvContent, expandEnvVars } from './env.js';
|
|
12
13
|
import { assertNoSecretLikeStackEnvKeys, isSecretLikeStackEnvKey } from './secrets.js';
|
|
13
14
|
import { ensureSecret } from './secrets-files.js';
|
|
14
15
|
import type { ControlPlaneState, ArtifactMeta } from "./types.js";
|
|
15
16
|
import { listEnabledAddonIds } from "./registry.js";
|
|
16
|
-
import { resolveOperatorIds, hasUsableOperatorId } from "./operator-ids.js";
|
|
17
|
+
import { resolveOperatorIds, hasUsableOperatorId, type OperatorIds } from "./operator-ids.js";
|
|
17
18
|
import { SPEC_DEFAULTS } from "./defaults.js";
|
|
18
19
|
import { CURRENT_LAYOUT_VERSION } from "./migrations.js";
|
|
19
20
|
|
|
@@ -26,6 +27,8 @@ import { sha256, randomHex } from "./crypto.js";
|
|
|
26
27
|
|
|
27
28
|
const DEFAULT_IMAGE_TAG = "latest";
|
|
28
29
|
|
|
30
|
+
const logger = createLogger("config-persistence");
|
|
31
|
+
|
|
29
32
|
// ── Env File Management ──────────────────────────────────────────────
|
|
30
33
|
|
|
31
34
|
/**
|
|
@@ -223,6 +226,13 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
|
|
|
223
226
|
const composeFiles = discoverStackOverlays(state.stackDir, state.homeDir);
|
|
224
227
|
if (composeFiles.length === 0) return;
|
|
225
228
|
|
|
229
|
+
// Resolve the operator UID/GID compose runs containers as (`user:`), so we
|
|
230
|
+
// can chown the dirs we pre-create to match. Without this, dirs created by
|
|
231
|
+
// a root-running install (or a host UID that differs from the forced
|
|
232
|
+
// container UID) are unwritable inside the non-root container — on OrbStack
|
|
233
|
+
// real UIDs are preserved, so e.g. ollama's mkdir is denied (issue #452).
|
|
234
|
+
const operatorIds = resolveOperatorIds(state.homeDir);
|
|
235
|
+
|
|
226
236
|
const envVars: Record<string, string> = {
|
|
227
237
|
...(process.env as Record<string, string>),
|
|
228
238
|
...parseEnvFile(`${state.stashDir}/env/stack.env`),
|
|
@@ -264,16 +274,40 @@ export function ensureComposeVolumeTargets(state: ControlPlaneState): void {
|
|
|
264
274
|
const isFile = basename.includes('.');
|
|
265
275
|
|
|
266
276
|
if (isFile) {
|
|
267
|
-
|
|
277
|
+
const parent = dirname(resolvedHostPath);
|
|
278
|
+
mkdirSync(parent, { recursive: true });
|
|
268
279
|
writeFileSync(resolvedHostPath, '');
|
|
280
|
+
chownVolumeTarget(parent, operatorIds);
|
|
281
|
+
chownVolumeTarget(resolvedHostPath, operatorIds);
|
|
269
282
|
} else {
|
|
270
283
|
mkdirSync(resolvedHostPath, { recursive: true });
|
|
284
|
+
chownVolumeTarget(resolvedHostPath, operatorIds);
|
|
271
285
|
}
|
|
272
286
|
}
|
|
273
287
|
}
|
|
274
288
|
}
|
|
275
289
|
}
|
|
276
290
|
|
|
291
|
+
/**
|
|
292
|
+
* chown a just-created bind-mount target to the operator UID/GID so the
|
|
293
|
+
* non-root container (`user: ${OP_UID}:${OP_GID}`) can write to it.
|
|
294
|
+
*
|
|
295
|
+
* No-op on Windows (chown is meaningless there) or when no operator can be
|
|
296
|
+
* resolved. A failure (e.g. not the owner) is logged and swallowed — the
|
|
297
|
+
* mkdir already succeeded and Docker Desktop's gRPC-FUSE masks ownership
|
|
298
|
+
* anyway, so a chown failure must not abort the install.
|
|
299
|
+
*/
|
|
300
|
+
function chownVolumeTarget(path: string, operatorIds: OperatorIds | null): void {
|
|
301
|
+
if (process.platform === "win32" || !operatorIds) return;
|
|
302
|
+
try {
|
|
303
|
+
chownSync(path, operatorIds.uid, operatorIds.gid);
|
|
304
|
+
} catch (error) {
|
|
305
|
+
logger.warn(
|
|
306
|
+
`Could not chown volume target ${path} to ${operatorIds.uid}:${operatorIds.gid}: ${error instanceof Error ? error.message : String(error)}`
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
277
311
|
// ── Persistence (direct-write to live paths) ────────────────────────
|
|
278
312
|
|
|
279
313
|
export function writeRuntimeFiles(
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
detectExistingProject,
|
|
4
|
+
isProjectOurs,
|
|
5
|
+
resolveComposeProjectName,
|
|
6
|
+
} from "./docker.js";
|
|
7
|
+
|
|
8
|
+
describe("isProjectOurs (ours-vs-foreign decision)", () => {
|
|
9
|
+
it("treats a matching working_dir as ours", () => {
|
|
10
|
+
expect(isProjectOurs("/home/me/.openpalm", "/home/me/.openpalm")).toBe(true);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("treats a different working_dir as foreign", () => {
|
|
14
|
+
expect(isProjectOurs("/home/other/.openpalm", "/home/me/.openpalm")).toBe(false);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("treats an empty/unknown working_dir as ours (reconcile, don't refuse)", () => {
|
|
18
|
+
expect(isProjectOurs("", "/home/me/.openpalm")).toBe(true);
|
|
19
|
+
expect(isProjectOurs(" ", "/home/me/.openpalm")).toBe(true);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("ignores surrounding whitespace on the label", () => {
|
|
23
|
+
expect(isProjectOurs(" /home/me/.openpalm \n", "/home/me/.openpalm")).toBe(true);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe("detectExistingProject", () => {
|
|
28
|
+
// Use a project name that cannot possibly match any running container so the
|
|
29
|
+
// result is deterministic whether or not a docker daemon is present:
|
|
30
|
+
// - docker error (no daemon) → { exists:false }
|
|
31
|
+
// - docker ok, no matching label → { exists:false }
|
|
32
|
+
const ghostName = `openpalm-detect-test-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
33
|
+
|
|
34
|
+
it("returns exists:false when no project matches (or docker is unavailable)", async () => {
|
|
35
|
+
const result = await detectExistingProject({
|
|
36
|
+
projectName: ghostName,
|
|
37
|
+
expectedWorkingDir: "/nonexistent/op_home",
|
|
38
|
+
});
|
|
39
|
+
expect(result.exists).toBe(false);
|
|
40
|
+
expect(result.isOurs).toBe(false);
|
|
41
|
+
expect(result.workingDir).toBe("");
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe("resolveComposeProjectName", () => {
|
|
46
|
+
const saved = process.env.OP_PROJECT_NAME;
|
|
47
|
+
afterEach(() => {
|
|
48
|
+
if (saved === undefined) delete process.env.OP_PROJECT_NAME;
|
|
49
|
+
else process.env.OP_PROJECT_NAME = saved;
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("defaults to openpalm", () => {
|
|
53
|
+
delete process.env.OP_PROJECT_NAME;
|
|
54
|
+
delete process.env.COMPOSE_PROJECT_NAME;
|
|
55
|
+
expect(resolveComposeProjectName({})).toBe("openpalm");
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("honors OP_PROJECT_NAME from overrides first", () => {
|
|
59
|
+
expect(resolveComposeProjectName({ OP_PROJECT_NAME: "openpalm-dev" })).toBe("openpalm-dev");
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -51,6 +51,83 @@ export function resolveComposeProjectName(envOverrides: Record<string, string> =
|
|
|
51
51
|
);
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Result of probing the Docker daemon for an existing compose project that
|
|
56
|
+
* shares our project name.
|
|
57
|
+
*
|
|
58
|
+
* - `exists` — at least one running container carries the project label.
|
|
59
|
+
* - `isOurs` — those containers were launched from THIS install's working
|
|
60
|
+
* dir (compose working_dir label === expectedWorkingDir). When
|
|
61
|
+
* true the caller should reconcile in place (up --force-recreate).
|
|
62
|
+
* When false a DIFFERENT OpenPalm install (e.g. dev vs host) owns
|
|
63
|
+
* the name and the caller must refuse.
|
|
64
|
+
* - `workingDir` — the working_dir label read off the first container, for
|
|
65
|
+
* error messages. Empty string when unknown.
|
|
66
|
+
*/
|
|
67
|
+
export type ExistingProject = {
|
|
68
|
+
exists: boolean;
|
|
69
|
+
isOurs: boolean;
|
|
70
|
+
workingDir: string;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Decide whether a running compose project (identified by its
|
|
75
|
+
* `com.docker.compose.project.working_dir` label) is OURS — i.e. was launched
|
|
76
|
+
* from this install's working dir. An empty/unknown label can't prove foreign,
|
|
77
|
+
* so it counts as ours (reconcile rather than wrongly refuse a redeploy).
|
|
78
|
+
*
|
|
79
|
+
* Pure decision split out from detectExistingProject so the ours-vs-foreign
|
|
80
|
+
* rule is unit-testable without a Docker daemon.
|
|
81
|
+
*/
|
|
82
|
+
export function isProjectOurs(workingDirLabel: string, expectedWorkingDir: string): boolean {
|
|
83
|
+
const label = workingDirLabel.trim();
|
|
84
|
+
return label === "" || label === expectedWorkingDir;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Probe the Docker daemon for a running compose project that shares
|
|
89
|
+
* `projectName`. Decides ours-vs-foreign by comparing the project's
|
|
90
|
+
* `com.docker.compose.project.working_dir` label against `expectedWorkingDir`
|
|
91
|
+
* (the install's OP_HOME / compose context).
|
|
92
|
+
*
|
|
93
|
+
* Returns `{ exists:false }` on any docker error (daemon down, no permission) —
|
|
94
|
+
* detection is best-effort and never blocks the caller; a real failure surfaces
|
|
95
|
+
* later through composeUp.
|
|
96
|
+
*/
|
|
97
|
+
export function detectExistingProject(opts: {
|
|
98
|
+
projectName: string;
|
|
99
|
+
expectedWorkingDir: string;
|
|
100
|
+
}): Promise<ExistingProject> {
|
|
101
|
+
const none: ExistingProject = { exists: false, isOurs: false, workingDir: "" };
|
|
102
|
+
return new Promise((resolve) => {
|
|
103
|
+
execFile(
|
|
104
|
+
"docker",
|
|
105
|
+
["ps", "-q", "--filter", `label=com.docker.compose.project=${opts.projectName}`],
|
|
106
|
+
{ timeout: 10_000 },
|
|
107
|
+
(err, stdout) => {
|
|
108
|
+
if (err) return resolve(none);
|
|
109
|
+
const ids = stdout.toString().trim().split(/\s+/).filter(Boolean);
|
|
110
|
+
if (ids.length === 0) return resolve(none);
|
|
111
|
+
execFile(
|
|
112
|
+
"docker",
|
|
113
|
+
[
|
|
114
|
+
"inspect",
|
|
115
|
+
"--format",
|
|
116
|
+
'{{ index .Config.Labels "com.docker.compose.project.working_dir" }}',
|
|
117
|
+
ids[0],
|
|
118
|
+
],
|
|
119
|
+
{ timeout: 10_000 },
|
|
120
|
+
(err2, stdout2) => {
|
|
121
|
+
if (err2) return resolve({ exists: true, isOurs: false, workingDir: "" });
|
|
122
|
+
const workingDir = stdout2.toString().trim();
|
|
123
|
+
resolve({ exists: true, isOurs: isProjectOurs(workingDir, opts.expectedWorkingDir), workingDir });
|
|
124
|
+
},
|
|
125
|
+
);
|
|
126
|
+
},
|
|
127
|
+
);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
54
131
|
/** Check if Docker is available */
|
|
55
132
|
export async function checkDocker(): Promise<DockerResult> {
|
|
56
133
|
return new Promise((resolve) => {
|
|
@@ -172,7 +249,21 @@ export async function composeUp(
|
|
|
172
249
|
if (options.forceRecreate) args.push("--force-recreate");
|
|
173
250
|
if (options.removeOrphans) args.push("--remove-orphans");
|
|
174
251
|
if (options.services?.length) args.push(...options.services);
|
|
175
|
-
return run(args, undefined,
|
|
252
|
+
return run(args, undefined, composeUpTimeoutMs(), collectEnvOverrides(options.envFiles));
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Timeout budget for `compose up`. A first install extracts multi-GB images
|
|
257
|
+
* (voice CUDA ~7.6 GB) onto slow disks; the previous hard 5-minute cap
|
|
258
|
+
* SIGTERM-killed the start mid-extraction and surfaced as an empty/opaque
|
|
259
|
+
* error. Default 30 min, override with OP_COMPOSE_UP_TIMEOUT_MS. Kept bounded
|
|
260
|
+
* (never removed) so a genuinely hung start still eventually fails.
|
|
261
|
+
*/
|
|
262
|
+
function composeUpTimeoutMs(): number {
|
|
263
|
+
const raw = process.env.OP_COMPOSE_UP_TIMEOUT_MS?.trim();
|
|
264
|
+
const parsed = raw ? Number(raw) : NaN;
|
|
265
|
+
if (Number.isFinite(parsed) && parsed > 0) return parsed;
|
|
266
|
+
return 30 * 60_000;
|
|
176
267
|
}
|
|
177
268
|
|
|
178
269
|
/**
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
// Host GPU / VRAM detection for setup recommendations.
|
|
2
|
+
//
|
|
3
|
+
// Data-driven on purpose: each entry in GPU_PROBES is a vendor + a command to
|
|
4
|
+
// run + a pure parser. Adding a new accelerator (Intel Arc, Apple Metal, a new
|
|
5
|
+
// rocm/CUDA query, etc.) is a one-entry change here — nothing downstream needs to
|
|
6
|
+
// know. detectGpu() runs every probe, ignores the ones whose tool is absent, and
|
|
7
|
+
// returns the single best (highest-VRAM) result, or null when no GPU is found.
|
|
8
|
+
|
|
9
|
+
import { execFile } from "node:child_process";
|
|
10
|
+
import { createLogger } from "../logger.js";
|
|
11
|
+
|
|
12
|
+
const logger = createLogger("hardware-detect");
|
|
13
|
+
|
|
14
|
+
export type GpuVendor = "nvidia" | "amd" | "apple" | "unknown";
|
|
15
|
+
|
|
16
|
+
export type GpuInfo = {
|
|
17
|
+
vendor: GpuVendor;
|
|
18
|
+
/** Human-readable adapter name, e.g. "NVIDIA GeForce RTX 4090". */
|
|
19
|
+
name: string;
|
|
20
|
+
/** Total VRAM in MiB. 0 when the tool reported the GPU but not its memory. */
|
|
21
|
+
vramMb: number;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
type GpuProbe = {
|
|
25
|
+
vendor: GpuVendor;
|
|
26
|
+
command: string;
|
|
27
|
+
args: string[];
|
|
28
|
+
/** Pure parser: tool stdout -> detected GPUs. Must not throw. */
|
|
29
|
+
parse: (stdout: string) => GpuInfo[];
|
|
30
|
+
/** Optional gate — when present and false, the probe is skipped entirely. */
|
|
31
|
+
enabled?: boolean;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/** Parse `nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits`. */
|
|
35
|
+
export function parseNvidiaSmi(stdout: string): GpuInfo[] {
|
|
36
|
+
return stdout
|
|
37
|
+
.split("\n")
|
|
38
|
+
.map((line) => line.trim())
|
|
39
|
+
.filter(Boolean)
|
|
40
|
+
.map((line): GpuInfo | null => {
|
|
41
|
+
// "NVIDIA GeForce RTX 4090, 24564"
|
|
42
|
+
const idx = line.lastIndexOf(",");
|
|
43
|
+
if (idx === -1) return null;
|
|
44
|
+
const name = line.slice(0, idx).trim();
|
|
45
|
+
const vramMb = Number.parseInt(line.slice(idx + 1).trim(), 10);
|
|
46
|
+
if (!name || !Number.isFinite(vramMb)) return null;
|
|
47
|
+
return { vendor: "nvidia", name, vramMb };
|
|
48
|
+
})
|
|
49
|
+
.filter((g): g is GpuInfo => g !== null);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Parse `rocm-smi --showmeminfo vram --showproductname --json`. */
|
|
53
|
+
export function parseRocmSmi(stdout: string): GpuInfo[] {
|
|
54
|
+
let doc: Record<string, Record<string, string>>;
|
|
55
|
+
try {
|
|
56
|
+
doc = JSON.parse(stdout);
|
|
57
|
+
} catch {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
const out: GpuInfo[] = [];
|
|
61
|
+
for (const card of Object.values(doc)) {
|
|
62
|
+
if (!card || typeof card !== "object") continue;
|
|
63
|
+
// rocm-smi key names drift across versions — match loosely.
|
|
64
|
+
const vramKey = Object.keys(card).find((k) => /vram total memory/i.test(k));
|
|
65
|
+
const nameKey = Object.keys(card).find((k) => /product name|card series|gfx/i.test(k));
|
|
66
|
+
const bytes = vramKey ? Number.parseInt(String(card[vramKey]).trim(), 10) : NaN;
|
|
67
|
+
const vramMb = Number.isFinite(bytes) ? Math.round(bytes / (1024 * 1024)) : 0;
|
|
68
|
+
out.push({ vendor: "amd", name: nameKey ? String(card[nameKey]).trim() : "AMD GPU", vramMb });
|
|
69
|
+
}
|
|
70
|
+
return out;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Parse `sysctl -n hw.memsize hw.model` (two lines: total bytes, then model id)
|
|
75
|
+
* into an Apple-Silicon GpuInfo. `hw.memsize` is UNIFIED memory shared between
|
|
76
|
+
* CPU and GPU, carried here as vramMb for informational display only — callers
|
|
77
|
+
* must NOT treat it like discrete VRAM (see setup-recommendation). Pure; never throws.
|
|
78
|
+
*/
|
|
79
|
+
export function parseAppleSilicon(stdout: string): GpuInfo[] {
|
|
80
|
+
const lines = stdout
|
|
81
|
+
.split("\n")
|
|
82
|
+
.map((l) => l.trim())
|
|
83
|
+
.filter(Boolean);
|
|
84
|
+
if (lines.length === 0) return [];
|
|
85
|
+
const bytes = Number.parseInt(lines[0] ?? "", 10);
|
|
86
|
+
if (!Number.isFinite(bytes) || bytes <= 0) return [];
|
|
87
|
+
const vramMb = Math.round(bytes / (1024 * 1024));
|
|
88
|
+
const model = lines[1] && lines[1].length > 0 ? lines[1] : "arm64";
|
|
89
|
+
return [{ vendor: "apple", name: `Apple Silicon (${model})`, vramMb }];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const GPU_PROBES: GpuProbe[] = [
|
|
93
|
+
{
|
|
94
|
+
vendor: "nvidia",
|
|
95
|
+
command: "nvidia-smi",
|
|
96
|
+
args: ["--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
|
|
97
|
+
parse: parseNvidiaSmi,
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
vendor: "amd",
|
|
101
|
+
command: "rocm-smi",
|
|
102
|
+
args: ["--showmeminfo", "vram", "--showproductname", "--json"],
|
|
103
|
+
parse: parseRocmSmi,
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
// Apple Silicon Macs expose no nvidia-smi/rocm-smi. Probe macOS sysctl for
|
|
107
|
+
// unified-memory size + model id. Gated to darwin/arm64 so it never runs (and
|
|
108
|
+
// never spawns a missing binary) on Linux/Intel.
|
|
109
|
+
vendor: "apple",
|
|
110
|
+
command: "sysctl",
|
|
111
|
+
args: ["-n", "hw.memsize", "hw.model"],
|
|
112
|
+
parse: parseAppleSilicon,
|
|
113
|
+
enabled: process.platform === "darwin" && process.arch === "arm64",
|
|
114
|
+
},
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
function run(command: string, args: string[], timeoutMs = 3_000): Promise<string | null> {
|
|
118
|
+
return new Promise((resolve) => {
|
|
119
|
+
execFile(command, args, { timeout: timeoutMs }, (err, stdout) => {
|
|
120
|
+
// ENOENT (tool not installed) and any non-zero exit -> not available.
|
|
121
|
+
resolve(err ? null : stdout?.toString() ?? "");
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Detect the host's best GPU. Returns the highest-VRAM adapter across all probes,
|
|
128
|
+
* or null when none is found. Never throws.
|
|
129
|
+
*/
|
|
130
|
+
export async function detectGpu(): Promise<GpuInfo | null> {
|
|
131
|
+
const found: GpuInfo[] = [];
|
|
132
|
+
await Promise.all(
|
|
133
|
+
GPU_PROBES.map(async (probe) => {
|
|
134
|
+
if (probe.enabled === false) return;
|
|
135
|
+
const stdout = await run(probe.command, probe.args);
|
|
136
|
+
if (stdout === null) return;
|
|
137
|
+
try {
|
|
138
|
+
found.push(...probe.parse(stdout));
|
|
139
|
+
} catch (error) {
|
|
140
|
+
logger.debug("gpu probe parse failed", { vendor: probe.vendor, error: String(error) });
|
|
141
|
+
}
|
|
142
|
+
}),
|
|
143
|
+
);
|
|
144
|
+
if (found.length === 0) return null;
|
|
145
|
+
return found.reduce((best, g) => (g.vramMb > best.vramMb ? g : best));
|
|
146
|
+
}
|
|
@@ -172,10 +172,7 @@ function resolveNewestDockerTag(payload: unknown): string | null {
|
|
|
172
172
|
return fallback;
|
|
173
173
|
}
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
namespace: string;
|
|
177
|
-
tag: string;
|
|
178
|
-
}> {
|
|
175
|
+
function resolveImageNamespace(state: ControlPlaneState): string {
|
|
179
176
|
const systemEnvPath = `${state.stashDir}/env/stack.env`;
|
|
180
177
|
const parsed = parseEnvFile(systemEnvPath);
|
|
181
178
|
const namespace = (parsed.OP_IMAGE_NAMESPACE ?? process.env.OP_IMAGE_NAMESPACE ?? "openpalm").trim().toLowerCase();
|
|
@@ -183,11 +180,21 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
|
|
|
183
180
|
if (!IMAGE_NAMESPACE_RE.test(namespace)) {
|
|
184
181
|
throw new Error(`Invalid image namespace in system.env: ${namespace}`);
|
|
185
182
|
}
|
|
183
|
+
return namespace;
|
|
184
|
+
}
|
|
186
185
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
186
|
+
/**
|
|
187
|
+
* Resolve the newest published platform tag from the Docker registry.
|
|
188
|
+
*
|
|
189
|
+
* `assistant` is the version-of-record image: all platform images
|
|
190
|
+
* (assistant, guardian, channel, voice) are published in lockstep under the
|
|
191
|
+
* same OP_IMAGE_TAG, so its newest tag is the canonical platform version.
|
|
192
|
+
*
|
|
193
|
+
* Used both to auto-detect during "Update now" and to resolve a requested
|
|
194
|
+
* `latest` selection into a concrete release tag before fetching stack assets
|
|
195
|
+
* (GitHub has no asset tree at a `latest` ref).
|
|
196
|
+
*/
|
|
197
|
+
export async function resolveLatestPlatformTag(namespace: string): Promise<string> {
|
|
191
198
|
let response: Response;
|
|
192
199
|
try {
|
|
193
200
|
response = await fetch(
|
|
@@ -207,6 +214,16 @@ export async function updateStackEnvToLatestImageTag(state: ControlPlaneState):
|
|
|
207
214
|
if (!latestTag) {
|
|
208
215
|
throw new Error("No usable Docker image tag found");
|
|
209
216
|
}
|
|
217
|
+
return latestTag;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export async function updateStackEnvToLatestImageTag(state: ControlPlaneState): Promise<{
|
|
221
|
+
namespace: string;
|
|
222
|
+
tag: string;
|
|
223
|
+
}> {
|
|
224
|
+
const systemEnvPath = `${state.stashDir}/env/stack.env`;
|
|
225
|
+
const namespace = resolveImageNamespace(state);
|
|
226
|
+
const latestTag = await resolveLatestPlatformTag(namespace);
|
|
210
227
|
|
|
211
228
|
const currentContent = existsSync(systemEnvPath) ? readFileSync(systemEnvPath, "utf-8") : "";
|
|
212
229
|
const updatedContent = mergeEnvContent(currentContent, { OP_IMAGE_TAG: latestTag }, { uncomment: true });
|
|
@@ -288,9 +305,14 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
|
|
|
288
305
|
throw new Error(`Failed to pull images: ${pullResult.stderr}`);
|
|
289
306
|
}
|
|
290
307
|
|
|
291
|
-
// 4. Recreate containers (includes profiles for voice addon)
|
|
308
|
+
// 4. Recreate containers (includes profiles for voice addon).
|
|
309
|
+
// forceRecreate is REQUIRED: channel adapters are installed at container
|
|
310
|
+
// startup from npm dist-tags (CHANNEL_PACKAGE, e.g. @openpalm/channel-discord@latest),
|
|
311
|
+
// so an unchanged compose config would leave those containers running on the
|
|
312
|
+
// old adapter. --force-recreate guarantees guardian + channel containers
|
|
313
|
+
// restart and re-resolve their dist-tag adapters (issue #450).
|
|
292
314
|
const services = await buildManagedServices(state);
|
|
293
|
-
const upResult = await composeUp({ ...composeOpts, services, removeOrphans: true });
|
|
315
|
+
const upResult = await composeUp({ ...composeOpts, services, forceRecreate: true, removeOrphans: true });
|
|
294
316
|
if (!upResult.ok) {
|
|
295
317
|
throw new Error(`Images pulled but failed to recreate containers: ${upResult.stderr}`);
|
|
296
318
|
}
|
|
@@ -309,13 +331,34 @@ export async function performUpgrade(state: ControlPlaneState): Promise<UpgradeR
|
|
|
309
331
|
* Used by the admin "set version" action — skips the auto-detect step in performUpgrade.
|
|
310
332
|
*/
|
|
311
333
|
export async function applyTagChange(state: ControlPlaneState, tag: string): Promise<UpgradeResult> {
|
|
334
|
+
const namespace = resolveImageNamespace(state);
|
|
335
|
+
|
|
336
|
+
// "latest" (or an empty selection) is not a real GitHub ref — there are no
|
|
337
|
+
// `.openpalm/...` stack assets at a `latest` tag, so refreshCoreAssets would
|
|
338
|
+
// fail with a raw download error. Resolve it to the concrete newest published
|
|
339
|
+
// platform tag BEFORE writing the env or fetching assets, so images and
|
|
340
|
+
// stack assets stay in lockstep on a real release tag.
|
|
341
|
+
const requested = tag.trim();
|
|
342
|
+
let resolvedTag = requested;
|
|
343
|
+
if (requested === "" || requested.toLowerCase() === "latest") {
|
|
344
|
+
try {
|
|
345
|
+
resolvedTag = await resolveLatestPlatformTag(namespace);
|
|
346
|
+
} catch (e) {
|
|
347
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
348
|
+
throw new Error(
|
|
349
|
+
`Cannot resolve "latest" to a concrete release: ${msg}. ` +
|
|
350
|
+
"Check your network connection or select a specific version."
|
|
351
|
+
);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
312
355
|
const stackEnvPath = `${state.stashDir}/env/stack.env`;
|
|
313
356
|
const currentContent = existsSync(stackEnvPath) ? readFileSync(stackEnvPath, "utf-8") : "";
|
|
314
|
-
writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG:
|
|
315
|
-
const upgradeResult = await applyUpgrade(state,
|
|
357
|
+
writeFileSync(stackEnvPath, mergeEnvContent(currentContent, { OP_IMAGE_TAG: resolvedTag }, { uncomment: true }));
|
|
358
|
+
const upgradeResult = await applyUpgrade(state, resolvedTag);
|
|
316
359
|
return {
|
|
317
|
-
imageTag:
|
|
318
|
-
namespace
|
|
360
|
+
imageTag: resolvedTag,
|
|
361
|
+
namespace,
|
|
319
362
|
backupDir: upgradeResult.backupDir,
|
|
320
363
|
assetsUpdated: upgradeResult.updated,
|
|
321
364
|
restarted: upgradeResult.restarted,
|
|
@@ -329,20 +372,27 @@ export function buildComposeFileList(state: ControlPlaneState): string[] {
|
|
|
329
372
|
export async function buildManagedServices(state: ControlPlaneState): Promise<string[]> {
|
|
330
373
|
const composeOpts = buildComposeOptions(state);
|
|
331
374
|
|
|
375
|
+
// Always force-recreate the core services (assistant + guardian) on upgrade,
|
|
376
|
+
// regardless of how the service set is discovered. getAddonServiceNames
|
|
377
|
+
// deliberately EXCLUDES guardian, so a fallback that relied on it alone would
|
|
378
|
+
// drop guardian from the recreated set when channel profiles are active —
|
|
379
|
+
// leaving guardian on stale state (issue #450).
|
|
380
|
+
const services = new Set<string>(CORE_SERVICES);
|
|
381
|
+
|
|
332
382
|
// Prefer compose-derived service list when Docker is available
|
|
333
383
|
if (composeOpts.files.length > 0 && !process.env.OP_SKIP_COMPOSE_PREFLIGHT) {
|
|
334
384
|
const result = await composeConfigServices(composeOpts);
|
|
335
385
|
if (result.ok && result.services.length > 0) {
|
|
336
|
-
|
|
386
|
+
for (const s of result.services) services.add(s);
|
|
387
|
+
return [...services];
|
|
337
388
|
}
|
|
338
389
|
}
|
|
339
390
|
|
|
340
391
|
// Fallback: static inference from CORE_SERVICES + active addon overlays
|
|
341
|
-
const services: string[] = [...CORE_SERVICES];
|
|
342
392
|
for (const addon of listEnabledAddonIds(state.homeDir)) {
|
|
343
|
-
|
|
393
|
+
for (const s of getAddonServiceNames(state.homeDir, addon)) services.add(s);
|
|
344
394
|
}
|
|
345
|
-
return services;
|
|
395
|
+
return [...services];
|
|
346
396
|
}
|
|
347
397
|
|
|
348
398
|
|
|
@@ -12,7 +12,7 @@ import { tmpdir } from 'node:os';
|
|
|
12
12
|
import { parse as parseYaml } from 'yaml';
|
|
13
13
|
import { createLogger } from '../logger.js';
|
|
14
14
|
import { resolveLocalOpenpalmDir } from './ui-assets.js';
|
|
15
|
-
import { ensureChannelSecret } from './config-persistence.js';
|
|
15
|
+
import { ensureChannelSecret, ensureComposeVolumeTargets } from './config-persistence.js';
|
|
16
16
|
import { patchSecretsEnvFile, readStackEnv } from './secrets.js';
|
|
17
17
|
import { readBundledStackAsset } from './core-assets.js';
|
|
18
18
|
import { canonicalAddonProfileSelection, resolveHardwareProfileVariant } from './profile-ids.js';
|
|
@@ -907,6 +907,15 @@ export function setAddonEnabled(homeDir: string, stackDir: string, name: string,
|
|
|
907
907
|
ensureChannelSecret(stackDir, channel);
|
|
908
908
|
}
|
|
909
909
|
}
|
|
910
|
+
|
|
911
|
+
// Pre-create (and chown) any host-side bind-mount targets the newly
|
|
912
|
+
// enabled addon declares — e.g. ollama's data dir. Matches the install
|
|
913
|
+
// path (applyInstall → ensureComposeVolumeTargets) so enabling an addon
|
|
914
|
+
// post-install isn't more exposed than enabling it at install time
|
|
915
|
+
// (issue #452). Guarded on `state` since callers may omit it.
|
|
916
|
+
if (state) {
|
|
917
|
+
ensureComposeVolumeTargets(state);
|
|
918
|
+
}
|
|
910
919
|
}
|
|
911
920
|
|
|
912
921
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { test, expect, describe } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
recommendSetup,
|
|
4
|
+
gpuToProfileVariant,
|
|
5
|
+
MIN_LOCAL_GPU_VRAM_MB,
|
|
6
|
+
type SetupRecommendationInput,
|
|
7
|
+
} from "./setup-recommendation.js";
|
|
8
|
+
import { parseNvidiaSmi, parseRocmSmi, parseAppleSilicon, type GpuInfo } from "./hardware-detect.js";
|
|
9
|
+
|
|
10
|
+
const base: SetupRecommendationInput = { cloudProviders: [], hostProviders: [], gpu: null };
|
|
11
|
+
const gpu = (vendor: GpuInfo["vendor"], vramMb: number, name = "Test GPU"): GpuInfo => ({ vendor, name, vramMb });
|
|
12
|
+
|
|
13
|
+
describe("recommendSetup", () => {
|
|
14
|
+
test("cloud provider connected -> use-cloud (wins over everything)", () => {
|
|
15
|
+
const r = recommendSetup({
|
|
16
|
+
cloudProviders: ["openai"],
|
|
17
|
+
hostProviders: [{ provider: "ollama", url: "x" }],
|
|
18
|
+
gpu: gpu("nvidia", 24576),
|
|
19
|
+
});
|
|
20
|
+
expect(r.action).toBe("use-cloud");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("no cloud, host provider running -> use-host-providers", () => {
|
|
24
|
+
const r = recommendSetup({ ...base, hostProviders: [{ provider: "ollama", url: "http://host:11434" }], gpu: gpu("nvidia", 24576) });
|
|
25
|
+
expect(r.action).toBe("use-host-providers");
|
|
26
|
+
if (r.action === "use-host-providers") expect(r.alert).toContain("ollama");
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test("host providers win over GPU enable-ollama", () => {
|
|
30
|
+
const r = recommendSetup({ ...base, hostProviders: [{ provider: "lmstudio", url: "x" }], gpu: gpu("nvidia", 24576) });
|
|
31
|
+
expect(r.action).toBe("use-host-providers");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("no cloud, no host, capable nvidia GPU -> enable-ollama cuda", () => {
|
|
35
|
+
const r = recommendSetup({ ...base, gpu: gpu("nvidia", 12288) });
|
|
36
|
+
expect(r.action).toBe("enable-ollama");
|
|
37
|
+
if (r.action === "enable-ollama") expect(r.profileVariant).toBe("cuda");
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("capable amd GPU -> enable-ollama rocm", () => {
|
|
41
|
+
const r = recommendSetup({ ...base, gpu: gpu("amd", 16384) });
|
|
42
|
+
if (r.action === "enable-ollama") expect(r.profileVariant).toBe("rocm");
|
|
43
|
+
else throw new Error("expected enable-ollama");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("VRAM exactly at threshold -> enable-ollama", () => {
|
|
47
|
+
const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB) });
|
|
48
|
+
expect(r.action).toBe("enable-ollama");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("VRAM just under threshold -> connect-manually", () => {
|
|
52
|
+
const r = recommendSetup({ ...base, gpu: gpu("nvidia", MIN_LOCAL_GPU_VRAM_MB - 1) });
|
|
53
|
+
expect(r.action).toBe("connect-manually");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("darwin + apple GPU + no provider -> connect-manually (NOT enable-ollama), Mac-tailored alert", () => {
|
|
57
|
+
const r = recommendSetup({ ...base, platform: "darwin", gpu: gpu("apple", 65536, "Apple Silicon (Mac15,7)") });
|
|
58
|
+
expect(r.action).toBe("connect-manually");
|
|
59
|
+
expect(r.action).not.toBe("enable-ollama");
|
|
60
|
+
if (r.action === "connect-manually") {
|
|
61
|
+
expect(r.alert).toContain("macOS");
|
|
62
|
+
expect(r.alert).toContain("Metal");
|
|
63
|
+
expect(r.alert.toLowerCase()).toContain("ollama");
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test("darwin + apple GPU never selects cuda/rocm (no in-stack enable)", () => {
|
|
68
|
+
// Even with huge unified memory, darwin+apple must not enable in-stack ollama.
|
|
69
|
+
const r = recommendSetup({ ...base, platform: "darwin", gpu: gpu("apple", 131072) });
|
|
70
|
+
expect(r.action).not.toBe("enable-ollama");
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("darwin + host ollama running -> still use-host-providers (wins over apple guidance)", () => {
|
|
74
|
+
const r = recommendSetup({
|
|
75
|
+
...base,
|
|
76
|
+
platform: "darwin",
|
|
77
|
+
hostProviders: [{ provider: "ollama", url: "http://localhost:11434" }],
|
|
78
|
+
gpu: gpu("apple", 65536),
|
|
79
|
+
});
|
|
80
|
+
expect(r.action).toBe("use-host-providers");
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("linux + nvidia >= threshold -> still enable-ollama cuda (unchanged)", () => {
|
|
84
|
+
const r = recommendSetup({ ...base, platform: "linux", gpu: gpu("nvidia", 24576) });
|
|
85
|
+
expect(r.action).toBe("enable-ollama");
|
|
86
|
+
if (r.action === "enable-ollama") expect(r.profileVariant).toBe("cuda");
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test("no cloud, no host, no GPU -> connect-manually", () => {
|
|
90
|
+
const r = recommendSetup(base);
|
|
91
|
+
expect(r.action).toBe("connect-manually");
|
|
92
|
+
if (r.action === "connect-manually") expect(r.alert).toContain("custom OpenAI-compatible");
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe("gpuToProfileVariant", () => {
|
|
97
|
+
test("nvidia->cuda, amd->rocm, apple->cpu, unknown->cpu", () => {
|
|
98
|
+
expect(gpuToProfileVariant(gpu("nvidia", 8192))).toBe("cuda");
|
|
99
|
+
expect(gpuToProfileVariant(gpu("amd", 8192))).toBe("rocm");
|
|
100
|
+
expect(gpuToProfileVariant(gpu("apple", 65536))).toBe("cpu");
|
|
101
|
+
expect(gpuToProfileVariant(gpu("unknown", 8192))).toBe("cpu");
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
describe("parseAppleSilicon", () => {
|
|
106
|
+
test("parses hw.memsize bytes -> MiB + vendor apple + model name", () => {
|
|
107
|
+
const stdout = `${16 * 1024 * 1024 * 1024}\nMac15,7\n`;
|
|
108
|
+
const out = parseAppleSilicon(stdout);
|
|
109
|
+
expect(out).toEqual([{ vendor: "apple", name: "Apple Silicon (Mac15,7)", vramMb: 16384 }]);
|
|
110
|
+
});
|
|
111
|
+
test("missing model line -> falls back to arm64", () => {
|
|
112
|
+
const out = parseAppleSilicon(`${8 * 1024 * 1024 * 1024}\n`);
|
|
113
|
+
expect(out[0]?.vendor).toBe("apple");
|
|
114
|
+
expect(out[0]?.name).toBe("Apple Silicon (arm64)");
|
|
115
|
+
expect(out[0]?.vramMb).toBe(8192);
|
|
116
|
+
});
|
|
117
|
+
test("garbage / empty -> []", () => {
|
|
118
|
+
expect(parseAppleSilicon("")).toEqual([]);
|
|
119
|
+
expect(parseAppleSilicon("not-a-number\nMac15,7")).toEqual([]);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
describe("parseNvidiaSmi", () => {
|
|
124
|
+
test("parses name + VRAM (MiB), handles commas in name", () => {
|
|
125
|
+
const out = parseNvidiaSmi("NVIDIA GeForce RTX 4090, 24564\nNVIDIA A100, 81920\n");
|
|
126
|
+
expect(out).toEqual([
|
|
127
|
+
{ vendor: "nvidia", name: "NVIDIA GeForce RTX 4090", vramMb: 24564 },
|
|
128
|
+
{ vendor: "nvidia", name: "NVIDIA A100", vramMb: 81920 },
|
|
129
|
+
]);
|
|
130
|
+
});
|
|
131
|
+
test("ignores blank/garbage lines", () => {
|
|
132
|
+
expect(parseNvidiaSmi("\n \nbadline\n")).toEqual([]);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
describe("parseRocmSmi", () => {
|
|
137
|
+
test("parses VRAM bytes -> MiB", () => {
|
|
138
|
+
const json = JSON.stringify({ card0: { "VRAM Total Memory (B)": String(16 * 1024 * 1024 * 1024), "Card Series": "Radeon RX 7900 XTX" } });
|
|
139
|
+
const out = parseRocmSmi(json);
|
|
140
|
+
expect(out[0]?.vendor).toBe("amd");
|
|
141
|
+
expect(out[0]?.vramMb).toBe(16384);
|
|
142
|
+
});
|
|
143
|
+
test("invalid json -> []", () => {
|
|
144
|
+
expect(parseRocmSmi("not json")).toEqual([]);
|
|
145
|
+
});
|
|
146
|
+
});
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
// Pure decision engine for "what should setup do about AI providers?".
|
|
2
|
+
//
|
|
3
|
+
// Inputs are gathered by the caller (detected cloud providers, host-local
|
|
4
|
+
// providers, GPU). This module makes the call and produces a recommendation +
|
|
5
|
+
// user-facing alert. It is intentionally pure and free of I/O so it is trivially
|
|
6
|
+
// unit-testable and easy to evolve as new hardware/providers/models ship — the
|
|
7
|
+
// only things to edit are the constants at the top and the ordered rules in
|
|
8
|
+
// recommendSetup().
|
|
9
|
+
|
|
10
|
+
import type { GpuInfo, GpuVendor } from "./hardware-detect.js";
|
|
11
|
+
|
|
12
|
+
export type { GpuInfo, GpuVendor } from "./hardware-detect.js";
|
|
13
|
+
|
|
14
|
+
/** Minimum VRAM to auto-enable in-stack Ollama for local models. Edit freely. */
|
|
15
|
+
export const MIN_LOCAL_GPU_VRAM_MB = 8 * 1024;
|
|
16
|
+
|
|
17
|
+
/** Ollama hardware-profile variant chosen per GPU vendor. Extend per new vendor. */
|
|
18
|
+
const VENDOR_PROFILE_VARIANT: Record<GpuVendor, "cuda" | "rocm" | "cpu"> = {
|
|
19
|
+
nvidia: "cuda",
|
|
20
|
+
amd: "rocm",
|
|
21
|
+
// The in-stack Ollama container on a Mac is a Linux container with no Metal
|
|
22
|
+
// access, so it can only ever run CPU. (On darwin apple GPUs are routed to
|
|
23
|
+
// host-Ollama guidance and never reach enable-ollama — see recommendSetup.)
|
|
24
|
+
apple: "cpu",
|
|
25
|
+
unknown: "cpu",
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export function gpuToProfileVariant(gpu: GpuInfo): "cuda" | "rocm" | "cpu" {
|
|
29
|
+
return VENDOR_PROFILE_VARIANT[gpu.vendor] ?? "cpu";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export type DetectedHostProvider = { provider: string; url: string };
|
|
33
|
+
|
|
34
|
+
export type SetupRecommendationInput = {
|
|
35
|
+
/** Cloud providers already connected (api-key / oauth / env). */
|
|
36
|
+
cloudProviders: string[];
|
|
37
|
+
/** Local providers reachable on the host (e.g. ollama, lmstudio), available only. */
|
|
38
|
+
hostProviders: DetectedHostProvider[];
|
|
39
|
+
/** Best detected GPU, or null. */
|
|
40
|
+
gpu: GpuInfo | null;
|
|
41
|
+
/**
|
|
42
|
+
* Host platform. Defaults to `process.platform` when omitted, but the decision
|
|
43
|
+
* logic only reads this field (never `process.*`) so the function stays pure.
|
|
44
|
+
* On darwin the in-stack Linux Ollama can't reach the Mac's Metal GPU, so an
|
|
45
|
+
* apple GPU is routed to host-Ollama guidance instead of enable-ollama.
|
|
46
|
+
*/
|
|
47
|
+
platform?: NodeJS.Platform;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
export type SetupRecommendation =
|
|
51
|
+
// A cloud provider is connected — nothing to auto-configure; proceed normally.
|
|
52
|
+
| { action: "use-cloud"; cloudProviders: string[] }
|
|
53
|
+
// No cloud, but local providers are running on the host — add them and proceed
|
|
54
|
+
// to model detection.
|
|
55
|
+
| { action: "use-host-providers"; hostProviders: DetectedHostProvider[]; alert: string }
|
|
56
|
+
// No provider at all, but a capable GPU exists — enable in-stack Ollama.
|
|
57
|
+
| { action: "enable-ollama"; profileVariant: "cuda" | "rocm" | "cpu"; gpu: GpuInfo; alert: string }
|
|
58
|
+
// No provider and no capable GPU — the user must connect one manually.
|
|
59
|
+
| { action: "connect-manually"; alert: string };
|
|
60
|
+
|
|
61
|
+
const fmtGb = (mb: number): string => (mb / 1024).toFixed(mb % 1024 === 0 ? 0 : 1);
|
|
62
|
+
|
|
63
|
+
const labelHostProviders = (h: DetectedHostProvider[]): string =>
|
|
64
|
+
h.map((p) => p.provider).join(" and ");
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Decide what setup should do, given detected providers + hardware.
|
|
68
|
+
*
|
|
69
|
+
* Order (first match wins):
|
|
70
|
+
* 1. cloud provider connected -> use it.
|
|
71
|
+
* 2. host-local provider running -> add it, proceed.
|
|
72
|
+
* 3. darwin + apple GPU -> guide to HOST Ollama (Metal); never in-stack.
|
|
73
|
+
* 4. capable GPU (>= threshold) -> enable in-stack Ollama.
|
|
74
|
+
* 5. otherwise -> ask the user to connect a provider.
|
|
75
|
+
*/
|
|
76
|
+
export function recommendSetup(input: SetupRecommendationInput): SetupRecommendation {
|
|
77
|
+
const { cloudProviders, hostProviders, gpu } = input;
|
|
78
|
+
const platform = input.platform ?? process.platform;
|
|
79
|
+
|
|
80
|
+
if (cloudProviders.length > 0) {
|
|
81
|
+
return { action: "use-cloud", cloudProviders };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (hostProviders.length > 0) {
|
|
85
|
+
return {
|
|
86
|
+
action: "use-host-providers",
|
|
87
|
+
hostProviders,
|
|
88
|
+
alert: `No cloud AI provider was detected, but ${labelHostProviders(hostProviders)} ${
|
|
89
|
+
hostProviders.length > 1 ? "are" : "is"
|
|
90
|
+
} running on your computer — added automatically. Pick your models on the next step.`,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// macOS: the in-stack Ollama is a Linux container with no access to the Mac's
|
|
95
|
+
// Metal GPU, so enabling it would silently fall back to slow CPU. When the Mac
|
|
96
|
+
// has an Apple-Silicon GPU and nothing is connected yet, steer the user to a
|
|
97
|
+
// native host Ollama (which DOES use Metal) via connect-manually — reusing the
|
|
98
|
+
// existing action avoids a new wizard branch (chosen for minimal UI impact).
|
|
99
|
+
if (platform === "darwin" && gpu && gpu.vendor === "apple") {
|
|
100
|
+
return {
|
|
101
|
+
action: "connect-manually",
|
|
102
|
+
alert:
|
|
103
|
+
"No AI provider was detected. On macOS, fast local models need Ollama running " +
|
|
104
|
+
"natively (it uses your Apple Silicon / Metal GPU) — the bundled in-stack Ollama " +
|
|
105
|
+
"runs in Linux and cannot reach Metal. Install Ollama for macOS (https://ollama.com/download), " +
|
|
106
|
+
"or connect a provider on the next step.",
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (gpu && gpu.vramMb >= MIN_LOCAL_GPU_VRAM_MB) {
|
|
111
|
+
return {
|
|
112
|
+
action: "enable-ollama",
|
|
113
|
+
profileVariant: gpuToProfileVariant(gpu),
|
|
114
|
+
gpu,
|
|
115
|
+
alert: `No AI provider was detected, but a capable GPU was found (${gpu.name}, ${fmtGb(
|
|
116
|
+
gpu.vramMb,
|
|
117
|
+
)} GB). Local models via Ollama have been enabled for you.`,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
action: "connect-manually",
|
|
123
|
+
alert:
|
|
124
|
+
"No AI provider was detected and no GPU with enough memory for local models was found. " +
|
|
125
|
+
"Connect a provider to continue — sign in to a provider on the next step, or add a custom OpenAI-compatible endpoint and key.",
|
|
126
|
+
};
|
|
127
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Upgrade-path regression tests.
|
|
3
|
+
*
|
|
4
|
+
* #449 — Check-up "latest" install: a `latest` (or empty) tag selection must be
|
|
5
|
+
* resolved to the concrete newest published platform tag BEFORE fetching stack
|
|
6
|
+
* assets. GitHub has no `.openpalm/...` asset tree at a `latest` ref, so passing
|
|
7
|
+
* `latest` straight through used to fail with a raw download error.
|
|
8
|
+
*
|
|
9
|
+
* #450 — "Update now" must force-recreate guardian + channel containers so they
|
|
10
|
+
* re-resolve their npm dist-tag adapters; guardian must never fall out of the
|
|
11
|
+
* recreated service set.
|
|
12
|
+
*/
|
|
13
|
+
import { describe, test, expect, afterEach } from "bun:test";
|
|
14
|
+
import { readFileSync } from "node:fs";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
import { mkdtempSync, writeFileSync, mkdirSync } from "node:fs";
|
|
17
|
+
import { tmpdir } from "node:os";
|
|
18
|
+
import { resolveLatestPlatformTag, applyTagChange } from "./lifecycle.js";
|
|
19
|
+
import type { ControlPlaneState } from "./types.js";
|
|
20
|
+
|
|
21
|
+
const LIB_CONTROL_PLANE_DIR = join(import.meta.dir);
|
|
22
|
+
|
|
23
|
+
const realFetch = globalThis.fetch;
|
|
24
|
+
afterEach(() => {
|
|
25
|
+
globalThis.fetch = realFetch;
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
function dockerTagsResponse(names: string[]): Response {
|
|
29
|
+
return new Response(
|
|
30
|
+
JSON.stringify({ results: names.map((name) => ({ name })) }),
|
|
31
|
+
{ status: 200, headers: { "content-type": "application/json" } },
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ── #449: latest-tag resolution ──────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
describe("resolveLatestPlatformTag (#449)", () => {
|
|
38
|
+
test("returns the newest semver tag from the Docker registry", async () => {
|
|
39
|
+
globalThis.fetch = (async () =>
|
|
40
|
+
dockerTagsResponse(["latest", "v0.11.0", "edge"])) as typeof fetch;
|
|
41
|
+
|
|
42
|
+
const tag = await resolveLatestPlatformTag("openpalm");
|
|
43
|
+
expect(tag).toBe("v0.11.0");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("throws when the registry yields no usable tag", async () => {
|
|
47
|
+
globalThis.fetch = (async () => dockerTagsResponse(["latest"])) as typeof fetch;
|
|
48
|
+
await expect(resolveLatestPlatformTag("openpalm")).rejects.toThrow(
|
|
49
|
+
/No usable Docker image tag/,
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe("applyTagChange latest resolution (#449)", () => {
|
|
55
|
+
function makeState(): ControlPlaneState {
|
|
56
|
+
const home = mkdtempSync(join(tmpdir(), "openpalm-upgrade-test-"));
|
|
57
|
+
mkdirSync(join(home, "knowledge", "env"), { recursive: true });
|
|
58
|
+
writeFileSync(join(home, "knowledge", "env", "stack.env"), "OP_IMAGE_NAMESPACE=openpalm\n");
|
|
59
|
+
return {
|
|
60
|
+
homeDir: home,
|
|
61
|
+
configDir: join(home, "config"),
|
|
62
|
+
stashDir: join(home, "knowledge"),
|
|
63
|
+
workspaceDir: join(home, "workspace"),
|
|
64
|
+
dataDir: join(home, "data"),
|
|
65
|
+
stackDir: join(home, "config", "stack"),
|
|
66
|
+
services: {},
|
|
67
|
+
artifacts: { compose: "" },
|
|
68
|
+
artifactMeta: [],
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
test('a "latest" selection that cannot be resolved fails with a clear validation error, not a raw download error', async () => {
|
|
73
|
+
globalThis.fetch = (async () => {
|
|
74
|
+
throw new Error("network down");
|
|
75
|
+
}) as typeof fetch;
|
|
76
|
+
|
|
77
|
+
const state = makeState();
|
|
78
|
+
// Resolution happens BEFORE any asset download, so the error must be the
|
|
79
|
+
// resolution message — never the GitHub "Failed to download ..." error.
|
|
80
|
+
await expect(applyTagChange(state, "latest")).rejects.toThrow(
|
|
81
|
+
/Cannot resolve "latest" to a concrete release/,
|
|
82
|
+
);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test('an empty selection is treated like "latest" and resolved (not passed through as a blank ref)', async () => {
|
|
86
|
+
globalThis.fetch = (async () => {
|
|
87
|
+
throw new Error("network down");
|
|
88
|
+
}) as typeof fetch;
|
|
89
|
+
|
|
90
|
+
const state = makeState();
|
|
91
|
+
await expect(applyTagChange(state, " ")).rejects.toThrow(
|
|
92
|
+
/Cannot resolve "latest" to a concrete release/,
|
|
93
|
+
);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// ── #450: upgrade recreates guardian + channel containers ─────────────────
|
|
98
|
+
|
|
99
|
+
describe("performUpgrade force-recreates managed services (#450)", () => {
|
|
100
|
+
test("performUpgrade passes forceRecreate to composeUp", () => {
|
|
101
|
+
const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
|
|
102
|
+
// The post-pull composeUp in performUpgrade must force-recreate so channel
|
|
103
|
+
// containers re-resolve their dist-tag adapters.
|
|
104
|
+
expect(src).toMatch(/composeUp\(\{[^}]*forceRecreate:\s*true/);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("buildManagedServices always includes the core services (guardian)", () => {
|
|
108
|
+
const src = readFileSync(join(LIB_CONTROL_PLANE_DIR, "lifecycle.ts"), "utf-8");
|
|
109
|
+
// Guardian comes from CORE_SERVICES and must be seeded into the set
|
|
110
|
+
// regardless of how the rest of the service list is discovered.
|
|
111
|
+
expect(src).toContain("new Set<string>(CORE_SERVICES)");
|
|
112
|
+
});
|
|
113
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -252,6 +252,7 @@ export {
|
|
|
252
252
|
applyUpgrade,
|
|
253
253
|
performUpgrade,
|
|
254
254
|
applyTagChange,
|
|
255
|
+
resolveLatestPlatformTag,
|
|
255
256
|
updateStackEnvToLatestImageTag,
|
|
256
257
|
buildComposeFileList,
|
|
257
258
|
buildManagedServices,
|
|
@@ -259,10 +260,11 @@ export {
|
|
|
259
260
|
} from "./control-plane/lifecycle.js";
|
|
260
261
|
|
|
261
262
|
// ── Docker ──────────────────────────────────────────────────────────────
|
|
262
|
-
export type { DockerResult } from "./control-plane/docker.js";
|
|
263
|
+
export type { DockerResult, ExistingProject } from "./control-plane/docker.js";
|
|
263
264
|
export {
|
|
264
265
|
checkDocker,
|
|
265
266
|
checkDockerCompose,
|
|
267
|
+
detectExistingProject,
|
|
266
268
|
resolveComposeProjectName,
|
|
267
269
|
composePreflight,
|
|
268
270
|
composeUp,
|
|
@@ -296,6 +298,20 @@ export {
|
|
|
296
298
|
export type { LocalProviderDetection } from "./control-plane/model-runner.js";
|
|
297
299
|
export { detectLocalProviders } from "./control-plane/model-runner.js";
|
|
298
300
|
|
|
301
|
+
// ── Hardware detection + setup recommendation ───────────────────────────
|
|
302
|
+
export type { GpuInfo, GpuVendor } from "./control-plane/hardware-detect.js";
|
|
303
|
+
export { detectGpu, parseNvidiaSmi, parseRocmSmi, parseAppleSilicon } from "./control-plane/hardware-detect.js";
|
|
304
|
+
export type {
|
|
305
|
+
DetectedHostProvider,
|
|
306
|
+
SetupRecommendation,
|
|
307
|
+
SetupRecommendationInput,
|
|
308
|
+
} from "./control-plane/setup-recommendation.js";
|
|
309
|
+
export {
|
|
310
|
+
recommendSetup,
|
|
311
|
+
gpuToProfileVariant,
|
|
312
|
+
MIN_LOCAL_GPU_VRAM_MB,
|
|
313
|
+
} from "./control-plane/setup-recommendation.js";
|
|
314
|
+
|
|
299
315
|
// ── Compose Arguments ────────────────────────────────────────────────────
|
|
300
316
|
export {
|
|
301
317
|
buildComposeOptions,
|