@meshxdata/fops 0.1.37 → 0.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +185 -0
- package/package.json +1 -1
- package/src/agent/llm.js +2 -0
- package/src/auth/azure.js +92 -0
- package/src/auth/cloudflare.js +125 -0
- package/src/auth/index.js +2 -0
- package/src/commands/index.js +8 -4
- package/src/commands/lifecycle.js +31 -10
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +130 -2
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +47 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +24 -25
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +66 -26
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-shared-cache.js +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +4 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +4 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +4 -3
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-backend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-frontend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-backend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-frontend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-hive.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-kafka.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-meltano.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-mlflow.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-opa.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-processor.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-scheduler.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-storage-engine.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-trino.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-watcher.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/config/repository.yaml +66 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/kustomization.yaml +30 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/acr-webhook-controller.yaml +63 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/externalsecrets.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/istio.yaml +42 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kafka.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kube-reflector.yaml +33 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kubecost.yaml +12 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/nats-server.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/prometheus-agent.yaml +34 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/reloader.yaml +12 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/spark.yaml +112 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/tailscale.yaml +67 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/vertical-pod-autoscaler.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-foundation/index.js +26 -6
- package/src/plugins/loader.js +23 -6
|
@@ -13,7 +13,7 @@ export function hashContent(text) {
|
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
/**
|
|
16
|
-
* Resolve Foundation credentials from env → .
|
|
16
|
+
* Resolve Foundation credentials from env → ~/.fops.json → .env files.
|
|
17
17
|
* Returns { bearerToken } or { user, password } or null.
|
|
18
18
|
*/
|
|
19
19
|
export function resolveFoundationCreds() {
|
|
@@ -26,6 +26,25 @@ export function resolveFoundationCreds() {
|
|
|
26
26
|
if (cfg.bearerToken?.trim()) return { bearerToken: cfg.bearerToken.trim() };
|
|
27
27
|
if (cfg.user?.trim() && cfg.password) return { user: cfg.user.trim(), password: cfg.password };
|
|
28
28
|
} catch { /* no fops.json */ }
|
|
29
|
+
|
|
30
|
+
// Fall back to .env files for credentials
|
|
31
|
+
const envCandidates = [pathMod.resolve(".env"), pathMod.resolve("..", ".env")];
|
|
32
|
+
try {
|
|
33
|
+
const raw = JSON.parse(fs.readFileSync(pathMod.join(os.homedir(), ".fops.json"), "utf8"));
|
|
34
|
+
if (raw?.projectRoot) envCandidates.unshift(pathMod.join(raw.projectRoot, ".env"));
|
|
35
|
+
} catch { /* ignore */ }
|
|
36
|
+
for (const ep of envCandidates) {
|
|
37
|
+
try {
|
|
38
|
+
const lines = fs.readFileSync(ep, "utf8").split("\n");
|
|
39
|
+
const get = (k) => {
|
|
40
|
+
const ln = lines.find((l) => l.startsWith(`${k}=`));
|
|
41
|
+
return ln ? ln.slice(k.length + 1).trim().replace(/^["']|["']$/g, "") : "";
|
|
42
|
+
};
|
|
43
|
+
const user = get("QA_USERNAME") || get("FOUNDATION_USERNAME");
|
|
44
|
+
const pass = get("QA_PASSWORD") || get("FOUNDATION_PASSWORD");
|
|
45
|
+
if (user && pass) return { user, password: pass };
|
|
46
|
+
} catch { /* try next */ }
|
|
47
|
+
}
|
|
29
48
|
return null;
|
|
30
49
|
}
|
|
31
50
|
|
|
@@ -162,7 +181,7 @@ export async function resolveRemoteAuth(opts = {}) {
|
|
|
162
181
|
|
|
163
182
|
const creds = resolveFoundationCreds();
|
|
164
183
|
let qaUser = creds?.user || process.env.QA_USERNAME || process.env.FOUNDATION_USERNAME || "operator@local";
|
|
165
|
-
let qaPass = creds?.password || process.env.QA_PASSWORD || "";
|
|
184
|
+
let qaPass = creds?.password || process.env.QA_PASSWORD || process.env.FOUNDATION_PASSWORD || "";
|
|
166
185
|
let bearerToken = creds?.bearerToken || "";
|
|
167
186
|
|
|
168
187
|
// 1) Use local bearer if it's a valid JWT
|
|
@@ -224,8 +243,32 @@ export async function resolveRemoteAuth(opts = {}) {
|
|
|
224
243
|
if (resp.ok) {
|
|
225
244
|
const data = await resp.json();
|
|
226
245
|
if (data.access_token) {
|
|
227
|
-
|
|
228
|
-
|
|
246
|
+
// Validate the token against the target API before committing to it.
|
|
247
|
+
// Local Auth0 config may have a different audience than the remote VM expects.
|
|
248
|
+
let tokenValid = true;
|
|
249
|
+
if (apiUrl) {
|
|
250
|
+
try {
|
|
251
|
+
const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
|
|
252
|
+
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
|
|
253
|
+
try {
|
|
254
|
+
const check = await fetch(`${apiUrl}/iam/me`, {
|
|
255
|
+
headers: { Authorization: `Bearer ${data.access_token}` },
|
|
256
|
+
signal: AbortSignal.timeout(8_000),
|
|
257
|
+
});
|
|
258
|
+
if (check.status === 401 || check.status === 403) {
|
|
259
|
+
tokenValid = false;
|
|
260
|
+
log(chalk.dim(` Auth0 token rejected by API (wrong audience) — trying SSH fallback…`));
|
|
261
|
+
}
|
|
262
|
+
} finally {
|
|
263
|
+
if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
|
|
264
|
+
else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
|
|
265
|
+
}
|
|
266
|
+
} catch { /* network error — assume token is OK */ }
|
|
267
|
+
}
|
|
268
|
+
if (tokenValid) {
|
|
269
|
+
log(chalk.green(` ✓ Authenticated as ${qaUser} via Auth0`));
|
|
270
|
+
return { bearerToken: data.access_token, qaUser, qaPass, useTokenMode: true };
|
|
271
|
+
}
|
|
229
272
|
}
|
|
230
273
|
} else {
|
|
231
274
|
log(chalk.dim(` Auth0 rejected: HTTP ${resp.status}`));
|
|
@@ -2369,31 +2369,6 @@ export async function azureList(opts = {}) {
|
|
|
2369
2369
|
let aksClusters = (fullState.azure || {}).clusters || {};
|
|
2370
2370
|
let hasAks = Object.keys(aksClusters).length > 0;
|
|
2371
2371
|
|
|
2372
|
-
// Always try to discover VMs from Azure (tag managed=fops) so we re-add any that were
|
|
2373
|
-
// lost from local state (e.g. state file reset or edited).
|
|
2374
|
-
try {
|
|
2375
|
-
const execa = await lazyExeca();
|
|
2376
|
-
await ensureAzCli(execa);
|
|
2377
|
-
await ensureAzAuth(execa, { subscription: opts.subscription });
|
|
2378
|
-
const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
|
|
2379
|
-
if (found > 0) {
|
|
2380
|
-
console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
|
|
2381
|
-
({ activeVm, vms } = listVms());
|
|
2382
|
-
vmNames = Object.keys(vms);
|
|
2383
|
-
fullState = readState();
|
|
2384
|
-
aksClusters = (fullState.azure || {}).clusters || {};
|
|
2385
|
-
hasAks = Object.keys(aksClusters).length > 0;
|
|
2386
|
-
}
|
|
2387
|
-
} catch { /* az not available or not authenticated */ }
|
|
2388
|
-
|
|
2389
|
-
if (vmNames.length === 0 && !hasAks) {
|
|
2390
|
-
banner("Azure VMs");
|
|
2391
|
-
hint("No VMs or clusters found in Azure.");
|
|
2392
|
-
hint("Create a VM: fops azure up <name>");
|
|
2393
|
-
hint("Create a cluster: fops azure aks up <name>\n");
|
|
2394
|
-
return;
|
|
2395
|
-
}
|
|
2396
|
-
|
|
2397
2372
|
// Use cache if fresh, otherwise try shared tags, then fall back to full sync
|
|
2398
2373
|
const forceLive = opts.live;
|
|
2399
2374
|
let cache = readCache();
|
|
@@ -2414,13 +2389,37 @@ export async function azureList(opts = {}) {
|
|
|
2414
2389
|
} catch { /* tag read failed, fall through to full sync */ }
|
|
2415
2390
|
}
|
|
2416
2391
|
|
|
2392
|
+
// Discovery + full sync only when all caches are stale
|
|
2417
2393
|
if (!fresh) {
|
|
2394
|
+
try {
|
|
2395
|
+
const execa = await lazyExeca();
|
|
2396
|
+
await ensureAzCli(execa);
|
|
2397
|
+
await ensureAzAuth(execa, { subscription: opts.subscription });
|
|
2398
|
+
const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
|
|
2399
|
+
if (found > 0) {
|
|
2400
|
+
console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
|
|
2401
|
+
({ activeVm, vms } = listVms());
|
|
2402
|
+
vmNames = Object.keys(vms);
|
|
2403
|
+
fullState = readState();
|
|
2404
|
+
aksClusters = (fullState.azure || {}).clusters || {};
|
|
2405
|
+
hasAks = Object.keys(aksClusters).length > 0;
|
|
2406
|
+
}
|
|
2407
|
+
} catch { /* az not available or not authenticated */ }
|
|
2408
|
+
|
|
2418
2409
|
await azureSync({ quiet: !opts.verbose });
|
|
2419
2410
|
cache = readCache();
|
|
2420
2411
|
cacheSource = "live";
|
|
2421
2412
|
}
|
|
2422
2413
|
}
|
|
2423
2414
|
|
|
2415
|
+
if (vmNames.length === 0 && !hasAks) {
|
|
2416
|
+
banner("Azure VMs");
|
|
2417
|
+
hint("No VMs or clusters found in Azure.");
|
|
2418
|
+
hint("Create a VM: fops azure up <name>");
|
|
2419
|
+
hint("Create a cluster: fops azure aks up <name>\n");
|
|
2420
|
+
return;
|
|
2421
|
+
}
|
|
2422
|
+
|
|
2424
2423
|
const cachedVms = cache?.vms || {};
|
|
2425
2424
|
const cachedClusters = cache?.clusters || {};
|
|
2426
2425
|
const cacheTime = cache?.updatedAt;
|
|
@@ -80,7 +80,7 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
|
|
|
80
80
|
|
|
81
81
|
console.log(chalk.dim(" Configuring VM..."));
|
|
82
82
|
|
|
83
|
-
// Batch: sshd tuning + docker group + ownership
|
|
83
|
+
// Batch: sshd tuning + docker group + ownership — single SSH round-trip
|
|
84
84
|
const setupBatch = [
|
|
85
85
|
// Speed up SSH: accept forwarded env vars, disable DNS reverse lookup
|
|
86
86
|
`sudo grep -q '^AcceptEnv.*BEARER_TOKEN' /etc/ssh/sshd_config 2>/dev/null || {`,
|
|
@@ -91,22 +91,10 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
|
|
|
91
91
|
`}`,
|
|
92
92
|
`sudo usermod -aG docker ${user} 2>/dev/null; true`,
|
|
93
93
|
"sudo chown -R azureuser:azureuser /opt/foundation-compose 2>/dev/null; true",
|
|
94
|
-
|
|
95
|
-
`
|
|
96
|
-
`if grep -q '^FOUNDATION_PUBLIC_URL=' .env 2>/dev/null; then`,
|
|
97
|
-
` sed -i 's|^FOUNDATION_PUBLIC_URL=.*|FOUNDATION_PUBLIC_URL=${publicUrl}|' .env;`,
|
|
98
|
-
`else`,
|
|
99
|
-
` echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env;`,
|
|
100
|
-
`fi`,
|
|
101
|
-
// Persist COMPOSE_PROFILES so k3s/watcher/traefik start on manual restarts too
|
|
102
|
-
`if grep -q '^COMPOSE_PROFILES=' .env 2>/dev/null; then`,
|
|
103
|
-
` sed -i 's|^COMPOSE_PROFILES=.*|COMPOSE_PROFILES=k3s,traefik${dai ? ",dai" : ""}|' .env;`,
|
|
104
|
-
`else`,
|
|
105
|
-
` echo 'COMPOSE_PROFILES=k3s,traefik${dai ? ",dai" : ""}' >> .env;`,
|
|
106
|
-
`fi`,
|
|
94
|
+
// Only inject FOUNDATION_PUBLIC_URL if not already set — never overwrite
|
|
95
|
+
`cd /opt/foundation-compose && grep -q '^FOUNDATION_PUBLIC_URL=' .env 2>/dev/null || echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env`,
|
|
107
96
|
].join("\n");
|
|
108
97
|
await ssh(setupBatch);
|
|
109
|
-
console.log(chalk.green(` ✓ FOUNDATION_PUBLIC_URL=${publicUrl}`));
|
|
110
98
|
|
|
111
99
|
let ghcrOk = false;
|
|
112
100
|
if (githubToken) {
|
|
@@ -1057,8 +1045,33 @@ async function removeSshBypassViaRunCommand(execa, rg, vmName, sourceCidr, sub)
|
|
|
1057
1045
|
// ── Step: SSH reachability ───────────────────────────────────────────────────
|
|
1058
1046
|
|
|
1059
1047
|
async function vmReconcileSsh(ctx) {
|
|
1060
|
-
const { execa, ip, adminUser,
|
|
1048
|
+
const { execa, ip, adminUser, port, desiredUrl, vmName, rg, sub } = ctx;
|
|
1061
1049
|
console.log(chalk.dim(" Checking SSH..."));
|
|
1050
|
+
|
|
1051
|
+
// Always fetch the knock sequence fresh from the Azure VM tag — local state can drift
|
|
1052
|
+
// after state recovery or manual changes, causing knocks with the wrong sequence.
|
|
1053
|
+
let knockSequence = ctx.knockSequence;
|
|
1054
|
+
if (rg) {
|
|
1055
|
+
try {
|
|
1056
|
+
const { stdout: tagRaw } = await execa("az", [
|
|
1057
|
+
"vm", "show", "-g", rg, "-n", vmName,
|
|
1058
|
+
"--query", "tags.fopsKnock", "-o", "tsv", ...subArgs(sub),
|
|
1059
|
+
], { timeout: 15000, reject: false });
|
|
1060
|
+
const raw = (tagRaw || "").trim().replace(/[()]/g, "");
|
|
1061
|
+
if (raw) {
|
|
1062
|
+
const fresh = raw.split(/[-,]/).map((s) => parseInt(s.trim(), 10)).filter((n) => Number.isInteger(n) && n > 0);
|
|
1063
|
+
if (fresh.length >= 2) {
|
|
1064
|
+
if (!knockSequence?.length || fresh.some((v, i) => v !== knockSequence[i])) {
|
|
1065
|
+
console.log(chalk.dim(" Syncing knock sequence from Azure tag..."));
|
|
1066
|
+
knockSequence = fresh;
|
|
1067
|
+
ctx.knockSequence = fresh;
|
|
1068
|
+
writeVmState(vmName, { knockSequence: fresh });
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
} catch { /* best-effort */ }
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1062
1075
|
// Close any stale mux master before probing. ControlPersist=600 keeps the SSH master
|
|
1063
1076
|
// alive for 10 min, but if the VM rebooted its underlying TCP is broken — every probe
|
|
1064
1077
|
// through the stale mux fails even after a successful knock. A fresh connect fixes it.
|
|
@@ -1094,15 +1107,44 @@ async function vmReconcileSsh(ctx) {
|
|
|
1094
1107
|
}
|
|
1095
1108
|
}
|
|
1096
1109
|
if (!sshReady) {
|
|
1097
|
-
|
|
1098
|
-
if (
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1110
|
+
// Verify actual VM power state via Azure CLI — the earlier instance-view may be stale
|
|
1111
|
+
if (rg) {
|
|
1112
|
+
try {
|
|
1113
|
+
const { stdout: showJson } = await execa("az", [
|
|
1114
|
+
"vm", "show", "--resource-group", rg, "--name", vmName,
|
|
1115
|
+
"--show-details", "--output", "json", ...subArgs(sub),
|
|
1116
|
+
], { timeout: 20000, reject: false });
|
|
1117
|
+
const vmDetails = JSON.parse(showJson || "{}");
|
|
1118
|
+
const powerState = vmDetails?.powerState || "";
|
|
1119
|
+
if (powerState && !powerState.includes("running")) {
|
|
1120
|
+
console.log(chalk.yellow(` ⚠ Azure reports VM power state: ${chalk.bold(powerState)} — starting VM...`));
|
|
1121
|
+
await execa("az", [
|
|
1122
|
+
"vm", "start", "--resource-group", rg, "--name", vmName, "--output", "none",
|
|
1123
|
+
...subArgs(sub),
|
|
1124
|
+
], { timeout: 300000 });
|
|
1125
|
+
reconcileOk("VM", "started");
|
|
1126
|
+
// Give the VM a moment then retry SSH once more
|
|
1127
|
+
await new Promise((r) => setTimeout(r, 10000));
|
|
1128
|
+
if (knockSequence?.length) await performKnock(ip, knockSequence, { quiet: true });
|
|
1129
|
+
sshReady = await waitForSsh(execa, ip, adminUser, 60000);
|
|
1130
|
+
} else if (powerState) {
|
|
1131
|
+
console.log(chalk.dim(` Azure VM power state: ${powerState}`));
|
|
1132
|
+
}
|
|
1133
|
+
} catch {
|
|
1134
|
+
// best-effort
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
if (!sshReady) {
|
|
1138
|
+
console.log(chalk.yellow(" ⚠ SSH not reachable — VM may still be booting. Skipping in-guest checks."));
|
|
1139
|
+
if (knockSequence?.length) {
|
|
1140
|
+
console.log(chalk.dim(" If knock is enabled, the stored sequence may not match the VM (e.g. after state recovery)."));
|
|
1141
|
+
console.log(chalk.dim(" Try: fops azure knock open " + ctx.vmName + " then immediately fops azure ssh " + ctx.vmName));
|
|
1142
|
+
console.log(chalk.dim(" Or remove knock: fops azure knock disable " + ctx.vmName));
|
|
1143
|
+
}
|
|
1144
|
+
ctx.publicUrl = desiredUrl || buildPublicUrl(ip, port);
|
|
1145
|
+
ctx.done = true;
|
|
1146
|
+
return;
|
|
1102
1147
|
}
|
|
1103
|
-
ctx.publicUrl = desiredUrl || buildPublicUrl(ip, port);
|
|
1104
|
-
ctx.done = true;
|
|
1105
|
-
return;
|
|
1106
1148
|
}
|
|
1107
1149
|
reconcileOk("SSH", "reachable");
|
|
1108
1150
|
}
|
|
@@ -1644,10 +1686,8 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
|
|
|
1644
1686
|
}
|
|
1645
1687
|
|
|
1646
1688
|
await runScript("Cloning foundation-compose", [
|
|
1647
|
-
"cp /opt/foundation-compose/.env /tmp/.env.fops-backup 2>/dev/null || true",
|
|
1648
1689
|
"rm -rf /opt/foundation-compose",
|
|
1649
1690
|
`git clone --branch ${branch} --depth 1 --recurse-submodules https://github.com/meshxdata/foundation-compose.git /opt/foundation-compose`,
|
|
1650
|
-
"if [ -f /tmp/.env.fops-backup ]; then cp /tmp/.env.fops-backup /opt/foundation-compose/.env; rm -f /tmp/.env.fops-backup; else cp /opt/foundation-compose/.env.example /opt/foundation-compose/.env; fi",
|
|
1651
1691
|
"mkdir -p /opt/foundation-compose/credentials",
|
|
1652
1692
|
"touch /opt/foundation-compose/credentials/kubeconfig.yaml",
|
|
1653
1693
|
`chown -R ${adminUser}:${adminUser} /opt/foundation-compose`,
|
|
@@ -22,7 +22,7 @@ import { readState, listVms } from "./azure-state.js";
|
|
|
22
22
|
// fops_by = alessio (who synced)
|
|
23
23
|
|
|
24
24
|
const TAG_PREFIX = "fops_";
|
|
25
|
-
const TAG_MAX_AGE_MS =
|
|
25
|
+
const TAG_MAX_AGE_MS = 20 * 60 * 1000; // 20 minutes — tags are cheaper to check
|
|
26
26
|
|
|
27
27
|
// ── Write: publish probe results as tags on a VM ─────────────────────────────
|
|
28
28
|
|
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
// Stored in ~/.fops.json under azure.cache:
|
|
13
13
|
// { updatedAt, vms: { <name>: { ... } }, clusters: { <name>: { ... } } }
|
|
14
14
|
|
|
15
|
-
const CACHE_MAX_AGE_MS =
|
|
15
|
+
const CACHE_MAX_AGE_MS = 15 * 60 * 1000; // 15 minutes
|
|
16
16
|
|
|
17
17
|
// Short keys for the 6 tracked Foundation services
|
|
18
18
|
const SVC_MAP = {
|
|
@@ -169,16 +169,16 @@ async function syncVms(execa) {
|
|
|
169
169
|
|
|
170
170
|
// After a knock, iptables rule needs a moment to propagate; first SSH needs full handshake.
|
|
171
171
|
// Brief delay then retry once to avoid false "unreachable" (e.g. uaenorth latency).
|
|
172
|
-
await new Promise((r) => setTimeout(r,
|
|
172
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
173
173
|
let sshOk = false;
|
|
174
174
|
for (let attempt = 0; attempt < 2; attempt++) {
|
|
175
175
|
const { exitCode: sshCode } = await execa("ssh", [
|
|
176
176
|
...MUX_OPTS(vm.publicIp, DEFAULTS.adminUser),
|
|
177
177
|
"-o", "BatchMode=yes",
|
|
178
178
|
`${DEFAULTS.adminUser}@${vm.publicIp}`, "echo ok",
|
|
179
|
-
], { timeout:
|
|
179
|
+
], { timeout: 8000, reject: false }).catch(() => ({ exitCode: 1 }));
|
|
180
180
|
if (sshCode === 0) { sshOk = true; break; }
|
|
181
|
-
if (attempt === 0) await new Promise((r) => setTimeout(r,
|
|
181
|
+
if (attempt === 0) await new Promise((r) => setTimeout(r, 1000));
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
if (!sshOk) {
|
|
@@ -508,6 +508,8 @@ export function registerInfraCommands(azure) {
|
|
|
508
508
|
.option("--github-token <token>", "GitHub PAT for Flux + GHCR pull (default: $GITHUB_TOKEN)")
|
|
509
509
|
.option("--no-flux", "Skip Flux bootstrap")
|
|
510
510
|
.option("--no-postgres", "Skip Postgres Flexible Server provisioning")
|
|
511
|
+
.option("--flux-local-repo <path>", "Path to local flux repo clone (auto-detected if omitted)")
|
|
512
|
+
.option("--overlay <name>", "App overlay name in flux repo (default: demo-azure)")
|
|
511
513
|
.option("--dai", "Include DAI (Dashboards AI) workloads")
|
|
512
514
|
.action(async (name, opts) => {
|
|
513
515
|
const { aksUp } = await import("../azure-aks.js");
|
|
@@ -524,6 +526,8 @@ export function registerInfraCommands(azure) {
|
|
|
524
526
|
githubToken: opts.githubToken,
|
|
525
527
|
noFlux: opts.flux === false,
|
|
526
528
|
noPostgres: opts.postgres === false,
|
|
529
|
+
fluxLocalRepo: opts.fluxLocalRepo,
|
|
530
|
+
overlay: opts.overlay,
|
|
527
531
|
dai: opts.dai === true,
|
|
528
532
|
});
|
|
529
533
|
});
|
|
@@ -468,12 +468,13 @@ export function registerVmCommands(azure, api, registry) {
|
|
|
468
468
|
.description("Perform port-knock sequence to temporarily open SSH");
|
|
469
469
|
|
|
470
470
|
knock
|
|
471
|
-
.command("open [
|
|
471
|
+
.command("open [names...]", { isDefault: true })
|
|
472
472
|
.description("Send the knock sequence — opens SSH for ~5 min")
|
|
473
473
|
.option("--vm-name <name>", "Target VM (default: active VM)")
|
|
474
|
-
.action(async (
|
|
474
|
+
.action(async (names, opts) => {
|
|
475
475
|
const { azureKnock } = await import("../azure.js");
|
|
476
|
-
|
|
476
|
+
const targets = opts.vmName ? [opts.vmName] : (names.length ? names : [undefined]);
|
|
477
|
+
for (const vmName of targets) await azureKnock({ vmName });
|
|
477
478
|
});
|
|
478
479
|
|
|
479
480
|
knock
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: dai-backend
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/dai/backend/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: dai-frontend
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/dai/frontend/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-backend
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/backend/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-frontend
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/frontend/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-hive
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/hive/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-kafka
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/kafka/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-meltano
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/meltano/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-mlflow
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/mlflow/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-opa
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/opa/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-processor.yaml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-processor
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/processor/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-scheduler.yaml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-scheduler
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/scheduler/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-storage-engine.yaml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-storage-engine
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/storage-engine/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-trino
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/trino/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: Kustomization
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation-watcher
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
interval: 1m
|
|
9
|
+
sourceRef:
|
|
10
|
+
kind: GitRepository
|
|
11
|
+
name: flux-system
|
|
12
|
+
path: ./apps/foundation/watcher/overlays/meshx/{{OVERLAY}}
|
|
13
|
+
prune: true
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
---
|
|
2
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
3
|
+
kind: HelmRepository
|
|
4
|
+
metadata:
|
|
5
|
+
name: foundation
|
|
6
|
+
namespace: flux-system
|
|
7
|
+
spec:
|
|
8
|
+
type: oci
|
|
9
|
+
interval: 1m0s
|
|
10
|
+
url: oci://meshxregistry.azurecr.io/helm
|
|
11
|
+
secretRef:
|
|
12
|
+
name: meshxregistry-helm-secret
|
|
13
|
+
---
|
|
14
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
15
|
+
kind: HelmRepository
|
|
16
|
+
metadata:
|
|
17
|
+
name: foundation
|
|
18
|
+
namespace: velora
|
|
19
|
+
spec:
|
|
20
|
+
type: oci
|
|
21
|
+
interval: 1m0s
|
|
22
|
+
url: oci://meshxregistry.azurecr.io/helm
|
|
23
|
+
secretRef:
|
|
24
|
+
name: meshxregistry-helm-secret
|
|
25
|
+
---
|
|
26
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
27
|
+
kind: HelmRepository
|
|
28
|
+
metadata:
|
|
29
|
+
name: dai
|
|
30
|
+
namespace: velora
|
|
31
|
+
spec:
|
|
32
|
+
type: oci
|
|
33
|
+
interval: 1m0s
|
|
34
|
+
url: oci://meshxregistry.azurecr.io/helm/dai
|
|
35
|
+
secretRef:
|
|
36
|
+
name: meshxregistry-helm-secret
|
|
37
|
+
---
|
|
38
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
39
|
+
kind: HelmRepository
|
|
40
|
+
metadata:
|
|
41
|
+
name: hive-metastore
|
|
42
|
+
namespace: flux-system
|
|
43
|
+
spec:
|
|
44
|
+
type: oci
|
|
45
|
+
interval: 1m0s
|
|
46
|
+
url: oci://meshxregistry.azurecr.io/helm
|
|
47
|
+
secretRef:
|
|
48
|
+
name: meshxregistry-helm-secret
|
|
49
|
+
---
|
|
50
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
51
|
+
kind: HelmRepository
|
|
52
|
+
metadata:
|
|
53
|
+
name: trinodb
|
|
54
|
+
namespace: flux-system
|
|
55
|
+
spec:
|
|
56
|
+
interval: 30m
|
|
57
|
+
url: https://trinodb.github.io/charts
|
|
58
|
+
---
|
|
59
|
+
apiVersion: source.toolkit.fluxcd.io/v1
|
|
60
|
+
kind: HelmRepository
|
|
61
|
+
metadata:
|
|
62
|
+
name: mlflow
|
|
63
|
+
namespace: flux-system
|
|
64
|
+
spec:
|
|
65
|
+
interval: 1m0s
|
|
66
|
+
url: https://community-charts.github.io/helm-charts
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
2
|
+
kind: Kustomization
|
|
3
|
+
resources:
|
|
4
|
+
- config/repository.yaml
|
|
5
|
+
- operator/acr-webhook-controller.yaml
|
|
6
|
+
- operator/kube-reflector.yaml
|
|
7
|
+
- operator/externalsecrets.yaml
|
|
8
|
+
- operator/istio.yaml
|
|
9
|
+
- operator/kafka.yaml
|
|
10
|
+
- operator/kubecost.yaml
|
|
11
|
+
- operator/spark.yaml
|
|
12
|
+
- operator/vertical-pod-autoscaler.yaml
|
|
13
|
+
- operator/prometheus-agent.yaml
|
|
14
|
+
- operator/tailscale.yaml
|
|
15
|
+
- operator/nats-server.yaml
|
|
16
|
+
- operator/reloader.yaml
|
|
17
|
+
- apps/foundation-backend.yaml
|
|
18
|
+
- apps/foundation-frontend.yaml
|
|
19
|
+
- apps/foundation-scheduler.yaml
|
|
20
|
+
- apps/foundation-processor.yaml
|
|
21
|
+
- apps/foundation-watcher.yaml
|
|
22
|
+
- apps/foundation-opa.yaml
|
|
23
|
+
- apps/foundation-meltano.yaml
|
|
24
|
+
- apps/foundation-kafka.yaml
|
|
25
|
+
- apps/foundation-hive.yaml
|
|
26
|
+
- apps/foundation-storage-engine.yaml
|
|
27
|
+
- apps/foundation-trino.yaml
|
|
28
|
+
- apps/foundation-mlflow.yaml
|
|
29
|
+
- apps/dai-backend.yaml
|
|
30
|
+
- apps/dai-frontend.yaml
|