@meshxdata/fops 0.1.40 → 0.1.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +5 -190
  2. package/package.json +1 -1
  3. package/src/agent/llm.js +0 -2
  4. package/src/doctor.js +21 -93
  5. package/src/plugins/bundled/fops-plugin-1password/index.js +1 -13
  6. package/src/plugins/bundled/fops-plugin-azure/index.js +2 -4
  7. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +2 -130
  8. package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +39 -71
  9. package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +2 -64
  10. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +28 -36
  11. package/src/plugins/bundled/fops-plugin-azure/lib/azure-shared-cache.js +1 -1
  12. package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +4 -4
  13. package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-lifecycle.js +10 -3
  14. package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +0 -4
  15. package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +10 -31
  16. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +30 -0
  17. package/src/plugins/bundled/fops-plugin-foundation/index.js +1 -18
  18. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-backend.yaml +0 -13
  19. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-frontend.yaml +0 -13
  20. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-backend.yaml +0 -13
  21. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-frontend.yaml +0 -13
  22. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-hive.yaml +0 -13
  23. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-kafka.yaml +0 -13
  24. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-meltano.yaml +0 -13
  25. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-mlflow.yaml +0 -13
  26. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-opa.yaml +0 -13
  27. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-processor.yaml +0 -13
  28. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-scheduler.yaml +0 -13
  29. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-storage-engine.yaml +0 -13
  30. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-trino.yaml +0 -13
  31. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-watcher.yaml +0 -13
  32. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/config/repository.yaml +0 -66
  33. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/kustomization.yaml +0 -30
  34. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/acr-webhook-controller.yaml +0 -63
  35. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/externalsecrets.yaml +0 -15
  36. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/istio.yaml +0 -42
  37. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kafka.yaml +0 -15
  38. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kube-reflector.yaml +0 -33
  39. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kubecost.yaml +0 -12
  40. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/nats-server.yaml +0 -15
  41. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/prometheus-agent.yaml +0 -34
  42. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/reloader.yaml +0 -12
  43. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/spark.yaml +0 -112
  44. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/tailscale.yaml +0 -67
  45. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/vertical-pod-autoscaler.yaml +0 -15
@@ -13,7 +13,7 @@ export function hashContent(text) {
13
13
  }
14
14
 
15
15
  /**
16
- * Resolve Foundation credentials from env → ~/.fops.json → .env files.
16
+ * Resolve Foundation credentials from env → .env~/.fops.json.
17
17
  * Returns { bearerToken } or { user, password } or null.
18
18
  */
19
19
  export function resolveFoundationCreds() {
@@ -26,25 +26,6 @@ export function resolveFoundationCreds() {
26
26
  if (cfg.bearerToken?.trim()) return { bearerToken: cfg.bearerToken.trim() };
27
27
  if (cfg.user?.trim() && cfg.password) return { user: cfg.user.trim(), password: cfg.password };
28
28
  } catch { /* no fops.json */ }
29
-
30
- // Fall back to .env files for credentials
31
- const envCandidates = [pathMod.resolve(".env"), pathMod.resolve("..", ".env")];
32
- try {
33
- const raw = JSON.parse(fs.readFileSync(pathMod.join(os.homedir(), ".fops.json"), "utf8"));
34
- if (raw?.projectRoot) envCandidates.unshift(pathMod.join(raw.projectRoot, ".env"));
35
- } catch { /* ignore */ }
36
- for (const ep of envCandidates) {
37
- try {
38
- const lines = fs.readFileSync(ep, "utf8").split("\n");
39
- const get = (k) => {
40
- const ln = lines.find((l) => l.startsWith(`${k}=`));
41
- return ln ? ln.slice(k.length + 1).trim().replace(/^["']|["']$/g, "") : "";
42
- };
43
- const user = get("QA_USERNAME") || get("FOUNDATION_USERNAME");
44
- const pass = get("QA_PASSWORD") || get("FOUNDATION_PASSWORD");
45
- if (user && pass) return { user, password: pass };
46
- } catch { /* try next */ }
47
- }
48
29
  return null;
49
30
  }
50
31
 
@@ -60,44 +41,12 @@ export function suppressTlsWarning() {
60
41
  };
61
42
  }
62
43
 
63
- /**
64
- * Resolve Cloudflare Access service-token headers from env or .env files.
65
- * Returns { "CF-Access-Client-Id": ..., "CF-Access-Client-Secret": ... } or {}.
66
- */
67
- let _cfAccessHeaders;
68
- export function resolveCfAccessHeaders() {
69
- if (_cfAccessHeaders !== undefined) return _cfAccessHeaders;
70
- let id = process.env.CF_ACCESS_CLIENT_ID || "";
71
- let secret = process.env.CF_ACCESS_CLIENT_SECRET || "";
72
- if (!id) {
73
- // Try .env files
74
- const candidates = [pathMod.resolve(".env"), pathMod.resolve("..", ".env")];
75
- try {
76
- const raw = JSON.parse(fs.readFileSync(pathMod.join(os.homedir(), ".fops.json"), "utf8"));
77
- if (raw?.projectRoot) candidates.unshift(pathMod.join(raw.projectRoot, ".env"));
78
- } catch {}
79
- for (const ep of candidates) {
80
- try {
81
- const lines = fs.readFileSync(ep, "utf8").split("\n");
82
- const get = (k) => { const ln = lines.find((l) => l.startsWith(`${k}=`)); return ln ? ln.slice(k.length + 1).trim().replace(/^["']|["']$/g, "") : ""; };
83
- id = id || get("CF_ACCESS_CLIENT_ID");
84
- secret = secret || get("CF_ACCESS_CLIENT_SECRET");
85
- if (id && secret) break;
86
- } catch {}
87
- }
88
- }
89
- _cfAccessHeaders = id && secret ? { "CF-Access-Client-Id": id, "CF-Access-Client-Secret": secret } : {};
90
- return _cfAccessHeaders;
91
- }
92
-
93
44
  export async function vmFetch(url, opts = {}) {
94
45
  suppressTlsWarning();
95
46
  const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
96
47
  process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
97
48
  try {
98
- const cfHeaders = resolveCfAccessHeaders();
99
- const headers = { ...cfHeaders, ...(opts.headers || {}) };
100
- return await fetch(url, { signal: AbortSignal.timeout(10_000), ...opts, headers });
49
+ return await fetch(url, { signal: AbortSignal.timeout(10_000), ...opts });
101
50
  } finally {
102
51
  if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
103
52
  else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
@@ -143,7 +92,7 @@ export function resolveAuth0Config() {
143
92
  * Tries the backend /iam/login first, then falls back to Auth0 ROPC.
144
93
  */
145
94
  export async function authenticateVm(vmUrl, ip, creds) {
146
- if (creds.bearerToken) return creds.bearerToken;
95
+ if (creds.bearerToken && !isJwtExpired(creds.bearerToken)) return creds.bearerToken;
147
96
 
148
97
  const hasDomain = vmUrl && !vmUrl.match(/^https?:\/\/\d+\.\d+\.\d+\.\d+/);
149
98
  const apiUrls = hasDomain
@@ -199,6 +148,28 @@ export function isJwt(token) {
199
148
  return token && token.split(".").length === 3;
200
149
  }
201
150
 
151
+ /**
152
+ * Decode a JWT payload without verification (for expiry checks only).
153
+ * Returns the parsed payload or null on failure.
154
+ */
155
+ function decodeJwtPayload(token) {
156
+ try {
157
+ const parts = token.split(".");
158
+ if (parts.length !== 3) return null;
159
+ const payload = Buffer.from(parts[1], "base64url").toString("utf8");
160
+ return JSON.parse(payload);
161
+ } catch { return null; }
162
+ }
163
+
164
+ /**
165
+ * Check if a JWT is expired (with 60s grace buffer).
166
+ */
167
+ export function isJwtExpired(token) {
168
+ const payload = decodeJwtPayload(token);
169
+ if (!payload?.exp) return false; // no exp claim → assume valid
170
+ return Date.now() / 1000 > payload.exp - 60;
171
+ }
172
+
202
173
  /**
203
174
  * Resolve a valid JWT bearer token for a remote VM/cluster.
204
175
  * Auth chain: local bearer → pre-auth /iam/login → Auth0 ROPC → SSH fetch from VM.
@@ -213,23 +184,19 @@ export async function resolveRemoteAuth(opts = {}) {
213
184
 
214
185
  const creds = resolveFoundationCreds();
215
186
  let qaUser = creds?.user || process.env.QA_USERNAME || process.env.FOUNDATION_USERNAME || "operator@local";
216
- let qaPass = creds?.password || process.env.QA_PASSWORD || process.env.FOUNDATION_PASSWORD || "";
187
+ let qaPass = creds?.password || process.env.QA_PASSWORD || "";
217
188
  let bearerToken = creds?.bearerToken || "";
218
189
 
219
- // 1) Use local bearer if it's a valid JWT
190
+ // 1) Use local bearer if it's a valid, non-expired JWT
220
191
  if (bearerToken && isJwt(bearerToken)) {
221
- return { bearerToken, qaUser, qaPass, useTokenMode: true };
192
+ if (!isJwtExpired(bearerToken)) {
193
+ return { bearerToken, qaUser, qaPass, useTokenMode: true };
194
+ }
195
+ log(chalk.dim(" Local bearer token expired — refreshing…"));
222
196
  }
223
197
  bearerToken = "";
224
198
 
225
199
  // 2) Pre-auth against the backend /iam/login
226
- const cfHeaders = resolveCfAccessHeaders();
227
- const cfKeys = Object.keys(cfHeaders);
228
- if (cfKeys.length) {
229
- log(chalk.dim(` CF Access headers: ${cfKeys.join(", ")} (id=${cfHeaders["CF-Access-Client-Id"]?.slice(0, 8)}…)`));
230
- } else {
231
- log(chalk.yellow(" ⚠ No CF Access service token found (set CF_ACCESS_CLIENT_ID + CF_ACCESS_CLIENT_SECRET)"));
232
- }
233
200
  if (qaUser && qaPass && apiUrl) {
234
201
  try {
235
202
  if (suppressTls) suppressTls();
@@ -238,8 +205,8 @@ export async function resolveRemoteAuth(opts = {}) {
238
205
  try {
239
206
  const resp = await fetch(`${apiUrl}/iam/login`, {
240
207
  method: "POST",
241
- headers: { "Content-Type": "application/json", ...cfHeaders },
242
- body: JSON.stringify({ user: qaUser, password: qaPass }),
208
+ headers: { "Content-Type": "application/json" },
209
+ body: JSON.stringify({ username: qaUser, password: qaPass }),
243
210
  signal: AbortSignal.timeout(10_000),
244
211
  });
245
212
  if (resp.ok) {
@@ -250,9 +217,7 @@ export async function resolveRemoteAuth(opts = {}) {
250
217
  return { bearerToken, qaUser, qaPass, useTokenMode: true };
251
218
  }
252
219
  } else {
253
- const body = await resp.text().catch(() => "");
254
- log(chalk.dim(` Local creds rejected: HTTP ${resp.status} (user=${qaUser})`));
255
- if (body) log(chalk.dim(` Response: ${body.slice(0, 200)}`));
220
+ log(chalk.dim(` Local creds rejected: HTTP ${resp.status}`));
256
221
  }
257
222
  } finally {
258
223
  if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
@@ -315,8 +280,11 @@ export async function resolveRemoteAuth(opts = {}) {
315
280
 
316
281
  const remoteToken = remoteEnv.BEARER_TOKEN;
317
282
  if (remoteToken && isJwt(remoteToken)) {
318
- log(chalk.green(" ✓ Got JWT bearer token from VM"));
319
- return { bearerToken: remoteToken, qaUser, qaPass, useTokenMode: true };
283
+ if (!isJwtExpired(remoteToken)) {
284
+ log(chalk.green(" ✓ Got JWT bearer token from VM"));
285
+ return { bearerToken: remoteToken, qaUser, qaPass, useTokenMode: true };
286
+ }
287
+ log(chalk.dim(" Remote bearer token expired — trying VM Auth0…"));
320
288
  }
321
289
 
322
290
  if (remoteEnv.MX_AUTH0_DOMAIN && remoteEnv.MX_AUTH0_CLIENT_ID) {
@@ -275,15 +275,7 @@ export async function ensureAzAuth(execa, { subscription, throwOnMissing = false
275
275
  if (subscription) args.push("--subscription", subscription);
276
276
  const { stdout } = await execa("az", args, { timeout: 15000 });
277
277
  return JSON.parse(stdout);
278
- } catch (err) {
279
- if (isAzSessionExpiredError(err)) {
280
- const { suggested } = parseAzReloginHint(err);
281
- const msg = `Azure session expired (MFA). Run:\n ${suggested.replace(/\n/g, "\n ")}`;
282
- if (throwOnMissing) throw new Error(msg);
283
- console.error(chalk.yellow(`\n Azure session expired (MFA or token refresh required).`));
284
- console.error(chalk.cyan(` Run: ${suggested.split("\n")[0]}\n`));
285
- process.exit(1);
286
- }
278
+ } catch {
287
279
  const msg = "Not logged in to Azure. Run: az login";
288
280
  if (throwOnMissing) throw new Error(msg);
289
281
  console.error(chalk.red("\n Not logged in to Azure. Run: az login\n"));
@@ -455,61 +447,7 @@ async function refreshTokenViaGh(execa, missingScopes) {
455
447
  }
456
448
 
457
449
  export async function verifyGithubToken(token) {
458
- if (!token) {
459
- // No token anywhere — try gh CLI auth
460
- const execa = await lazyExeca();
461
- try {
462
- const { stdout: ghToken, exitCode } = await execa("gh", ["auth", "token", "-h", "github.com"], { timeout: 10000, reject: false });
463
- const existing = (ghToken || "").trim();
464
- if (exitCode === 0 && existing) {
465
- console.log(chalk.cyan(" No token in env/netrc — using gh CLI token"));
466
- token = existing;
467
- }
468
- } catch { /* gh not installed or not authed */ }
469
-
470
- if (!token) {
471
- // Still no token — offer interactive gh auth login
472
- console.log(chalk.yellow("\n ⚠ No GitHub token found (checked --github-token, $GITHUB_TOKEN, ~/.netrc, gh CLI)"));
473
- try {
474
- const { exitCode: ghExists } = await execa("which", ["gh"], { reject: false, timeout: 5000 });
475
- if (ghExists === 0) {
476
- console.log(chalk.cyan(" ▶ Running gh auth login…\n"));
477
- const { exitCode: loginExit } = await execa("gh", ["auth", "login", "-h", "github.com", "-s", "write:packages,repo"], { stdio: "inherit", reject: false, timeout: 300000 });
478
- if (loginExit === 0) {
479
- const { stdout: newToken } = await execa("gh", ["auth", "token", "-h", "github.com"], { timeout: 10000 });
480
- token = (newToken || "").trim();
481
- if (token) {
482
- // Sync to .netrc for future use
483
- const netrcPath = path.join(os.homedir(), ".netrc");
484
- const entry = `machine github.com login x-access-token password ${token}`;
485
- try {
486
- let content = "";
487
- try { content = fs.readFileSync(netrcPath, "utf8"); } catch {}
488
- if (/^machine\s+github\.com\b/m.test(content)) {
489
- content = content.replace(
490
- /machine\s+github\.com\b[^\n]*(\n\s*(login|password)\s+[^\n]*)*/gm,
491
- entry,
492
- );
493
- } else {
494
- content = content.trimEnd() + (content ? "\n" : "") + entry + "\n";
495
- }
496
- fs.writeFileSync(netrcPath, content, { mode: 0o600 });
497
- console.log(chalk.green(" ✓ ~/.netrc updated"));
498
- } catch {}
499
- }
500
- }
501
- } else {
502
- console.log(chalk.dim(" Install gh CLI to authenticate: https://cli.github.com"));
503
- }
504
- } catch {}
505
-
506
- if (!token) {
507
- console.error(chalk.red(" ✗ GitHub authentication required — GHCR pulls will fail without a token."));
508
- console.error(chalk.dim(" Set $GITHUB_TOKEN, run gh auth login, or pass --github-token.\n"));
509
- process.exit(1);
510
- }
511
- }
512
- }
450
+ if (!token) return { token, login: undefined };
513
451
  const execa = await lazyExeca();
514
452
  try {
515
453
  let res = await fetch("https://api.github.com/user", {
@@ -2369,6 +2369,31 @@ export async function azureList(opts = {}) {
2369
2369
  let aksClusters = (fullState.azure || {}).clusters || {};
2370
2370
  let hasAks = Object.keys(aksClusters).length > 0;
2371
2371
 
2372
+ // Always try to discover VMs from Azure (tag managed=fops) so we re-add any that were
2373
+ // lost from local state (e.g. state file reset or edited).
2374
+ try {
2375
+ const execa = await lazyExeca();
2376
+ await ensureAzCli(execa);
2377
+ await ensureAzAuth(execa, { subscription: opts.subscription });
2378
+ const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
2379
+ if (found > 0) {
2380
+ console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
2381
+ ({ activeVm, vms } = listVms());
2382
+ vmNames = Object.keys(vms);
2383
+ fullState = readState();
2384
+ aksClusters = (fullState.azure || {}).clusters || {};
2385
+ hasAks = Object.keys(aksClusters).length > 0;
2386
+ }
2387
+ } catch { /* az not available or not authenticated */ }
2388
+
2389
+ if (vmNames.length === 0 && !hasAks) {
2390
+ banner("Azure VMs");
2391
+ hint("No VMs or clusters found in Azure.");
2392
+ hint("Create a VM: fops azure up <name>");
2393
+ hint("Create a cluster: fops azure aks up <name>\n");
2394
+ return;
2395
+ }
2396
+
2372
2397
  // Use cache if fresh, otherwise try shared tags, then fall back to full sync
2373
2398
  const forceLive = opts.live;
2374
2399
  let cache = readCache();
@@ -2389,37 +2414,13 @@ export async function azureList(opts = {}) {
2389
2414
  } catch { /* tag read failed, fall through to full sync */ }
2390
2415
  }
2391
2416
 
2392
- // Discovery + full sync only when all caches are stale
2393
2417
  if (!fresh) {
2394
- try {
2395
- const execa = await lazyExeca();
2396
- await ensureAzCli(execa);
2397
- await ensureAzAuth(execa, { subscription: opts.subscription });
2398
- const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
2399
- if (found > 0) {
2400
- console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
2401
- ({ activeVm, vms } = listVms());
2402
- vmNames = Object.keys(vms);
2403
- fullState = readState();
2404
- aksClusters = (fullState.azure || {}).clusters || {};
2405
- hasAks = Object.keys(aksClusters).length > 0;
2406
- }
2407
- } catch { /* az not available or not authenticated */ }
2408
-
2409
2418
  await azureSync({ quiet: !opts.verbose });
2410
2419
  cache = readCache();
2411
2420
  cacheSource = "live";
2412
2421
  }
2413
2422
  }
2414
2423
 
2415
- if (vmNames.length === 0 && !hasAks) {
2416
- banner("Azure VMs");
2417
- hint("No VMs or clusters found in Azure.");
2418
- hint("Create a VM: fops azure up <name>");
2419
- hint("Create a cluster: fops azure aks up <name>\n");
2420
- return;
2421
- }
2422
-
2423
2424
  const cachedVms = cache?.vms || {};
2424
2425
  const cachedClusters = cache?.clusters || {};
2425
2426
  const cacheTime = cache?.updatedAt;
@@ -3273,7 +3274,7 @@ export async function azureSshWhitelistMe(opts = {}) {
3273
3274
 
3274
3275
  const merged = [...new Set([...currentSources.filter(s => s && s !== "*" && s !== "Internet"), myCidr])];
3275
3276
  console.log(chalk.yellow(` ↻ Adding ${myCidr} to SSH rule on ${nsgName} (${currentSources.length} existing)...`));
3276
- const { exitCode: updateCode, stderr: updateStderr } = await execa("az", [
3277
+ const { exitCode: updateCode } = await execa("az", [
3277
3278
  "network", "nsg", "rule", "create", "-g", rg, "--nsg-name", nsgName,
3278
3279
  "-n", sshRule?.name || "allow-ssh", "--priority", String(sshRule?.priority || 1000),
3279
3280
  "--destination-port-ranges", "22", "--access", "Allow",
@@ -3283,17 +3284,8 @@ export async function azureSshWhitelistMe(opts = {}) {
3283
3284
  ], { reject: false, timeout: 30000 });
3284
3285
 
3285
3286
  if (updateCode !== 0) {
3286
- console.error(ERR(`\n Failed to update NSG rule on ${nsgName}`));
3287
- const msg = (updateStderr || "").trim();
3288
- if (msg.includes("AADSTS") || msg.includes("Interactive authentication")) {
3289
- console.error(ERR(" Azure session expired — run: az login"));
3290
- } else if (msg.includes("AuthorizationFailed")) {
3291
- console.error(ERR(" Insufficient permissions to update NSG rules in this subscription."));
3292
- } else if (msg) {
3293
- console.error(DIM(` ${msg.split("\n")[0]}`));
3294
- }
3295
- console.error("");
3296
- return;
3287
+ console.error(ERR(`\n Failed to update NSG rule on ${nsgName}\n`));
3288
+ process.exit(1);
3297
3289
  }
3298
3290
  console.log(OK(`\n ✓ SSH (22) whitelisted for ${myCidr} on ${vmName} (${nsgName})\n`));
3299
3291
  console.log(` Sources: ${merged.join(", ")}\n`);
@@ -22,7 +22,7 @@ import { readState, listVms } from "./azure-state.js";
22
22
  // fops_by = alessio (who synced)
23
23
 
24
24
  const TAG_PREFIX = "fops_";
25
- const TAG_MAX_AGE_MS = 20 * 60 * 1000; // 20 minutes — tags are cheaper to check
25
+ const TAG_MAX_AGE_MS = 10 * 60 * 1000; // 10 minutes — tags are cheaper to check
26
26
 
27
27
  // ── Write: publish probe results as tags on a VM ─────────────────────────────
28
28
 
@@ -12,7 +12,7 @@ import {
12
12
  // Stored in ~/.fops.json under azure.cache:
13
13
  // { updatedAt, vms: { <name>: { ... } }, clusters: { <name>: { ... } } }
14
14
 
15
- const CACHE_MAX_AGE_MS = 15 * 60 * 1000; // 15 minutes
15
+ const CACHE_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes
16
16
 
17
17
  // Short keys for the 6 tracked Foundation services
18
18
  const SVC_MAP = {
@@ -169,16 +169,16 @@ async function syncVms(execa) {
169
169
 
170
170
  // After a knock, iptables rule needs a moment to propagate; first SSH needs full handshake.
171
171
  // Brief delay then retry once to avoid false "unreachable" (e.g. uaenorth latency).
172
- await new Promise((r) => setTimeout(r, 400));
172
+ await new Promise((r) => setTimeout(r, 800));
173
173
  let sshOk = false;
174
174
  for (let attempt = 0; attempt < 2; attempt++) {
175
175
  const { exitCode: sshCode } = await execa("ssh", [
176
176
  ...MUX_OPTS(vm.publicIp, DEFAULTS.adminUser),
177
177
  "-o", "BatchMode=yes",
178
178
  `${DEFAULTS.adminUser}@${vm.publicIp}`, "echo ok",
179
- ], { timeout: 8000, reject: false }).catch(() => ({ exitCode: 1 }));
179
+ ], { timeout: 15000, reject: false }).catch(() => ({ exitCode: 1 }));
180
180
  if (sshCode === 0) { sshOk = true; break; }
181
- if (attempt === 0) await new Promise((r) => setTimeout(r, 1000));
181
+ if (attempt === 0) await new Promise((r) => setTimeout(r, 2000));
182
182
  }
183
183
 
184
184
  if (!sshOk) {
@@ -257,6 +257,10 @@ export async function azureUp(opts = {}) {
257
257
  if (!publicIp) { console.error(ERR(" VM created but no public IP assigned.")); process.exit(1); }
258
258
  console.log(OK(` ✓ VM created — ${publicIp}`));
259
259
 
260
+ // Persist IP immediately so it's never lost if later steps fail or user Ctrl+C's
261
+ const publicUrl = opts.url || defaultUrl;
262
+ writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, createdAt: new Date().toISOString() });
263
+
260
264
  hint("Enabling accelerated networking…");
261
265
  const nicName = `${vmName}VMNic`;
262
266
  const dealloc = await execa("az", ["vm", "deallocate", "-g", rg, "-n", vmName, "--output", "none", ...subArgs(sub)], { reject: false, timeout: 120000 });
@@ -361,9 +365,6 @@ export async function azureUp(opts = {}) {
361
365
  ], { reject: false, timeout: 30000 });
362
366
  console.log(OK(" ✓ Knock port range open"));
363
367
 
364
- const publicUrl = opts.url || defaultUrl;
365
- writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, createdAt: new Date().toISOString() });
366
-
367
368
  // Save SSH key to 1Password if available
368
369
  try {
369
370
  const { opWhoami, opEnsureVault, opSaveSSHKey } = await import("../../fops-plugin-1password/lib/op.js");
@@ -393,6 +394,12 @@ export async function azureUp(opts = {}) {
393
394
  await syncDns(cfToken, publicUrl, publicIp);
394
395
  await ensureOpenAiNetworkAccess(execa, publicIp, sub);
395
396
 
397
+ // Print IP/URL prominently before the long SSH wait so users don't miss it
398
+ console.log("");
399
+ kvLine("IP", ACCENT(publicIp));
400
+ kvLine("URL", ACCENT(publicUrl));
401
+ console.log("");
402
+
396
403
  console.log(chalk.magenta(" ✻") + " " + DIM("Waiting for SSH…"));
397
404
  const sshMaxWait = 300000;
398
405
  let ready = await waitForSsh(execa, publicIp, adminUser, sshMaxWait);
@@ -508,8 +508,6 @@ export function registerInfraCommands(azure) {
508
508
  .option("--github-token <token>", "GitHub PAT for Flux + GHCR pull (default: $GITHUB_TOKEN)")
509
509
  .option("--no-flux", "Skip Flux bootstrap")
510
510
  .option("--no-postgres", "Skip Postgres Flexible Server provisioning")
511
- .option("--flux-local-repo <path>", "Path to local flux repo clone (auto-detected if omitted)")
512
- .option("--overlay <name>", "App overlay name in flux repo (default: demo-azure)")
513
511
  .option("--dai", "Include DAI (Dashboards AI) workloads")
514
512
  .action(async (name, opts) => {
515
513
  const { aksUp } = await import("../azure-aks.js");
@@ -526,8 +524,6 @@ export function registerInfraCommands(azure) {
526
524
  githubToken: opts.githubToken,
527
525
  noFlux: opts.flux === false,
528
526
  noPostgres: opts.postgres === false,
529
- fluxLocalRepo: opts.fluxLocalRepo,
530
- overlay: opts.overlay,
531
527
  dai: opts.dai === true,
532
528
  });
533
529
  });
@@ -15,14 +15,17 @@ export function registerTestCommands(azure) {
15
15
  .description("Run QA automation tests locally against a remote VM")
16
16
  .option("--vm-name <name>", "Target VM (default: active)")
17
17
  .action(async (name, opts) => {
18
- const { requireVmState, knockForVm } = await import("../azure.js");
19
- const { resolveCliSrc } = await import("../azure-helpers.js");
18
+ const { resolveCliSrc, lazyExeca, ensureAzCli, ensureAzAuth, resolvePublicIp } = await import("../azure-helpers.js");
19
+ const { requireVmState, knockForVm, sshCmd, MUX_OPTS } = await import("../azure.js");
20
20
  const { rootDir } = await import(resolveCliSrc("project.js"));
21
21
  const fsp = await import("node:fs/promises");
22
22
  const path = await import("node:path");
23
23
 
24
24
  const state = requireVmState(opts.vmName || name);
25
- const ip = state.publicIp;
25
+ const execa = await lazyExeca();
26
+ await ensureAzCli(execa);
27
+ await ensureAzAuth(execa);
28
+ const ip = await resolvePublicIp(execa, state.resourceGroup, state.vmName, state.publicIp);
26
29
  if (!ip) {
27
30
  console.error(chalk.red("\n No IP address. Is the VM running? Try: fops azure start\n"));
28
31
  process.exit(1);
@@ -45,13 +48,11 @@ export function registerTestCommands(azure) {
45
48
 
46
49
  const vmUrl = state.publicUrl || `https://${ip}`;
47
50
  const apiUrl = `${vmUrl}/api`;
48
- const { execa: execaFn } = await import("execa");
49
- const { sshCmd, MUX_OPTS } = await import("../azure.js");
50
51
 
51
52
  console.log(chalk.dim(` Authenticating against ${vmUrl}…`));
52
53
  const auth = await resolveRemoteAuth({
53
54
  apiUrl, ip, vmState: state,
54
- execaFn, sshCmd, knockForVm, suppressTlsWarning,
55
+ execaFn: execa, sshCmd, knockForVm, suppressTlsWarning,
55
56
  });
56
57
  let { bearerToken, qaUser, qaPass, useTokenMode } = auth;
57
58
 
@@ -78,20 +79,6 @@ export function registerTestCommands(azure) {
78
79
  : content + `\n${key}=${value}`;
79
80
  };
80
81
 
81
- // Resolve CF Access service token for Cloudflare-proxied endpoints
82
- let cfClientId = process.env.CF_ACCESS_CLIENT_ID || "";
83
- let cfClientSecret = process.env.CF_ACCESS_CLIENT_SECRET || "";
84
- if (!cfClientId) {
85
- // Try reading from the compose .env
86
- try {
87
- const composeDotEnv = await fsp.readFile(path.join(root, ".env"), "utf8");
88
- const idMatch = composeDotEnv.match(/^CF_ACCESS_CLIENT_ID=(.+)$/m);
89
- const secretMatch = composeDotEnv.match(/^CF_ACCESS_CLIENT_SECRET=(.+)$/m);
90
- if (idMatch) cfClientId = idMatch[1].trim();
91
- if (secretMatch) cfClientSecret = secretMatch[1].trim();
92
- } catch { /* .env may not exist */ }
93
- }
94
-
95
82
  envContent = setVar(envContent, "API_URL", apiUrl);
96
83
  envContent = setVar(envContent, "DEV_API_URL", apiUrl);
97
84
  envContent = setVar(envContent, "LIVE_API_URL", apiUrl);
@@ -107,10 +94,6 @@ export function registerTestCommands(azure) {
107
94
  envContent = setVar(envContent, "BEARER_TOKEN", bearerToken);
108
95
  envContent = setVar(envContent, "TOKEN_AUTH0", bearerToken);
109
96
  }
110
- if (cfClientId && cfClientSecret) {
111
- envContent = setVar(envContent, "CF_ACCESS_CLIENT_ID", cfClientId);
112
- envContent = setVar(envContent, "CF_ACCESS_CLIENT_SECRET", cfClientSecret);
113
- }
114
97
 
115
98
  await fsp.writeFile(envPath, envContent);
116
99
  console.log(chalk.green(` ✓ Configured QA .env → ${apiUrl}`));
@@ -120,8 +103,8 @@ export function registerTestCommands(azure) {
120
103
  await fsp.access(path.join(qaDir, "venv"));
121
104
  } catch {
122
105
  console.log(chalk.cyan(" Setting up QA automation environment…"));
123
- await execaFn("python3", ["-m", "venv", "venv"], { cwd: qaDir, stdio: "inherit" });
124
- await execaFn("bash", ["-c", "source venv/bin/activate && pip install -r requirements.txt && playwright install"], { cwd: qaDir, stdio: "inherit" });
106
+ await execa("python3", ["-m", "venv", "venv"], { cwd: qaDir, stdio: "inherit" });
107
+ await execa("bash", ["-c", "source venv/bin/activate && pip install -r requirements.txt && playwright install"], { cwd: qaDir, stdio: "inherit" });
125
108
  }
126
109
 
127
110
  // Knock to ensure VM is reachable
@@ -172,13 +155,9 @@ export function registerTestCommands(azure) {
172
155
  testEnv.BEARER_TOKEN = bearerToken;
173
156
  testEnv.TOKEN_AUTH0 = bearerToken;
174
157
  }
175
- if (cfClientId && cfClientSecret) {
176
- testEnv.CF_ACCESS_CLIENT_ID = cfClientId;
177
- testEnv.CF_ACCESS_CLIENT_SECRET = cfClientSecret;
178
- }
179
158
 
180
159
  const startMs = Date.now();
181
- const proc = execaFn(
160
+ const proc = execa(
182
161
  "bash",
183
162
  ["-c", `source venv/bin/activate && pytest ${pytestArgs}`],
184
163
  { cwd: qaDir, timeout: 600_000, reject: false, env: testEnv },
@@ -171,6 +171,36 @@ export function registerVmCommands(azure, api, registry) {
171
171
  await azureList({ live: opts.live, verbose: opts.verbose, cost: opts.cost, days: parseInt(opts.days), versions: opts.versions });
172
172
  });
173
173
 
174
+ // ── ip ─────────────────────────────────────────────────────────────────
175
+ azure
176
+ .command("ip [name]")
177
+ .description("Print the public IP (and URL) of a VM — quick lookup")
178
+ .option("--resolve", "Query Azure for the current IP (ignores cached)")
179
+ .action(async (name, opts) => {
180
+ const { requireVmState } = await import("../azure-state.js");
181
+ const state = requireVmState(name);
182
+ let ip = state.publicIp;
183
+
184
+ if (opts.resolve) {
185
+ const { lazyExeca, ensureAzCli, ensureAzAuth, resolvePublicIp, subArgs } = await import("../azure.js");
186
+ const { writeVmState } = await import("../azure-state.js");
187
+ const execa = await lazyExeca();
188
+ await ensureAzCli(execa);
189
+ await ensureAzAuth(execa);
190
+ ip = await resolvePublicIp(execa, state.resourceGroup, state.vmName, state.publicIp);
191
+ if (ip && ip !== state.publicIp) {
192
+ writeVmState(state.vmName, { publicIp: ip });
193
+ }
194
+ }
195
+
196
+ if (!ip) {
197
+ console.error(chalk.red("\n No IP address found. Is the VM running? Try: fops azure start\n"));
198
+ process.exit(1);
199
+ }
200
+ console.log(ip);
201
+ if (state.publicUrl) console.log(chalk.dim(state.publicUrl));
202
+ });
203
+
174
204
  // ── select ───────────────────────────────────────────────────────────────
175
205
  azure
176
206
  .command("select [name]")
@@ -986,24 +986,11 @@ app.run()
986
986
  .option("--url <url>", "Override the backend API URL")
987
987
  .action(async (opts) => {
988
988
  const { spawn } = await import("node:child_process");
989
- const { writeFileSync, existsSync, realpathSync, readFileSync, unlinkSync, mkdirSync } = await import("node:fs");
989
+ const { writeFileSync, existsSync, realpathSync, readFileSync } = await import("node:fs");
990
990
  const { tmpdir, homedir } = await import("node:os");
991
991
  const { join, dirname } = await import("node:path");
992
992
  const { findComposeRoot } = await import("./lib/tools-write.js");
993
993
 
994
- // ── Singleton: kill any existing tray process ──────────────────────────
995
- const pidDir = join(homedir(), ".fops");
996
- const pidFile = join(pidDir, "tray.pid");
997
- if (existsSync(pidFile)) {
998
- try {
999
- const oldPid = parseInt(readFileSync(pidFile, "utf8").trim(), 10);
1000
- if (oldPid) process.kill(oldPid, "SIGTERM");
1001
- } catch {
1002
- // process already gone — ignore
1003
- }
1004
- try { unlinkSync(pidFile); } catch {}
1005
- }
1006
-
1007
994
  const composeRoot = program._fopsRoot || findComposeRoot() || "";
1008
995
 
1009
996
  let apiUrl = opts.url || process.env.FOPS_API_URL || "http://127.0.0.1:9001";
@@ -1248,8 +1235,6 @@ $tray.Visible = $false
1248
1235
  env: trayEnv,
1249
1236
  windowsHide: true,
1250
1237
  });
1251
- if (!existsSync(pidDir)) mkdirSync(pidDir, { recursive: true });
1252
- writeFileSync(pidFile, String(winChild.pid));
1253
1238
  winChild.unref();
1254
1239
  return;
1255
1240
  }
@@ -2010,8 +1995,6 @@ app.run()
2010
1995
  detached: true,
2011
1996
  env: trayEnv,
2012
1997
  });
2013
- if (!existsSync(pidDir)) mkdirSync(pidDir, { recursive: true });
2014
- writeFileSync(pidFile, String(child.pid));
2015
1998
  child.unref();
2016
1999
  });
2017
2000
  });
@@ -1,13 +0,0 @@
1
- ---
2
- apiVersion: kustomize.toolkit.fluxcd.io/v1
3
- kind: Kustomization
4
- metadata:
5
- name: dai-backend
6
- namespace: flux-system
7
- spec:
8
- interval: 1m
9
- sourceRef:
10
- kind: GitRepository
11
- name: flux-system
12
- path: ./apps/dai/backend/overlays/meshx/{{OVERLAY}}
13
- prune: true
@@ -1,13 +0,0 @@
1
- ---
2
- apiVersion: kustomize.toolkit.fluxcd.io/v1
3
- kind: Kustomization
4
- metadata:
5
- name: dai-frontend
6
- namespace: flux-system
7
- spec:
8
- interval: 1m
9
- sourceRef:
10
- kind: GitRepository
11
- name: flux-system
12
- path: ./apps/dai/frontend/overlays/meshx/{{OVERLAY}}
13
- prune: true