@meshxdata/fops 0.1.40 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +2 -376
  2. package/package.json +1 -1
  3. package/src/agent/llm.js +0 -2
  4. package/src/doctor.js +21 -93
  5. package/src/plugins/bundled/fops-plugin-1password/index.js +1 -13
  6. package/src/plugins/bundled/fops-plugin-azure/index.js +2 -4
  7. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +2 -130
  8. package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +32 -68
  9. package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +2 -64
  10. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +28 -36
  11. package/src/plugins/bundled/fops-plugin-azure/lib/azure-shared-cache.js +1 -1
  12. package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +4 -4
  13. package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-lifecycle.js +10 -3
  14. package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +0 -4
  15. package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +0 -22
  16. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +30 -0
  17. package/src/plugins/bundled/fops-plugin-foundation/index.js +1 -18
  18. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-backend.yaml +0 -13
  19. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-frontend.yaml +0 -13
  20. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-backend.yaml +0 -13
  21. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-frontend.yaml +0 -13
  22. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-hive.yaml +0 -13
  23. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-kafka.yaml +0 -13
  24. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-meltano.yaml +0 -13
  25. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-mlflow.yaml +0 -13
  26. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-opa.yaml +0 -13
  27. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-processor.yaml +0 -13
  28. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-scheduler.yaml +0 -13
  29. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-storage-engine.yaml +0 -13
  30. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-trino.yaml +0 -13
  31. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-watcher.yaml +0 -13
  32. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/config/repository.yaml +0 -66
  33. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/kustomization.yaml +0 -30
  34. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/acr-webhook-controller.yaml +0 -63
  35. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/externalsecrets.yaml +0 -15
  36. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/istio.yaml +0 -42
  37. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kafka.yaml +0 -15
  38. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kube-reflector.yaml +0 -33
  39. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kubecost.yaml +0 -12
  40. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/nats-server.yaml +0 -15
  41. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/prometheus-agent.yaml +0 -34
  42. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/reloader.yaml +0 -12
  43. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/spark.yaml +0 -112
  44. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/tailscale.yaml +0 -67
  45. package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/vertical-pod-autoscaler.yaml +0 -15
@@ -148,7 +148,7 @@ function resolveFluxConfig(clusterName, opts) {
148
148
  return {
149
149
  fluxRepo: opts?.fluxRepo ?? tracked?.flux?.repo ?? az.fluxRepo ?? project?.fluxRepo ?? AKS_DEFAULTS.fluxRepo,
150
150
  fluxOwner: opts?.fluxOwner ?? tracked?.flux?.owner ?? az.fluxOwner ?? project?.fluxOwner ?? AKS_DEFAULTS.fluxOwner,
151
- fluxPath: opts?.fluxPath || tracked?.flux?.path || az.fluxPath || project?.fluxPath || `clusters/${clusterName}`,
151
+ fluxPath: opts?.fluxPath || tracked?.flux?.path || az.fluxPath || project?.fluxPath || AKS_DEFAULTS.fluxPath,
152
152
  fluxBranch: opts?.fluxBranch ?? tracked?.flux?.branch ?? az.fluxBranch ?? project?.fluxBranch ?? AKS_DEFAULTS.fluxBranch,
153
153
  };
154
154
  }
@@ -182,107 +182,6 @@ function requireCluster(name) {
182
182
  };
183
183
  }
184
184
 
185
- // ── Flux local-repo scaffolding ───────────────────────────────────────────────
186
-
187
- /**
188
- * Auto-detect the local flux repo clone.
189
- * Searches common relative paths from the project root and CWD.
190
- */
191
- function findFluxLocalRepo() {
192
- const state = readState();
193
- const projectRoot = state.azure?.projectRoot || state.projectRoot;
194
-
195
- const candidates = [];
196
- if (projectRoot) {
197
- candidates.push(path.resolve(projectRoot, "..", "flux"));
198
- candidates.push(path.resolve(projectRoot, "flux"));
199
- }
200
- candidates.push(path.resolve("../flux"));
201
- candidates.push(path.resolve("../../flux"));
202
-
203
- for (const p of candidates) {
204
- if (fs.existsSync(path.join(p, "clusters"))) return p;
205
- }
206
- return null;
207
- }
208
-
209
- /**
210
- * Resolve the bundled cluster template directory shipped with the CLI.
211
- */
212
- function resolveClusterTemplate() {
213
- const thisDir = path.dirname(fileURLToPath(import.meta.url));
214
- return path.resolve(thisDir, "..", "templates", "cluster");
215
- }
216
-
217
- /**
218
- * Scaffold a new cluster directory in the local flux repo from the bundled
219
- * template, substituting {{CLUSTER_NAME}} and {{OVERLAY}} placeholders.
220
- * Then commits and pushes the change.
221
- */
222
- async function scaffoldFluxCluster(execa, { clusterName, fluxLocalRepo, overlay }) {
223
- const templateDir = resolveClusterTemplate();
224
- const destDir = path.join(fluxLocalRepo, "clusters", clusterName);
225
-
226
- if (!fs.existsSync(templateDir)) {
227
- console.log(WARN(` ⚠ Cluster template not found at ${templateDir}`));
228
- return false;
229
- }
230
-
231
- if (fs.existsSync(destDir)) {
232
- console.log(OK(` ✓ Cluster directory already exists: clusters/${clusterName}`));
233
- return true;
234
- }
235
-
236
- const vars = {
237
- "{{CLUSTER_NAME}}": clusterName,
238
- "{{OVERLAY}}": overlay || "demo-azure",
239
- };
240
-
241
- hint("Scaffolding Flux cluster manifests…");
242
-
243
- function copyDir(src, dest) {
244
- fs.mkdirSync(dest, { recursive: true });
245
- for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
246
- const srcPath = path.join(src, entry.name);
247
- const destPath = path.join(dest, entry.name);
248
- if (entry.isDirectory()) {
249
- copyDir(srcPath, destPath);
250
- } else {
251
- let content = fs.readFileSync(srcPath, "utf8");
252
- for (const [k, v] of Object.entries(vars)) {
253
- content = content.replaceAll(k, v);
254
- }
255
- fs.writeFileSync(destPath, content);
256
- }
257
- }
258
- }
259
-
260
- copyDir(templateDir, destDir);
261
- console.log(OK(` ✓ Cluster directory created: clusters/${clusterName}`));
262
-
263
- // List remaining placeholders
264
- const remaining = [];
265
- for (const line of fs.readFileSync(path.join(destDir, "kustomization.yaml"), "utf8").split("\n")) {
266
- const m = line.match(/\{\{(\w+)\}\}/g);
267
- if (m) remaining.push(...m);
268
- }
269
-
270
- // Git add + commit + push
271
- hint("Committing and pushing to flux repo…");
272
- try {
273
- await execa("git", ["-C", fluxLocalRepo, "add", `clusters/${clusterName}`], { timeout: 15000 });
274
- await execa("git", ["-C", fluxLocalRepo, "commit", "-m", `Add cluster ${clusterName}`], { timeout: 15000 });
275
- await execa("git", ["-C", fluxLocalRepo, "push"], { timeout: 60000 });
276
- console.log(OK(` ✓ Pushed clusters/${clusterName} to flux repo`));
277
- } catch (err) {
278
- const msg = (err.stderr || err.message || "").split("\n")[0];
279
- console.log(WARN(` ⚠ Git push failed: ${msg}`));
280
- hint(`Manually commit and push clusters/${clusterName} in the flux repo`);
281
- }
282
-
283
- return true;
284
- }
285
-
286
185
  // ── Flux helpers ──────────────────────────────────────────────────────────────
287
186
 
288
187
  async function ensureFluxCli(execa) {
@@ -345,18 +244,6 @@ export async function aksUp(opts = {}) {
345
244
  if (exists === 0) {
346
245
  console.log(WARN(`\n Cluster "${clusterName}" already exists — reconciling…`));
347
246
 
348
- // Scaffold cluster directory if it doesn't exist yet
349
- if (!opts.noFlux) {
350
- const fluxLocalRepo = opts.fluxLocalRepo || findFluxLocalRepo();
351
- if (fluxLocalRepo) {
352
- await scaffoldFluxCluster(execa, {
353
- clusterName,
354
- fluxLocalRepo,
355
- overlay: opts.overlay,
356
- });
357
- }
358
- }
359
-
360
247
  const maxPods = opts.maxPods || 110;
361
248
  const ctx = { execa, clusterName, rg, sub, opts, minCount, maxCount, maxPods };
362
249
  await reconcileCluster(ctx);
@@ -460,22 +347,7 @@ export async function aksUp(opts = {}) {
460
347
  const fluxRepo = opts.fluxRepo ?? AKS_DEFAULTS.fluxRepo;
461
348
  const fluxOwner = opts.fluxOwner ?? AKS_DEFAULTS.fluxOwner;
462
349
  const fluxBranch = opts.fluxBranch ?? AKS_DEFAULTS.fluxBranch;
463
- const fluxPath = opts.fluxPath || `clusters/${clusterName}`;
464
-
465
- // Scaffold cluster directory in the flux repo before bootstrapping
466
- if (!opts.noFlux) {
467
- const fluxLocalRepo = opts.fluxLocalRepo || findFluxLocalRepo();
468
- if (fluxLocalRepo) {
469
- await scaffoldFluxCluster(execa, {
470
- clusterName,
471
- fluxLocalRepo,
472
- templateCluster: opts.templateCluster,
473
- });
474
- } else {
475
- console.log(WARN(" ⚠ Local flux repo not found — skipping cluster scaffolding."));
476
- hint("Pass --flux-local-repo <path> or clone meshxdata/flux next to foundation-compose.");
477
- }
478
- }
350
+ const fluxPath = opts.fluxPath || AKS_DEFAULTS.fluxPath;
479
351
 
480
352
  if (opts.noFlux) {
481
353
  console.log("");
@@ -13,7 +13,7 @@ export function hashContent(text) {
13
13
  }
14
14
 
15
15
  /**
16
- * Resolve Foundation credentials from env → ~/.fops.json → .env files.
16
+ * Resolve Foundation credentials from env → .env~/.fops.json.
17
17
  * Returns { bearerToken } or { user, password } or null.
18
18
  */
19
19
  export function resolveFoundationCreds() {
@@ -26,25 +26,6 @@ export function resolveFoundationCreds() {
26
26
  if (cfg.bearerToken?.trim()) return { bearerToken: cfg.bearerToken.trim() };
27
27
  if (cfg.user?.trim() && cfg.password) return { user: cfg.user.trim(), password: cfg.password };
28
28
  } catch { /* no fops.json */ }
29
-
30
- // Fall back to .env files for credentials
31
- const envCandidates = [pathMod.resolve(".env"), pathMod.resolve("..", ".env")];
32
- try {
33
- const raw = JSON.parse(fs.readFileSync(pathMod.join(os.homedir(), ".fops.json"), "utf8"));
34
- if (raw?.projectRoot) envCandidates.unshift(pathMod.join(raw.projectRoot, ".env"));
35
- } catch { /* ignore */ }
36
- for (const ep of envCandidates) {
37
- try {
38
- const lines = fs.readFileSync(ep, "utf8").split("\n");
39
- const get = (k) => {
40
- const ln = lines.find((l) => l.startsWith(`${k}=`));
41
- return ln ? ln.slice(k.length + 1).trim().replace(/^["']|["']$/g, "") : "";
42
- };
43
- const user = get("QA_USERNAME") || get("FOUNDATION_USERNAME");
44
- const pass = get("QA_PASSWORD") || get("FOUNDATION_PASSWORD");
45
- if (user && pass) return { user, password: pass };
46
- } catch { /* try next */ }
47
- }
48
29
  return null;
49
30
  }
50
31
 
@@ -60,44 +41,12 @@ export function suppressTlsWarning() {
60
41
  };
61
42
  }
62
43
 
63
- /**
64
- * Resolve Cloudflare Access service-token headers from env or .env files.
65
- * Returns { "CF-Access-Client-Id": ..., "CF-Access-Client-Secret": ... } or {}.
66
- */
67
- let _cfAccessHeaders;
68
- export function resolveCfAccessHeaders() {
69
- if (_cfAccessHeaders !== undefined) return _cfAccessHeaders;
70
- let id = process.env.CF_ACCESS_CLIENT_ID || "";
71
- let secret = process.env.CF_ACCESS_CLIENT_SECRET || "";
72
- if (!id) {
73
- // Try .env files
74
- const candidates = [pathMod.resolve(".env"), pathMod.resolve("..", ".env")];
75
- try {
76
- const raw = JSON.parse(fs.readFileSync(pathMod.join(os.homedir(), ".fops.json"), "utf8"));
77
- if (raw?.projectRoot) candidates.unshift(pathMod.join(raw.projectRoot, ".env"));
78
- } catch {}
79
- for (const ep of candidates) {
80
- try {
81
- const lines = fs.readFileSync(ep, "utf8").split("\n");
82
- const get = (k) => { const ln = lines.find((l) => l.startsWith(`${k}=`)); return ln ? ln.slice(k.length + 1).trim().replace(/^["']|["']$/g, "") : ""; };
83
- id = id || get("CF_ACCESS_CLIENT_ID");
84
- secret = secret || get("CF_ACCESS_CLIENT_SECRET");
85
- if (id && secret) break;
86
- } catch {}
87
- }
88
- }
89
- _cfAccessHeaders = id && secret ? { "CF-Access-Client-Id": id, "CF-Access-Client-Secret": secret } : {};
90
- return _cfAccessHeaders;
91
- }
92
-
93
44
  export async function vmFetch(url, opts = {}) {
94
45
  suppressTlsWarning();
95
46
  const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
96
47
  process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
97
48
  try {
98
- const cfHeaders = resolveCfAccessHeaders();
99
- const headers = { ...cfHeaders, ...(opts.headers || {}) };
100
- return await fetch(url, { signal: AbortSignal.timeout(10_000), ...opts, headers });
49
+ return await fetch(url, { signal: AbortSignal.timeout(10_000), ...opts });
101
50
  } finally {
102
51
  if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
103
52
  else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
@@ -213,7 +162,7 @@ export async function resolveRemoteAuth(opts = {}) {
213
162
 
214
163
  const creds = resolveFoundationCreds();
215
164
  let qaUser = creds?.user || process.env.QA_USERNAME || process.env.FOUNDATION_USERNAME || "operator@local";
216
- let qaPass = creds?.password || process.env.QA_PASSWORD || process.env.FOUNDATION_PASSWORD || "";
165
+ let qaPass = creds?.password || process.env.QA_PASSWORD || "";
217
166
  let bearerToken = creds?.bearerToken || "";
218
167
 
219
168
  // 1) Use local bearer if it's a valid JWT
@@ -223,13 +172,6 @@ export async function resolveRemoteAuth(opts = {}) {
223
172
  bearerToken = "";
224
173
 
225
174
  // 2) Pre-auth against the backend /iam/login
226
- const cfHeaders = resolveCfAccessHeaders();
227
- const cfKeys = Object.keys(cfHeaders);
228
- if (cfKeys.length) {
229
- log(chalk.dim(` CF Access headers: ${cfKeys.join(", ")} (id=${cfHeaders["CF-Access-Client-Id"]?.slice(0, 8)}…)`));
230
- } else {
231
- log(chalk.yellow(" ⚠ No CF Access service token found (set CF_ACCESS_CLIENT_ID + CF_ACCESS_CLIENT_SECRET)"));
232
- }
233
175
  if (qaUser && qaPass && apiUrl) {
234
176
  try {
235
177
  if (suppressTls) suppressTls();
@@ -238,8 +180,8 @@ export async function resolveRemoteAuth(opts = {}) {
238
180
  try {
239
181
  const resp = await fetch(`${apiUrl}/iam/login`, {
240
182
  method: "POST",
241
- headers: { "Content-Type": "application/json", ...cfHeaders },
242
- body: JSON.stringify({ user: qaUser, password: qaPass }),
183
+ headers: { "Content-Type": "application/json" },
184
+ body: JSON.stringify({ username: qaUser, password: qaPass }),
243
185
  signal: AbortSignal.timeout(10_000),
244
186
  });
245
187
  if (resp.ok) {
@@ -250,9 +192,7 @@ export async function resolveRemoteAuth(opts = {}) {
250
192
  return { bearerToken, qaUser, qaPass, useTokenMode: true };
251
193
  }
252
194
  } else {
253
- const body = await resp.text().catch(() => "");
254
- log(chalk.dim(` Local creds rejected: HTTP ${resp.status} (user=${qaUser})`));
255
- if (body) log(chalk.dim(` Response: ${body.slice(0, 200)}`));
195
+ log(chalk.dim(` Local creds rejected: HTTP ${resp.status}`));
256
196
  }
257
197
  } finally {
258
198
  if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
@@ -284,8 +224,32 @@ export async function resolveRemoteAuth(opts = {}) {
284
224
  if (resp.ok) {
285
225
  const data = await resp.json();
286
226
  if (data.access_token) {
287
- log(chalk.green(` ✓ Authenticated as ${qaUser} via Auth0`));
288
- return { bearerToken: data.access_token, qaUser, qaPass, useTokenMode: true };
227
+ // Validate the token against the target API before committing to it.
228
+ // Local Auth0 config may have a different audience than the remote VM expects.
229
+ let tokenValid = true;
230
+ if (apiUrl) {
231
+ try {
232
+ const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
233
+ process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
234
+ try {
235
+ const check = await fetch(`${apiUrl}/iam/me`, {
236
+ headers: { Authorization: `Bearer ${data.access_token}` },
237
+ signal: AbortSignal.timeout(8_000),
238
+ });
239
+ if (check.status === 401 || check.status === 403) {
240
+ tokenValid = false;
241
+ log(chalk.dim(` Auth0 token rejected by API (wrong audience) — trying SSH fallback…`));
242
+ }
243
+ } finally {
244
+ if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
245
+ else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
246
+ }
247
+ } catch { /* network error — assume token is OK */ }
248
+ }
249
+ if (tokenValid) {
250
+ log(chalk.green(` ✓ Authenticated as ${qaUser} via Auth0`));
251
+ return { bearerToken: data.access_token, qaUser, qaPass, useTokenMode: true };
252
+ }
289
253
  }
290
254
  } else {
291
255
  log(chalk.dim(` Auth0 rejected: HTTP ${resp.status}`));
@@ -275,15 +275,7 @@ export async function ensureAzAuth(execa, { subscription, throwOnMissing = false
275
275
  if (subscription) args.push("--subscription", subscription);
276
276
  const { stdout } = await execa("az", args, { timeout: 15000 });
277
277
  return JSON.parse(stdout);
278
- } catch (err) {
279
- if (isAzSessionExpiredError(err)) {
280
- const { suggested } = parseAzReloginHint(err);
281
- const msg = `Azure session expired (MFA). Run:\n ${suggested.replace(/\n/g, "\n ")}`;
282
- if (throwOnMissing) throw new Error(msg);
283
- console.error(chalk.yellow(`\n Azure session expired (MFA or token refresh required).`));
284
- console.error(chalk.cyan(` Run: ${suggested.split("\n")[0]}\n`));
285
- process.exit(1);
286
- }
278
+ } catch {
287
279
  const msg = "Not logged in to Azure. Run: az login";
288
280
  if (throwOnMissing) throw new Error(msg);
289
281
  console.error(chalk.red("\n Not logged in to Azure. Run: az login\n"));
@@ -455,61 +447,7 @@ async function refreshTokenViaGh(execa, missingScopes) {
455
447
  }
456
448
 
457
449
  export async function verifyGithubToken(token) {
458
- if (!token) {
459
- // No token anywhere — try gh CLI auth
460
- const execa = await lazyExeca();
461
- try {
462
- const { stdout: ghToken, exitCode } = await execa("gh", ["auth", "token", "-h", "github.com"], { timeout: 10000, reject: false });
463
- const existing = (ghToken || "").trim();
464
- if (exitCode === 0 && existing) {
465
- console.log(chalk.cyan(" No token in env/netrc — using gh CLI token"));
466
- token = existing;
467
- }
468
- } catch { /* gh not installed or not authed */ }
469
-
470
- if (!token) {
471
- // Still no token — offer interactive gh auth login
472
- console.log(chalk.yellow("\n ⚠ No GitHub token found (checked --github-token, $GITHUB_TOKEN, ~/.netrc, gh CLI)"));
473
- try {
474
- const { exitCode: ghExists } = await execa("which", ["gh"], { reject: false, timeout: 5000 });
475
- if (ghExists === 0) {
476
- console.log(chalk.cyan(" ▶ Running gh auth login…\n"));
477
- const { exitCode: loginExit } = await execa("gh", ["auth", "login", "-h", "github.com", "-s", "write:packages,repo"], { stdio: "inherit", reject: false, timeout: 300000 });
478
- if (loginExit === 0) {
479
- const { stdout: newToken } = await execa("gh", ["auth", "token", "-h", "github.com"], { timeout: 10000 });
480
- token = (newToken || "").trim();
481
- if (token) {
482
- // Sync to .netrc for future use
483
- const netrcPath = path.join(os.homedir(), ".netrc");
484
- const entry = `machine github.com login x-access-token password ${token}`;
485
- try {
486
- let content = "";
487
- try { content = fs.readFileSync(netrcPath, "utf8"); } catch {}
488
- if (/^machine\s+github\.com\b/m.test(content)) {
489
- content = content.replace(
490
- /machine\s+github\.com\b[^\n]*(\n\s*(login|password)\s+[^\n]*)*/gm,
491
- entry,
492
- );
493
- } else {
494
- content = content.trimEnd() + (content ? "\n" : "") + entry + "\n";
495
- }
496
- fs.writeFileSync(netrcPath, content, { mode: 0o600 });
497
- console.log(chalk.green(" ✓ ~/.netrc updated"));
498
- } catch {}
499
- }
500
- }
501
- } else {
502
- console.log(chalk.dim(" Install gh CLI to authenticate: https://cli.github.com"));
503
- }
504
- } catch {}
505
-
506
- if (!token) {
507
- console.error(chalk.red(" ✗ GitHub authentication required — GHCR pulls will fail without a token."));
508
- console.error(chalk.dim(" Set $GITHUB_TOKEN, run gh auth login, or pass --github-token.\n"));
509
- process.exit(1);
510
- }
511
- }
512
- }
450
+ if (!token) return { token, login: undefined };
513
451
  const execa = await lazyExeca();
514
452
  try {
515
453
  let res = await fetch("https://api.github.com/user", {
@@ -2369,6 +2369,31 @@ export async function azureList(opts = {}) {
2369
2369
  let aksClusters = (fullState.azure || {}).clusters || {};
2370
2370
  let hasAks = Object.keys(aksClusters).length > 0;
2371
2371
 
2372
+ // Always try to discover VMs from Azure (tag managed=fops) so we re-add any that were
2373
+ // lost from local state (e.g. state file reset or edited).
2374
+ try {
2375
+ const execa = await lazyExeca();
2376
+ await ensureAzCli(execa);
2377
+ await ensureAzAuth(execa, { subscription: opts.subscription });
2378
+ const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
2379
+ if (found > 0) {
2380
+ console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
2381
+ ({ activeVm, vms } = listVms());
2382
+ vmNames = Object.keys(vms);
2383
+ fullState = readState();
2384
+ aksClusters = (fullState.azure || {}).clusters || {};
2385
+ hasAks = Object.keys(aksClusters).length > 0;
2386
+ }
2387
+ } catch { /* az not available or not authenticated */ }
2388
+
2389
+ if (vmNames.length === 0 && !hasAks) {
2390
+ banner("Azure VMs");
2391
+ hint("No VMs or clusters found in Azure.");
2392
+ hint("Create a VM: fops azure up <name>");
2393
+ hint("Create a cluster: fops azure aks up <name>\n");
2394
+ return;
2395
+ }
2396
+
2372
2397
  // Use cache if fresh, otherwise try shared tags, then fall back to full sync
2373
2398
  const forceLive = opts.live;
2374
2399
  let cache = readCache();
@@ -2389,37 +2414,13 @@ export async function azureList(opts = {}) {
2389
2414
  } catch { /* tag read failed, fall through to full sync */ }
2390
2415
  }
2391
2416
 
2392
- // Discovery + full sync only when all caches are stale
2393
2417
  if (!fresh) {
2394
- try {
2395
- const execa = await lazyExeca();
2396
- await ensureAzCli(execa);
2397
- await ensureAzAuth(execa, { subscription: opts.subscription });
2398
- const found = await discoverVmsFromAzure(execa, { quiet: true, subscription: opts.subscription });
2399
- if (found > 0) {
2400
- console.log(OK(` ✓ Re-discovered ${found} VM(s) from Azure`) + DIM(" (tag managed=fops)\n"));
2401
- ({ activeVm, vms } = listVms());
2402
- vmNames = Object.keys(vms);
2403
- fullState = readState();
2404
- aksClusters = (fullState.azure || {}).clusters || {};
2405
- hasAks = Object.keys(aksClusters).length > 0;
2406
- }
2407
- } catch { /* az not available or not authenticated */ }
2408
-
2409
2418
  await azureSync({ quiet: !opts.verbose });
2410
2419
  cache = readCache();
2411
2420
  cacheSource = "live";
2412
2421
  }
2413
2422
  }
2414
2423
 
2415
- if (vmNames.length === 0 && !hasAks) {
2416
- banner("Azure VMs");
2417
- hint("No VMs or clusters found in Azure.");
2418
- hint("Create a VM: fops azure up <name>");
2419
- hint("Create a cluster: fops azure aks up <name>\n");
2420
- return;
2421
- }
2422
-
2423
2424
  const cachedVms = cache?.vms || {};
2424
2425
  const cachedClusters = cache?.clusters || {};
2425
2426
  const cacheTime = cache?.updatedAt;
@@ -3273,7 +3274,7 @@ export async function azureSshWhitelistMe(opts = {}) {
3273
3274
 
3274
3275
  const merged = [...new Set([...currentSources.filter(s => s && s !== "*" && s !== "Internet"), myCidr])];
3275
3276
  console.log(chalk.yellow(` ↻ Adding ${myCidr} to SSH rule on ${nsgName} (${currentSources.length} existing)...`));
3276
- const { exitCode: updateCode, stderr: updateStderr } = await execa("az", [
3277
+ const { exitCode: updateCode } = await execa("az", [
3277
3278
  "network", "nsg", "rule", "create", "-g", rg, "--nsg-name", nsgName,
3278
3279
  "-n", sshRule?.name || "allow-ssh", "--priority", String(sshRule?.priority || 1000),
3279
3280
  "--destination-port-ranges", "22", "--access", "Allow",
@@ -3283,17 +3284,8 @@ export async function azureSshWhitelistMe(opts = {}) {
3283
3284
  ], { reject: false, timeout: 30000 });
3284
3285
 
3285
3286
  if (updateCode !== 0) {
3286
- console.error(ERR(`\n Failed to update NSG rule on ${nsgName}`));
3287
- const msg = (updateStderr || "").trim();
3288
- if (msg.includes("AADSTS") || msg.includes("Interactive authentication")) {
3289
- console.error(ERR(" Azure session expired — run: az login"));
3290
- } else if (msg.includes("AuthorizationFailed")) {
3291
- console.error(ERR(" Insufficient permissions to update NSG rules in this subscription."));
3292
- } else if (msg) {
3293
- console.error(DIM(` ${msg.split("\n")[0]}`));
3294
- }
3295
- console.error("");
3296
- return;
3287
+ console.error(ERR(`\n Failed to update NSG rule on ${nsgName}\n`));
3288
+ process.exit(1);
3297
3289
  }
3298
3290
  console.log(OK(`\n ✓ SSH (22) whitelisted for ${myCidr} on ${vmName} (${nsgName})\n`));
3299
3291
  console.log(` Sources: ${merged.join(", ")}\n`);
@@ -22,7 +22,7 @@ import { readState, listVms } from "./azure-state.js";
22
22
  // fops_by = alessio (who synced)
23
23
 
24
24
  const TAG_PREFIX = "fops_";
25
- const TAG_MAX_AGE_MS = 20 * 60 * 1000; // 20 minutes — tags are cheaper to check
25
+ const TAG_MAX_AGE_MS = 10 * 60 * 1000; // 10 minutes — tags are cheaper to check
26
26
 
27
27
  // ── Write: publish probe results as tags on a VM ─────────────────────────────
28
28
 
@@ -12,7 +12,7 @@ import {
12
12
  // Stored in ~/.fops.json under azure.cache:
13
13
  // { updatedAt, vms: { <name>: { ... } }, clusters: { <name>: { ... } } }
14
14
 
15
- const CACHE_MAX_AGE_MS = 15 * 60 * 1000; // 15 minutes
15
+ const CACHE_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes
16
16
 
17
17
  // Short keys for the 6 tracked Foundation services
18
18
  const SVC_MAP = {
@@ -169,16 +169,16 @@ async function syncVms(execa) {
169
169
 
170
170
  // After a knock, iptables rule needs a moment to propagate; first SSH needs full handshake.
171
171
  // Brief delay then retry once to avoid false "unreachable" (e.g. uaenorth latency).
172
- await new Promise((r) => setTimeout(r, 400));
172
+ await new Promise((r) => setTimeout(r, 800));
173
173
  let sshOk = false;
174
174
  for (let attempt = 0; attempt < 2; attempt++) {
175
175
  const { exitCode: sshCode } = await execa("ssh", [
176
176
  ...MUX_OPTS(vm.publicIp, DEFAULTS.adminUser),
177
177
  "-o", "BatchMode=yes",
178
178
  `${DEFAULTS.adminUser}@${vm.publicIp}`, "echo ok",
179
- ], { timeout: 8000, reject: false }).catch(() => ({ exitCode: 1 }));
179
+ ], { timeout: 15000, reject: false }).catch(() => ({ exitCode: 1 }));
180
180
  if (sshCode === 0) { sshOk = true; break; }
181
- if (attempt === 0) await new Promise((r) => setTimeout(r, 1000));
181
+ if (attempt === 0) await new Promise((r) => setTimeout(r, 2000));
182
182
  }
183
183
 
184
184
  if (!sshOk) {
@@ -257,6 +257,10 @@ export async function azureUp(opts = {}) {
257
257
  if (!publicIp) { console.error(ERR(" VM created but no public IP assigned.")); process.exit(1); }
258
258
  console.log(OK(` ✓ VM created — ${publicIp}`));
259
259
 
260
+ // Persist IP immediately so it's never lost if later steps fail or user Ctrl+C's
261
+ const publicUrl = opts.url || defaultUrl;
262
+ writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, createdAt: new Date().toISOString() });
263
+
260
264
  hint("Enabling accelerated networking…");
261
265
  const nicName = `${vmName}VMNic`;
262
266
  const dealloc = await execa("az", ["vm", "deallocate", "-g", rg, "-n", vmName, "--output", "none", ...subArgs(sub)], { reject: false, timeout: 120000 });
@@ -361,9 +365,6 @@ export async function azureUp(opts = {}) {
361
365
  ], { reject: false, timeout: 30000 });
362
366
  console.log(OK(" ✓ Knock port range open"));
363
367
 
364
- const publicUrl = opts.url || defaultUrl;
365
- writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, createdAt: new Date().toISOString() });
366
-
367
368
  // Save SSH key to 1Password if available
368
369
  try {
369
370
  const { opWhoami, opEnsureVault, opSaveSSHKey } = await import("../../fops-plugin-1password/lib/op.js");
@@ -393,6 +394,12 @@ export async function azureUp(opts = {}) {
393
394
  await syncDns(cfToken, publicUrl, publicIp);
394
395
  await ensureOpenAiNetworkAccess(execa, publicIp, sub);
395
396
 
397
+ // Print IP/URL prominently before the long SSH wait so users don't miss it
398
+ console.log("");
399
+ kvLine("IP", ACCENT(publicIp));
400
+ kvLine("URL", ACCENT(publicUrl));
401
+ console.log("");
402
+
396
403
  console.log(chalk.magenta(" ✻") + " " + DIM("Waiting for SSH…"));
397
404
  const sshMaxWait = 300000;
398
405
  let ready = await waitForSsh(execa, publicIp, adminUser, sshMaxWait);
@@ -508,8 +508,6 @@ export function registerInfraCommands(azure) {
508
508
  .option("--github-token <token>", "GitHub PAT for Flux + GHCR pull (default: $GITHUB_TOKEN)")
509
509
  .option("--no-flux", "Skip Flux bootstrap")
510
510
  .option("--no-postgres", "Skip Postgres Flexible Server provisioning")
511
- .option("--flux-local-repo <path>", "Path to local flux repo clone (auto-detected if omitted)")
512
- .option("--overlay <name>", "App overlay name in flux repo (default: demo-azure)")
513
511
  .option("--dai", "Include DAI (Dashboards AI) workloads")
514
512
  .action(async (name, opts) => {
515
513
  const { aksUp } = await import("../azure-aks.js");
@@ -526,8 +524,6 @@ export function registerInfraCommands(azure) {
526
524
  githubToken: opts.githubToken,
527
525
  noFlux: opts.flux === false,
528
526
  noPostgres: opts.postgres === false,
529
- fluxLocalRepo: opts.fluxLocalRepo,
530
- overlay: opts.overlay,
531
527
  dai: opts.dai === true,
532
528
  });
533
529
  });
@@ -78,20 +78,6 @@ export function registerTestCommands(azure) {
78
78
  : content + `\n${key}=${value}`;
79
79
  };
80
80
 
81
- // Resolve CF Access service token for Cloudflare-proxied endpoints
82
- let cfClientId = process.env.CF_ACCESS_CLIENT_ID || "";
83
- let cfClientSecret = process.env.CF_ACCESS_CLIENT_SECRET || "";
84
- if (!cfClientId) {
85
- // Try reading from the compose .env
86
- try {
87
- const composeDotEnv = await fsp.readFile(path.join(root, ".env"), "utf8");
88
- const idMatch = composeDotEnv.match(/^CF_ACCESS_CLIENT_ID=(.+)$/m);
89
- const secretMatch = composeDotEnv.match(/^CF_ACCESS_CLIENT_SECRET=(.+)$/m);
90
- if (idMatch) cfClientId = idMatch[1].trim();
91
- if (secretMatch) cfClientSecret = secretMatch[1].trim();
92
- } catch { /* .env may not exist */ }
93
- }
94
-
95
81
  envContent = setVar(envContent, "API_URL", apiUrl);
96
82
  envContent = setVar(envContent, "DEV_API_URL", apiUrl);
97
83
  envContent = setVar(envContent, "LIVE_API_URL", apiUrl);
@@ -107,10 +93,6 @@ export function registerTestCommands(azure) {
107
93
  envContent = setVar(envContent, "BEARER_TOKEN", bearerToken);
108
94
  envContent = setVar(envContent, "TOKEN_AUTH0", bearerToken);
109
95
  }
110
- if (cfClientId && cfClientSecret) {
111
- envContent = setVar(envContent, "CF_ACCESS_CLIENT_ID", cfClientId);
112
- envContent = setVar(envContent, "CF_ACCESS_CLIENT_SECRET", cfClientSecret);
113
- }
114
96
 
115
97
  await fsp.writeFile(envPath, envContent);
116
98
  console.log(chalk.green(` ✓ Configured QA .env → ${apiUrl}`));
@@ -172,10 +154,6 @@ export function registerTestCommands(azure) {
172
154
  testEnv.BEARER_TOKEN = bearerToken;
173
155
  testEnv.TOKEN_AUTH0 = bearerToken;
174
156
  }
175
- if (cfClientId && cfClientSecret) {
176
- testEnv.CF_ACCESS_CLIENT_ID = cfClientId;
177
- testEnv.CF_ACCESS_CLIENT_SECRET = cfClientSecret;
178
- }
179
157
 
180
158
  const startMs = Date.now();
181
159
  const proc = execaFn(
@@ -171,6 +171,36 @@ export function registerVmCommands(azure, api, registry) {
171
171
  await azureList({ live: opts.live, verbose: opts.verbose, cost: opts.cost, days: parseInt(opts.days), versions: opts.versions });
172
172
  });
173
173
 
174
+ // ── ip ─────────────────────────────────────────────────────────────────
175
+ azure
176
+ .command("ip [name]")
177
+ .description("Print the public IP (and URL) of a VM — quick lookup")
178
+ .option("--resolve", "Query Azure for the current IP (ignores cached)")
179
+ .action(async (name, opts) => {
180
+ const { requireVmState } = await import("../azure-state.js");
181
+ const state = requireVmState(name);
182
+ let ip = state.publicIp;
183
+
184
+ if (opts.resolve) {
185
+ const { lazyExeca, ensureAzCli, ensureAzAuth, resolvePublicIp, subArgs } = await import("../azure.js");
186
+ const { writeVmState } = await import("../azure-state.js");
187
+ const execa = await lazyExeca();
188
+ await ensureAzCli(execa);
189
+ await ensureAzAuth(execa);
190
+ ip = await resolvePublicIp(execa, state.resourceGroup, state.vmName, state.publicIp);
191
+ if (ip && ip !== state.publicIp) {
192
+ writeVmState(state.vmName, { publicIp: ip });
193
+ }
194
+ }
195
+
196
+ if (!ip) {
197
+ console.error(chalk.red("\n No IP address found. Is the VM running? Try: fops azure start\n"));
198
+ process.exit(1);
199
+ }
200
+ console.log(ip);
201
+ if (state.publicUrl) console.log(chalk.dim(state.publicUrl));
202
+ });
203
+
174
204
  // ── select ───────────────────────────────────────────────────────────────
175
205
  azure
176
206
  .command("select [name]")