@meshxdata/fops 0.1.52 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +372 -0
  2. package/package.json +2 -6
  3. package/src/agent/agent.js +6 -0
  4. package/src/commands/setup.js +34 -0
  5. package/src/fleet-registry.js +38 -2
  6. package/src/plugins/__test-fixtures__/fake-plugin.js +2 -0
  7. package/src/plugins/__test-fixtures__/no-register-plugin.js +2 -0
  8. package/src/plugins/__test-fixtures__/with-register/index.js +2 -0
  9. package/src/plugins/__test-fixtures__/without-register/index.js +2 -0
  10. package/src/plugins/api.js +4 -0
  11. package/src/plugins/builtins/docker-compose.js +59 -0
  12. package/src/plugins/bundled/fops-plugin-azure/index.js +4 -0
  13. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +44 -53
  14. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +2 -2
  15. package/src/plugins/bundled/fops-plugin-azure/lib/azure-cost.js +52 -22
  16. package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +6 -2
  17. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +113 -7
  18. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-init.js +13 -4
  19. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +91 -14
  20. package/src/plugins/bundled/fops-plugin-azure/lib/azure-service.js +507 -0
  21. package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +146 -7
  22. package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +1 -1
  23. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +61 -0
  24. package/src/plugins/bundled/fops-plugin-cloud/api.js +712 -0
  25. package/src/plugins/bundled/fops-plugin-cloud/fops.plugin.json +6 -0
  26. package/src/plugins/bundled/fops-plugin-cloud/index.js +208 -0
  27. package/src/plugins/bundled/fops-plugin-cloud/lib/azure-provider.js +81 -0
  28. package/src/plugins/bundled/fops-plugin-cloud/lib/provider.js +50 -0
  29. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/favicon-C49brna2.svg +15 -0
  30. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-CVqQ_kKW.js +65 -0
  31. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-DZetahP3.css +1 -0
  32. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/index.html +28 -0
  33. package/src/plugins/bundled/fops-plugin-cloud/ui/index.html +27 -0
  34. package/src/plugins/bundled/fops-plugin-cloud/ui/package-lock.json +2634 -0
  35. package/src/plugins/bundled/fops-plugin-cloud/ui/package.json +29 -0
  36. package/src/plugins/bundled/fops-plugin-cloud/ui/postcss.config.cjs +5 -0
  37. package/src/plugins/bundled/fops-plugin-cloud/ui/src/App.jsx +32 -0
  38. package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/client.js +114 -0
  39. package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/queries.js +111 -0
  40. package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/LogPanel.jsx +162 -0
  41. package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/ThemeToggle.jsx +46 -0
  42. package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/additional-styles/utility-patterns.css +147 -0
  43. package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/style.css +138 -0
  44. package/src/plugins/bundled/fops-plugin-cloud/ui/src/favicon.svg +15 -0
  45. package/src/plugins/bundled/fops-plugin-cloud/ui/src/lib/utils.ts +19 -0
  46. package/src/plugins/bundled/fops-plugin-cloud/ui/src/main.jsx +25 -0
  47. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Audit.jsx +164 -0
  48. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Costs.jsx +305 -0
  49. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/CreateResource.jsx +285 -0
  50. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Fleet.jsx +307 -0
  51. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Resources.jsx +229 -0
  52. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Header.jsx +132 -0
  53. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Sidebar.jsx +174 -0
  54. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/SidebarLinkGroup.jsx +21 -0
  55. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/AuthContext.jsx +170 -0
  56. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Info.jsx +49 -0
  57. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/ThemeContext.jsx +37 -0
  58. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Transition.jsx +116 -0
  59. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Utils.js +63 -0
  60. package/src/plugins/bundled/fops-plugin-cloud/ui/vite.config.js +23 -0
  61. package/src/plugins/bundled/fops-plugin-foundation/test-helpers.js +65 -0
  62. package/src/plugins/loader.js +34 -1
  63. package/src/plugins/registry.js +15 -0
  64. package/src/plugins/schemas.js +17 -0
  65. package/src/project.js +1 -1
  66. package/src/serve.js +196 -2
  67. package/src/shell.js +21 -1
  68. package/src/web/admin.html.js +236 -0
  69. package/src/web/api.js +73 -0
  70. package/src/web/dist/assets/index-BphVaAUd.css +1 -0
  71. package/src/web/dist/assets/index-CSckLzuG.js +129 -0
  72. package/src/web/dist/index.html +2 -2
  73. package/src/web/frontend/index.html +16 -0
  74. package/src/web/frontend/src/App.jsx +445 -0
  75. package/src/web/frontend/src/components/ChatView.jsx +910 -0
  76. package/src/web/frontend/src/components/InputBox.jsx +523 -0
  77. package/src/web/frontend/src/components/Sidebar.jsx +410 -0
  78. package/src/web/frontend/src/components/StatusBar.jsx +37 -0
  79. package/src/web/frontend/src/components/TabBar.jsx +87 -0
  80. package/src/web/frontend/src/hooks/useWebSocket.js +412 -0
  81. package/src/web/frontend/src/index.css +78 -0
  82. package/src/web/frontend/src/main.jsx +6 -0
  83. package/src/web/frontend/vite.config.js +21 -0
  84. package/src/web/server.js +64 -1
  85. package/src/web/dist/assets/index-NXC8Hvnp.css +0 -1
  86. package/src/web/dist/assets/index-QH1N4ejK.js +0 -112
@@ -1057,64 +1057,55 @@ export async function aksList(opts = {}) {
1057
1057
 
1058
1058
  banner("AKS Clusters");
1059
1059
 
1060
- // If no clusters tracked locally, try to discover fops-managed clusters from Azure
1061
- if (names.length === 0) {
1060
+ // Always discover fops-managed clusters from Azure so we pick up clusters
1061
+ // created by teammates or missing from local state.
1062
+ try {
1062
1063
  const execa = await lazyExeca();
1063
- try {
1064
- await ensureAzCli(execa);
1065
- await ensureAzAuth(execa, { subscription: opts.profile });
1066
- } catch {
1067
- hint("No clusters tracked.");
1068
- hint("Create one: fops azure aks up <name>\n");
1069
- return;
1070
- }
1071
-
1072
- hint("No clusters tracked locally — checking Azure for fops-managed clusters…\n");
1064
+ await ensureAzCli(execa);
1065
+ await ensureAzAuth(execa, { subscription: opts.profile });
1073
1066
 
1074
- try {
1075
- // Query all AKS clusters and filter by managed=fops tag
1076
- const { stdout, exitCode } = await execa("az", [
1077
- "aks", "list",
1078
- "--query", "[?tags.managed=='fops']",
1079
- "--output", "json",
1080
- ...subArgs(opts.profile),
1081
- ], { timeout: 60000, reject: false });
1082
-
1083
- if (exitCode === 0 && stdout?.trim()) {
1084
- const discovered = JSON.parse(stdout);
1085
- if (discovered.length > 0) {
1086
- for (const cl of discovered) {
1087
- const name = cl.name;
1088
- const info = {
1089
- resourceGroup: cl.resourceGroup,
1090
- location: cl.location,
1091
- kubernetesVersion: cl.kubernetesVersion,
1092
- fqdn: cl.fqdn,
1093
- nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1094
- nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1095
- subscriptionId: cl.id?.split("/")[2],
1096
- createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
1097
- };
1098
- writeClusterState(name, info);
1099
- console.log(OK(` + Discovered ${name} (${cl.location})`));
1100
- }
1101
- console.log("");
1102
- // Re-read after discovery
1103
- const updated = readAksClusters();
1104
- activeCluster = updated.activeCluster;
1105
- clusters = updated.clusters;
1106
- names = Object.keys(clusters);
1107
- }
1067
+ const { stdout, exitCode } = await execa("az", [
1068
+ "aks", "list",
1069
+ "--query", "[?tags.managed=='fops']",
1070
+ "--output", "json",
1071
+ ...subArgs(opts.profile),
1072
+ ], { timeout: 60000, reject: false });
1073
+
1074
+ if (exitCode === 0 && stdout?.trim()) {
1075
+ const discovered = JSON.parse(stdout);
1076
+ let added = 0;
1077
+ for (const cl of discovered) {
1078
+ if (clusters[cl.name]) continue; // already tracked
1079
+ const info = {
1080
+ resourceGroup: cl.resourceGroup,
1081
+ location: cl.location,
1082
+ kubernetesVersion: cl.kubernetesVersion,
1083
+ fqdn: cl.fqdn,
1084
+ nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1085
+ nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1086
+ subscriptionId: cl.id?.split("/")[2],
1087
+ createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
1088
+ };
1089
+ writeClusterState(cl.name, info);
1090
+ console.log(OK(` + Discovered ${cl.name} (${cl.location})`));
1091
+ added++;
1092
+ }
1093
+ if (added > 0) {
1094
+ console.log("");
1095
+ const updated = readAksClusters();
1096
+ activeCluster = updated.activeCluster;
1097
+ clusters = updated.clusters;
1098
+ names = Object.keys(clusters);
1108
1099
  }
1109
- } catch {
1110
- // Discovery failed, continue with empty list
1111
1100
  }
1101
+ } catch {
1102
+ // az not available or not authenticated — continue with local state
1103
+ }
1112
1104
 
1113
- if (names.length === 0) {
1114
- hint("No fops-managed clusters found in Azure.");
1115
- hint("Create one: fops azure aks up <name>\n");
1116
- return;
1117
- }
1105
+ if (names.length === 0) {
1106
+ hint("No clusters tracked.");
1107
+ hint("Create one: fops azure aks up <name>\n");
1108
+ return;
1118
1109
  }
1119
1110
 
1120
1111
  // Refresh each tracked cluster from Azure so RG, Location, Nodes, FQDN, etc. are current
@@ -38,7 +38,7 @@ export async function reconcileStorageAccount(ctx) {
38
38
  const { execa, clusterName, rg, sub } = ctx;
39
39
  const storageAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
40
40
  const vaultName = `fops-${clusterName}-kv`;
41
- const containers = ["foundation", "vault"];
41
+ const containers = ["foundation", "vault", "loki"];
42
42
 
43
43
  hint(`Reconciling Azure Storage Account "${storageAccountName}"…`);
44
44
 
@@ -571,7 +571,7 @@ export async function reconcileStorageReplication(ctx) {
571
571
 
572
572
  const sourceAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
573
573
  const destAccountName = `fops${clusterName.replace(/-/g, "")}ha`.toLowerCase().slice(0, 24);
574
- const containers = ["foundation", "vault"];
574
+ const containers = ["foundation", "vault", "loki"];
575
575
 
576
576
  hint(`Setting up cross-region storage replication (${location} → ${replicaRegion})…`);
577
577
 
@@ -24,22 +24,50 @@ async function az(args, opts = {}) {
24
24
  }
25
25
  }
26
26
 
27
+ // In-memory cache for cost queries (TTL: 1 hour)
28
+ const _costCache = new Map();
29
+ const COST_CACHE_TTL = 60 * 60 * 1000; // 1 hour
30
+
27
31
  async function costQuery(scope, dataset) {
28
- try {
29
- const body = JSON.stringify({ ...dataset });
30
- const { stdout, stderr } = await execa("az", [
31
- "rest", "--method", "POST",
32
- "--url", `https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01`,
33
- "--body", body,
34
- "--output", "json",
35
- ], { timeout: 120_000, reject: false });
36
- if (stderr?.includes("Please run 'az login'") || stderr?.includes("AADSTS")) {
37
- return { error: stderr.split("\n")[0] };
32
+ const cacheKey = JSON.stringify({ scope, dataset });
33
+ const cached = _costCache.get(cacheKey);
34
+ if (cached && Date.now() - cached.ts < COST_CACHE_TTL) {
35
+ return cached.data;
36
+ }
37
+
38
+ const maxRetries = 3;
39
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
40
+ try {
41
+ const body = JSON.stringify({ ...dataset });
42
+ const { stdout, stderr } = await execa("az", [
43
+ "rest", "--method", "POST",
44
+ "--url", `https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01`,
45
+ "--body", body,
46
+ "--output", "json",
47
+ ], { timeout: 120_000, reject: false });
48
+
49
+ if (stderr?.includes("Please run 'az login'") || stderr?.includes("AADSTS")) {
50
+ return { error: stderr.split("\n")[0] + "\nMake sure you are logged into Azure (az login) and have Cost Management access." };
51
+ }
52
+
53
+ // Handle 429 rate limiting
54
+ if (stderr?.includes("429") || stderr?.includes("Too many requests") || stderr?.includes("Too Many Requests")) {
55
+ const wait = Math.pow(2, attempt + 1) * 5000; // 10s, 20s, 40s
56
+ if (attempt < maxRetries - 1) {
57
+ await new Promise((r) => setTimeout(r, wait));
58
+ continue;
59
+ }
60
+ return { error: `Rate limited by Azure Cost Management API after ${maxRetries} retries. Try again in a few minutes.` };
61
+ }
62
+
63
+ const result = JSON.parse(stdout || "{}");
64
+ _costCache.set(cacheKey, { data: result, ts: Date.now() });
65
+ return result;
66
+ } catch (err) {
67
+ if (attempt === maxRetries - 1) return { error: err.message };
38
68
  }
39
- return JSON.parse(stdout || "{}");
40
- } catch (err) {
41
- return { error: err.message };
42
69
  }
70
+ return { error: "Cost query failed after retries" };
43
71
  }
44
72
 
45
73
  function formatCost(amount, currency = "USD") {
@@ -402,16 +430,18 @@ export async function registerCostTools(api) {
402
430
  ? allVms.filter(v => v.powerState?.toLowerCase().includes(input.state))
403
431
  : allVms;
404
432
 
405
- // Rough monthly cost estimates (USD, Pay-As-You-Go, select regions)
433
+ // Rough monthly cost estimates (USD, Pay-As-You-Go)
406
434
  const costs = {
407
- Standard_B2s: 30, Standard_B4ms: 60, Standard_B2ms: 60,
408
- Standard_D2s_v3: 70, Standard_D4s_v3: 140, Standard_D8s_v3: 281,
409
- Standard_D16s_v3: 562, Standard_D32s_v3: 1124,
410
- Standard_D2s_v5: 70, Standard_D4s_v5: 140, Standard_D8s_v5: 281,
411
- Standard_D16s_v5: 562, Standard_D32s_v5: 1124,
412
- Standard_E2s_v3: 92, Standard_E4s_v3: 184, Standard_E8s_v3: 368,
413
- Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
414
- Standard_F2s_v2: 62, Standard_F4s_v2: 124, Standard_F8s_v2: 248,
435
+ // B-series (burstable)
436
+ Standard_B1s: 8, Standard_B2s: 30, Standard_B2ms: 60, Standard_B4ms: 120,
437
+ // D-series (general purpose) — v3/v4/v5 similar pricing
438
+ Standard_D2s_v3: 70, Standard_D4s_v3: 140, Standard_D8s_v3: 281, Standard_D16s_v3: 562, Standard_D32s_v3: 1124,
439
+ Standard_D2s_v5: 70, Standard_D4s_v5: 140, Standard_D8s_v5: 281, Standard_D16s_v5: 562, Standard_D32s_v5: 1124, Standard_D64s_v5: 2249,
440
+ // E-series (memory optimized)
441
+ Standard_E2s_v3: 92, Standard_E4s_v3: 184, Standard_E8s_v3: 368, Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
442
+ Standard_E2s_v5: 92, Standard_E4s_v5: 184, Standard_E8s_v5: 368, Standard_E16s_v5: 736, Standard_E32s_v5: 1472, Standard_E64s_v5: 2621,
443
+ // F-series (compute optimized)
444
+ Standard_F2s_v2: 62, Standard_F4s_v2: 124, Standard_F8s_v2: 248, Standard_F16s_v2: 496, Standard_F32s_v2: 992,
415
445
  };
416
446
 
417
447
  let output = "Azure VMs\n" + "=".repeat(75) + "\n";
@@ -854,16 +854,20 @@ export function fopsUpCmd(publicUrl, { k3s, traefik, dai } = {}) {
854
854
  ].join("; ");
855
855
 
856
856
  const debugPostamble = [
857
- `echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code \\$? ===\\\" >> ${logFile}`,
857
+ `echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code \\$_fops_rc ===\\\" >> ${logFile}`,
858
858
  `echo \\\"--- Container status ---\\\" >> ${logFile}`,
859
859
  `docker compose ps --format 'table {{.Name}}\\t{{.Status}}' >> ${logFile} 2>&1`,
860
860
  `echo \\\"--- Recent docker events ---\\\" >> ${logFile}`,
861
861
  `tail -50 ${eventsLog} >> ${logFile} 2>&1 || true`,
862
+ `exit \\$_fops_rc`,
862
863
  ].join("; ");
863
864
 
865
+ // Fail fast if Docker is not installed
866
+ const dockerGuard = `command -v docker >/dev/null 2>&1 || { echo \\\"ERROR: Docker is not installed — cannot start Foundation\\\" >> ${logFile}; echo \\\"ERROR: Docker is not installed\\\" >&2; exit 1; }`;
867
+
864
868
  // Run from project dir with FOUNDATION_ROOT set explicitly (sudo can reset cwd)
865
869
  const envSetup = `export PATH=/usr/local/bin:/usr/bin:\\$PATH FOUNDATION_ROOT=/opt/foundation-compose`;
866
- return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; ${debugPostamble}"`;
870
+ return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${dockerGuard}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; _fops_rc=\\$?; ${debugPostamble}"`;
867
871
  }
868
872
 
869
873
  /** Build remote "fops up [component] [branch]" args (same as local fops up). For foreground run on VM. */
@@ -321,6 +321,71 @@ export async function azureTrinoStatus(opts = {}) {
321
321
  console.log("");
322
322
  }
323
323
 
324
+ // ── ping ─────────────────────────────────────────────────────────────────────
325
+
326
+ /**
327
+ * Check Foundation backend /api/ping/json health endpoint on a VM.
328
+ */
329
+ export async function azurePing(opts = {}) {
330
+ const execa = await lazyExeca();
331
+ const state = requireVmState(opts.vmName);
332
+ const { vmName } = state;
333
+ const ip = state.publicIp;
334
+ const adminUser = DEFAULTS.adminUser;
335
+
336
+ if (!ip) {
337
+ console.log(WARN(` VM ${vmName} has no public IP (probably stopped)`));
338
+ return;
339
+ }
340
+
341
+ await knockForVm(state);
342
+ const sshOk = await waitForSsh(execa, ip, adminUser, 10000);
343
+ if (!sshOk) {
344
+ console.log(WARN("\n ⚠ SSH not reachable"));
345
+ return;
346
+ }
347
+
348
+ const pingToken = opts.token || process.env.FOPS_PING_TOKEN || "";
349
+ const tokenHeader = pingToken ? `-H "X-Ping-Token: ${pingToken}"` : "";
350
+ const { stdout, exitCode } = await sshCmd(execa, ip, adminUser,
351
+ `curl -sf ${tokenHeader} http://localhost:9001/api/ping/json 2>/dev/null || echo '{}'`,
352
+ 15000,
353
+ );
354
+
355
+ let ping;
356
+ try {
357
+ ping = JSON.parse(stdout.trim() || "{}");
358
+ } catch {
359
+ console.log(ERR(` Failed to parse ping response: ${stdout}`));
360
+ return;
361
+ }
362
+
363
+ banner(`Ping: ${vmName}`);
364
+
365
+ if (ping.ok === undefined) {
366
+ console.log(WARN(" No response from backend /api/ping/json"));
367
+ hint("Backend may be down or starting up");
368
+ console.log("");
369
+ return;
370
+ }
371
+
372
+ const overall = ping.ok ? OK("✓ healthy") : ERR("✗ unhealthy");
373
+ kvLine("Status", overall);
374
+ if (ping.tag) kvLine("Tag", DIM(ping.tag));
375
+
376
+ if (ping.checks) {
377
+ console.log("");
378
+ console.log(ACCENT(" Checks:"));
379
+ for (const [name, check] of Object.entries(ping.checks)) {
380
+ const status = check.ok ? OK("✓") : ERR("✗");
381
+ const latency = check.latency_ms !== undefined ? DIM(` (${check.latency_ms}ms)`) : "";
382
+ const err = check.error ? ERR(` — ${check.error}`) : "";
383
+ console.log(` ${status} ${name}${latency}${err}`);
384
+ }
385
+ }
386
+ console.log("");
387
+ }
388
+
324
389
  /**
325
390
  * Run VM diagnostics: show config versions, then run make download and print
326
391
  * full output so image-pull failures (e.g. after config versions change) can be diagnosed.
@@ -1295,6 +1360,41 @@ export async function azureList(opts = {}) {
1295
1360
  }
1296
1361
  } catch { /* az not available or not authenticated */ }
1297
1362
 
1363
+ // Always discover AKS clusters from Azure (tag managed=fops)
1364
+ try {
1365
+ const execa = await lazyExeca();
1366
+ const { writeClusterState } = await import("./azure-aks-state.js");
1367
+ const { stdout, exitCode } = await execa("az", [
1368
+ "aks", "list",
1369
+ "--query", "[?tags.managed=='fops']",
1370
+ "--output", "json",
1371
+ ...subArgs(opts.subscription),
1372
+ ], { timeout: 60000, reject: false });
1373
+ if (exitCode === 0 && stdout?.trim()) {
1374
+ const discovered = JSON.parse(stdout);
1375
+ let added = 0;
1376
+ for (const cl of discovered) {
1377
+ if (aksClusters[cl.name]) continue;
1378
+ writeClusterState(cl.name, {
1379
+ resourceGroup: cl.resourceGroup,
1380
+ location: cl.location,
1381
+ kubernetesVersion: cl.kubernetesVersion,
1382
+ fqdn: cl.fqdn,
1383
+ nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1384
+ nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1385
+ subscriptionId: cl.id?.split("/")[2],
1386
+ });
1387
+ added++;
1388
+ }
1389
+ if (added > 0) {
1390
+ console.log(OK(` ✓ Re-discovered ${added} AKS cluster(s) from Azure`) + DIM(" (tag managed=fops)\n"));
1391
+ fullState = readState();
1392
+ aksClusters = (fullState.azure || {}).clusters || {};
1393
+ hasAks = Object.keys(aksClusters).length > 0;
1394
+ }
1395
+ }
1396
+ } catch { /* az not available or AKS discovery failed */ }
1397
+
1298
1398
  // JSON output mode - early return with structured data
1299
1399
  if (opts.json) {
1300
1400
  const output = {
@@ -1568,10 +1668,9 @@ export async function azureList(opts = {}) {
1568
1668
  const hasPrimary = primaryName && clusterNames.includes(primaryName);
1569
1669
  const prefix = isStandby && hasPrimary ? " └─" : "";
1570
1670
  const dot = active ? OK("●") : DIM("○");
1571
- const displayName = isStandby && hasPrimary
1572
- ? `${cr.name} ${DIM("(HA standby)")}`
1573
- : cr.name;
1574
- const cNameTxt = active ? OK(displayName.padEnd(maxCName + 13)) : LABEL(displayName.padEnd(maxCName + 13));
1671
+ const paddedName = cr.name.padEnd(maxCName);
1672
+ const standbySuffix = isStandby && hasPrimary ? ` ${DIM("(HA standby)")}` : "";
1673
+ const cNameTxt = active ? OK(paddedName) + standbySuffix : LABEL(paddedName) + standbySuffix;
1575
1674
  const loc = (cl?.location || cr.location || "–").padEnd(10);
1576
1675
  const nodes = cr.nodes != null ? `${cr.nodes} x ${cr.sizes || "?"}` : "–";
1577
1676
  const k8s = (cr.kubernetesVersion || "–").padEnd(6);
@@ -1640,12 +1739,19 @@ export function printServiceMatrix(results, nameWidth) {
1640
1739
  const withSvc = results.filter(r => r.services && Object.keys(r.services).length > 0);
1641
1740
  if (withSvc.length === 0) return;
1642
1741
 
1742
+ // Resolve display value for a service entry (supports both string and {tag,sha} formats)
1743
+ const svcVal = (entry) => {
1744
+ if (!entry) return null;
1745
+ if (typeof entry === "string") return entry;
1746
+ return entry.sha || entry.tag || null;
1747
+ };
1748
+
1643
1749
  // Find the majority value per column to highlight drift
1644
1750
  const majority = {};
1645
1751
  for (const svc of SVC_ORDER) {
1646
1752
  const counts = {};
1647
1753
  for (const r of withSvc) {
1648
- const v = r.services?.[svc];
1754
+ const v = svcVal(r.services?.[svc]);
1649
1755
  if (v) counts[v] = (counts[v] || 0) + 1;
1650
1756
  }
1651
1757
  const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
@@ -1660,7 +1766,7 @@ export function printServiceMatrix(results, nameWidth) {
1660
1766
  for (const r of withSvc) {
1661
1767
  const nameTxt = LABEL(r.name.padEnd(nameWidth));
1662
1768
  const cells = SVC_ORDER.map(svc => {
1663
- const v = r.services?.[svc] || "–";
1769
+ const v = svcVal(r.services?.[svc]) || "–";
1664
1770
  const display = v.padEnd(colW);
1665
1771
  if (v === "–") return DIM(display);
1666
1772
  if (v !== majority[svc]) return WARN(display);
@@ -1671,7 +1777,7 @@ export function printServiceMatrix(results, nameWidth) {
1671
1777
 
1672
1778
  // Check for drift
1673
1779
  const hasDrift = SVC_ORDER.some(svc => {
1674
- const vals = withSvc.map(r => r.services?.[svc]).filter(Boolean);
1780
+ const vals = withSvc.map(r => svcVal(r.services?.[svc])).filter(Boolean);
1675
1781
  return new Set(vals).size > 1;
1676
1782
  });
1677
1783
  if (hasDrift) {
@@ -44,19 +44,26 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
44
44
  "apt-get install -y -qq apt-transport-https ca-certificates curl gnupg lsb-release jq git make unzip zsh software-properties-common python3-venv python3-pip",
45
45
  ].join("\n"), 300000);
46
46
 
47
- await runScript("Installing Docker", [
47
+ const dockerExit = await runScript("Installing Docker", [
48
48
  waitAptLock,
49
49
  "export DEBIAN_FRONTEND=noninteractive",
50
50
  "install -m 0755 -d /etc/apt/keyrings",
51
- "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg",
51
+ "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg",
52
52
  "chmod a+r /etc/apt/keyrings/docker.gpg",
53
53
  `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list`,
54
- "apt-get update -qq",
55
- "apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin",
54
+ "set +e",
55
+ "for _ in 1 2 3 4 5; do if apt-get update -qq && apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin; then break; fi; echo 'Retrying Docker install in 10s…'; sleep 10; done",
56
+ "set -e",
57
+ "command -v docker >/dev/null 2>&1 || (echo 'Docker not found after install attempts' && exit 1)",
56
58
  "systemctl enable docker && systemctl start docker",
57
59
  `usermod -aG docker ${adminUser}`,
58
60
  ].join("\n"), 300000);
59
61
 
62
+ if (dockerExit !== 0) {
63
+ console.log(WARN(" ✗ Docker installation failed — cannot continue provisioning"));
64
+ throw new Error("Docker installation failed");
65
+ }
66
+
60
67
  await runScript("Configuring br_netfilter for k3s DNS", [
61
68
  "modprobe br_netfilter",
62
69
  "echo br_netfilter > /etc/modules-load.d/br_netfilter.conf",
@@ -178,6 +185,8 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
178
185
  Project dir: /opt/foundation-compose
179
186
 
180
187
  MOTD`,
188
+ `grep -q 'cd /opt/foundation-compose' /home/${adminUser}/.bashrc 2>/dev/null || echo 'cd /opt/foundation-compose' >> /home/${adminUser}/.bashrc`,
189
+ `grep -q 'cd /opt/foundation-compose' /home/${adminUser}/.zshrc 2>/dev/null || echo 'cd /opt/foundation-compose' >> /home/${adminUser}/.zshrc`,
181
190
  ].join("\n"));
182
191
 
183
192
  await ssh("sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/*", 30000);
@@ -44,7 +44,7 @@ async function ensureGhcrOnVm(ssh, user, githubToken, { timeout = 60000 } = {})
44
44
  // ── Configure a fresh or restarted VM ───────────────────────────────────────
45
45
 
46
46
  export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s, traefik, dai, deferStartToReconcile, quiet } = {}) {
47
- const ssh = (cmd) => sshCmd(execa, ip, user, cmd);
47
+ const ssh = (cmd, timeout) => sshCmd(execa, ip, user, cmd, timeout);
48
48
 
49
49
  if (!quiet) console.log(chalk.dim(" Configuring VM..."));
50
50
 
@@ -72,6 +72,68 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
72
72
  ].join("\n");
73
73
  await ssh(setupBatch);
74
74
 
75
+ // Verify Docker is installed — if missing, install it before anything else
76
+ const { exitCode: dockerCheck } = await ssh("sudo docker info >/dev/null 2>&1");
77
+ if (dockerCheck !== 0) {
78
+ if (!quiet) console.log(chalk.yellow(" ⚠ Docker not found — installing..."));
79
+ // Repo setup is idempotent (tolerates partial prior attempts); install+start uses &&
80
+ const repoSetup = [
81
+ "export DEBIAN_FRONTEND=noninteractive",
82
+ "while fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; do sleep 3; done",
83
+ "sudo install -m 0755 -d /etc/apt/keyrings",
84
+ "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg",
85
+ "sudo chmod a+r /etc/apt/keyrings/docker.gpg",
86
+ `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null`,
87
+ ].join("; ");
88
+ const installAndStart = [
89
+ "sudo apt-get update -qq",
90
+ "sudo apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin",
91
+ "sudo systemctl enable docker && sudo systemctl start docker",
92
+ `sudo usermod -aG docker ${user}`,
93
+ ].join(" && ");
94
+ const { exitCode: installExit } = await ssh(`${repoSetup}; ${installAndStart}`, 300000);
95
+ if (installExit === 0) {
96
+ if (!quiet) console.log(chalk.green(" ✓ Docker installed"));
97
+ } else {
98
+ console.log(chalk.red(" ✗ Docker installation failed — container operations will not work"));
99
+ console.log(chalk.dim(` SSH in and check: ssh ${user}@${ip} "sudo apt-get install -y docker-ce"`));
100
+ }
101
+ }
102
+
103
+ // Verify Node.js + fops CLI are installed — if missing, install them
104
+ const { stdout: fopsWhich } = await ssh("command -v fops 2>/dev/null || echo MISSING");
105
+ if (!fopsWhich?.trim() || fopsWhich.includes("MISSING")) {
106
+ if (!quiet) console.log(chalk.yellow(" ⚠ fops CLI not found — installing Node.js + fops..."));
107
+ const installNode = [
108
+ "export DEBIAN_FRONTEND=noninteractive",
109
+ "if ! command -v node >/dev/null 2>&1; then curl -fsSL https://deb.nodesource.com/setup_20.x | sudo bash - && sudo apt-get install -y -qq nodejs; fi",
110
+ ].join("; ");
111
+ await ssh(installNode, 120000);
112
+ // Install fops globally, retry with sudo if needed
113
+ let fopsInstalled = false;
114
+ const { exitCode: userInstall } = await ssh("npm install -g @meshxdata/fops@latest 2>&1", 300000);
115
+ if (userInstall === 0) {
116
+ fopsInstalled = true;
117
+ } else {
118
+ const { exitCode: sudoInstall } = await ssh(
119
+ "sudo bash -c 'D=\"$(npm root -g)/@meshxdata\"; rm -rf \"$D\" 2>/dev/null; npm install -g @meshxdata/fops@latest' 2>&1",
120
+ 300000,
121
+ );
122
+ fopsInstalled = sudoInstall === 0;
123
+ }
124
+ // Ensure fops is on PATH
125
+ await ssh(
126
+ 'MJS="$(npm root -g 2>/dev/null)/@meshxdata/fops/fops.mjs"; [ -f "$MJS" ] || MJS="$(sudo npm root -g 2>/dev/null)/@meshxdata/fops/fops.mjs"; [ -f "$MJS" ] && sudo ln -sf "$MJS" /usr/local/bin/fops; true',
127
+ 15000,
128
+ );
129
+ if (fopsInstalled) {
130
+ if (!quiet) console.log(chalk.green(" ✓ fops CLI installed"));
131
+ } else {
132
+ console.log(chalk.red(" ✗ fops CLI installation failed"));
133
+ console.log(chalk.dim(` SSH in and check: ssh ${user}@${ip} "sudo npm install -g @meshxdata/fops@latest"`));
134
+ }
135
+ }
136
+
75
137
  let ghcrOk = false;
76
138
  if (githubToken) {
77
139
  if (!quiet) console.log(chalk.dim(" Configuring GitHub/GHCR credentials..."));
@@ -458,9 +520,12 @@ async function vmReconcileNetworking(ctx) {
458
520
  console.log(chalk.yellow(" ⚠ No NSG attached to NIC"));
459
521
  }
460
522
 
523
+ // Accelerated networking — only supported on D/E/F/M series (2+ vCPU), not B-series
524
+ const vmSize = (iv.hardwareProfile?.vmSize || "").toLowerCase();
525
+ const supportsAccelNet = !vmSize.startsWith("standard_b") && !vmSize.startsWith("standard_a");
461
526
  if (nic.enableAcceleratedNetworking) {
462
527
  reconcileOk("Accelerated networking", "enabled");
463
- } else {
528
+ } else if (supportsAccelNet) {
464
529
  console.log(chalk.yellow(" ↻ Accelerated networking not enabled — enabling…"));
465
530
  const { exitCode: anCode } = await execa("az", [
466
531
  "network", "nic", "update", "-g", rg, "-n", ctx.nicName,
@@ -469,8 +534,9 @@ async function vmReconcileNetworking(ctx) {
469
534
  ], { reject: false, timeout: 30000 });
470
535
  console.log(anCode === 0
471
536
  ? chalk.green(` ✓ ${"Accelerated networking".padEnd(RECONCILE_LABEL_WIDTH)} — enabled`)
472
- : chalk.yellow(" ⚠ Could not enable accelerated networking (VM size may not support it)"));
537
+ : chalk.yellow(" ⚠ Could not enable accelerated networking"));
473
538
  }
539
+ // B-series and A-series VMs don't support accelerated networking — skip silently
474
540
  }
475
541
 
476
542
  if (!ctx.ip) ctx.ip = await resolvePublicIp(execa, rg, vmName);
@@ -832,10 +898,16 @@ async function vmReconcileSecurity(ctx) {
832
898
  reconcileOk("Boot diagnostics", "enabled");
833
899
  } else {
834
900
  console.log(chalk.yellow(" ↻ Boot diagnostics not enabled — enabling..."));
835
- const { exitCode: bdCode } = await execa("az", [
836
- "vm", "boot-diagnostics", "enable", "-g", rg, "-n", vmName, "--output", "none",
837
- ...subArgs(sub),
838
- ], { reject: false, timeout: 30000 });
901
+ let bdCode = 1;
902
+ for (let attempt = 0; attempt < 3; attempt++) {
903
+ const res = await execa("az", [
904
+ "vm", "boot-diagnostics", "enable", "-g", rg, "-n", vmName, "--output", "none",
905
+ ...subArgs(sub),
906
+ ], { reject: false, timeout: 30000 });
907
+ bdCode = res.exitCode;
908
+ if (bdCode === 0) break;
909
+ if (attempt < 2) await new Promise((r) => setTimeout(r, 5000));
910
+ }
839
911
  if (bdCode === 0) {
840
912
  await new Promise((r) => setTimeout(r, 2000));
841
913
  const { stdout: bdJson } = await execa("az", [
@@ -880,20 +952,25 @@ async function vmReconcileSecurity(ctx) {
880
952
  console.log(amCode === 0
881
953
  ? chalk.green(` ✓ ${"Antimalware extension".padEnd(RECONCILE_LABEL_WIDTH)} — installed`)
882
954
  : chalk.yellow(" ⚠ Could not install antimalware extension"));
883
- } else {
884
- console.log(chalk.dim(` Antimalware extension — Windows only (skipped on Linux)`));
885
955
  }
956
+ // Linux VMs don't need antimalware — skip silently
886
957
 
887
958
  if (isTrustedLaunch) {
888
959
  if (hasGuestAttestation) {
889
960
  reconcileOk("Guest Attestation extension", "installed");
890
961
  } else {
891
962
  console.log(chalk.yellow(" ↻ Guest Attestation missing — installing…"));
892
- const { exitCode: gaCode } = await execa("az", [
893
- "vm", "extension", "set", "-g", rg, "--vm-name", vmName,
894
- "-n", "GuestAttestation", "--publisher", "Microsoft.Azure.Security.LinuxAttestation",
895
- "--output", "none", ...subArgs(sub),
896
- ], { reject: false, timeout: 120000 });
963
+ let gaCode = 1;
964
+ for (let attempt = 0; attempt < 3; attempt++) {
965
+ const res = await execa("az", [
966
+ "vm", "extension", "set", "-g", rg, "--vm-name", vmName,
967
+ "-n", "GuestAttestation", "--publisher", "Microsoft.Azure.Security.LinuxAttestation",
968
+ "--output", "none", ...subArgs(sub),
969
+ ], { reject: false, timeout: 120000 });
970
+ gaCode = res.exitCode;
971
+ if (gaCode === 0) break;
972
+ if (attempt < 2) await new Promise((r) => setTimeout(r, 10000));
973
+ }
897
974
  console.log(gaCode === 0
898
975
  ? chalk.green(` ✓ ${"Guest Attestation extension".padEnd(RECONCILE_LABEL_WIDTH)} — installed`)
899
976
  : chalk.yellow(" ⚠ Could not install Guest Attestation extension"));