@meshxdata/fops 0.1.51 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +207 -21
  2. package/package.json +2 -6
  3. package/src/agent/agent.js +6 -0
  4. package/src/commands/setup.js +34 -0
  5. package/src/doctor.js +11 -8
  6. package/src/fleet-registry.js +38 -2
  7. package/src/plugins/__test-fixtures__/fake-plugin.js +2 -0
  8. package/src/plugins/__test-fixtures__/no-register-plugin.js +2 -0
  9. package/src/plugins/__test-fixtures__/with-register/index.js +2 -0
  10. package/src/plugins/__test-fixtures__/without-register/index.js +2 -0
  11. package/src/plugins/api.js +4 -0
  12. package/src/plugins/builtins/docker-compose.js +59 -0
  13. package/src/plugins/bundled/fops-plugin-azure/index.js +4 -0
  14. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +53 -53
  15. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-secrets.js +151 -0
  16. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +2 -2
  17. package/src/plugins/bundled/fops-plugin-azure/lib/azure-cost.js +52 -22
  18. package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet.js +12 -4
  19. package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +6 -2
  20. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +113 -7
  21. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-init.js +13 -4
  22. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +91 -14
  23. package/src/plugins/bundled/fops-plugin-azure/lib/azure-service.js +507 -0
  24. package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +146 -7
  25. package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +1 -1
  26. package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +28 -0
  27. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +61 -0
  28. package/src/plugins/bundled/fops-plugin-cloud/api.js +712 -0
  29. package/src/plugins/bundled/fops-plugin-cloud/fops.plugin.json +6 -0
  30. package/src/plugins/bundled/fops-plugin-cloud/index.js +208 -0
  31. package/src/plugins/bundled/fops-plugin-cloud/lib/azure-provider.js +81 -0
  32. package/src/plugins/bundled/fops-plugin-cloud/lib/provider.js +50 -0
  33. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/favicon-C49brna2.svg +15 -0
  34. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-CVqQ_kKW.js +65 -0
  35. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-DZetahP3.css +1 -0
  36. package/src/plugins/bundled/fops-plugin-cloud/ui/dist/index.html +28 -0
  37. package/src/plugins/bundled/fops-plugin-cloud/ui/index.html +27 -0
  38. package/src/plugins/bundled/fops-plugin-cloud/ui/package-lock.json +2634 -0
  39. package/src/plugins/bundled/fops-plugin-cloud/ui/package.json +29 -0
  40. package/src/plugins/bundled/fops-plugin-cloud/ui/postcss.config.cjs +5 -0
  41. package/src/plugins/bundled/fops-plugin-cloud/ui/src/App.jsx +32 -0
  42. package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/client.js +114 -0
  43. package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/queries.js +111 -0
  44. package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/LogPanel.jsx +162 -0
  45. package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/ThemeToggle.jsx +46 -0
  46. package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/additional-styles/utility-patterns.css +147 -0
  47. package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/style.css +138 -0
  48. package/src/plugins/bundled/fops-plugin-cloud/ui/src/favicon.svg +15 -0
  49. package/src/plugins/bundled/fops-plugin-cloud/ui/src/lib/utils.ts +19 -0
  50. package/src/plugins/bundled/fops-plugin-cloud/ui/src/main.jsx +25 -0
  51. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Audit.jsx +164 -0
  52. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Costs.jsx +305 -0
  53. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/CreateResource.jsx +285 -0
  54. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Fleet.jsx +307 -0
  55. package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Resources.jsx +229 -0
  56. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Header.jsx +132 -0
  57. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Sidebar.jsx +174 -0
  58. package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/SidebarLinkGroup.jsx +21 -0
  59. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/AuthContext.jsx +170 -0
  60. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Info.jsx +49 -0
  61. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/ThemeContext.jsx +37 -0
  62. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Transition.jsx +116 -0
  63. package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Utils.js +63 -0
  64. package/src/plugins/bundled/fops-plugin-cloud/ui/vite.config.js +23 -0
  65. package/src/plugins/bundled/fops-plugin-foundation/test-helpers.js +65 -0
  66. package/src/plugins/loader.js +34 -1
  67. package/src/plugins/registry.js +15 -0
  68. package/src/plugins/schemas.js +17 -0
  69. package/src/project.js +1 -1
  70. package/src/serve.js +196 -2
  71. package/src/shell.js +21 -1
  72. package/src/web/admin.html.js +236 -0
  73. package/src/web/api.js +73 -0
  74. package/src/web/dist/assets/index-BphVaAUd.css +1 -0
  75. package/src/web/dist/assets/index-CSckLzuG.js +129 -0
  76. package/src/web/dist/index.html +2 -2
  77. package/src/web/frontend/index.html +16 -0
  78. package/src/web/frontend/src/App.jsx +445 -0
  79. package/src/web/frontend/src/components/ChatView.jsx +910 -0
  80. package/src/web/frontend/src/components/InputBox.jsx +523 -0
  81. package/src/web/frontend/src/components/Sidebar.jsx +410 -0
  82. package/src/web/frontend/src/components/StatusBar.jsx +37 -0
  83. package/src/web/frontend/src/components/TabBar.jsx +87 -0
  84. package/src/web/frontend/src/hooks/useWebSocket.js +412 -0
  85. package/src/web/frontend/src/index.css +78 -0
  86. package/src/web/frontend/src/main.jsx +6 -0
  87. package/src/web/frontend/vite.config.js +21 -0
  88. package/src/web/server.js +64 -1
  89. package/src/web/dist/assets/index-NXC8Hvnp.css +0 -1
  90. package/src/web/dist/assets/index-QH1N4ejK.js +0 -112
@@ -1057,64 +1057,55 @@ export async function aksList(opts = {}) {
1057
1057
 
1058
1058
  banner("AKS Clusters");
1059
1059
 
1060
- // If no clusters tracked locally, try to discover fops-managed clusters from Azure
1061
- if (names.length === 0) {
1060
+ // Always discover fops-managed clusters from Azure so we pick up clusters
1061
+ // created by teammates or missing from local state.
1062
+ try {
1062
1063
  const execa = await lazyExeca();
1063
- try {
1064
- await ensureAzCli(execa);
1065
- await ensureAzAuth(execa, { subscription: opts.profile });
1066
- } catch {
1067
- hint("No clusters tracked.");
1068
- hint("Create one: fops azure aks up <name>\n");
1069
- return;
1070
- }
1071
-
1072
- hint("No clusters tracked locally — checking Azure for fops-managed clusters…\n");
1064
+ await ensureAzCli(execa);
1065
+ await ensureAzAuth(execa, { subscription: opts.profile });
1073
1066
 
1074
- try {
1075
- // Query all AKS clusters and filter by managed=fops tag
1076
- const { stdout, exitCode } = await execa("az", [
1077
- "aks", "list",
1078
- "--query", "[?tags.managed=='fops']",
1079
- "--output", "json",
1080
- ...subArgs(opts.profile),
1081
- ], { timeout: 60000, reject: false });
1082
-
1083
- if (exitCode === 0 && stdout?.trim()) {
1084
- const discovered = JSON.parse(stdout);
1085
- if (discovered.length > 0) {
1086
- for (const cl of discovered) {
1087
- const name = cl.name;
1088
- const info = {
1089
- resourceGroup: cl.resourceGroup,
1090
- location: cl.location,
1091
- kubernetesVersion: cl.kubernetesVersion,
1092
- fqdn: cl.fqdn,
1093
- nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1094
- nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1095
- subscriptionId: cl.id?.split("/")[2],
1096
- createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
1097
- };
1098
- writeClusterState(name, info);
1099
- console.log(OK(` + Discovered ${name} (${cl.location})`));
1100
- }
1101
- console.log("");
1102
- // Re-read after discovery
1103
- const updated = readAksClusters();
1104
- activeCluster = updated.activeCluster;
1105
- clusters = updated.clusters;
1106
- names = Object.keys(clusters);
1107
- }
1067
+ const { stdout, exitCode } = await execa("az", [
1068
+ "aks", "list",
1069
+ "--query", "[?tags.managed=='fops']",
1070
+ "--output", "json",
1071
+ ...subArgs(opts.profile),
1072
+ ], { timeout: 60000, reject: false });
1073
+
1074
+ if (exitCode === 0 && stdout?.trim()) {
1075
+ const discovered = JSON.parse(stdout);
1076
+ let added = 0;
1077
+ for (const cl of discovered) {
1078
+ if (clusters[cl.name]) continue; // already tracked
1079
+ const info = {
1080
+ resourceGroup: cl.resourceGroup,
1081
+ location: cl.location,
1082
+ kubernetesVersion: cl.kubernetesVersion,
1083
+ fqdn: cl.fqdn,
1084
+ nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1085
+ nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1086
+ subscriptionId: cl.id?.split("/")[2],
1087
+ createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
1088
+ };
1089
+ writeClusterState(cl.name, info);
1090
+ console.log(OK(` + Discovered ${cl.name} (${cl.location})`));
1091
+ added++;
1092
+ }
1093
+ if (added > 0) {
1094
+ console.log("");
1095
+ const updated = readAksClusters();
1096
+ activeCluster = updated.activeCluster;
1097
+ clusters = updated.clusters;
1098
+ names = Object.keys(clusters);
1108
1099
  }
1109
- } catch {
1110
- // Discovery failed, continue with empty list
1111
1100
  }
1101
+ } catch {
1102
+ // az not available or not authenticated — continue with local state
1103
+ }
1112
1104
 
1113
- if (names.length === 0) {
1114
- hint("No fops-managed clusters found in Azure.");
1115
- hint("Create one: fops azure aks up <name>\n");
1116
- return;
1117
- }
1105
+ if (names.length === 0) {
1106
+ hint("No clusters tracked.");
1107
+ hint("Create one: fops azure aks up <name>\n");
1108
+ return;
1118
1109
  }
1119
1110
 
1120
1111
  // Refresh each tracked cluster from Azure so RG, Location, Nodes, FQDN, etc. are current
@@ -1250,6 +1241,15 @@ export async function aksStatus(opts = {}) {
1250
1241
  hint(" Flux CLI not available — skipping Flux status.");
1251
1242
  }
1252
1243
 
1244
+ // External Secrets health check
1245
+ console.log(`\n ${LABEL("External Secrets")}`);
1246
+ try {
1247
+ const { validateExternalSecretsHealth } = await import("./azure-aks-secrets.js");
1248
+ await validateExternalSecretsHealth({ execa, clusterName, rg, sub });
1249
+ } catch (e) {
1250
+ hint(` Could not check External Secrets: ${e.message}`);
1251
+ }
1252
+
1253
1253
  console.log("");
1254
1254
  }
1255
1255
 
@@ -172,6 +172,32 @@ export async function reconcileSecretStore(ctx) {
172
172
  }
173
173
  }
174
174
 
175
+ // 2c. Check for External Secrets managed identity (ext-* prefix) and grant Key Vault access
176
+ const extSecretsIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
177
+ if (extSecretsIdentity && kvId) {
178
+ const { stdout: hasExtRole } = await execa("az", [
179
+ "role", "assignment", "list",
180
+ "--assignee", extSecretsIdentity.clientId,
181
+ "--role", "Key Vault Secrets User",
182
+ "--scope", kvId,
183
+ "--query", "[0].id", "-o", "tsv",
184
+ ...subArgs(sub),
185
+ ], { reject: false, timeout: 30000 });
186
+
187
+ if (!hasExtRole?.trim()) {
188
+ await execa("az", [
189
+ "role", "assignment", "create",
190
+ "--assignee", extSecretsIdentity.clientId,
191
+ "--role", "Key Vault Secrets User",
192
+ "--scope", kvId,
193
+ ...subArgs(sub),
194
+ ], { reject: false, timeout: 30000 });
195
+ console.log(OK(` ✓ External Secrets identity granted Key Vault Secrets User role`));
196
+ }
197
+ // Store the identity ID for SecretStore configuration
198
+ ctx.extSecretsIdentityId = extSecretsIdentity.clientId;
199
+ }
200
+
175
201
  // 3. Ensure azure-secret-sp exists in each target namespace
176
202
  const { stdout: spSecretJson } = await kubectl([
177
203
  "get", "secret", "azure-secret-sp", "-n", "foundation", "-o", "json",
@@ -579,6 +605,131 @@ export async function detectEsApiVersion(kubectl) {
579
605
  return "external-secrets.io/v1";
580
606
  }
581
607
 
608
+ /**
609
+ * Detect External Secrets managed identity (ext-* prefix) for clusters with multiple identities.
610
+ * When AKS has multiple user-assigned identities, SecretStore needs to specify which one to use.
611
+ */
612
+ export async function detectExternalSecretsIdentity(execa, clusterName, sub) {
613
+ const { subArgs } = await import("./azure.js");
614
+
615
+ // List all managed identities that match the external-secrets pattern
616
+ const { stdout: identitiesJson } = await execa("az", [
617
+ "identity", "list",
618
+ "--query", `[?contains(name, '${clusterName}')].{name:name,clientId:clientId}`,
619
+ "-o", "json",
620
+ ...subArgs(sub),
621
+ ], { reject: false, timeout: 30000 });
622
+
623
+ let identities = [];
624
+ try { identities = JSON.parse(identitiesJson || "[]"); } catch {}
625
+
626
+ // Look for ext-* identity (External Secrets workload identity)
627
+ const extIdentity = identities.find(i => i.name?.startsWith("ext-") && i.name?.includes(clusterName));
628
+ if (extIdentity) {
629
+ return { name: extIdentity.name, clientId: extIdentity.clientId };
630
+ }
631
+
632
+ // If multiple identities exist but no ext-* found, warn about potential issues
633
+ if (identities.length > 1) {
634
+ const { WARN, hint } = await import("./azure.js");
635
+ console.log(WARN(` ⚠ Multiple managed identities found for ${clusterName} but no ext-* identity detected`));
636
+ hint("External Secrets may fail with 'Multiple user assigned identities exist' error");
637
+ hint("Create a dedicated identity: az identity create -n ext-<cluster> -g <rg>");
638
+ }
639
+
640
+ return null;
641
+ }
642
+
643
+ /**
644
+ * Validate External Secrets health - checks SecretStore config and ExternalSecret status.
645
+ * Reports issues like missing identityId when multiple identities exist.
646
+ */
647
+ export async function validateExternalSecretsHealth(ctx) {
648
+ const { execa, clusterName, sub } = ctx;
649
+ const { OK, WARN, DIM, hint, subArgs } = await import("./azure.js");
650
+
651
+ const kubectl = (args, opts = {}) =>
652
+ execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
653
+
654
+ const issues = [];
655
+
656
+ // Check SecretStore status
657
+ const { stdout: ssJson } = await kubectl([
658
+ "get", "secretstore", SECRET_STORE_NAME, "-n", "foundation", "-o", "json",
659
+ ]);
660
+ if (!ssJson) {
661
+ issues.push({ level: "error", msg: "SecretStore not found in foundation namespace" });
662
+ return issues;
663
+ }
664
+
665
+ const ss = JSON.parse(ssJson);
666
+ const ssReady = ss.status?.conditions?.find(c => c.type === "Ready")?.status === "True";
667
+ const authType = ss.spec?.provider?.azurekv?.authType;
668
+ const identityId = ss.spec?.provider?.azurekv?.identityId;
669
+
670
+ // Check if using ManagedIdentity auth without identityId when multiple identities exist
671
+ if (authType === "ManagedIdentity" && !identityId) {
672
+ const extIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
673
+ if (extIdentity) {
674
+ issues.push({
675
+ level: "warn",
676
+ msg: `SecretStore uses ManagedIdentity but identityId is empty`,
677
+ fix: `Set identityId to "${extIdentity.clientId}" in clusters/${clusterName}/config/secret-store.yaml`,
678
+ });
679
+ }
680
+ }
681
+
682
+ // Check ExternalSecret status
683
+ const { stdout: esJson } = await kubectl([
684
+ "get", "externalsecret", "-n", "foundation", "-o", "json",
685
+ ]);
686
+ const externalSecrets = esJson ? JSON.parse(esJson).items : [];
687
+
688
+ for (const es of externalSecrets) {
689
+ const ready = es.status?.conditions?.find(c => c.type === "Ready");
690
+ if (ready?.status !== "True") {
691
+ const msg = ready?.message || "Unknown error";
692
+ if (msg.includes("Multiple user assigned identities exist")) {
693
+ const extIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
694
+ issues.push({
695
+ level: "error",
696
+ msg: `ExternalSecret "${es.metadata.name}" failing: Multiple identities detected`,
697
+ fix: extIdentity
698
+ ? `Add identityId: "${extIdentity.clientId}" to SecretStore spec`
699
+ : "Create ext-* managed identity and grant Key Vault access",
700
+ });
701
+ } else if (msg.includes("Forbidden") || msg.includes("not authorized")) {
702
+ issues.push({
703
+ level: "error",
704
+ msg: `ExternalSecret "${es.metadata.name}" failing: Key Vault access denied`,
705
+ fix: "Run: fops azure aks doctor --fix to grant Key Vault permissions",
706
+ });
707
+ } else {
708
+ issues.push({
709
+ level: "error",
710
+ msg: `ExternalSecret "${es.metadata.name}" failing: ${msg.substring(0, 100)}`,
711
+ });
712
+ }
713
+ }
714
+ }
715
+
716
+ // Report findings
717
+ if (issues.length === 0) {
718
+ console.log(OK(" ✓ External Secrets healthy"));
719
+ } else {
720
+ for (const issue of issues) {
721
+ if (issue.level === "error") {
722
+ console.log(WARN(` ✗ ${issue.msg}`));
723
+ } else {
724
+ console.log(WARN(` ⚠ ${issue.msg}`));
725
+ }
726
+ if (issue.fix) hint(` Fix: ${issue.fix}`);
727
+ }
728
+ }
729
+
730
+ return issues;
731
+ }
732
+
582
733
  // ── Vault auto-unseal bootstrap ──────────────────────────────────────────────
583
734
 
584
735
  export const VAULT_UNSEAL_KEY_NAME = "vault-unseal";
@@ -38,7 +38,7 @@ export async function reconcileStorageAccount(ctx) {
38
38
  const { execa, clusterName, rg, sub } = ctx;
39
39
  const storageAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
40
40
  const vaultName = `fops-${clusterName}-kv`;
41
- const containers = ["foundation", "vault"];
41
+ const containers = ["foundation", "vault", "loki"];
42
42
 
43
43
  hint(`Reconciling Azure Storage Account "${storageAccountName}"…`);
44
44
 
@@ -571,7 +571,7 @@ export async function reconcileStorageReplication(ctx) {
571
571
 
572
572
  const sourceAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
573
573
  const destAccountName = `fops${clusterName.replace(/-/g, "")}ha`.toLowerCase().slice(0, 24);
574
- const containers = ["foundation", "vault"];
574
+ const containers = ["foundation", "vault", "loki"];
575
575
 
576
576
  hint(`Setting up cross-region storage replication (${location} → ${replicaRegion})…`);
577
577
 
@@ -24,22 +24,50 @@ async function az(args, opts = {}) {
24
24
  }
25
25
  }
26
26
 
27
+ // In-memory cache for cost queries (TTL: 1 hour)
28
+ const _costCache = new Map();
29
+ const COST_CACHE_TTL = 60 * 60 * 1000; // 1 hour
30
+
27
31
  async function costQuery(scope, dataset) {
28
- try {
29
- const body = JSON.stringify({ ...dataset });
30
- const { stdout, stderr } = await execa("az", [
31
- "rest", "--method", "POST",
32
- "--url", `https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01`,
33
- "--body", body,
34
- "--output", "json",
35
- ], { timeout: 120_000, reject: false });
36
- if (stderr?.includes("Please run 'az login'") || stderr?.includes("AADSTS")) {
37
- return { error: stderr.split("\n")[0] };
32
+ const cacheKey = JSON.stringify({ scope, dataset });
33
+ const cached = _costCache.get(cacheKey);
34
+ if (cached && Date.now() - cached.ts < COST_CACHE_TTL) {
35
+ return cached.data;
36
+ }
37
+
38
+ const maxRetries = 3;
39
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
40
+ try {
41
+ const body = JSON.stringify({ ...dataset });
42
+ const { stdout, stderr } = await execa("az", [
43
+ "rest", "--method", "POST",
44
+ "--url", `https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01`,
45
+ "--body", body,
46
+ "--output", "json",
47
+ ], { timeout: 120_000, reject: false });
48
+
49
+ if (stderr?.includes("Please run 'az login'") || stderr?.includes("AADSTS")) {
50
+ return { error: stderr.split("\n")[0] + "\nMake sure you are logged into Azure (az login) and have Cost Management access." };
51
+ }
52
+
53
+ // Handle 429 rate limiting
54
+ if (stderr?.includes("429") || stderr?.includes("Too many requests") || stderr?.includes("Too Many Requests")) {
55
+ const wait = Math.pow(2, attempt + 1) * 5000; // 10s, 20s, 40s
56
+ if (attempt < maxRetries - 1) {
57
+ await new Promise((r) => setTimeout(r, wait));
58
+ continue;
59
+ }
60
+ return { error: `Rate limited by Azure Cost Management API after ${maxRetries} retries. Try again in a few minutes.` };
61
+ }
62
+
63
+ const result = JSON.parse(stdout || "{}");
64
+ _costCache.set(cacheKey, { data: result, ts: Date.now() });
65
+ return result;
66
+ } catch (err) {
67
+ if (attempt === maxRetries - 1) return { error: err.message };
38
68
  }
39
- return JSON.parse(stdout || "{}");
40
- } catch (err) {
41
- return { error: err.message };
42
69
  }
70
+ return { error: "Cost query failed after retries" };
43
71
  }
44
72
 
45
73
  function formatCost(amount, currency = "USD") {
@@ -402,16 +430,18 @@ export async function registerCostTools(api) {
402
430
  ? allVms.filter(v => v.powerState?.toLowerCase().includes(input.state))
403
431
  : allVms;
404
432
 
405
- // Rough monthly cost estimates (USD, Pay-As-You-Go, select regions)
433
+ // Rough monthly cost estimates (USD, Pay-As-You-Go)
406
434
  const costs = {
407
- Standard_B2s: 30, Standard_B4ms: 60, Standard_B2ms: 60,
408
- Standard_D2s_v3: 70, Standard_D4s_v3: 140, Standard_D8s_v3: 281,
409
- Standard_D16s_v3: 562, Standard_D32s_v3: 1124,
410
- Standard_D2s_v5: 70, Standard_D4s_v5: 140, Standard_D8s_v5: 281,
411
- Standard_D16s_v5: 562, Standard_D32s_v5: 1124,
412
- Standard_E2s_v3: 92, Standard_E4s_v3: 184, Standard_E8s_v3: 368,
413
- Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
414
- Standard_F2s_v2: 62, Standard_F4s_v2: 124, Standard_F8s_v2: 248,
435
+ // B-series (burstable)
436
+ Standard_B1s: 8, Standard_B2s: 30, Standard_B2ms: 60, Standard_B4ms: 120,
437
+ // D-series (general purpose) — v3/v4/v5 similar pricing
438
+ Standard_D2s_v3: 70, Standard_D4s_v3: 140, Standard_D8s_v3: 281, Standard_D16s_v3: 562, Standard_D32s_v3: 1124,
439
+ Standard_D2s_v5: 70, Standard_D4s_v5: 140, Standard_D8s_v5: 281, Standard_D16s_v5: 562, Standard_D32s_v5: 1124, Standard_D64s_v5: 2249,
440
+ // E-series (memory optimized)
441
+ Standard_E2s_v3: 92, Standard_E4s_v3: 184, Standard_E8s_v3: 368, Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
442
+ Standard_E2s_v5: 92, Standard_E4s_v5: 184, Standard_E8s_v5: 368, Standard_E16s_v5: 736, Standard_E32s_v5: 1472, Standard_E64s_v5: 2621,
443
+ // F-series (compute optimized)
444
+ Standard_F2s_v2: 62, Standard_F4s_v2: 124, Standard_F8s_v2: 248, Standard_F16s_v2: 496, Standard_F32s_v2: 992,
415
445
  };
416
446
 
417
447
  let output = "Azure VMs\n" + "=".repeat(75) + "\n";
@@ -71,20 +71,28 @@ async function forEachVm({
71
71
  return { results: [], vms };
72
72
  }
73
73
 
74
- const targets = opts.vmName ? [opts.vmName] : names;
75
- for (const t of targets) {
74
+ const allTargets = opts.vmName ? [opts.vmName] : names;
75
+ for (const t of allTargets) {
76
76
  if (!vms[t]) {
77
77
  console.error(ERR(`\n VM "${t}" not tracked. Run: fops azure list\n`));
78
78
  process.exit(1);
79
79
  }
80
80
  }
81
81
 
82
+ // Filter out VMs without public IPs (e.g., local stack) unless explicitly targeted
83
+ const skippedVms = opts.vmName ? [] : allTargets.filter(t => !vms[t].publicIp);
84
+ const targets = opts.vmName ? allTargets : allTargets.filter(t => vms[t].publicIp);
85
+
82
86
  banner(title);
83
- hint(`${targets.length} VM(s)${concurrency ? ` (concurrency: ${concurrency})` : ""}…\n`);
87
+ if (targets.length === 0) {
88
+ hint("No VMs with public IPs to target.\n");
89
+ return { results: [], vms, activeVm: listVms().activeVm };
90
+ }
91
+ hint(`${targets.length} VM(s)${skippedVms.length ? ` (${skippedVms.length} skipped: no public IP)` : ""}${concurrency ? ` (concurrency: ${concurrency})` : ""}…\n`);
84
92
 
85
93
  async function runOne(name) {
86
94
  const vm = vms[name];
87
- if (!vm.publicIp) return { name, ok: false, reason: "no public IP" };
95
+ if (!vm.publicIp) return { name, ok: false, reason: "no public IP (local stack?)" };
88
96
 
89
97
  try {
90
98
  await knockForVm(vm);
@@ -854,16 +854,20 @@ export function fopsUpCmd(publicUrl, { k3s, traefik, dai } = {}) {
854
854
  ].join("; ");
855
855
 
856
856
  const debugPostamble = [
857
- `echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code \\$? ===\\\" >> ${logFile}`,
857
+ `echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code \\$_fops_rc ===\\\" >> ${logFile}`,
858
858
  `echo \\\"--- Container status ---\\\" >> ${logFile}`,
859
859
  `docker compose ps --format 'table {{.Name}}\\t{{.Status}}' >> ${logFile} 2>&1`,
860
860
  `echo \\\"--- Recent docker events ---\\\" >> ${logFile}`,
861
861
  `tail -50 ${eventsLog} >> ${logFile} 2>&1 || true`,
862
+ `exit \\$_fops_rc`,
862
863
  ].join("; ");
863
864
 
865
+ // Fail fast if Docker is not installed
866
+ const dockerGuard = `command -v docker >/dev/null 2>&1 || { echo \\\"ERROR: Docker is not installed — cannot start Foundation\\\" >> ${logFile}; echo \\\"ERROR: Docker is not installed\\\" >&2; exit 1; }`;
867
+
864
868
  // Run from project dir with FOUNDATION_ROOT set explicitly (sudo can reset cwd)
865
869
  const envSetup = `export PATH=/usr/local/bin:/usr/bin:\\$PATH FOUNDATION_ROOT=/opt/foundation-compose`;
866
- return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; ${debugPostamble}"`;
870
+ return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${dockerGuard}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; _fops_rc=\\$?; ${debugPostamble}"`;
867
871
  }
868
872
 
869
873
  /** Build remote "fops up [component] [branch]" args (same as local fops up). For foreground run on VM. */
@@ -321,6 +321,71 @@ export async function azureTrinoStatus(opts = {}) {
321
321
  console.log("");
322
322
  }
323
323
 
324
+ // ── ping ─────────────────────────────────────────────────────────────────────
325
+
326
+ /**
327
+ * Check Foundation backend /api/ping/json health endpoint on a VM.
328
+ */
329
+ export async function azurePing(opts = {}) {
330
+ const execa = await lazyExeca();
331
+ const state = requireVmState(opts.vmName);
332
+ const { vmName } = state;
333
+ const ip = state.publicIp;
334
+ const adminUser = DEFAULTS.adminUser;
335
+
336
+ if (!ip) {
337
+ console.log(WARN(` VM ${vmName} has no public IP (probably stopped)`));
338
+ return;
339
+ }
340
+
341
+ await knockForVm(state);
342
+ const sshOk = await waitForSsh(execa, ip, adminUser, 10000);
343
+ if (!sshOk) {
344
+ console.log(WARN("\n ⚠ SSH not reachable"));
345
+ return;
346
+ }
347
+
348
+ const pingToken = opts.token || process.env.FOPS_PING_TOKEN || "";
349
+ const tokenHeader = pingToken ? `-H "X-Ping-Token: ${pingToken}"` : "";
350
+ const { stdout, exitCode } = await sshCmd(execa, ip, adminUser,
351
+ `curl -sf ${tokenHeader} http://localhost:9001/api/ping/json 2>/dev/null || echo '{}'`,
352
+ 15000,
353
+ );
354
+
355
+ let ping;
356
+ try {
357
+ ping = JSON.parse(stdout.trim() || "{}");
358
+ } catch {
359
+ console.log(ERR(` Failed to parse ping response: ${stdout}`));
360
+ return;
361
+ }
362
+
363
+ banner(`Ping: ${vmName}`);
364
+
365
+ if (ping.ok === undefined) {
366
+ console.log(WARN(" No response from backend /api/ping/json"));
367
+ hint("Backend may be down or starting up");
368
+ console.log("");
369
+ return;
370
+ }
371
+
372
+ const overall = ping.ok ? OK("✓ healthy") : ERR("✗ unhealthy");
373
+ kvLine("Status", overall);
374
+ if (ping.tag) kvLine("Tag", DIM(ping.tag));
375
+
376
+ if (ping.checks) {
377
+ console.log("");
378
+ console.log(ACCENT(" Checks:"));
379
+ for (const [name, check] of Object.entries(ping.checks)) {
380
+ const status = check.ok ? OK("✓") : ERR("✗");
381
+ const latency = check.latency_ms !== undefined ? DIM(` (${check.latency_ms}ms)`) : "";
382
+ const err = check.error ? ERR(` — ${check.error}`) : "";
383
+ console.log(` ${status} ${name}${latency}${err}`);
384
+ }
385
+ }
386
+ console.log("");
387
+ }
388
+
324
389
  /**
325
390
  * Run VM diagnostics: show config versions, then run make download and print
326
391
  * full output so image-pull failures (e.g. after config versions change) can be diagnosed.
@@ -1295,6 +1360,41 @@ export async function azureList(opts = {}) {
1295
1360
  }
1296
1361
  } catch { /* az not available or not authenticated */ }
1297
1362
 
1363
+ // Always discover AKS clusters from Azure (tag managed=fops)
1364
+ try {
1365
+ const execa = await lazyExeca();
1366
+ const { writeClusterState } = await import("./azure-aks-state.js");
1367
+ const { stdout, exitCode } = await execa("az", [
1368
+ "aks", "list",
1369
+ "--query", "[?tags.managed=='fops']",
1370
+ "--output", "json",
1371
+ ...subArgs(opts.subscription),
1372
+ ], { timeout: 60000, reject: false });
1373
+ if (exitCode === 0 && stdout?.trim()) {
1374
+ const discovered = JSON.parse(stdout);
1375
+ let added = 0;
1376
+ for (const cl of discovered) {
1377
+ if (aksClusters[cl.name]) continue;
1378
+ writeClusterState(cl.name, {
1379
+ resourceGroup: cl.resourceGroup,
1380
+ location: cl.location,
1381
+ kubernetesVersion: cl.kubernetesVersion,
1382
+ fqdn: cl.fqdn,
1383
+ nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
1384
+ nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
1385
+ subscriptionId: cl.id?.split("/")[2],
1386
+ });
1387
+ added++;
1388
+ }
1389
+ if (added > 0) {
1390
+ console.log(OK(` ✓ Re-discovered ${added} AKS cluster(s) from Azure`) + DIM(" (tag managed=fops)\n"));
1391
+ fullState = readState();
1392
+ aksClusters = (fullState.azure || {}).clusters || {};
1393
+ hasAks = Object.keys(aksClusters).length > 0;
1394
+ }
1395
+ }
1396
+ } catch { /* az not available or AKS discovery failed */ }
1397
+
1298
1398
  // JSON output mode - early return with structured data
1299
1399
  if (opts.json) {
1300
1400
  const output = {
@@ -1568,10 +1668,9 @@ export async function azureList(opts = {}) {
1568
1668
  const hasPrimary = primaryName && clusterNames.includes(primaryName);
1569
1669
  const prefix = isStandby && hasPrimary ? " └─" : "";
1570
1670
  const dot = active ? OK("●") : DIM("○");
1571
- const displayName = isStandby && hasPrimary
1572
- ? `${cr.name} ${DIM("(HA standby)")}`
1573
- : cr.name;
1574
- const cNameTxt = active ? OK(displayName.padEnd(maxCName + 13)) : LABEL(displayName.padEnd(maxCName + 13));
1671
+ const paddedName = cr.name.padEnd(maxCName);
1672
+ const standbySuffix = isStandby && hasPrimary ? ` ${DIM("(HA standby)")}` : "";
1673
+ const cNameTxt = active ? OK(paddedName) + standbySuffix : LABEL(paddedName) + standbySuffix;
1575
1674
  const loc = (cl?.location || cr.location || "–").padEnd(10);
1576
1675
  const nodes = cr.nodes != null ? `${cr.nodes} x ${cr.sizes || "?"}` : "–";
1577
1676
  const k8s = (cr.kubernetesVersion || "–").padEnd(6);
@@ -1640,12 +1739,19 @@ export function printServiceMatrix(results, nameWidth) {
1640
1739
  const withSvc = results.filter(r => r.services && Object.keys(r.services).length > 0);
1641
1740
  if (withSvc.length === 0) return;
1642
1741
 
1742
+ // Resolve display value for a service entry (supports both string and {tag,sha} formats)
1743
+ const svcVal = (entry) => {
1744
+ if (!entry) return null;
1745
+ if (typeof entry === "string") return entry;
1746
+ return entry.sha || entry.tag || null;
1747
+ };
1748
+
1643
1749
  // Find the majority value per column to highlight drift
1644
1750
  const majority = {};
1645
1751
  for (const svc of SVC_ORDER) {
1646
1752
  const counts = {};
1647
1753
  for (const r of withSvc) {
1648
- const v = r.services?.[svc];
1754
+ const v = svcVal(r.services?.[svc]);
1649
1755
  if (v) counts[v] = (counts[v] || 0) + 1;
1650
1756
  }
1651
1757
  const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
@@ -1660,7 +1766,7 @@ export function printServiceMatrix(results, nameWidth) {
1660
1766
  for (const r of withSvc) {
1661
1767
  const nameTxt = LABEL(r.name.padEnd(nameWidth));
1662
1768
  const cells = SVC_ORDER.map(svc => {
1663
- const v = r.services?.[svc] || "–";
1769
+ const v = svcVal(r.services?.[svc]) || "–";
1664
1770
  const display = v.padEnd(colW);
1665
1771
  if (v === "–") return DIM(display);
1666
1772
  if (v !== majority[svc]) return WARN(display);
@@ -1671,7 +1777,7 @@ export function printServiceMatrix(results, nameWidth) {
1671
1777
 
1672
1778
  // Check for drift
1673
1779
  const hasDrift = SVC_ORDER.some(svc => {
1674
- const vals = withSvc.map(r => r.services?.[svc]).filter(Boolean);
1780
+ const vals = withSvc.map(r => svcVal(r.services?.[svc])).filter(Boolean);
1675
1781
  return new Set(vals).size > 1;
1676
1782
  });
1677
1783
  if (hasDrift) {