@meshxdata/fops 0.1.52 → 0.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +559 -0
- package/package.json +2 -6
- package/src/agent/agent.js +6 -0
- package/src/commands/setup.js +34 -0
- package/src/fleet-registry.js +38 -2
- package/src/plugins/__test-fixtures__/fake-plugin.js +2 -0
- package/src/plugins/__test-fixtures__/no-register-plugin.js +2 -0
- package/src/plugins/__test-fixtures__/with-register/index.js +2 -0
- package/src/plugins/__test-fixtures__/without-register/index.js +2 -0
- package/src/plugins/api.js +4 -0
- package/src/plugins/builtins/docker-compose.js +65 -0
- package/src/plugins/bundled/fops-plugin-azure/index.js +4 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +44 -53
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +2 -2
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-cost.js +52 -22
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +6 -2
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +113 -7
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-init.js +13 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +91 -14
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-service.js +507 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +146 -7
- package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +61 -0
- package/src/plugins/bundled/fops-plugin-cloud/api.js +712 -0
- package/src/plugins/bundled/fops-plugin-cloud/fops.plugin.json +6 -0
- package/src/plugins/bundled/fops-plugin-cloud/index.js +208 -0
- package/src/plugins/bundled/fops-plugin-cloud/lib/azure-provider.js +81 -0
- package/src/plugins/bundled/fops-plugin-cloud/lib/provider.js +50 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/favicon-C49brna2.svg +15 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-CVqQ_kKW.js +65 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/dist/assets/index-DZetahP3.css +1 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/dist/index.html +28 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/index.html +27 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/package-lock.json +2634 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/package.json +29 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/postcss.config.cjs +5 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/App.jsx +32 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/client.js +114 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/api/queries.js +111 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/LogPanel.jsx +162 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/components/ThemeToggle.jsx +46 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/additional-styles/utility-patterns.css +147 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/css/style.css +138 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/favicon.svg +15 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/lib/utils.ts +19 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/main.jsx +25 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Audit.jsx +164 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Costs.jsx +305 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/CreateResource.jsx +285 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Fleet.jsx +307 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/pages/Resources.jsx +229 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Header.jsx +132 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/Sidebar.jsx +174 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/partials/SidebarLinkGroup.jsx +21 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/AuthContext.jsx +170 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Info.jsx +49 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/ThemeContext.jsx +37 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Transition.jsx +116 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/src/utils/Utils.js +63 -0
- package/src/plugins/bundled/fops-plugin-cloud/ui/vite.config.js +23 -0
- package/src/plugins/bundled/fops-plugin-foundation/test-helpers.js +65 -0
- package/src/plugins/loader.js +34 -1
- package/src/plugins/registry.js +15 -0
- package/src/plugins/schemas.js +17 -0
- package/src/project.js +1 -1
- package/src/serve.js +196 -2
- package/src/shell.js +21 -1
- package/src/web/admin.html.js +236 -0
- package/src/web/api.js +73 -0
- package/src/web/dist/assets/index-BphVaAUd.css +1 -0
- package/src/web/dist/assets/index-CSckLzuG.js +129 -0
- package/src/web/dist/index.html +2 -2
- package/src/web/frontend/index.html +16 -0
- package/src/web/frontend/src/App.jsx +445 -0
- package/src/web/frontend/src/components/ChatView.jsx +910 -0
- package/src/web/frontend/src/components/InputBox.jsx +523 -0
- package/src/web/frontend/src/components/Sidebar.jsx +410 -0
- package/src/web/frontend/src/components/StatusBar.jsx +37 -0
- package/src/web/frontend/src/components/TabBar.jsx +87 -0
- package/src/web/frontend/src/hooks/useWebSocket.js +412 -0
- package/src/web/frontend/src/index.css +78 -0
- package/src/web/frontend/src/main.jsx +6 -0
- package/src/web/frontend/vite.config.js +21 -0
- package/src/web/server.js +64 -1
- package/src/web/dist/assets/index-NXC8Hvnp.css +0 -1
- package/src/web/dist/assets/index-QH1N4ejK.js +0 -112
package/src/fleet-registry.js
CHANGED
|
@@ -102,13 +102,31 @@ function parseSnapshot(raw, vmMeta) {
|
|
|
102
102
|
if (eq > 0) flags[line.slice(0, eq)] = line.slice(eq + 1);
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
-
// Services
|
|
105
|
+
// Services — health + version from container images
|
|
106
|
+
const SERVICE_IMAGE_MAP = {
|
|
107
|
+
be: "foundation-backend",
|
|
108
|
+
fe: "foundation-frontend",
|
|
109
|
+
pr: "foundation-processor",
|
|
110
|
+
wa: "foundation-watcher",
|
|
111
|
+
sc: "foundation-scheduler",
|
|
112
|
+
se: "foundation-storage-engine",
|
|
113
|
+
};
|
|
114
|
+
|
|
106
115
|
const services = {
|
|
107
116
|
backend: (raw.backendHealth || "").trim() === "OK" ? "healthy" : "down",
|
|
108
117
|
frontend: (raw.frontendHealth || "").trim() === "OK" ? "healthy" : "down",
|
|
109
118
|
storage: (raw.storageHealth || "").trim() === "OK" ? "healthy" : "down",
|
|
110
119
|
};
|
|
111
120
|
|
|
121
|
+
// Extract version tags from container images
|
|
122
|
+
for (const [key, imageName] of Object.entries(SERVICE_IMAGE_MAP)) {
|
|
123
|
+
const c = containers.find((c) => c.image?.includes(imageName));
|
|
124
|
+
if (c?.image) {
|
|
125
|
+
const tag = c.image.split(":").pop() || "";
|
|
126
|
+
services[key] = { tag, health: c.healthy ? "healthy" : c.unhealthy ? "unhealthy" : "down" };
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
112
130
|
// Foundation entities (meshes, data systems, data sources, data products)
|
|
113
131
|
let foundation = null;
|
|
114
132
|
try {
|
|
@@ -306,7 +324,25 @@ export class FleetRegistry {
|
|
|
306
324
|
fopsVersion: s.stack.fopsVersion,
|
|
307
325
|
branch: s.stack.gitBranch,
|
|
308
326
|
commit: s.stack.gitSha,
|
|
309
|
-
services:
|
|
327
|
+
services: (() => {
|
|
328
|
+
const svc = { ...s.services };
|
|
329
|
+
// HTTP scrape puts versions in a nested object; merge them up
|
|
330
|
+
if (svc.versions) {
|
|
331
|
+
Object.assign(svc, svc.versions);
|
|
332
|
+
delete svc.versions;
|
|
333
|
+
}
|
|
334
|
+
// If still no version keys, extract from container images
|
|
335
|
+
if (!svc.be && s.containers?.list) {
|
|
336
|
+
const IMAGE_MAP = { be: "foundation-backend", fe: "foundation-frontend", pr: "foundation-processor", wa: "foundation-watcher", sc: "foundation-scheduler", se: "foundation-storage-engine" };
|
|
337
|
+
for (const [key, img] of Object.entries(IMAGE_MAP)) {
|
|
338
|
+
const c = s.containers.list.find((c) => c.image?.includes(img));
|
|
339
|
+
if (c?.image) {
|
|
340
|
+
svc[key] = { tag: c.image.split(":").pop() || "", health: c.healthy ? "healthy" : c.unhealthy ? "unhealthy" : "down" };
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return svc;
|
|
345
|
+
})(),
|
|
310
346
|
lastScrape: entry.lastScrape,
|
|
311
347
|
});
|
|
312
348
|
}
|
package/src/plugins/api.js
CHANGED
|
@@ -98,6 +98,10 @@ export function createPluginApi(pluginId, registry, opts = {}) {
|
|
|
98
98
|
registry.services.push({ pluginId, name, instance });
|
|
99
99
|
},
|
|
100
100
|
|
|
101
|
+
registerWebPanel(panel) {
|
|
102
|
+
registry.addWebPanel({ pluginId, ...panel });
|
|
103
|
+
},
|
|
104
|
+
|
|
101
105
|
registerIndexSource(source) {
|
|
102
106
|
if (!registry.indexSources) registry.indexSources = [];
|
|
103
107
|
registry.indexSources.push({ pluginId, name: source.name, fn: source.fn });
|
|
@@ -465,6 +465,71 @@ You manage Docker Compose stacks: inspect containers, read logs, restart service
|
|
|
465
465
|
- If asked about security or CVEs, use compose_scan.`,
|
|
466
466
|
});
|
|
467
467
|
|
|
468
|
+
// ── Debug Agent ───────────────────────────────────────────────────
|
|
469
|
+
api.registerAgent({
|
|
470
|
+
name: "debug",
|
|
471
|
+
description: "Debug stack issues — diagnose alerts, check containers, logs, metrics, and suggest fixes",
|
|
472
|
+
contextMode: "minimal",
|
|
473
|
+
tools: [
|
|
474
|
+
"compose_ps",
|
|
475
|
+
"compose_logs",
|
|
476
|
+
"compose_restart",
|
|
477
|
+
"compose_exec",
|
|
478
|
+
"compose_inspect",
|
|
479
|
+
"compose_stats",
|
|
480
|
+
"compose_images",
|
|
481
|
+
"embeddings_search",
|
|
482
|
+
],
|
|
483
|
+
maxIterations: 20,
|
|
484
|
+
systemPrompt: `You are FOPS Debug Agent — an expert platform debugger for Foundation stack issues.
|
|
485
|
+
|
|
486
|
+
## Role
|
|
487
|
+
You investigate alerts, diagnose service failures, and suggest fixes. You have direct access to Docker containers, logs, and system metrics. You are called by the Glue bot when monitoring alerts fire.
|
|
488
|
+
|
|
489
|
+
## Tools Available
|
|
490
|
+
- **compose_ps**: List all containers and their status (start here)
|
|
491
|
+
- **compose_logs**: Read container logs (check for errors, crashes, OOM)
|
|
492
|
+
- **compose_inspect**: Get container details (health checks, env vars, mounts, restarts)
|
|
493
|
+
- **compose_stats**: CPU/memory/network usage per container
|
|
494
|
+
- **compose_exec**: Run commands inside containers (e.g. check disk, network, processes)
|
|
495
|
+
- **compose_images**: List images and versions
|
|
496
|
+
- **compose_restart**: Restart specific services
|
|
497
|
+
- **embeddings_search**: Search docs, configs, and past knowledge for context
|
|
498
|
+
|
|
499
|
+
## Investigation Approach
|
|
500
|
+
1. **Triage**: Run compose_ps to see overall stack health. Identify unhealthy/restarting containers.
|
|
501
|
+
2. **Diagnose**: For each affected container:
|
|
502
|
+
- compose_logs to find errors, exceptions, OOM kills, crash traces
|
|
503
|
+
- compose_inspect for health check failures, restart count, resource limits
|
|
504
|
+
- compose_stats for CPU/memory spikes
|
|
505
|
+
3. **Context**: Use embeddings_search to find relevant docs or known issues.
|
|
506
|
+
4. **Root cause**: Correlate findings — is it a code bug, resource exhaustion, dependency failure, config issue?
|
|
507
|
+
5. **Fix**: Suggest specific actions (restart, config change, scale, rollback).
|
|
508
|
+
|
|
509
|
+
## Output Format
|
|
510
|
+
Structure your response with blank lines between each section:
|
|
511
|
+
|
|
512
|
+
**Status:** One-line summary (e.g. "Processor container restarting due to OOM")
|
|
513
|
+
|
|
514
|
+
**Findings:** What you discovered from each tool
|
|
515
|
+
|
|
516
|
+
**Root Cause:** Most likely cause
|
|
517
|
+
|
|
518
|
+
**Actions:** Specific steps to fix
|
|
519
|
+
|
|
520
|
+
**Prevention:** How to avoid this in the future
|
|
521
|
+
|
|
522
|
+
## Rules
|
|
523
|
+
- Always check compose_ps first.
|
|
524
|
+
- Check logs BEFORE suggesting restarts.
|
|
525
|
+
- Look for patterns: repeated restarts, OOM kills, connection refused, timeout errors.
|
|
526
|
+
- If a dependency is down (postgres, kafka), flag it — fixing the dependency fixes the dependent.
|
|
527
|
+
- Be concise — this output goes into a Glue chat thread.
|
|
528
|
+
- Never suggest 'docker compose down' — prefer targeted restarts.
|
|
529
|
+
- After restarting, verify with compose_ps.
|
|
530
|
+
- IMPORTANT: Always put a blank line between sections in your response so they render as separate paragraphs.`,
|
|
531
|
+
});
|
|
532
|
+
|
|
468
533
|
// ── Doctor check: Trivy ───────────────────────────────────────────
|
|
469
534
|
api.registerDoctorCheck({
|
|
470
535
|
name: "Trivy",
|
|
@@ -23,6 +23,10 @@ import { registerRegistryCommands } from "./lib/commands/registry-cmds.js";
|
|
|
23
23
|
export { resolveFoundationCreds, resolveAuth0Config, authenticateVm, vmFetch };
|
|
24
24
|
|
|
25
25
|
export async function register(api) {
|
|
26
|
+
// ── Service: expose structured API for cross-plugin use ──
|
|
27
|
+
const { AzureService } = await import("./lib/azure-service.js");
|
|
28
|
+
api.registerService("azure", new AzureService());
|
|
29
|
+
|
|
26
30
|
// ── Commands ──────────────────────────────────────────────────────────
|
|
27
31
|
|
|
28
32
|
api.registerCommand((program, registry) => {
|
|
@@ -1057,64 +1057,55 @@ export async function aksList(opts = {}) {
|
|
|
1057
1057
|
|
|
1058
1058
|
banner("AKS Clusters");
|
|
1059
1059
|
|
|
1060
|
-
//
|
|
1061
|
-
|
|
1060
|
+
// Always discover fops-managed clusters from Azure so we pick up clusters
|
|
1061
|
+
// created by teammates or missing from local state.
|
|
1062
|
+
try {
|
|
1062
1063
|
const execa = await lazyExeca();
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
await ensureAzAuth(execa, { subscription: opts.profile });
|
|
1066
|
-
} catch {
|
|
1067
|
-
hint("No clusters tracked.");
|
|
1068
|
-
hint("Create one: fops azure aks up <name>\n");
|
|
1069
|
-
return;
|
|
1070
|
-
}
|
|
1071
|
-
|
|
1072
|
-
hint("No clusters tracked locally — checking Azure for fops-managed clusters…\n");
|
|
1064
|
+
await ensureAzCli(execa);
|
|
1065
|
+
await ensureAzAuth(execa, { subscription: opts.profile });
|
|
1073
1066
|
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
if (
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
names = Object.keys(clusters);
|
|
1107
|
-
}
|
|
1067
|
+
const { stdout, exitCode } = await execa("az", [
|
|
1068
|
+
"aks", "list",
|
|
1069
|
+
"--query", "[?tags.managed=='fops']",
|
|
1070
|
+
"--output", "json",
|
|
1071
|
+
...subArgs(opts.profile),
|
|
1072
|
+
], { timeout: 60000, reject: false });
|
|
1073
|
+
|
|
1074
|
+
if (exitCode === 0 && stdout?.trim()) {
|
|
1075
|
+
const discovered = JSON.parse(stdout);
|
|
1076
|
+
let added = 0;
|
|
1077
|
+
for (const cl of discovered) {
|
|
1078
|
+
if (clusters[cl.name]) continue; // already tracked
|
|
1079
|
+
const info = {
|
|
1080
|
+
resourceGroup: cl.resourceGroup,
|
|
1081
|
+
location: cl.location,
|
|
1082
|
+
kubernetesVersion: cl.kubernetesVersion,
|
|
1083
|
+
fqdn: cl.fqdn,
|
|
1084
|
+
nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
|
|
1085
|
+
nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
|
|
1086
|
+
subscriptionId: cl.id?.split("/")[2],
|
|
1087
|
+
createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
|
|
1088
|
+
};
|
|
1089
|
+
writeClusterState(cl.name, info);
|
|
1090
|
+
console.log(OK(` + Discovered ${cl.name} (${cl.location})`));
|
|
1091
|
+
added++;
|
|
1092
|
+
}
|
|
1093
|
+
if (added > 0) {
|
|
1094
|
+
console.log("");
|
|
1095
|
+
const updated = readAksClusters();
|
|
1096
|
+
activeCluster = updated.activeCluster;
|
|
1097
|
+
clusters = updated.clusters;
|
|
1098
|
+
names = Object.keys(clusters);
|
|
1108
1099
|
}
|
|
1109
|
-
} catch {
|
|
1110
|
-
// Discovery failed, continue with empty list
|
|
1111
1100
|
}
|
|
1101
|
+
} catch {
|
|
1102
|
+
// az not available or not authenticated — continue with local state
|
|
1103
|
+
}
|
|
1112
1104
|
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
}
|
|
1105
|
+
if (names.length === 0) {
|
|
1106
|
+
hint("No clusters tracked.");
|
|
1107
|
+
hint("Create one: fops azure aks up <name>\n");
|
|
1108
|
+
return;
|
|
1118
1109
|
}
|
|
1119
1110
|
|
|
1120
1111
|
// Refresh each tracked cluster from Azure so RG, Location, Nodes, FQDN, etc. are current
|
|
@@ -38,7 +38,7 @@ export async function reconcileStorageAccount(ctx) {
|
|
|
38
38
|
const { execa, clusterName, rg, sub } = ctx;
|
|
39
39
|
const storageAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
|
|
40
40
|
const vaultName = `fops-${clusterName}-kv`;
|
|
41
|
-
const containers = ["foundation", "vault"];
|
|
41
|
+
const containers = ["foundation", "vault", "loki"];
|
|
42
42
|
|
|
43
43
|
hint(`Reconciling Azure Storage Account "${storageAccountName}"…`);
|
|
44
44
|
|
|
@@ -571,7 +571,7 @@ export async function reconcileStorageReplication(ctx) {
|
|
|
571
571
|
|
|
572
572
|
const sourceAccountName = `fops${clusterName.replace(/-/g, "")}`.toLowerCase().slice(0, 24);
|
|
573
573
|
const destAccountName = `fops${clusterName.replace(/-/g, "")}ha`.toLowerCase().slice(0, 24);
|
|
574
|
-
const containers = ["foundation", "vault"];
|
|
574
|
+
const containers = ["foundation", "vault", "loki"];
|
|
575
575
|
|
|
576
576
|
hint(`Setting up cross-region storage replication (${location} → ${replicaRegion})…`);
|
|
577
577
|
|
|
@@ -24,22 +24,50 @@ async function az(args, opts = {}) {
|
|
|
24
24
|
}
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
// In-memory cache for cost queries (TTL: 1 hour)
|
|
28
|
+
const _costCache = new Map();
|
|
29
|
+
const COST_CACHE_TTL = 60 * 60 * 1000; // 1 hour
|
|
30
|
+
|
|
27
31
|
async function costQuery(scope, dataset) {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
const cacheKey = JSON.stringify({ scope, dataset });
|
|
33
|
+
const cached = _costCache.get(cacheKey);
|
|
34
|
+
if (cached && Date.now() - cached.ts < COST_CACHE_TTL) {
|
|
35
|
+
return cached.data;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const maxRetries = 3;
|
|
39
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const body = JSON.stringify({ ...dataset });
|
|
42
|
+
const { stdout, stderr } = await execa("az", [
|
|
43
|
+
"rest", "--method", "POST",
|
|
44
|
+
"--url", `https://management.azure.com${scope}/providers/Microsoft.CostManagement/query?api-version=2023-11-01`,
|
|
45
|
+
"--body", body,
|
|
46
|
+
"--output", "json",
|
|
47
|
+
], { timeout: 120_000, reject: false });
|
|
48
|
+
|
|
49
|
+
if (stderr?.includes("Please run 'az login'") || stderr?.includes("AADSTS")) {
|
|
50
|
+
return { error: stderr.split("\n")[0] + "\nMake sure you are logged into Azure (az login) and have Cost Management access." };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Handle 429 rate limiting
|
|
54
|
+
if (stderr?.includes("429") || stderr?.includes("Too many requests") || stderr?.includes("Too Many Requests")) {
|
|
55
|
+
const wait = Math.pow(2, attempt + 1) * 5000; // 10s, 20s, 40s
|
|
56
|
+
if (attempt < maxRetries - 1) {
|
|
57
|
+
await new Promise((r) => setTimeout(r, wait));
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
return { error: `Rate limited by Azure Cost Management API after ${maxRetries} retries. Try again in a few minutes.` };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const result = JSON.parse(stdout || "{}");
|
|
64
|
+
_costCache.set(cacheKey, { data: result, ts: Date.now() });
|
|
65
|
+
return result;
|
|
66
|
+
} catch (err) {
|
|
67
|
+
if (attempt === maxRetries - 1) return { error: err.message };
|
|
38
68
|
}
|
|
39
|
-
return JSON.parse(stdout || "{}");
|
|
40
|
-
} catch (err) {
|
|
41
|
-
return { error: err.message };
|
|
42
69
|
}
|
|
70
|
+
return { error: "Cost query failed after retries" };
|
|
43
71
|
}
|
|
44
72
|
|
|
45
73
|
function formatCost(amount, currency = "USD") {
|
|
@@ -402,16 +430,18 @@ export async function registerCostTools(api) {
|
|
|
402
430
|
? allVms.filter(v => v.powerState?.toLowerCase().includes(input.state))
|
|
403
431
|
: allVms;
|
|
404
432
|
|
|
405
|
-
// Rough monthly cost estimates (USD, Pay-As-You-Go
|
|
433
|
+
// Rough monthly cost estimates (USD, Pay-As-You-Go)
|
|
406
434
|
const costs = {
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
Standard_D16s_v5: 562, Standard_D32s_v5: 1124,
|
|
412
|
-
|
|
413
|
-
Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
|
|
414
|
-
|
|
435
|
+
// B-series (burstable)
|
|
436
|
+
Standard_B1s: 8, Standard_B2s: 30, Standard_B2ms: 60, Standard_B4ms: 120,
|
|
437
|
+
// D-series (general purpose) — v3/v4/v5 similar pricing
|
|
438
|
+
Standard_D2s_v3: 70, Standard_D4s_v3: 140, Standard_D8s_v3: 281, Standard_D16s_v3: 562, Standard_D32s_v3: 1124,
|
|
439
|
+
Standard_D2s_v5: 70, Standard_D4s_v5: 140, Standard_D8s_v5: 281, Standard_D16s_v5: 562, Standard_D32s_v5: 1124, Standard_D64s_v5: 2249,
|
|
440
|
+
// E-series (memory optimized)
|
|
441
|
+
Standard_E2s_v3: 92, Standard_E4s_v3: 184, Standard_E8s_v3: 368, Standard_E16s_v3: 736, Standard_E32s_v3: 1472,
|
|
442
|
+
Standard_E2s_v5: 92, Standard_E4s_v5: 184, Standard_E8s_v5: 368, Standard_E16s_v5: 736, Standard_E32s_v5: 1472, Standard_E64s_v5: 2621,
|
|
443
|
+
// F-series (compute optimized)
|
|
444
|
+
Standard_F2s_v2: 62, Standard_F4s_v2: 124, Standard_F8s_v2: 248, Standard_F16s_v2: 496, Standard_F32s_v2: 992,
|
|
415
445
|
};
|
|
416
446
|
|
|
417
447
|
let output = "Azure VMs\n" + "=".repeat(75) + "\n";
|
|
@@ -854,16 +854,20 @@ export function fopsUpCmd(publicUrl, { k3s, traefik, dai } = {}) {
|
|
|
854
854
|
].join("; ");
|
|
855
855
|
|
|
856
856
|
const debugPostamble = [
|
|
857
|
-
`echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code
|
|
857
|
+
`echo \\\"=== fops up finished at \\$(date -Iseconds) with exit code \\$_fops_rc ===\\\" >> ${logFile}`,
|
|
858
858
|
`echo \\\"--- Container status ---\\\" >> ${logFile}`,
|
|
859
859
|
`docker compose ps --format 'table {{.Name}}\\t{{.Status}}' >> ${logFile} 2>&1`,
|
|
860
860
|
`echo \\\"--- Recent docker events ---\\\" >> ${logFile}`,
|
|
861
861
|
`tail -50 ${eventsLog} >> ${logFile} 2>&1 || true`,
|
|
862
|
+
`exit \\$_fops_rc`,
|
|
862
863
|
].join("; ");
|
|
863
864
|
|
|
865
|
+
// Fail fast if Docker is not installed
|
|
866
|
+
const dockerGuard = `command -v docker >/dev/null 2>&1 || { echo \\\"ERROR: Docker is not installed — cannot start Foundation\\\" >> ${logFile}; echo \\\"ERROR: Docker is not installed\\\" >&2; exit 1; }`;
|
|
867
|
+
|
|
864
868
|
// Run from project dir with FOUNDATION_ROOT set explicitly (sudo can reset cwd)
|
|
865
869
|
const envSetup = `export PATH=/usr/local/bin:/usr/bin:\\$PATH FOUNDATION_ROOT=/opt/foundation-compose`;
|
|
866
|
-
return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; ${debugPostamble}"`;
|
|
870
|
+
return `bash -c "cd /opt/foundation-compose && ${envSetup}; ${dockerGuard}; ${debugPreamble}; ${quietPull}; if command -v fops >/dev/null 2>&1; then ${profileEnv}${fopsCmd}; else echo 'fops not found — falling back to docker compose'; ${composeCmd}; fi; _fops_rc=\\$?; ${debugPostamble}"`;
|
|
867
871
|
}
|
|
868
872
|
|
|
869
873
|
/** Build remote "fops up [component] [branch]" args (same as local fops up). For foreground run on VM. */
|
|
@@ -321,6 +321,71 @@ export async function azureTrinoStatus(opts = {}) {
|
|
|
321
321
|
console.log("");
|
|
322
322
|
}
|
|
323
323
|
|
|
324
|
+
// ── ping ─────────────────────────────────────────────────────────────────────
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Check Foundation backend /api/ping/json health endpoint on a VM.
|
|
328
|
+
*/
|
|
329
|
+
export async function azurePing(opts = {}) {
|
|
330
|
+
const execa = await lazyExeca();
|
|
331
|
+
const state = requireVmState(opts.vmName);
|
|
332
|
+
const { vmName } = state;
|
|
333
|
+
const ip = state.publicIp;
|
|
334
|
+
const adminUser = DEFAULTS.adminUser;
|
|
335
|
+
|
|
336
|
+
if (!ip) {
|
|
337
|
+
console.log(WARN(` VM ${vmName} has no public IP (probably stopped)`));
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
await knockForVm(state);
|
|
342
|
+
const sshOk = await waitForSsh(execa, ip, adminUser, 10000);
|
|
343
|
+
if (!sshOk) {
|
|
344
|
+
console.log(WARN("\n ⚠ SSH not reachable"));
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const pingToken = opts.token || process.env.FOPS_PING_TOKEN || "";
|
|
349
|
+
const tokenHeader = pingToken ? `-H "X-Ping-Token: ${pingToken}"` : "";
|
|
350
|
+
const { stdout, exitCode } = await sshCmd(execa, ip, adminUser,
|
|
351
|
+
`curl -sf ${tokenHeader} http://localhost:9001/api/ping/json 2>/dev/null || echo '{}'`,
|
|
352
|
+
15000,
|
|
353
|
+
);
|
|
354
|
+
|
|
355
|
+
let ping;
|
|
356
|
+
try {
|
|
357
|
+
ping = JSON.parse(stdout.trim() || "{}");
|
|
358
|
+
} catch {
|
|
359
|
+
console.log(ERR(` Failed to parse ping response: ${stdout}`));
|
|
360
|
+
return;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
banner(`Ping: ${vmName}`);
|
|
364
|
+
|
|
365
|
+
if (ping.ok === undefined) {
|
|
366
|
+
console.log(WARN(" No response from backend /api/ping/json"));
|
|
367
|
+
hint("Backend may be down or starting up");
|
|
368
|
+
console.log("");
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const overall = ping.ok ? OK("✓ healthy") : ERR("✗ unhealthy");
|
|
373
|
+
kvLine("Status", overall);
|
|
374
|
+
if (ping.tag) kvLine("Tag", DIM(ping.tag));
|
|
375
|
+
|
|
376
|
+
if (ping.checks) {
|
|
377
|
+
console.log("");
|
|
378
|
+
console.log(ACCENT(" Checks:"));
|
|
379
|
+
for (const [name, check] of Object.entries(ping.checks)) {
|
|
380
|
+
const status = check.ok ? OK("✓") : ERR("✗");
|
|
381
|
+
const latency = check.latency_ms !== undefined ? DIM(` (${check.latency_ms}ms)`) : "";
|
|
382
|
+
const err = check.error ? ERR(` — ${check.error}`) : "";
|
|
383
|
+
console.log(` ${status} ${name}${latency}${err}`);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
console.log("");
|
|
387
|
+
}
|
|
388
|
+
|
|
324
389
|
/**
|
|
325
390
|
* Run VM diagnostics: show config versions, then run make download and print
|
|
326
391
|
* full output so image-pull failures (e.g. after config versions change) can be diagnosed.
|
|
@@ -1295,6 +1360,41 @@ export async function azureList(opts = {}) {
|
|
|
1295
1360
|
}
|
|
1296
1361
|
} catch { /* az not available or not authenticated */ }
|
|
1297
1362
|
|
|
1363
|
+
// Always discover AKS clusters from Azure (tag managed=fops)
|
|
1364
|
+
try {
|
|
1365
|
+
const execa = await lazyExeca();
|
|
1366
|
+
const { writeClusterState } = await import("./azure-aks-state.js");
|
|
1367
|
+
const { stdout, exitCode } = await execa("az", [
|
|
1368
|
+
"aks", "list",
|
|
1369
|
+
"--query", "[?tags.managed=='fops']",
|
|
1370
|
+
"--output", "json",
|
|
1371
|
+
...subArgs(opts.subscription),
|
|
1372
|
+
], { timeout: 60000, reject: false });
|
|
1373
|
+
if (exitCode === 0 && stdout?.trim()) {
|
|
1374
|
+
const discovered = JSON.parse(stdout);
|
|
1375
|
+
let added = 0;
|
|
1376
|
+
for (const cl of discovered) {
|
|
1377
|
+
if (aksClusters[cl.name]) continue;
|
|
1378
|
+
writeClusterState(cl.name, {
|
|
1379
|
+
resourceGroup: cl.resourceGroup,
|
|
1380
|
+
location: cl.location,
|
|
1381
|
+
kubernetesVersion: cl.kubernetesVersion,
|
|
1382
|
+
fqdn: cl.fqdn,
|
|
1383
|
+
nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
|
|
1384
|
+
nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
|
|
1385
|
+
subscriptionId: cl.id?.split("/")[2],
|
|
1386
|
+
});
|
|
1387
|
+
added++;
|
|
1388
|
+
}
|
|
1389
|
+
if (added > 0) {
|
|
1390
|
+
console.log(OK(` ✓ Re-discovered ${added} AKS cluster(s) from Azure`) + DIM(" (tag managed=fops)\n"));
|
|
1391
|
+
fullState = readState();
|
|
1392
|
+
aksClusters = (fullState.azure || {}).clusters || {};
|
|
1393
|
+
hasAks = Object.keys(aksClusters).length > 0;
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
} catch { /* az not available or AKS discovery failed */ }
|
|
1397
|
+
|
|
1298
1398
|
// JSON output mode - early return with structured data
|
|
1299
1399
|
if (opts.json) {
|
|
1300
1400
|
const output = {
|
|
@@ -1568,10 +1668,9 @@ export async function azureList(opts = {}) {
|
|
|
1568
1668
|
const hasPrimary = primaryName && clusterNames.includes(primaryName);
|
|
1569
1669
|
const prefix = isStandby && hasPrimary ? " └─" : "";
|
|
1570
1670
|
const dot = active ? OK("●") : DIM("○");
|
|
1571
|
-
const
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
const cNameTxt = active ? OK(displayName.padEnd(maxCName + 13)) : LABEL(displayName.padEnd(maxCName + 13));
|
|
1671
|
+
const paddedName = cr.name.padEnd(maxCName);
|
|
1672
|
+
const standbySuffix = isStandby && hasPrimary ? ` ${DIM("(HA standby)")}` : "";
|
|
1673
|
+
const cNameTxt = active ? OK(paddedName) + standbySuffix : LABEL(paddedName) + standbySuffix;
|
|
1575
1674
|
const loc = (cl?.location || cr.location || "–").padEnd(10);
|
|
1576
1675
|
const nodes = cr.nodes != null ? `${cr.nodes} x ${cr.sizes || "?"}` : "–";
|
|
1577
1676
|
const k8s = (cr.kubernetesVersion || "–").padEnd(6);
|
|
@@ -1640,12 +1739,19 @@ export function printServiceMatrix(results, nameWidth) {
|
|
|
1640
1739
|
const withSvc = results.filter(r => r.services && Object.keys(r.services).length > 0);
|
|
1641
1740
|
if (withSvc.length === 0) return;
|
|
1642
1741
|
|
|
1742
|
+
// Resolve display value for a service entry (supports both string and {tag,sha} formats)
|
|
1743
|
+
const svcVal = (entry) => {
|
|
1744
|
+
if (!entry) return null;
|
|
1745
|
+
if (typeof entry === "string") return entry;
|
|
1746
|
+
return entry.sha || entry.tag || null;
|
|
1747
|
+
};
|
|
1748
|
+
|
|
1643
1749
|
// Find the majority value per column to highlight drift
|
|
1644
1750
|
const majority = {};
|
|
1645
1751
|
for (const svc of SVC_ORDER) {
|
|
1646
1752
|
const counts = {};
|
|
1647
1753
|
for (const r of withSvc) {
|
|
1648
|
-
const v = r.services?.[svc];
|
|
1754
|
+
const v = svcVal(r.services?.[svc]);
|
|
1649
1755
|
if (v) counts[v] = (counts[v] || 0) + 1;
|
|
1650
1756
|
}
|
|
1651
1757
|
const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]);
|
|
@@ -1660,7 +1766,7 @@ export function printServiceMatrix(results, nameWidth) {
|
|
|
1660
1766
|
for (const r of withSvc) {
|
|
1661
1767
|
const nameTxt = LABEL(r.name.padEnd(nameWidth));
|
|
1662
1768
|
const cells = SVC_ORDER.map(svc => {
|
|
1663
|
-
const v = r.services?.[svc] || "–";
|
|
1769
|
+
const v = svcVal(r.services?.[svc]) || "–";
|
|
1664
1770
|
const display = v.padEnd(colW);
|
|
1665
1771
|
if (v === "–") return DIM(display);
|
|
1666
1772
|
if (v !== majority[svc]) return WARN(display);
|
|
@@ -1671,7 +1777,7 @@ export function printServiceMatrix(results, nameWidth) {
|
|
|
1671
1777
|
|
|
1672
1778
|
// Check for drift
|
|
1673
1779
|
const hasDrift = SVC_ORDER.some(svc => {
|
|
1674
|
-
const vals = withSvc.map(r => r.services?.[svc]).filter(Boolean);
|
|
1780
|
+
const vals = withSvc.map(r => svcVal(r.services?.[svc])).filter(Boolean);
|
|
1675
1781
|
return new Set(vals).size > 1;
|
|
1676
1782
|
});
|
|
1677
1783
|
if (hasDrift) {
|
|
@@ -44,19 +44,26 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
|
|
|
44
44
|
"apt-get install -y -qq apt-transport-https ca-certificates curl gnupg lsb-release jq git make unzip zsh software-properties-common python3-venv python3-pip",
|
|
45
45
|
].join("\n"), 300000);
|
|
46
46
|
|
|
47
|
-
await runScript("Installing Docker", [
|
|
47
|
+
const dockerExit = await runScript("Installing Docker", [
|
|
48
48
|
waitAptLock,
|
|
49
49
|
"export DEBIAN_FRONTEND=noninteractive",
|
|
50
50
|
"install -m 0755 -d /etc/apt/keyrings",
|
|
51
|
-
"curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg",
|
|
51
|
+
"curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg",
|
|
52
52
|
"chmod a+r /etc/apt/keyrings/docker.gpg",
|
|
53
53
|
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list`,
|
|
54
|
-
"
|
|
55
|
-
"apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin",
|
|
54
|
+
"set +e",
|
|
55
|
+
"for _ in 1 2 3 4 5; do if apt-get update -qq && apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin; then break; fi; echo 'Retrying Docker install in 10s…'; sleep 10; done",
|
|
56
|
+
"set -e",
|
|
57
|
+
"command -v docker >/dev/null 2>&1 || (echo 'Docker not found after install attempts' && exit 1)",
|
|
56
58
|
"systemctl enable docker && systemctl start docker",
|
|
57
59
|
`usermod -aG docker ${adminUser}`,
|
|
58
60
|
].join("\n"), 300000);
|
|
59
61
|
|
|
62
|
+
if (dockerExit !== 0) {
|
|
63
|
+
console.log(WARN(" ✗ Docker installation failed — cannot continue provisioning"));
|
|
64
|
+
throw new Error("Docker installation failed");
|
|
65
|
+
}
|
|
66
|
+
|
|
60
67
|
await runScript("Configuring br_netfilter for k3s DNS", [
|
|
61
68
|
"modprobe br_netfilter",
|
|
62
69
|
"echo br_netfilter > /etc/modules-load.d/br_netfilter.conf",
|
|
@@ -178,6 +185,8 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
|
|
|
178
185
|
Project dir: /opt/foundation-compose
|
|
179
186
|
|
|
180
187
|
MOTD`,
|
|
188
|
+
`grep -q 'cd /opt/foundation-compose' /home/${adminUser}/.bashrc 2>/dev/null || echo 'cd /opt/foundation-compose' >> /home/${adminUser}/.bashrc`,
|
|
189
|
+
`grep -q 'cd /opt/foundation-compose' /home/${adminUser}/.zshrc 2>/dev/null || echo 'cd /opt/foundation-compose' >> /home/${adminUser}/.zshrc`,
|
|
181
190
|
].join("\n"));
|
|
182
191
|
|
|
183
192
|
await ssh("sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/*", 30000);
|