@meshxdata/fops 0.1.36 → 0.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +207 -0
- package/fops.mjs +37 -14
- package/package.json +1 -1
- package/src/agent/llm.js +2 -0
- package/src/auth/azure.js +92 -0
- package/src/auth/cloudflare.js +125 -0
- package/src/auth/index.js +2 -0
- package/src/commands/index.js +8 -4
- package/src/commands/lifecycle.js +31 -10
- package/src/plugins/bundled/fops-plugin-azure/index.js +44 -2896
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +130 -2
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +497 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +51 -13
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +206 -52
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +128 -34
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-shared-cache.js +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-sync.js +4 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +2 -2
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/fleet-cmds.js +254 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +894 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +314 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +893 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-backend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/dai-frontend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-backend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-frontend.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-hive.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-kafka.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-meltano.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-mlflow.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-opa.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-processor.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-scheduler.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-storage-engine.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-trino.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/apps/foundation-watcher.yaml +13 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/config/repository.yaml +66 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/kustomization.yaml +30 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/acr-webhook-controller.yaml +63 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/externalsecrets.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/istio.yaml +42 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kafka.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kube-reflector.yaml +33 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/kubecost.yaml +12 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/nats-server.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/prometheus-agent.yaml +34 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/reloader.yaml +12 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/spark.yaml +112 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/tailscale.yaml +67 -0
- package/src/plugins/bundled/fops-plugin-azure/templates/cluster/operator/vertical-pod-autoscaler.yaml +15 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.aligned.csv +1 -1
- package/src/plugins/bundled/fops-plugin-file/index.js +81 -12
- package/src/plugins/bundled/fops-plugin-file/lib/match.js +133 -15
- package/src/plugins/bundled/fops-plugin-file/lib/report.js +3 -0
- package/src/plugins/bundled/fops-plugin-foundation/index.js +26 -6
- package/src/plugins/bundled/fops-plugin-foundation/lib/client.js +9 -5
- package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-product.js +32 -0
- package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/schema.js +20 -1
- package/src/plugins/loader.js +23 -6
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
DEFAULTS, DIM, OK, WARN, ERR,
|
|
9
9
|
banner, hint, kvLine,
|
|
10
10
|
resolvePublicIp, subArgs, buildTags, fetchMyIp,
|
|
11
|
-
sshCmd, waitForSsh, fopsUpCmd, buildPublicUrl,
|
|
11
|
+
sshCmd, waitForSsh, closeMux, fopsUpCmd, buildPublicUrl,
|
|
12
12
|
runReconcilers, ensureOpenAiNetworkAccess,
|
|
13
13
|
reconcileOk, RECONCILE_LABEL_WIDTH,
|
|
14
14
|
} from "./azure-helpers.js";
|
|
@@ -80,7 +80,7 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
|
|
|
80
80
|
|
|
81
81
|
console.log(chalk.dim(" Configuring VM..."));
|
|
82
82
|
|
|
83
|
-
// Batch: sshd tuning + docker group + ownership
|
|
83
|
+
// Batch: sshd tuning + docker group + ownership — single SSH round-trip
|
|
84
84
|
const setupBatch = [
|
|
85
85
|
// Speed up SSH: accept forwarded env vars, disable DNS reverse lookup
|
|
86
86
|
`sudo grep -q '^AcceptEnv.*BEARER_TOKEN' /etc/ssh/sshd_config 2>/dev/null || {`,
|
|
@@ -91,22 +91,10 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
|
|
|
91
91
|
`}`,
|
|
92
92
|
`sudo usermod -aG docker ${user} 2>/dev/null; true`,
|
|
93
93
|
"sudo chown -R azureuser:azureuser /opt/foundation-compose 2>/dev/null; true",
|
|
94
|
-
|
|
95
|
-
`
|
|
96
|
-
`if grep -q '^FOUNDATION_PUBLIC_URL=' .env 2>/dev/null; then`,
|
|
97
|
-
` sed -i 's|^FOUNDATION_PUBLIC_URL=.*|FOUNDATION_PUBLIC_URL=${publicUrl}|' .env;`,
|
|
98
|
-
`else`,
|
|
99
|
-
` echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env;`,
|
|
100
|
-
`fi`,
|
|
101
|
-
// Persist COMPOSE_PROFILES so k3s/watcher/traefik start on manual restarts too
|
|
102
|
-
`if grep -q '^COMPOSE_PROFILES=' .env 2>/dev/null; then`,
|
|
103
|
-
` sed -i 's|^COMPOSE_PROFILES=.*|COMPOSE_PROFILES=k3s,traefik${dai ? ",dai" : ""}|' .env;`,
|
|
104
|
-
`else`,
|
|
105
|
-
` echo 'COMPOSE_PROFILES=k3s,traefik${dai ? ",dai" : ""}' >> .env;`,
|
|
106
|
-
`fi`,
|
|
94
|
+
// Only inject FOUNDATION_PUBLIC_URL if not already set — never overwrite
|
|
95
|
+
`cd /opt/foundation-compose && grep -q '^FOUNDATION_PUBLIC_URL=' .env 2>/dev/null || echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env`,
|
|
107
96
|
].join("\n");
|
|
108
97
|
await ssh(setupBatch);
|
|
109
|
-
console.log(chalk.green(` ✓ FOUNDATION_PUBLIC_URL=${publicUrl}`));
|
|
110
98
|
|
|
111
99
|
let ghcrOk = false;
|
|
112
100
|
if (githubToken) {
|
|
@@ -448,7 +436,36 @@ async function vmReconcileNetworking(ctx) {
|
|
|
448
436
|
}
|
|
449
437
|
}
|
|
450
438
|
} else {
|
|
451
|
-
|
|
439
|
+
// No public IP attached — create one and attach it
|
|
440
|
+
const pipName = `${vmName}PublicIP`;
|
|
441
|
+
console.log(chalk.dim(` ↻ ${"Public IP".padEnd(RECONCILE_LABEL_WIDTH)} — creating ${pipName}…`));
|
|
442
|
+
const { exitCode: createCode } = await execa("az", [
|
|
443
|
+
"network", "public-ip", "create", "-g", rg, "-n", pipName,
|
|
444
|
+
"--sku", "Standard", "--allocation-method", "Static", "--output", "none",
|
|
445
|
+
...subArgs(sub),
|
|
446
|
+
], { reject: false, timeout: 30000 });
|
|
447
|
+
if (createCode === 0) {
|
|
448
|
+
const { exitCode: attachCode } = await execa("az", [
|
|
449
|
+
"network", "nic", "ip-config", "update",
|
|
450
|
+
"-g", rg, "--nic-name", ctx.nicName, "-n", "ipconfig1",
|
|
451
|
+
"--public-ip-address", pipName, "--output", "none",
|
|
452
|
+
...subArgs(sub),
|
|
453
|
+
], { reject: false, timeout: 30000 });
|
|
454
|
+
if (attachCode === 0) {
|
|
455
|
+
const { stdout: newPipJson } = await execa("az", [
|
|
456
|
+
"network", "public-ip", "show", "-g", rg, "-n", pipName, "--output", "json",
|
|
457
|
+
...subArgs(sub),
|
|
458
|
+
], { reject: false, timeout: 15000 });
|
|
459
|
+
try {
|
|
460
|
+
ctx.ip = JSON.parse(newPipJson).ipAddress || "";
|
|
461
|
+
} catch {}
|
|
462
|
+
reconcileOk("Public IP", ctx.ip ? `${ctx.ip} (created)` : `${pipName} (created)`);
|
|
463
|
+
} else {
|
|
464
|
+
console.log(chalk.yellow(` ⚠ Created ${pipName} but could not attach to NIC`));
|
|
465
|
+
}
|
|
466
|
+
} else {
|
|
467
|
+
console.log(chalk.yellow(` ⚠ Could not create public IP — VM will have no public IP`));
|
|
468
|
+
}
|
|
452
469
|
}
|
|
453
470
|
|
|
454
471
|
const nsgId = nic.networkSecurityGroup?.id || "";
|
|
@@ -560,6 +577,7 @@ async function vmReconcileNsg(ctx) {
|
|
|
560
577
|
...subArgs(sub),
|
|
561
578
|
], { reject: false, timeout: 30000 });
|
|
562
579
|
reconcileOk("SSH (22)", source);
|
|
580
|
+
ctx.operatorIpChanged = true;
|
|
563
581
|
} else if (sshSource && !sshHasMyIp) {
|
|
564
582
|
// Add our IP to existing admin IPs
|
|
565
583
|
const merged = [...new Set([...sshCurrentSources, sshSource])];
|
|
@@ -573,6 +591,7 @@ async function vmReconcileNsg(ctx) {
|
|
|
573
591
|
...subArgs(sub),
|
|
574
592
|
], { reject: false, timeout: 30000 });
|
|
575
593
|
reconcileOk("SSH (22)", merged.join(", "));
|
|
594
|
+
ctx.operatorIpChanged = true;
|
|
576
595
|
} else {
|
|
577
596
|
reconcileOk("SSH (22)", sshCurrentSources.join(", ") || "*");
|
|
578
597
|
}
|
|
@@ -1026,14 +1045,46 @@ async function removeSshBypassViaRunCommand(execa, rg, vmName, sourceCidr, sub)
|
|
|
1026
1045
|
// ── Step: SSH reachability ───────────────────────────────────────────────────
|
|
1027
1046
|
|
|
1028
1047
|
async function vmReconcileSsh(ctx) {
|
|
1029
|
-
const { execa, ip, adminUser,
|
|
1048
|
+
const { execa, ip, adminUser, port, desiredUrl, vmName, rg, sub } = ctx;
|
|
1030
1049
|
console.log(chalk.dim(" Checking SSH..."));
|
|
1031
|
-
|
|
1050
|
+
|
|
1051
|
+
// Always fetch the knock sequence fresh from the Azure VM tag — local state can drift
|
|
1052
|
+
// after state recovery or manual changes, causing knocks with the wrong sequence.
|
|
1053
|
+
let knockSequence = ctx.knockSequence;
|
|
1054
|
+
if (rg) {
|
|
1055
|
+
try {
|
|
1056
|
+
const { stdout: tagRaw } = await execa("az", [
|
|
1057
|
+
"vm", "show", "-g", rg, "-n", vmName,
|
|
1058
|
+
"--query", "tags.fopsKnock", "-o", "tsv", ...subArgs(sub),
|
|
1059
|
+
], { timeout: 15000, reject: false });
|
|
1060
|
+
const raw = (tagRaw || "").trim().replace(/[()]/g, "");
|
|
1061
|
+
if (raw) {
|
|
1062
|
+
const fresh = raw.split(/[-,]/).map((s) => parseInt(s.trim(), 10)).filter((n) => Number.isInteger(n) && n > 0);
|
|
1063
|
+
if (fresh.length >= 2) {
|
|
1064
|
+
if (!knockSequence?.length || fresh.some((v, i) => v !== knockSequence[i])) {
|
|
1065
|
+
console.log(chalk.dim(" Syncing knock sequence from Azure tag..."));
|
|
1066
|
+
knockSequence = fresh;
|
|
1067
|
+
ctx.knockSequence = fresh;
|
|
1068
|
+
writeVmState(vmName, { knockSequence: fresh });
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
} catch { /* best-effort */ }
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
// Close any stale mux master before probing. ControlPersist=600 keeps the SSH master
|
|
1076
|
+
// alive for 10 min, but if the VM rebooted its underlying TCP is broken — every probe
|
|
1077
|
+
// through the stale mux fails even after a successful knock. A fresh connect fixes it.
|
|
1078
|
+
await closeMux(execa, ip, adminUser);
|
|
1079
|
+
// When Run Command is available (rg set), use a short knock probe then fall through to
|
|
1080
|
+
// Run Command rather than burning 2+ minutes on knock retries.
|
|
1081
|
+
const canRunCommand = knockSequence?.length && rg;
|
|
1082
|
+
const maxWaitFirst = canRunCommand ? 20000 : 90000;
|
|
1032
1083
|
if (knockSequence?.length) {
|
|
1033
1084
|
await performKnock(ip, knockSequence, { quiet: true });
|
|
1034
1085
|
}
|
|
1035
1086
|
let sshReady = await waitForSsh(execa, ip, adminUser, maxWaitFirst);
|
|
1036
|
-
if (!sshReady && knockSequence?.length) {
|
|
1087
|
+
if (!sshReady && knockSequence?.length && !canRunCommand) {
|
|
1037
1088
|
console.log(chalk.dim(" Re-sending knock and retrying SSH..."));
|
|
1038
1089
|
await performKnock(ip, knockSequence, { quiet: true });
|
|
1039
1090
|
await new Promise((r) => setTimeout(r, 5000));
|
|
@@ -1056,15 +1107,44 @@ async function vmReconcileSsh(ctx) {
|
|
|
1056
1107
|
}
|
|
1057
1108
|
}
|
|
1058
1109
|
if (!sshReady) {
|
|
1059
|
-
|
|
1060
|
-
if (
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1110
|
+
// Verify actual VM power state via Azure CLI — the earlier instance-view may be stale
|
|
1111
|
+
if (rg) {
|
|
1112
|
+
try {
|
|
1113
|
+
const { stdout: showJson } = await execa("az", [
|
|
1114
|
+
"vm", "show", "--resource-group", rg, "--name", vmName,
|
|
1115
|
+
"--show-details", "--output", "json", ...subArgs(sub),
|
|
1116
|
+
], { timeout: 20000, reject: false });
|
|
1117
|
+
const vmDetails = JSON.parse(showJson || "{}");
|
|
1118
|
+
const powerState = vmDetails?.powerState || "";
|
|
1119
|
+
if (powerState && !powerState.includes("running")) {
|
|
1120
|
+
console.log(chalk.yellow(` ⚠ Azure reports VM power state: ${chalk.bold(powerState)} — starting VM...`));
|
|
1121
|
+
await execa("az", [
|
|
1122
|
+
"vm", "start", "--resource-group", rg, "--name", vmName, "--output", "none",
|
|
1123
|
+
...subArgs(sub),
|
|
1124
|
+
], { timeout: 300000 });
|
|
1125
|
+
reconcileOk("VM", "started");
|
|
1126
|
+
// Give the VM a moment then retry SSH once more
|
|
1127
|
+
await new Promise((r) => setTimeout(r, 10000));
|
|
1128
|
+
if (knockSequence?.length) await performKnock(ip, knockSequence, { quiet: true });
|
|
1129
|
+
sshReady = await waitForSsh(execa, ip, adminUser, 60000);
|
|
1130
|
+
} else if (powerState) {
|
|
1131
|
+
console.log(chalk.dim(` Azure VM power state: ${powerState}`));
|
|
1132
|
+
}
|
|
1133
|
+
} catch {
|
|
1134
|
+
// best-effort
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
if (!sshReady) {
|
|
1138
|
+
console.log(chalk.yellow(" ⚠ SSH not reachable — VM may still be booting. Skipping in-guest checks."));
|
|
1139
|
+
if (knockSequence?.length) {
|
|
1140
|
+
console.log(chalk.dim(" If knock is enabled, the stored sequence may not match the VM (e.g. after state recovery)."));
|
|
1141
|
+
console.log(chalk.dim(" Try: fops azure knock open " + ctx.vmName + " then immediately fops azure ssh " + ctx.vmName));
|
|
1142
|
+
console.log(chalk.dim(" Or remove knock: fops azure knock disable " + ctx.vmName));
|
|
1143
|
+
}
|
|
1144
|
+
ctx.publicUrl = desiredUrl || buildPublicUrl(ip, port);
|
|
1145
|
+
ctx.done = true;
|
|
1146
|
+
return;
|
|
1064
1147
|
}
|
|
1065
|
-
ctx.publicUrl = desiredUrl || buildPublicUrl(ip, port);
|
|
1066
|
-
ctx.done = true;
|
|
1067
|
-
return;
|
|
1068
1148
|
}
|
|
1069
1149
|
reconcileOk("SSH", "reachable");
|
|
1070
1150
|
}
|
|
@@ -1081,8 +1161,25 @@ async function vmReconcileKnock(ctx) {
|
|
|
1081
1161
|
);
|
|
1082
1162
|
const knockdRunning = knockdCheck?.trim() === "active";
|
|
1083
1163
|
|
|
1084
|
-
|
|
1085
|
-
|
|
1164
|
+
// Read the authoritative sequence from knockd.conf on the VM
|
|
1165
|
+
const { stdout: knockConfOut } = await ssh(
|
|
1166
|
+
"grep -m1 'sequence' /etc/knockd.conf 2>/dev/null | sed 's/.*sequence[[:space:]]*=[[:space:]]*//'", 8000,
|
|
1167
|
+
);
|
|
1168
|
+
const vmKnockSeq = (knockConfOut || "").trim()
|
|
1169
|
+
.split(",").map((s) => parseInt(s.trim(), 10)).filter((n) => Number.isInteger(n) && n > 0);
|
|
1170
|
+
|
|
1171
|
+
if (knockdRunning && vmKnockSeq.length >= 2) {
|
|
1172
|
+
// Reconcile: keep tag + local state in sync with knockd.conf
|
|
1173
|
+
const local = ctx.knockSequence;
|
|
1174
|
+
const seqDiffers = !local?.length || local.length !== vmKnockSeq.length || local.some((v, i) => v !== vmKnockSeq[i]);
|
|
1175
|
+
if (seqDiffers) {
|
|
1176
|
+
console.log(chalk.dim(` ↻ ${"Port knocking".padEnd(RECONCILE_LABEL_WIDTH)} — sequence drift detected, syncing…`));
|
|
1177
|
+
writeVmState(vmName, { knockSequence: vmKnockSeq });
|
|
1178
|
+
const { setVmKnockTag } = await import("./azure-helpers.js");
|
|
1179
|
+
await setVmKnockTag(execa, ctx.rg, vmName, vmKnockSeq, ctx.sub);
|
|
1180
|
+
ctx.knockSequence = vmKnockSeq;
|
|
1181
|
+
}
|
|
1182
|
+
reconcileOk("Port knocking", `active [${vmKnockSeq.join(", ")}]`);
|
|
1086
1183
|
} else if (!knockdRunning) {
|
|
1087
1184
|
console.log(chalk.dim(" Setting up port knocking (after post-start + UI reachable)..."));
|
|
1088
1185
|
const knockSeq = generateKnockSequence();
|
|
@@ -1092,8 +1189,7 @@ async function vmReconcileKnock(ctx) {
|
|
|
1092
1189
|
await setVmKnockTag(execa, ctx.rg, vmName, knockSeq, ctx.sub);
|
|
1093
1190
|
ctx.knockSequence = knockSeq;
|
|
1094
1191
|
} else {
|
|
1095
|
-
console.log(chalk.yellow(" ⚠ knockd active on VM but
|
|
1096
|
-
console.log(chalk.dim(" To re-create: fops azure knock disable && fops azure up"));
|
|
1192
|
+
console.log(chalk.yellow(" ⚠ knockd active on VM but could not read sequence from knockd.conf"));
|
|
1097
1193
|
}
|
|
1098
1194
|
|
|
1099
1195
|
if (ctx.knockSequence?.length) {
|
|
@@ -1590,10 +1686,8 @@ export async function provisionVm(execa, ip, adminUser, { githubToken, branch =
|
|
|
1590
1686
|
}
|
|
1591
1687
|
|
|
1592
1688
|
await runScript("Cloning foundation-compose", [
|
|
1593
|
-
"cp /opt/foundation-compose/.env /tmp/.env.fops-backup 2>/dev/null || true",
|
|
1594
1689
|
"rm -rf /opt/foundation-compose",
|
|
1595
1690
|
`git clone --branch ${branch} --depth 1 --recurse-submodules https://github.com/meshxdata/foundation-compose.git /opt/foundation-compose`,
|
|
1596
|
-
"if [ -f /tmp/.env.fops-backup ]; then cp /tmp/.env.fops-backup /opt/foundation-compose/.env; rm -f /tmp/.env.fops-backup; else cp /opt/foundation-compose/.env.example /opt/foundation-compose/.env; fi",
|
|
1597
1691
|
"mkdir -p /opt/foundation-compose/credentials",
|
|
1598
1692
|
"touch /opt/foundation-compose/credentials/kubeconfig.yaml",
|
|
1599
1693
|
`chown -R ${adminUser}:${adminUser} /opt/foundation-compose`,
|
|
@@ -22,7 +22,7 @@ import { readState, listVms } from "./azure-state.js";
|
|
|
22
22
|
// fops_by = alessio (who synced)
|
|
23
23
|
|
|
24
24
|
const TAG_PREFIX = "fops_";
|
|
25
|
-
const TAG_MAX_AGE_MS =
|
|
25
|
+
const TAG_MAX_AGE_MS = 20 * 60 * 1000; // 20 minutes — tags are cheaper to check
|
|
26
26
|
|
|
27
27
|
// ── Write: publish probe results as tags on a VM ─────────────────────────────
|
|
28
28
|
|
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
// Stored in ~/.fops.json under azure.cache:
|
|
13
13
|
// { updatedAt, vms: { <name>: { ... } }, clusters: { <name>: { ... } } }
|
|
14
14
|
|
|
15
|
-
const CACHE_MAX_AGE_MS =
|
|
15
|
+
const CACHE_MAX_AGE_MS = 15 * 60 * 1000; // 15 minutes
|
|
16
16
|
|
|
17
17
|
// Short keys for the 6 tracked Foundation services
|
|
18
18
|
const SVC_MAP = {
|
|
@@ -169,16 +169,16 @@ async function syncVms(execa) {
|
|
|
169
169
|
|
|
170
170
|
// After a knock, iptables rule needs a moment to propagate; first SSH needs full handshake.
|
|
171
171
|
// Brief delay then retry once to avoid false "unreachable" (e.g. uaenorth latency).
|
|
172
|
-
await new Promise((r) => setTimeout(r,
|
|
172
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
173
173
|
let sshOk = false;
|
|
174
174
|
for (let attempt = 0; attempt < 2; attempt++) {
|
|
175
175
|
const { exitCode: sshCode } = await execa("ssh", [
|
|
176
176
|
...MUX_OPTS(vm.publicIp, DEFAULTS.adminUser),
|
|
177
177
|
"-o", "BatchMode=yes",
|
|
178
178
|
`${DEFAULTS.adminUser}@${vm.publicIp}`, "echo ok",
|
|
179
|
-
], { timeout:
|
|
179
|
+
], { timeout: 8000, reject: false }).catch(() => ({ exitCode: 1 }));
|
|
180
180
|
if (sshCode === 0) { sshOk = true; break; }
|
|
181
|
-
if (attempt === 0) await new Promise((r) => setTimeout(r,
|
|
181
|
+
if (attempt === 0) await new Promise((r) => setTimeout(r, 1000));
|
|
182
182
|
}
|
|
183
183
|
|
|
184
184
|
if (!sshOk) {
|
|
@@ -27,7 +27,7 @@ export {
|
|
|
27
27
|
managedImageId, resolvePublicIp,
|
|
28
28
|
resolveGithubToken, verifyGithubToken,
|
|
29
29
|
sshCmd, closeMux, MUX_OPTS, muxSocketPath, waitForSsh,
|
|
30
|
-
knockForVm, fopsUpCmd,
|
|
30
|
+
knockForVm, ensureKnockSequence, fopsUpCmd,
|
|
31
31
|
runReconcilers, ensureOpenAiNetworkAccess,
|
|
32
32
|
} from "./azure-helpers.js";
|
|
33
33
|
|
|
@@ -46,7 +46,7 @@ export {
|
|
|
46
46
|
|
|
47
47
|
// ── VM operations ────────────────────────────────────────────────────────────
|
|
48
48
|
export {
|
|
49
|
-
azureStatus, azureTrinoStatus, azureSsh, azurePortForward, azureSshAdminAdd, azureVmCheck, azureAgent, azureOpenAiDebugVm,
|
|
49
|
+
azureStatus, azureTrinoStatus, azureSsh, azureSshWhitelistMe, azurePortForward, azureSshAdminAdd, azureVmCheck, azureAgent, azureOpenAiDebugVm,
|
|
50
50
|
azureDeploy, azurePull, azureDeployVersion, azureRunUp, azureConfig, azureConfigVersions, azureUpdate,
|
|
51
51
|
azureLogs, azureGrantAdmin, azureContext,
|
|
52
52
|
azureList, azureApply,
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fleet management, swarm, audit, snapshot/restore commands.
|
|
3
|
+
* All operations target multiple Azure VMs or the swarm cluster.
|
|
4
|
+
*/
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
|
|
7
|
+
export function registerFleetCommands(azure) {
|
|
8
|
+
// ── Fleet management ───────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
azure
|
|
11
|
+
.command("exec <command>")
|
|
12
|
+
.description("Run a command on all tracked VMs in parallel")
|
|
13
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
14
|
+
.option("--timeout <seconds>", "Per-VM command timeout (default: 120)", "120")
|
|
15
|
+
.option("--quiet", "Show only summary, not command output")
|
|
16
|
+
.action(async (command, opts) => {
|
|
17
|
+
const { fleetExec } = await import("../azure-fleet.js");
|
|
18
|
+
await fleetExec(command, { vmName: opts.vmName, timeout: Number(opts.timeout), quiet: opts.quiet });
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
azure
|
|
22
|
+
.command("diff")
|
|
23
|
+
.description("Compare configuration across all VMs — detect drift")
|
|
24
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
25
|
+
.action(async (opts) => {
|
|
26
|
+
const { fleetDiff } = await import("../azure-fleet.js");
|
|
27
|
+
await fleetDiff({ vmName: opts.vmName });
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
azure
|
|
31
|
+
.command("rollout")
|
|
32
|
+
.description("Rolling deploy: pull, restart, health-check across VMs in batches")
|
|
33
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
34
|
+
.option("--batch <size>", "Number of VMs to deploy in parallel per batch (default: 1)", "1")
|
|
35
|
+
.option("--branch <branch>", "Git branch to deploy (default: main)")
|
|
36
|
+
.option("--health-timeout <seconds>", "Seconds to wait for healthy after restart (default: 120)", "120")
|
|
37
|
+
.option("--force", "Continue rolling out even if a batch fails")
|
|
38
|
+
.action(async (opts) => {
|
|
39
|
+
const { fleetRollout } = await import("../azure-fleet.js");
|
|
40
|
+
await fleetRollout({
|
|
41
|
+
vmName: opts.vmName, batch: Number(opts.batch), branch: opts.branch,
|
|
42
|
+
healthTimeout: Number(opts.healthTimeout), force: opts.force,
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
azure
|
|
47
|
+
.command("sync")
|
|
48
|
+
.description("Push local config files to all VMs")
|
|
49
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
50
|
+
.option("--files <files>", "Comma-separated list of files to sync (default: docker-compose.yaml,.env)", "docker-compose.yaml,.env")
|
|
51
|
+
.option("--restart", "Run fops up after syncing files")
|
|
52
|
+
.action(async (opts) => {
|
|
53
|
+
const { fleetSync } = await import("../azure-fleet.js");
|
|
54
|
+
await fleetSync({ vmName: opts.vmName, files: opts.files.split(","), restart: opts.restart });
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
azure
|
|
58
|
+
.command("health")
|
|
59
|
+
.description("Deep health report across all VMs — containers, disk, memory, load")
|
|
60
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
61
|
+
.action(async (opts) => {
|
|
62
|
+
const { fleetHealth } = await import("../azure-fleet.js");
|
|
63
|
+
await fleetHealth({ vmName: opts.vmName });
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
azure
|
|
67
|
+
.command("snapshot [name]")
|
|
68
|
+
.description("Create Azure disk snapshots of all (or one) tracked VMs")
|
|
69
|
+
.option("--vm-name <name>", "Target a specific VM instead of all")
|
|
70
|
+
.option("--tag <tag>", "Snapshot tag/label (default: ISO timestamp)")
|
|
71
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
72
|
+
.action(async (name, opts) => {
|
|
73
|
+
const { fleetSnapshot } = await import("../azure-fleet.js");
|
|
74
|
+
await fleetSnapshot({ vmName: opts.vmName || name, tag: opts.tag, profile: opts.profile });
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
azure
|
|
78
|
+
.command("restore <name>")
|
|
79
|
+
.description("Restore a VM from an Azure disk snapshot")
|
|
80
|
+
.option("--snapshot <name>", "Snapshot name to restore from (omit to list available)")
|
|
81
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
82
|
+
.option("--yes", "Skip confirmation prompt")
|
|
83
|
+
.action(async (name, opts) => {
|
|
84
|
+
const { fleetRestore } = await import("../azure-fleet.js");
|
|
85
|
+
await fleetRestore({ vmName: name, snapshot: opts.snapshot, profile: opts.profile, yes: opts.yes });
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
// ── Audit subcommands ──────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
const audit = azure
|
|
91
|
+
.command("audit")
|
|
92
|
+
.description("Security & compliance audit across Azure resources");
|
|
93
|
+
|
|
94
|
+
audit
|
|
95
|
+
.command("all", { isDefault: true })
|
|
96
|
+
.description("Run all audit checks (VMs, AKS, storage encryption)")
|
|
97
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
98
|
+
.option("--vm-name <name>", "Limit VM audit to a specific VM")
|
|
99
|
+
.option("--cluster <name>", "Limit AKS audit to a specific cluster")
|
|
100
|
+
.option("--account <name>", "Limit storage audit to a specific account")
|
|
101
|
+
.option("--verbose", "Show info-level suggestions alongside warnings")
|
|
102
|
+
.action(async (opts) => {
|
|
103
|
+
const { auditAll } = await import("../azure-audit.js");
|
|
104
|
+
await auditAll({ profile: opts.profile, vmName: opts.vmName, clusterName: opts.cluster, account: opts.account, verbose: opts.verbose });
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
audit
|
|
108
|
+
.command("vm [vmName]")
|
|
109
|
+
.description("Audit VM security — disk encryption, NSG rules, managed identity, patching")
|
|
110
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
111
|
+
.option("--vm-name <name>", "Audit a specific VM (default: all tracked)")
|
|
112
|
+
.option("--verbose", "Show info-level suggestions alongside warnings")
|
|
113
|
+
.action(async (vmName, opts) => {
|
|
114
|
+
const { auditVms } = await import("../azure-audit.js");
|
|
115
|
+
await auditVms({ profile: opts.profile, vmName: opts.vmName || vmName, verbose: opts.verbose });
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
audit
|
|
119
|
+
.command("aks [clusterName]")
|
|
120
|
+
.description("Audit AKS cluster security — RBAC, network policy, auto-upgrade, Defender")
|
|
121
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
122
|
+
.option("--verbose", "Show info-level suggestions alongside warnings")
|
|
123
|
+
.action(async (clusterName, opts) => {
|
|
124
|
+
const { auditAks } = await import("../azure-audit.js");
|
|
125
|
+
await auditAks({ profile: opts.profile, clusterName, verbose: opts.verbose });
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
audit
|
|
129
|
+
.command("storage")
|
|
130
|
+
.description("Audit storage account encryption & security posture")
|
|
131
|
+
.option("--profile <subscription>", "Azure subscription name or ID")
|
|
132
|
+
.option("--account <name>", "Audit a specific storage account (default: all)")
|
|
133
|
+
.option("--verbose", "Show info-level suggestions alongside warnings")
|
|
134
|
+
.action(async (opts) => {
|
|
135
|
+
const { auditStorage } = await import("../azure-audit.js");
|
|
136
|
+
await auditStorage({ profile: opts.profile, account: opts.account, verbose: opts.verbose });
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
audit
|
|
140
|
+
.command("sessions [vmName]")
|
|
141
|
+
.description("View SSH session recordings across the fleet")
|
|
142
|
+
.option("--session <id>", "Read a specific session by ID")
|
|
143
|
+
.option("--live", "Show only active (live) sessions")
|
|
144
|
+
.option("--cloud", "Read from blob storage instead of SSH")
|
|
145
|
+
.option("--push", "Push collected sessions to blob storage")
|
|
146
|
+
.option("--last <n>", "Show only the last N sessions", "50")
|
|
147
|
+
.option("--tail <n>", "When reading a session, show only the last N lines")
|
|
148
|
+
.action(async (vmName, opts) => {
|
|
149
|
+
const { fleetAudit } = await import("../azure-fleet.js");
|
|
150
|
+
await fleetAudit({
|
|
151
|
+
vmName,
|
|
152
|
+
session: opts.session,
|
|
153
|
+
live: opts.live,
|
|
154
|
+
cloud: opts.cloud,
|
|
155
|
+
push: opts.push,
|
|
156
|
+
last: Number(opts.last),
|
|
157
|
+
tail: opts.tail ? Number(opts.tail) : undefined,
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
audit
|
|
162
|
+
.command("zap [vmName]")
|
|
163
|
+
.description("Run an authenticated OWASP ZAP DAST scan against a VM's frontend")
|
|
164
|
+
.option("--vm-name <name>", "Target VM (default: active)")
|
|
165
|
+
.option("--target <url>", "Override target URL (default: https://<vm>.meshx.app)")
|
|
166
|
+
.option("--output <dir>", "Directory for JSON report (default: cwd)")
|
|
167
|
+
.option("--spider-minutes <n>", "Spider duration in minutes (default: 3)", "3")
|
|
168
|
+
.option("--ajax-minutes <n>", "Ajax spider duration in minutes (default: 3)", "3")
|
|
169
|
+
.option("--active-scan-minutes <n>", "Active scan rule timeout in minutes (default: 5)", "5")
|
|
170
|
+
.option("--max-minutes <n>", "Overall scan timeout in minutes (default: 20)", "20")
|
|
171
|
+
.option("--verbose", "Show fix suggestions for each finding")
|
|
172
|
+
.option("--aggressive", "Pentest mode: Penetration Tester policy, longer crawl/scan")
|
|
173
|
+
.action(async (vmName, opts) => {
|
|
174
|
+
const { auditZap } = await import("../azure-audit.js");
|
|
175
|
+
await auditZap({
|
|
176
|
+
vmName: opts.vmName || vmName,
|
|
177
|
+
target: opts.target,
|
|
178
|
+
output: opts.output,
|
|
179
|
+
spiderMinutes: opts.aggressive ? undefined : (opts.spiderMinutes ? Number(opts.spiderMinutes) : undefined),
|
|
180
|
+
ajaxMinutes: opts.aggressive ? undefined : (opts.ajaxMinutes ? Number(opts.ajaxMinutes) : undefined),
|
|
181
|
+
activeScanMinutes: opts.aggressive ? undefined : (opts.activeScanMinutes ? Number(opts.activeScanMinutes) : undefined),
|
|
182
|
+
maxMinutes: opts.aggressive ? undefined : (opts.maxMinutes ? Number(opts.maxMinutes) : undefined),
|
|
183
|
+
verbose: opts.verbose,
|
|
184
|
+
aggressive: opts.aggressive,
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// ── Swarm subcommands ──────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
const swarm = azure
|
|
191
|
+
.command("swarm")
|
|
192
|
+
.description("Docker Swarm cluster management");
|
|
193
|
+
|
|
194
|
+
swarm
|
|
195
|
+
.command("init <vmName>")
|
|
196
|
+
.description("Initialize Docker Swarm on a VM (single-node manager)")
|
|
197
|
+
.option("--stack", "Also deploy the compose stack as swarm services")
|
|
198
|
+
.action(async (vmName, opts) => {
|
|
199
|
+
const { swarmInit } = await import("../azure-fleet.js");
|
|
200
|
+
await swarmInit({ vmName, stack: opts.stack });
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
swarm
|
|
204
|
+
.command("join <vmName>")
|
|
205
|
+
.description("Join a VM as a worker to an existing swarm (auto-creates the VM if it doesn't exist)")
|
|
206
|
+
.requiredOption("--manager <name>", "Manager VM to join")
|
|
207
|
+
.option("--as-manager", "Join as a manager instead of worker")
|
|
208
|
+
.option("--vm-size <size>", "VM size when auto-creating (default: inherited from manager)")
|
|
209
|
+
.option("--location <region>", "Azure region when auto-creating (default: inherited from manager)")
|
|
210
|
+
.option("--image <urn>", "Custom image URN when auto-creating")
|
|
211
|
+
.option("--url <url>", "Public URL override when auto-creating")
|
|
212
|
+
.option("--profile <subscription>", "Azure subscription when auto-creating")
|
|
213
|
+
.action(async (vmName, opts) => {
|
|
214
|
+
const { swarmJoin } = await import("../azure-fleet.js");
|
|
215
|
+
await swarmJoin({
|
|
216
|
+
vmName, manager: opts.manager, asManager: opts.asManager,
|
|
217
|
+
vmSize: opts.vmSize, location: opts.location,
|
|
218
|
+
image: opts.image, url: opts.url, profile: opts.profile,
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
swarm
|
|
223
|
+
.command("status [vmName]")
|
|
224
|
+
.description("Show swarm node and service status")
|
|
225
|
+
.action(async (vmName) => {
|
|
226
|
+
const { swarmStatus } = await import("../azure-fleet.js");
|
|
227
|
+
await swarmStatus({ vmName });
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
swarm
|
|
231
|
+
.command("promote <vmName>")
|
|
232
|
+
.description("Promote a swarm worker to manager")
|
|
233
|
+
.action(async (vmName) => {
|
|
234
|
+
const { swarmPromote } = await import("../azure-fleet.js");
|
|
235
|
+
await swarmPromote({ vmName });
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
swarm
|
|
239
|
+
.command("deploy [vmName]")
|
|
240
|
+
.description("Deploy the compose stack as swarm services (or update existing)")
|
|
241
|
+
.action(async (vmName) => {
|
|
242
|
+
const { swarmDeploy } = await import("../azure-fleet.js");
|
|
243
|
+
await swarmDeploy({ vmName });
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
swarm
|
|
247
|
+
.command("leave <vmName>")
|
|
248
|
+
.description("Remove a VM from the swarm")
|
|
249
|
+
.option("--force", "Force leave (required for managers)")
|
|
250
|
+
.action(async (vmName, opts) => {
|
|
251
|
+
const { swarmLeave } = await import("../azure-fleet.js");
|
|
252
|
+
await swarmLeave({ vmName, force: opts.force });
|
|
253
|
+
});
|
|
254
|
+
}
|