@meshxdata/fops 0.1.49 → 0.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +368 -0
- package/package.json +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +347 -6
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-data-bootstrap.js +421 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-flux.js +5 -179
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-naming.js +14 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-postgres.js +171 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +303 -8
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +2 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet-swarm.js +936 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet.js +10 -918
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +5 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault-keys.js +413 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault.js +14 -399
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-config.js +754 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-knock.js +527 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-ssh.js +427 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +99 -1686
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-health.js +279 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-init.js +186 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +66 -444
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-results.js +11 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-lifecycle.js +5 -540
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-terraform.js +544 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +75 -3
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +227 -11
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +2 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/pytest-parse.js +21 -0
- package/src/plugins/bundled/fops-plugin-foundation/index.js +371 -44
|
@@ -0,0 +1,936 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* azure-fleet-swarm.js
|
|
3
|
+
* Docker Swarm cluster management for Azure VMs.
|
|
4
|
+
* Extracted from azure-fleet.js for maintainability.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import chalk from "chalk";
|
|
8
|
+
import { performKnock, closeKnock, keepKnockAlive } from "./port-knock.js";
|
|
9
|
+
import {
|
|
10
|
+
DEFAULTS, DIM, OK, WARN, ERR, LABEL, ACCENT,
|
|
11
|
+
banner, hint, kvLine,
|
|
12
|
+
lazyExeca,
|
|
13
|
+
listVms, readVmState, writeVmState,
|
|
14
|
+
sshCmd, closeMux,
|
|
15
|
+
knockForVm,
|
|
16
|
+
azureUp,
|
|
17
|
+
} from "./azure.js";
|
|
18
|
+
|
|
19
|
+
// ── swarm — Docker Swarm cluster management ─────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Initialize Docker Swarm on a VM (single-node manager).
|
|
23
|
+
* Optionally converts the running compose stack to a swarm stack deploy.
|
|
24
|
+
*/
|
|
25
|
+
export async function swarmInit(opts = {}) {
|
|
26
|
+
const execa = await lazyExeca();
|
|
27
|
+
const vmName = opts.vmName;
|
|
28
|
+
if (!vmName) {
|
|
29
|
+
console.error(ERR("\n Usage: fops azure swarm init <vmName>\n"));
|
|
30
|
+
process.exit(1);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const vm = readVmState(vmName);
|
|
34
|
+
if (!vm) {
|
|
35
|
+
console.error(ERR(`\n VM "${vmName}" not tracked. Run: fops azure list\n`));
|
|
36
|
+
process.exit(1);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
banner("Swarm Init");
|
|
40
|
+
kvLine("VM", LABEL(vmName));
|
|
41
|
+
kvLine("IP", DIM(vm.publicIp));
|
|
42
|
+
console.log("");
|
|
43
|
+
|
|
44
|
+
if (vm.swarm?.role) {
|
|
45
|
+
console.log(WARN(` VM "${vmName}" is already a swarm ${vm.swarm.role}.`));
|
|
46
|
+
hint("Use: fops azure swarm status " + vmName);
|
|
47
|
+
console.log("");
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
await knockForVm(vm);
|
|
52
|
+
const stopKnockKeepAlive = keepKnockAlive(vm.publicIp, vm.knockSequence);
|
|
53
|
+
const user = DEFAULTS.adminUser;
|
|
54
|
+
const ssh = (cmd, t) => sshCmd(execa, vm.publicIp, user, cmd, t || 30000);
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
// Get the private IP for the advertise address
|
|
58
|
+
hint("Detecting private IP…");
|
|
59
|
+
const { stdout: privateIp } = await ssh(
|
|
60
|
+
"hostname -I | awk '{print $1}'"
|
|
61
|
+
);
|
|
62
|
+
const advertiseAddr = (privateIp || "").trim();
|
|
63
|
+
if (!advertiseAddr) {
|
|
64
|
+
console.error(ERR(" Could not detect private IP"));
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
kvLine("Advertise", DIM(advertiseAddr));
|
|
68
|
+
|
|
69
|
+
// Check if already in a swarm
|
|
70
|
+
const { stdout: swarmStatus } = await ssh("sudo docker info --format '{{.Swarm.LocalNodeState}}'");
|
|
71
|
+
if ((swarmStatus || "").trim() === "active") {
|
|
72
|
+
console.log(WARN("\n Docker Swarm is already active on this node."));
|
|
73
|
+
// Still save state if missing
|
|
74
|
+
const { stdout: nodeId } = await ssh("sudo docker info --format '{{.Swarm.NodeID}}'");
|
|
75
|
+
const { stdout: managerToken } = await ssh("sudo docker swarm join-token manager -q 2>/dev/null || echo ''");
|
|
76
|
+
const { stdout: workerToken } = await ssh("sudo docker swarm join-token worker -q 2>/dev/null || echo ''");
|
|
77
|
+
|
|
78
|
+
writeVmState(vmName, {
|
|
79
|
+
swarm: {
|
|
80
|
+
role: "manager",
|
|
81
|
+
nodeId: (nodeId || "").trim(),
|
|
82
|
+
advertiseAddr,
|
|
83
|
+
managerToken: (managerToken || "").trim(),
|
|
84
|
+
workerToken: (workerToken || "").trim(),
|
|
85
|
+
initAt: vm.swarm?.initAt || new Date().toISOString(),
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
if (opts.stack) {
|
|
90
|
+
hint("Deploying compose stack as swarm services…");
|
|
91
|
+
const nets = await genSwarmConfig(ssh);
|
|
92
|
+
await ensureOverlayNetworks(ssh, nets, hint);
|
|
93
|
+
const { exitCode: stackCode, stdout: stackOut } = await deployStackWithRetry(
|
|
94
|
+
ssh, nets, { hint, OK, WARN },
|
|
95
|
+
);
|
|
96
|
+
if (stackCode === 0) {
|
|
97
|
+
console.log(OK(" ✓ Stack deployed as swarm services"));
|
|
98
|
+
} else {
|
|
99
|
+
console.log(WARN(` Stack deploy returned warnings:\n${(stackOut || "").trim()}`));
|
|
100
|
+
}
|
|
101
|
+
} else {
|
|
102
|
+
hint("State updated. Run: fops azure swarm status " + vmName);
|
|
103
|
+
}
|
|
104
|
+
console.log("");
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Initialize swarm
|
|
109
|
+
hint("Initializing Docker Swarm…");
|
|
110
|
+
const { stdout: initOut, exitCode: initCode } = await ssh(
|
|
111
|
+
`sudo docker swarm init --advertise-addr ${advertiseAddr} 2>&1`,
|
|
112
|
+
60000,
|
|
113
|
+
);
|
|
114
|
+
if (initCode !== 0) {
|
|
115
|
+
console.error(ERR(` Swarm init failed:\n${initOut}`));
|
|
116
|
+
process.exit(1);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Collect tokens and node ID
|
|
120
|
+
const { stdout: nodeId } = await ssh("sudo docker info --format '{{.Swarm.NodeID}}'");
|
|
121
|
+
const { stdout: managerToken } = await ssh("sudo docker swarm join-token manager -q");
|
|
122
|
+
const { stdout: workerToken } = await ssh("sudo docker swarm join-token worker -q");
|
|
123
|
+
|
|
124
|
+
// Save to VM state
|
|
125
|
+
const swarmState = {
|
|
126
|
+
role: "manager",
|
|
127
|
+
nodeId: (nodeId || "").trim(),
|
|
128
|
+
advertiseAddr,
|
|
129
|
+
managerToken: (managerToken || "").trim(),
|
|
130
|
+
workerToken: (workerToken || "").trim(),
|
|
131
|
+
initAt: new Date().toISOString(),
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
writeVmState(vmName, { swarm: swarmState });
|
|
135
|
+
|
|
136
|
+
console.log(OK("\n ✓ Docker Swarm initialized"));
|
|
137
|
+
kvLine("Node ID", DIM(swarmState.nodeId));
|
|
138
|
+
console.log("");
|
|
139
|
+
|
|
140
|
+
// Optionally convert to stack deploy
|
|
141
|
+
if (opts.stack) {
|
|
142
|
+
hint("Converting compose stack to swarm services…");
|
|
143
|
+
const nets = await genSwarmConfig(ssh);
|
|
144
|
+
await ensureOverlayNetworks(ssh, nets, hint);
|
|
145
|
+
const { exitCode: stackCode, stdout: stackOut } = await deployStackWithRetry(
|
|
146
|
+
ssh, nets, { hint, OK, WARN },
|
|
147
|
+
);
|
|
148
|
+
if (stackCode === 0) {
|
|
149
|
+
console.log(OK(" ✓ Stack deployed as swarm services"));
|
|
150
|
+
} else {
|
|
151
|
+
console.log(WARN(` Stack deploy returned warnings:\n${(stackOut || "").trim()}`));
|
|
152
|
+
}
|
|
153
|
+
console.log("");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
hint("Add workers: fops azure swarm join <workerVm> --manager " + vmName);
|
|
157
|
+
hint("Check: fops azure swarm status " + vmName);
|
|
158
|
+
if (!opts.stack) {
|
|
159
|
+
hint("Deploy stack: fops azure swarm deploy " + vmName);
|
|
160
|
+
}
|
|
161
|
+
console.log("");
|
|
162
|
+
} finally {
|
|
163
|
+
stopKnockKeepAlive();
|
|
164
|
+
if (vm.knockSequence?.length) {
|
|
165
|
+
const sshFn = (cmd) => sshCmd(execa, vm.publicIp, user, cmd);
|
|
166
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
167
|
+
}
|
|
168
|
+
await closeMux(execa, vm.publicIp, user);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* SSH commands to generate a swarm-compatible compose file.
|
|
174
|
+
* Fixes: top-level name, cpus (must be string), ports.published (must be int),
|
|
175
|
+
* depends_on (must be list not map).
|
|
176
|
+
* Non-external networks become stack-managed overlays so docker stack deploy
|
|
177
|
+
* creates them atomically with the services (no raft race).
|
|
178
|
+
* Truly external networks are collected so the deploy flow can pre-create them.
|
|
179
|
+
*/
|
|
180
|
+
const SWARM_FIXUP_PY = [
|
|
181
|
+
"import sys, yaml, json",
|
|
182
|
+
"doc = yaml.safe_load(sys.stdin)",
|
|
183
|
+
'doc.pop("name", None)',
|
|
184
|
+
"def fix(svc):",
|
|
185
|
+
' for p in [("deploy","resources","limits"),("deploy","resources","reservations")]:',
|
|
186
|
+
" n = svc",
|
|
187
|
+
" for k in p: n = n.get(k, {}) if isinstance(n, dict) else {}",
|
|
188
|
+
' if "cpus" in n and not isinstance(n["cpus"], str): n["cpus"] = str(n["cpus"])',
|
|
189
|
+
' for port in svc.get("ports", []):',
|
|
190
|
+
" if isinstance(port, dict):",
|
|
191
|
+
" port.pop('host_ip', None)",
|
|
192
|
+
' if "published" in port:',
|
|
193
|
+
' try: port["published"] = int(port["published"])',
|
|
194
|
+
" except (ValueError, TypeError): pass",
|
|
195
|
+
" svc.pop('profiles', None)",
|
|
196
|
+
' deps = svc.get("depends_on")',
|
|
197
|
+
" if isinstance(deps, dict): svc['depends_on'] = list(deps.keys())",
|
|
198
|
+
' for net_name, net_cfg in list(svc.get("networks", {}).items()):',
|
|
199
|
+
" if isinstance(net_cfg, dict) and net_cfg.get('ipv4_address'):",
|
|
200
|
+
" del net_cfg['ipv4_address']",
|
|
201
|
+
" if isinstance(net_cfg, dict) and net_cfg.get('ipv6_address'):",
|
|
202
|
+
" del net_cfg['ipv6_address']",
|
|
203
|
+
'for s in (doc.get("services") or {}).values(): fix(s)',
|
|
204
|
+
"# Non-external networks: let docker stack deploy manage them as overlays.",
|
|
205
|
+
"# Only truly external networks need pre-creation by the deploy flow.",
|
|
206
|
+
"net_names = []",
|
|
207
|
+
'for key, net in list((doc.get("networks") or {}).items()):',
|
|
208
|
+
" if net is None:",
|
|
209
|
+
" doc['networks'][key] = {'driver': 'overlay', 'attachable': True}",
|
|
210
|
+
" elif isinstance(net, dict) and not net.get('external'):",
|
|
211
|
+
" net.clear()",
|
|
212
|
+
" net['driver'] = 'overlay'",
|
|
213
|
+
" net['attachable'] = True",
|
|
214
|
+
" elif isinstance(net, dict) and net.get('external'):",
|
|
215
|
+
" net_names.append(net.get('name', key))",
|
|
216
|
+
"yaml.dump(doc, sys.stdout, default_flow_style=False, sort_keys=False)",
|
|
217
|
+
"with open('/tmp/_swarm_networks.json', 'w') as f: json.dump(net_names, f)",
|
|
218
|
+
].join("\n");
|
|
219
|
+
|
|
220
|
+
const SWARM_FIXUP_B64 = Buffer.from(SWARM_FIXUP_PY).toString("base64");
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Generate swarm-compatible config and return the list of network names
|
|
224
|
+
* that must be pre-created as overlay networks before deploying.
|
|
225
|
+
*/
|
|
226
|
+
async function genSwarmConfig(ssh) {
|
|
227
|
+
await ssh(
|
|
228
|
+
`echo '${SWARM_FIXUP_B64}' | base64 -d > /tmp/_swarm_fix.py`,
|
|
229
|
+
10000,
|
|
230
|
+
);
|
|
231
|
+
await ssh(
|
|
232
|
+
"cd /opt/foundation-compose && sudo docker compose config 2>/dev/null | python3 /tmp/_swarm_fix.py > /tmp/docker-compose-swarm.yaml",
|
|
233
|
+
60000,
|
|
234
|
+
);
|
|
235
|
+
const { stdout: netJson } = await ssh(
|
|
236
|
+
"cat /tmp/_swarm_networks.json 2>/dev/null || echo '[]'",
|
|
237
|
+
);
|
|
238
|
+
try { return JSON.parse(netJson?.trim() || "[]"); } catch { return []; }
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Ensure all required overlay networks exist before deploying.
|
|
243
|
+
* Creates any missing networks and waits for raft propagation on multi-node swarms.
|
|
244
|
+
*/
|
|
245
|
+
async function ensureOverlayNetworks(ssh, networkNames, hint) {
|
|
246
|
+
if (!networkNames?.length) return;
|
|
247
|
+
for (const netName of networkNames) {
|
|
248
|
+
const { stdout: scopeOut } = await ssh(
|
|
249
|
+
`sudo docker network inspect ${netName} --format '{{.Scope}}' 2>/dev/null || echo missing`,
|
|
250
|
+
);
|
|
251
|
+
if ((scopeOut || "").trim() === "swarm") continue;
|
|
252
|
+
hint(` Creating overlay network ${netName}…`);
|
|
253
|
+
await ssh(`sudo docker network create --driver overlay --attachable ${netName} 2>&1 || true`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Deploy the swarm stack with retry logic for raft propagation delays.
|
|
259
|
+
* `docker stack deploy` queries the raft store for external networks, which can
|
|
260
|
+
* lag behind the local daemon. Retries on "network ... not found" errors.
|
|
261
|
+
*/
|
|
262
|
+
async function deployStackWithRetry(ssh, overlayNetworks, { hint, OK, WARN, maxRetries = 5, retryDelayMs = 15000 } = {}) {
|
|
263
|
+
let exitCode;
|
|
264
|
+
let stdout;
|
|
265
|
+
|
|
266
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
267
|
+
const result = await ssh(
|
|
268
|
+
"cd /opt/foundation-compose && sudo docker stack deploy --with-registry-auth -c /tmp/docker-compose-swarm.yaml foundation 2>&1",
|
|
269
|
+
300000,
|
|
270
|
+
);
|
|
271
|
+
exitCode = result.exitCode;
|
|
272
|
+
stdout = result.stdout || "";
|
|
273
|
+
|
|
274
|
+
const isNetworkNotFound = /network\s+\S+\s+not found/i.test(stdout);
|
|
275
|
+
|
|
276
|
+
if (exitCode === 0 && !isNetworkNotFound) break;
|
|
277
|
+
|
|
278
|
+
if (!isNetworkNotFound || attempt === maxRetries) break;
|
|
279
|
+
|
|
280
|
+
if (WARN) console.log(WARN(` Network not yet visible in raft store — retry ${attempt + 1}/${maxRetries}…`));
|
|
281
|
+
|
|
282
|
+
// Remove partially-created stack to avoid stale state on retry
|
|
283
|
+
await ssh("sudo docker stack rm foundation 2>&1 || true", 60000);
|
|
284
|
+
for (let i = 0; i < 10; i++) {
|
|
285
|
+
const { stdout: stackStill } = await ssh(
|
|
286
|
+
"sudo docker stack ls --format '{{.Name}}' 2>/dev/null | grep -qx foundation && echo yes || echo no",
|
|
287
|
+
);
|
|
288
|
+
if (stackStill?.trim() !== "yes") break;
|
|
289
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Re-verify / recreate overlay networks
|
|
293
|
+
for (const netName of (overlayNetworks || [])) {
|
|
294
|
+
const { stdout: scopeCheck } = await ssh(
|
|
295
|
+
`sudo docker network inspect ${netName} --format '{{.Scope}}' 2>/dev/null || echo missing`,
|
|
296
|
+
);
|
|
297
|
+
if ((scopeCheck || "").trim() !== "swarm") {
|
|
298
|
+
await ssh(`sudo docker network create --driver overlay --attachable ${netName} 2>&1 || true`);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
await new Promise(r => setTimeout(r, retryDelayMs));
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return { exitCode, stdout };
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Detect and remove broken overlay networks before a swarm deploy.
|
|
309
|
+
* Overlay corruption manifests as missing peers (fewer than node count),
|
|
310
|
+
* causing "no configured subnet" task rejections on worker nodes.
|
|
311
|
+
* Removing the stack lets docker stack deploy recreate fresh networks.
|
|
312
|
+
*/
|
|
313
|
+
async function reconcileOverlayNetworks(ssh, hint, OK, WARN) {
|
|
314
|
+
const { stdout: nodeCountOut } = await ssh(
|
|
315
|
+
"sudo docker node ls -q 2>/dev/null | wc -l",
|
|
316
|
+
);
|
|
317
|
+
const nodeCount = parseInt(nodeCountOut?.trim(), 10) || 1;
|
|
318
|
+
if (nodeCount <= 1) return;
|
|
319
|
+
|
|
320
|
+
const { stdout: overlayNets } = await ssh(
|
|
321
|
+
"sudo docker network ls --filter driver=overlay --format '{{.Name}}' 2>/dev/null | grep -E '^foundation[_-]' || true",
|
|
322
|
+
);
|
|
323
|
+
if (!overlayNets?.trim()) return;
|
|
324
|
+
|
|
325
|
+
const netNames = overlayNets.trim().split("\n").filter(Boolean);
|
|
326
|
+
let broken = false;
|
|
327
|
+
for (const netName of netNames) {
|
|
328
|
+
const { stdout: peerJson } = await ssh(
|
|
329
|
+
`sudo docker network inspect ${netName} --format '{{json .Peers}}' 2>/dev/null || echo '[]'`,
|
|
330
|
+
);
|
|
331
|
+
let peerCount = 0;
|
|
332
|
+
try { peerCount = JSON.parse(peerJson?.trim() || "[]").length; } catch {}
|
|
333
|
+
|
|
334
|
+
if (peerCount < nodeCount) {
|
|
335
|
+
hint(`Overlay "${netName}" has ${peerCount}/${nodeCount} peers — network is degraded`);
|
|
336
|
+
broken = true;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!broken) return;
|
|
341
|
+
|
|
342
|
+
// Remove the stack so docker stack deploy recreates fresh overlay networks.
|
|
343
|
+
const { stdout: hasStack } = await ssh(
|
|
344
|
+
"sudo docker stack ls --format '{{.Name}}' 2>/dev/null | grep -qx foundation && echo yes || echo no",
|
|
345
|
+
);
|
|
346
|
+
if (hasStack?.trim() === "yes") {
|
|
347
|
+
hint("Removing stack to recreate overlay networks…");
|
|
348
|
+
await ssh("sudo docker stack rm foundation 2>&1 || true", 60000);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
for (let attempt = 0; attempt < 15; attempt++) {
|
|
352
|
+
const { stdout: stackStill } = await ssh(
|
|
353
|
+
"sudo docker stack ls --format '{{.Name}}' 2>/dev/null | grep -qx foundation && echo yes || echo no",
|
|
354
|
+
);
|
|
355
|
+
if (stackStill?.trim() !== "yes") break;
|
|
356
|
+
hint("Waiting for stack removal…");
|
|
357
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Remove any leftover overlay networks (including legacy foundation-compose_ ones)
|
|
361
|
+
for (let attempt = 0; attempt < 20; attempt++) {
|
|
362
|
+
const { stdout: remaining } = await ssh(
|
|
363
|
+
"sudo docker network ls --filter driver=overlay --format '{{.Name}}' 2>/dev/null | grep -E '^foundation[_-]' || true",
|
|
364
|
+
);
|
|
365
|
+
if (!remaining?.trim()) break;
|
|
366
|
+
for (const net of remaining.trim().split("\n").filter(Boolean)) {
|
|
367
|
+
await ssh(`sudo docker network rm ${net} 2>&1 || true`);
|
|
368
|
+
}
|
|
369
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
373
|
+
console.log(OK(" ✓ Stale overlay networks cleared"));
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* After stack deploy, wait briefly for services to converge and detect
|
|
378
|
+
* overlay network issues early. If tasks are rejected with subnet errors,
|
|
379
|
+
* tear down and retry once — docker stack deploy recreates its own networks.
|
|
380
|
+
*/
|
|
381
|
+
async function waitForSwarmConvergence(ssh, hint, OK, WARN) {
|
|
382
|
+
hint("Waiting for services to converge…");
|
|
383
|
+
await new Promise(r => setTimeout(r, 15000));
|
|
384
|
+
|
|
385
|
+
// Show task errors for services that failed to start
|
|
386
|
+
const { stdout: failedTasks } = await ssh(
|
|
387
|
+
"sudo docker service ls --filter 'name=foundation_' --format '{{.Name}} {{.Replicas}}' 2>/dev/null " +
|
|
388
|
+
"| awk '$2 ~ /^0\\//' | while read svc rest; do " +
|
|
389
|
+
" err=$(sudo docker service ps \"$svc\" --no-trunc --format '{{.Error}}' --filter 'desired-state=shutdown' 2>/dev/null | head -1); " +
|
|
390
|
+
" [ -n \"$err\" ] && echo \"$svc: $err\"; " +
|
|
391
|
+
"done",
|
|
392
|
+
30000,
|
|
393
|
+
);
|
|
394
|
+
if (failedTasks?.trim()) {
|
|
395
|
+
console.log(WARN("\n Tasks with errors:"));
|
|
396
|
+
for (const line of failedTasks.trim().split("\n").filter(Boolean)) {
|
|
397
|
+
console.log(WARN(` ${line}`));
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const { stdout: rejectedOut } = await ssh(
|
|
402
|
+
"sudo docker service ls -q --filter 'name=foundation_' 2>/dev/null " +
|
|
403
|
+
"| xargs -I{} sudo docker service ps {} --no-trunc --format '{{.Error}}' --filter 'desired-state=shutdown' 2>/dev/null " +
|
|
404
|
+
"| grep -c 'no configured subnet' || echo 0",
|
|
405
|
+
30000,
|
|
406
|
+
);
|
|
407
|
+
const rejectCount = parseInt(rejectedOut?.trim(), 10) || 0;
|
|
408
|
+
if (rejectCount === 0) return;
|
|
409
|
+
|
|
410
|
+
console.log(WARN(`\n ⚠ ${rejectCount} task(s) rejected with overlay network errors — retrying deploy…`));
|
|
411
|
+
|
|
412
|
+
// Remove the stack (also removes its managed overlay networks)
|
|
413
|
+
await ssh("sudo docker stack rm foundation 2>&1 || true", 60000);
|
|
414
|
+
for (let i = 0; i < 15; i++) {
|
|
415
|
+
const { stdout: stackStill } = await ssh(
|
|
416
|
+
"sudo docker stack ls --format '{{.Name}}' 2>/dev/null | grep -qx foundation && echo yes || echo no",
|
|
417
|
+
);
|
|
418
|
+
if (stackStill?.trim() !== "yes") break;
|
|
419
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Remove any lingering overlay networks that stack rm didn't clean up
|
|
423
|
+
const { stdout: netList } = await ssh(
|
|
424
|
+
"sudo docker network ls --filter driver=overlay --format '{{.Name}}' 2>/dev/null | grep -E '^foundation[_-]' || true",
|
|
425
|
+
);
|
|
426
|
+
for (const net of (netList?.trim() || "").split("\n").filter(Boolean)) {
|
|
427
|
+
await ssh(`sudo docker network rm ${net} 2>&1 || true`);
|
|
428
|
+
}
|
|
429
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
430
|
+
|
|
431
|
+
hint("Redeploying stack…");
|
|
432
|
+
const { exitCode, stdout } = await deployStackWithRetry(
|
|
433
|
+
ssh, [], { hint, OK, WARN },
|
|
434
|
+
);
|
|
435
|
+
if (exitCode === 0) {
|
|
436
|
+
console.log(OK(" ✓ Stack redeployed successfully"));
|
|
437
|
+
} else {
|
|
438
|
+
console.log(WARN(` Redeploy warnings:\n${(stdout || "").trim()}`));
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Deploy (or update) the compose stack as swarm services on a manager node.
|
|
444
|
+
*/
|
|
445
|
+
export async function swarmDeploy(opts = {}) {
|
|
446
|
+
const execa = await lazyExeca();
|
|
447
|
+
const { activeVm, vms } = listVms();
|
|
448
|
+
|
|
449
|
+
const vmName = opts.vmName
|
|
450
|
+
|| Object.keys(vms).find(n => vms[n].swarm?.role === "manager")
|
|
451
|
+
|| activeVm;
|
|
452
|
+
|
|
453
|
+
if (!vmName || !vms[vmName]) {
|
|
454
|
+
console.error(ERR("\n No swarm manager found. Initialize one: fops azure swarm init <vmName>\n"));
|
|
455
|
+
process.exit(1);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const vm = vms[vmName];
|
|
459
|
+
if (!vm.swarm?.role) {
|
|
460
|
+
console.error(ERR(`\n "${vmName}" is not in a swarm. Run: fops azure swarm init ${vmName}\n`));
|
|
461
|
+
process.exit(1);
|
|
462
|
+
}
|
|
463
|
+
if (vm.swarm.role !== "manager") {
|
|
464
|
+
console.error(ERR(`\n "${vmName}" is a worker, not a manager. Target the manager instead.\n`));
|
|
465
|
+
process.exit(1);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
banner("Swarm Deploy");
|
|
469
|
+
kvLine("Manager", LABEL(vmName));
|
|
470
|
+
console.log("");
|
|
471
|
+
|
|
472
|
+
await knockForVm(vm);
|
|
473
|
+
const stopKnockKeepAlive = keepKnockAlive(vm.publicIp, vm.knockSequence);
|
|
474
|
+
const user = DEFAULTS.adminUser;
|
|
475
|
+
const ssh = (cmd, t) => sshCmd(execa, vm.publicIp, user, cmd, t || 30000);
|
|
476
|
+
|
|
477
|
+
try {
|
|
478
|
+
// Detect existing compose stack and migrate: stop compose, remove networks
|
|
479
|
+
// Detect existing compose stack or stale networks and clean up before swarm deploy.
|
|
480
|
+
// compose down alone may not remove containers with restart policies (e.g. traefik),
|
|
481
|
+
// so we kill first, then down, then force-remove any stragglers and networks.
|
|
482
|
+
const { stdout: composePsOut } = await ssh(
|
|
483
|
+
"cd /opt/foundation-compose && sudo docker compose ps -q 2>/dev/null | head -1",
|
|
484
|
+
);
|
|
485
|
+
if (composePsOut?.trim()) {
|
|
486
|
+
hint("Existing compose stack detected — stopping for swarm migration…");
|
|
487
|
+
await ssh("cd /opt/foundation-compose && sudo docker compose kill 2>&1 || true", 30000);
|
|
488
|
+
await ssh("cd /opt/foundation-compose && sudo docker compose down --remove-orphans 2>&1 || true", 120000);
|
|
489
|
+
// Force-remove any containers still attached to compose networks
|
|
490
|
+
const { stdout: stragglers } = await ssh(
|
|
491
|
+
"sudo docker ps -aq --filter 'network=foundation-compose_foundation-network' 2>/dev/null || true",
|
|
492
|
+
);
|
|
493
|
+
if (stragglers?.trim()) {
|
|
494
|
+
await ssh(`sudo docker rm -f ${stragglers.trim().split("\\n").join(" ")} 2>&1 || true`);
|
|
495
|
+
}
|
|
496
|
+
console.log(OK(" ✓ Compose stack stopped"));
|
|
497
|
+
}
|
|
498
|
+
// Remove any leftover compose bridge networks that would conflict with swarm overlay networks
|
|
499
|
+
const { stdout: staleNets } = await ssh(
|
|
500
|
+
"sudo docker network ls --filter 'driver=bridge' --format '{{.Name}}' 2>/dev/null | grep foundation-compose || true",
|
|
501
|
+
);
|
|
502
|
+
if (staleNets?.trim()) {
|
|
503
|
+
for (const net of staleNets.trim().split("\n").filter(Boolean)) {
|
|
504
|
+
hint(`Removing stale network ${net}…`);
|
|
505
|
+
await ssh(`sudo docker network rm ${net} 2>&1 || true`);
|
|
506
|
+
}
|
|
507
|
+
console.log(OK(" ✓ Stale networks removed"));
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Reconcile broken overlay networks before deploying.
|
|
511
|
+
await reconcileOverlayNetworks(ssh, hint, OK, WARN);
|
|
512
|
+
|
|
513
|
+
hint("Generating swarm-compatible config…");
|
|
514
|
+
const externalNetworks = await genSwarmConfig(ssh);
|
|
515
|
+
|
|
516
|
+
// Pre-create any truly external networks (not stack-managed).
|
|
517
|
+
// Stack-managed overlay networks are created atomically by docker stack deploy.
|
|
518
|
+
if (externalNetworks.length > 0) {
|
|
519
|
+
hint(`Ensuring ${externalNetworks.length} external network(s) exist…`);
|
|
520
|
+
for (const netName of externalNetworks) {
|
|
521
|
+
const { stdout: existsOut } = await ssh(
|
|
522
|
+
`sudo docker network inspect ${netName} --format '{{.Scope}}' 2>/dev/null || echo missing`,
|
|
523
|
+
);
|
|
524
|
+
if ((existsOut || "").trim() === "swarm") continue;
|
|
525
|
+
hint(` Creating ${netName}…`);
|
|
526
|
+
await ssh(`sudo docker network create --driver overlay --attachable ${netName} 2>&1 || true`);
|
|
527
|
+
}
|
|
528
|
+
console.log(OK(` ✓ ${externalNetworks.length} external network(s) ready`));
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
hint("Deploying compose stack as swarm services…");
|
|
532
|
+
const { exitCode: deployExitCode, stdout: deployStdout } = await deployStackWithRetry(
|
|
533
|
+
ssh, externalNetworks, { hint, OK, WARN },
|
|
534
|
+
);
|
|
535
|
+
|
|
536
|
+
if (deployExitCode === 0) {
|
|
537
|
+
console.log(OK("\n ✓ Stack deployed as swarm services"));
|
|
538
|
+
} else {
|
|
539
|
+
console.log(WARN(` Stack deploy warnings:\n${(deployStdout || "").trim()}`));
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Post-deploy: wait for services to converge, detect overlay issues early
|
|
543
|
+
await waitForSwarmConvergence(ssh, hint, OK, WARN);
|
|
544
|
+
|
|
545
|
+
// Show services
|
|
546
|
+
const { stdout: svcs } = await ssh(
|
|
547
|
+
"sudo docker stack services foundation --format '{{.Name}}\\t{{.Replicas}}\\t{{.Image}}' 2>/dev/null",
|
|
548
|
+
);
|
|
549
|
+
if (svcs?.trim()) {
|
|
550
|
+
console.log("");
|
|
551
|
+
hint("Services:");
|
|
552
|
+
for (const line of svcs.trim().split("\n")) {
|
|
553
|
+
console.log(` ${line}`);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
console.log("");
|
|
558
|
+
hint("Status: fops azure swarm status " + vmName);
|
|
559
|
+
hint("Update: fops azure swarm deploy " + vmName);
|
|
560
|
+
console.log("");
|
|
561
|
+
} finally {
|
|
562
|
+
stopKnockKeepAlive();
|
|
563
|
+
if (vm.knockSequence?.length) {
|
|
564
|
+
const sshFn = (cmd) => sshCmd(execa, vm.publicIp, user, cmd);
|
|
565
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
566
|
+
}
|
|
567
|
+
await closeMux(execa, vm.publicIp, user);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Join a VM as a worker (or manager) to an existing swarm.
|
|
573
|
+
*/
|
|
574
|
+
export async function swarmJoin(opts = {}) {
|
|
575
|
+
const execa = await lazyExeca();
|
|
576
|
+
const vmName = opts.vmName;
|
|
577
|
+
const managerName = opts.manager;
|
|
578
|
+
|
|
579
|
+
if (!vmName || !managerName) {
|
|
580
|
+
console.error(ERR("\n Usage: fops azure swarm join <vmName> --manager <managerVm>\n"));
|
|
581
|
+
process.exit(1);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
const managerVm = readVmState(managerName);
|
|
585
|
+
if (!managerVm) {
|
|
586
|
+
console.error(ERR(`\n Manager VM "${managerName}" not tracked.\n`));
|
|
587
|
+
process.exit(1);
|
|
588
|
+
}
|
|
589
|
+
if (!managerVm.swarm?.workerToken) {
|
|
590
|
+
console.error(ERR(`\n "${managerName}" is not a swarm manager. Run: fops azure swarm init ${managerName}\n`));
|
|
591
|
+
process.exit(1);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
let vm = readVmState(vmName);
|
|
595
|
+
if (!vm) {
|
|
596
|
+
hint(`VM "${vmName}" not found — creating it now…`);
|
|
597
|
+
await azureUp({
|
|
598
|
+
vmName,
|
|
599
|
+
location: opts.location || managerVm.location,
|
|
600
|
+
vmSize: opts.vmSize,
|
|
601
|
+
image: opts.image,
|
|
602
|
+
url: opts.url,
|
|
603
|
+
profile: opts.profile || managerVm.subscriptionId,
|
|
604
|
+
});
|
|
605
|
+
vm = readVmState(vmName);
|
|
606
|
+
if (!vm) {
|
|
607
|
+
console.error(ERR(`\n Failed to create VM "${vmName}".\n`));
|
|
608
|
+
process.exit(1);
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
banner("Swarm Join");
|
|
613
|
+
kvLine("Worker", LABEL(vmName));
|
|
614
|
+
kvLine("Manager", LABEL(managerName));
|
|
615
|
+
console.log("");
|
|
616
|
+
|
|
617
|
+
if (vm.swarm?.role) {
|
|
618
|
+
console.log(WARN(` "${vmName}" is already a swarm ${vm.swarm.role}.`));
|
|
619
|
+
hint("Leave first: fops azure swarm leave " + vmName);
|
|
620
|
+
console.log("");
|
|
621
|
+
return;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
await knockForVm(vm);
|
|
625
|
+
const user = DEFAULTS.adminUser;
|
|
626
|
+
const ssh = (cmd, t) => sshCmd(execa, vm.publicIp, user, cmd, t || 30000);
|
|
627
|
+
|
|
628
|
+
try {
|
|
629
|
+
const token = opts.asManager ? managerVm.swarm.managerToken : managerVm.swarm.workerToken;
|
|
630
|
+
const joinAddr = `${managerVm.swarm.advertiseAddr}:2377`;
|
|
631
|
+
const role = opts.asManager ? "manager" : "worker";
|
|
632
|
+
|
|
633
|
+
hint(`Joining as ${role}…`);
|
|
634
|
+
const { stdout: joinOut, exitCode: joinCode } = await ssh(
|
|
635
|
+
`sudo docker swarm join --token ${token} ${joinAddr} 2>&1`,
|
|
636
|
+
60000,
|
|
637
|
+
);
|
|
638
|
+
|
|
639
|
+
if (joinCode !== 0) {
|
|
640
|
+
console.error(ERR(` Join failed:\n${(joinOut || "").trim()}`));
|
|
641
|
+
process.exit(1);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
const { stdout: nodeId } = await ssh("sudo docker info --format '{{.Swarm.NodeID}}'");
|
|
645
|
+
|
|
646
|
+
writeVmState(vmName, {
|
|
647
|
+
swarm: {
|
|
648
|
+
role,
|
|
649
|
+
nodeId: (nodeId || "").trim(),
|
|
650
|
+
manager: managerName,
|
|
651
|
+
joinedAt: new Date().toISOString(),
|
|
652
|
+
},
|
|
653
|
+
});
|
|
654
|
+
|
|
655
|
+
console.log(OK(`\n ✓ ${vmName} joined swarm as ${role}`));
|
|
656
|
+
kvLine("Node ID", DIM((nodeId || "").trim()));
|
|
657
|
+
hint("Status: fops azure swarm status " + managerName);
|
|
658
|
+
console.log("");
|
|
659
|
+
} finally {
|
|
660
|
+
if (vm.knockSequence?.length) {
|
|
661
|
+
const sshFn = (cmd) => sshCmd(execa, vm.publicIp, user, cmd);
|
|
662
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
663
|
+
}
|
|
664
|
+
await closeMux(execa, vm.publicIp, user);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
/**
|
|
669
|
+
* Show swarm status for a VM (or the entire swarm from a manager node).
|
|
670
|
+
*/
|
|
671
|
+
export async function swarmStatus(opts = {}) {
|
|
672
|
+
const execa = await lazyExeca();
|
|
673
|
+
const { activeVm, vms } = listVms();
|
|
674
|
+
|
|
675
|
+
// Find the target — prefer explicit vmName, else find a manager, else active
|
|
676
|
+
const vmName = opts.vmName
|
|
677
|
+
|| Object.keys(vms).find(n => vms[n].swarm?.role === "manager")
|
|
678
|
+
|| activeVm;
|
|
679
|
+
|
|
680
|
+
if (!vmName || !vms[vmName]) {
|
|
681
|
+
console.error(ERR("\n No swarm manager found. Initialize one: fops azure swarm init <vmName>\n"));
|
|
682
|
+
process.exit(1);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
const vm = vms[vmName];
|
|
686
|
+
|
|
687
|
+
banner("Swarm Status");
|
|
688
|
+
kvLine("VM", LABEL(vmName));
|
|
689
|
+
kvLine("Role", vm.swarm?.role ? ACCENT(vm.swarm.role) : DIM("not in swarm"));
|
|
690
|
+
|
|
691
|
+
if (!vm.swarm?.role) {
|
|
692
|
+
console.log("");
|
|
693
|
+
hint("Initialize: fops azure swarm init " + vmName);
|
|
694
|
+
console.log("");
|
|
695
|
+
return;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
await knockForVm(vm);
|
|
699
|
+
const user = DEFAULTS.adminUser;
|
|
700
|
+
const ssh = (cmd, t) => sshCmd(execa, vm.publicIp, user, cmd, t || 30000);
|
|
701
|
+
|
|
702
|
+
try {
|
|
703
|
+
// Node list (only works on managers)
|
|
704
|
+
const { stdout: nodesOut, exitCode: nodesCode } = await ssh(
|
|
705
|
+
"sudo docker node ls --format '{{.ID}}|{{.Hostname}}|{{.Status}}|{{.Availability}}|{{.ManagerStatus}}' 2>/dev/null"
|
|
706
|
+
);
|
|
707
|
+
|
|
708
|
+
if (nodesCode === 0 && nodesOut?.trim()) {
|
|
709
|
+
const nodes = nodesOut.trim().split("\n").filter(Boolean);
|
|
710
|
+
console.log("");
|
|
711
|
+
|
|
712
|
+
const parsed = nodes.map(line => {
|
|
713
|
+
const [id, hostname, status, availability, managerStatus] = line.split("|");
|
|
714
|
+
return { id, hostname, status, availability, managerStatus };
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
const maxHost = Math.max(...parsed.map(n => (n.hostname || "").length), 8);
|
|
718
|
+
const maxId = 12;
|
|
719
|
+
|
|
720
|
+
console.log(DIM(` ${"NODE ID".padEnd(maxId)} ${"HOSTNAME".padEnd(maxHost)} ${"STATUS".padEnd(10)} ${"AVAIL".padEnd(10)} ROLE`));
|
|
721
|
+
console.log(DIM(` ${"─".repeat(maxId)} ${"─".repeat(maxHost)} ${"─".repeat(10)} ${"─".repeat(10)} ${"─".repeat(10)}`));
|
|
722
|
+
|
|
723
|
+
for (const n of parsed) {
|
|
724
|
+
const statusColor = n.status === "Ready" ? OK : ERR;
|
|
725
|
+
const availColor = n.availability === "Active" ? OK : WARN;
|
|
726
|
+
const roleText = n.managerStatus
|
|
727
|
+
? (n.managerStatus === "Leader" ? OK("Leader") : ACCENT(n.managerStatus))
|
|
728
|
+
: DIM("worker");
|
|
729
|
+
|
|
730
|
+
console.log(
|
|
731
|
+
` ${DIM((n.id || "").slice(0, maxId).padEnd(maxId))} ` +
|
|
732
|
+
`${chalk.bold.white((n.hostname || "").padEnd(maxHost))} ` +
|
|
733
|
+
`${statusColor((n.status || "").padEnd(10))} ` +
|
|
734
|
+
`${availColor((n.availability || "").padEnd(10))} ` +
|
|
735
|
+
`${roleText}`
|
|
736
|
+
);
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
const managerCount = parsed.filter(n => n.managerStatus).length;
|
|
740
|
+
const readyCount = parsed.filter(n => n.status === "Ready").length;
|
|
741
|
+
console.log(DIM(`\n ${parsed.length} node(s), ${managerCount} manager(s), ${readyCount} ready`));
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// Service list (if any swarm services deployed)
|
|
745
|
+
const { stdout: svcsOut, exitCode: svcsCode } = await ssh(
|
|
746
|
+
"sudo docker service ls --format '{{.Name}}|{{.Replicas}}|{{.Image}}|{{.Ports}}' 2>/dev/null"
|
|
747
|
+
);
|
|
748
|
+
|
|
749
|
+
if (svcsCode === 0 && svcsOut?.trim()) {
|
|
750
|
+
const services = svcsOut.trim().split("\n").filter(Boolean);
|
|
751
|
+
console.log("");
|
|
752
|
+
console.log(DIM(" Services:"));
|
|
753
|
+
|
|
754
|
+
const maxSvc = Math.max(...services.map(s => s.split("|")[0]?.length || 0), 7);
|
|
755
|
+
|
|
756
|
+
console.log(DIM(` ${"SERVICE".padEnd(maxSvc)} ${"REPLICAS".padEnd(10)} ${"IMAGE".padEnd(30)} PORTS`));
|
|
757
|
+
console.log(DIM(` ${"─".repeat(maxSvc)} ${"─".repeat(10)} ${"─".repeat(30)} ${"─".repeat(20)}`));
|
|
758
|
+
|
|
759
|
+
for (const line of services) {
|
|
760
|
+
const [name, replicas, image, ports] = line.split("|");
|
|
761
|
+
const replicaParts = (replicas || "").split("/");
|
|
762
|
+
const running = parseInt(replicaParts[0]) || 0;
|
|
763
|
+
const desired = parseInt(replicaParts[1]) || 0;
|
|
764
|
+
const repColor = running === desired && running > 0 ? OK : running === 0 ? ERR : WARN;
|
|
765
|
+
|
|
766
|
+
console.log(
|
|
767
|
+
` ${LABEL((name || "").padEnd(maxSvc))} ` +
|
|
768
|
+
`${repColor((replicas || "").padEnd(10))} ` +
|
|
769
|
+
`${DIM((image || "").slice(0, 30).padEnd(30))} ` +
|
|
770
|
+
`${DIM(ports || "")}`
|
|
771
|
+
);
|
|
772
|
+
}
|
|
773
|
+
} else {
|
|
774
|
+
console.log(DIM("\n No swarm services deployed."));
|
|
775
|
+
hint("Deploy: fops azure swarm deploy " + vmName);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
console.log("");
|
|
779
|
+
} finally {
|
|
780
|
+
if (vm.knockSequence?.length) {
|
|
781
|
+
const sshFn = (cmd) => sshCmd(execa, vm.publicIp, user, cmd);
|
|
782
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
783
|
+
}
|
|
784
|
+
await closeMux(execa, vm.publicIp, user);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
/**
|
|
789
|
+
* Promote a worker node to manager.
|
|
790
|
+
*/
|
|
791
|
+
export async function swarmPromote(opts = {}) {
|
|
792
|
+
const execa = await lazyExeca();
|
|
793
|
+
const vmName = opts.vmName;
|
|
794
|
+
if (!vmName) {
|
|
795
|
+
console.error(ERR("\n Usage: fops azure swarm promote <vmName>\n"));
|
|
796
|
+
process.exit(1);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
const vm = readVmState(vmName);
|
|
800
|
+
if (!vm) {
|
|
801
|
+
console.error(ERR(`\n VM "${vmName}" not tracked.\n`));
|
|
802
|
+
process.exit(1);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
if (!vm.swarm?.nodeId) {
|
|
806
|
+
console.error(ERR(`\n "${vmName}" is not in a swarm.\n`));
|
|
807
|
+
process.exit(1);
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
if (vm.swarm.role === "manager") {
|
|
811
|
+
console.log(WARN(` "${vmName}" is already a manager.\n`));
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Find a manager VM to run the promote command
|
|
816
|
+
const { vms } = listVms();
|
|
817
|
+
const managerEntry = Object.entries(vms).find(([, v]) => v.swarm?.role === "manager");
|
|
818
|
+
if (!managerEntry) {
|
|
819
|
+
console.error(ERR("\n No swarm manager found in tracked VMs.\n"));
|
|
820
|
+
process.exit(1);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
const [mgrName, mgr] = managerEntry;
|
|
824
|
+
|
|
825
|
+
banner("Swarm Promote");
|
|
826
|
+
kvLine("Promoting", LABEL(vmName));
|
|
827
|
+
kvLine("Via manager", DIM(mgrName));
|
|
828
|
+
console.log("");
|
|
829
|
+
|
|
830
|
+
await knockForVm(mgr);
|
|
831
|
+
const user = DEFAULTS.adminUser;
|
|
832
|
+
|
|
833
|
+
try {
|
|
834
|
+
const { exitCode, stdout } = await sshCmd(
|
|
835
|
+
execa, mgr.publicIp, user,
|
|
836
|
+
`sudo docker node promote ${vm.swarm.nodeId} 2>&1`,
|
|
837
|
+
30000,
|
|
838
|
+
);
|
|
839
|
+
|
|
840
|
+
if (exitCode !== 0) {
|
|
841
|
+
console.error(ERR(` Promote failed: ${(stdout || "").trim()}`));
|
|
842
|
+
process.exit(1);
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
writeVmState(vmName, {
|
|
846
|
+
swarm: { ...vm.swarm, role: "manager", promotedAt: new Date().toISOString() },
|
|
847
|
+
});
|
|
848
|
+
|
|
849
|
+
console.log(OK(` ✓ ${vmName} promoted to manager`));
|
|
850
|
+
console.log("");
|
|
851
|
+
} finally {
|
|
852
|
+
if (mgr.knockSequence?.length) {
|
|
853
|
+
const sshFn = (cmd) => sshCmd(execa, mgr.publicIp, user, cmd);
|
|
854
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
855
|
+
}
|
|
856
|
+
await closeMux(execa, mgr.publicIp, user);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* Remove a VM from the swarm.
|
|
862
|
+
*/
|
|
863
|
+
export async function swarmLeave(opts = {}) {
|
|
864
|
+
const execa = await lazyExeca();
|
|
865
|
+
const vmName = opts.vmName;
|
|
866
|
+
if (!vmName) {
|
|
867
|
+
console.error(ERR("\n Usage: fops azure swarm leave <vmName>\n"));
|
|
868
|
+
process.exit(1);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
const vm = readVmState(vmName);
|
|
872
|
+
if (!vm) {
|
|
873
|
+
console.error(ERR(`\n VM "${vmName}" not tracked.\n`));
|
|
874
|
+
process.exit(1);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
if (!vm.swarm?.role) {
|
|
878
|
+
console.log(DIM(` "${vmName}" is not in a swarm.\n`));
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
banner("Swarm Leave");
|
|
883
|
+
kvLine("VM", LABEL(vmName));
|
|
884
|
+
kvLine("Role", ACCENT(vm.swarm.role));
|
|
885
|
+
console.log("");
|
|
886
|
+
|
|
887
|
+
const isManager = vm.swarm.role === "manager";
|
|
888
|
+
|
|
889
|
+
if (isManager && !opts.force) {
|
|
890
|
+
console.log(WARN(" This is a manager node. Leaving will disrupt the swarm."));
|
|
891
|
+
hint("Use --force to confirm, or demote first.");
|
|
892
|
+
console.log("");
|
|
893
|
+
return;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
await knockForVm(vm);
|
|
897
|
+
const user = DEFAULTS.adminUser;
|
|
898
|
+
const ssh = (cmd, t) => sshCmd(execa, vm.publicIp, user, cmd, t || 30000);
|
|
899
|
+
|
|
900
|
+
try {
|
|
901
|
+
const forceFlag = isManager ? " --force" : "";
|
|
902
|
+
const { exitCode, stdout: leaveOut } = await ssh(
|
|
903
|
+
`sudo docker swarm leave${forceFlag} 2>&1`,
|
|
904
|
+
60000,
|
|
905
|
+
);
|
|
906
|
+
|
|
907
|
+
if (exitCode !== 0) {
|
|
908
|
+
console.error(ERR(` Leave failed: ${(leaveOut || "").trim()}`));
|
|
909
|
+
process.exit(1);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// Clear swarm state
|
|
913
|
+
writeVmState(vmName, { swarm: null });
|
|
914
|
+
|
|
915
|
+
console.log(OK(` ✓ ${vmName} left the swarm`));
|
|
916
|
+
|
|
917
|
+
// If it was a manager, clean up worker references
|
|
918
|
+
if (isManager) {
|
|
919
|
+
const { vms } = listVms();
|
|
920
|
+
for (const [name, v] of Object.entries(vms)) {
|
|
921
|
+
if (v.swarm?.manager === vmName) {
|
|
922
|
+
writeVmState(name, { swarm: null });
|
|
923
|
+
console.log(DIM(` Cleared swarm state for worker: ${name}`));
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
console.log("");
|
|
929
|
+
} finally {
|
|
930
|
+
if (vm.knockSequence?.length) {
|
|
931
|
+
const sshFn = (cmd) => sshCmd(execa, vm.publicIp, user, cmd);
|
|
932
|
+
await closeKnock(sshFn, { quiet: true }).catch(() => {});
|
|
933
|
+
}
|
|
934
|
+
await closeMux(execa, vm.publicIp, user);
|
|
935
|
+
}
|
|
936
|
+
}
|