@meshxdata/fops 0.1.48 → 0.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +368 -0
  2. package/package.json +1 -1
  3. package/src/commands/lifecycle.js +30 -11
  4. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +347 -6
  5. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-data-bootstrap.js +421 -0
  6. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-flux.js +5 -179
  7. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-naming.js +14 -4
  8. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-postgres.js +171 -4
  9. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +303 -8
  10. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +2 -0
  11. package/src/plugins/bundled/fops-plugin-azure/lib/azure-auth.js +1 -1
  12. package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet-swarm.js +936 -0
  13. package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet.js +10 -918
  14. package/src/plugins/bundled/fops-plugin-azure/lib/azure-helpers.js +5 -0
  15. package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault-keys.js +413 -0
  16. package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault.js +14 -399
  17. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-config.js +754 -0
  18. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-knock.js +527 -0
  19. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops-ssh.js +427 -0
  20. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +99 -1686
  21. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-health.js +279 -0
  22. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision-init.js +186 -0
  23. package/src/plugins/bundled/fops-plugin-azure/lib/azure-provision.js +66 -444
  24. package/src/plugins/bundled/fops-plugin-azure/lib/azure-results.js +11 -0
  25. package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-lifecycle.js +5 -540
  26. package/src/plugins/bundled/fops-plugin-azure/lib/azure-vm-terraform.js +544 -0
  27. package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +75 -3
  28. package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +227 -11
  29. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +2 -1
  30. package/src/plugins/bundled/fops-plugin-azure/lib/pytest-parse.js +21 -0
  31. package/src/plugins/bundled/fops-plugin-foundation/index.js +309 -44
@@ -10,46 +10,14 @@ import {
10
10
  resolvePublicIp, subArgs, buildTags, fetchMyIp,
11
11
  sshCmd, waitForSsh, closeMux, fopsUpCmd, buildPublicUrl,
12
12
  runReconcilers, ensureOpenAiNetworkAccess,
13
- reconcileOk, RECONCILE_LABEL_WIDTH,
13
+ reconcileOk, reconcileSection, RECONCILE_LABEL_WIDTH,
14
14
  } from "./azure-helpers.js";
15
+ import { postStartChecks } from "./azure-provision-health.js";
16
+ import { provisionVm } from "./azure-provision-init.js";
15
17
 
16
- let _tlsWarningSuppressed = false;
17
- async function tlsFetch(url, opts = {}) {
18
- if (!_tlsWarningSuppressed) {
19
- _tlsWarningSuppressed = true;
20
- const origEmit = process.emitWarning;
21
- process.emitWarning = (warning, ...args) => {
22
- if (typeof warning === "string" && warning.includes("NODE_TLS_REJECT_UNAUTHORIZED")) return;
23
- return origEmit.call(process, warning, ...args);
24
- };
25
- }
26
- const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
27
- process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
28
- try {
29
- return await fetch(url, opts);
30
- } finally {
31
- if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
32
- else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
33
- }
34
- }
35
-
36
- const URL_WAIT_MODE_HTTP_ANY = "http-any";
37
- const URL_WAIT_MODE_HTTP_2XX = "http-2xx";
38
- const URL_WAIT_PROGRESS_INTERVAL_MS = 15000;
39
-
40
- function normalizeUrlWaitMode(mode) {
41
- const normalized = String(mode || URL_WAIT_MODE_HTTP_ANY).trim().toLowerCase();
42
- if (normalized === URL_WAIT_MODE_HTTP_2XX) return URL_WAIT_MODE_HTTP_2XX;
43
- return URL_WAIT_MODE_HTTP_ANY;
44
- }
45
-
46
- function isUrlReadyByMode(status, waitMode) {
47
- if (waitMode === URL_WAIT_MODE_HTTP_2XX) {
48
- return status >= 200 && status < 300;
49
- }
50
- // Any HTTP response means DNS + TLS + edge routing are reachable.
51
- return Number.isInteger(status) && status >= 100 && status <= 599;
52
- }
18
+ // Re-export for backwards compatibility
19
+ export { postStartChecks } from "./azure-provision-health.js";
20
+ export { provisionVm } from "./azure-provision-init.js";
53
21
 
54
22
  // ── Ensure GHCR on VM (wait for Docker, login with retries) ──────────────────
55
23
 
@@ -75,10 +43,14 @@ async function ensureGhcrOnVm(ssh, user, githubToken, { timeout = 60000 } = {})
75
43
 
76
44
  // ── Configure a fresh or restarted VM ───────────────────────────────────────
77
45
 
78
- export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s, traefik, dai, deferStartToReconcile } = {}) {
46
+ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s, traefik, dai, deferStartToReconcile, quiet } = {}) {
79
47
  const ssh = (cmd) => sshCmd(execa, ip, user, cmd);
80
48
 
81
- console.log(chalk.dim(" Configuring VM..."));
49
+ if (!quiet) console.log(chalk.dim(" Configuring VM..."));
50
+
51
+ // Derive environment name from publicUrl (e.g., staging.meshx.app -> Staging)
52
+ const envName = (publicUrl || "").replace(/https?:\/\//, "").split(".")[0] || "Local";
53
+ const environmentName = envName.charAt(0).toUpperCase() + envName.slice(1);
82
54
 
83
55
  // Batch: sshd tuning + docker group + ownership + br_netfilter — single SSH round-trip
84
56
  const setupBatch = [
@@ -93,19 +65,23 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
93
65
  "sudo chown -R azureuser:azureuser /opt/foundation-compose 2>/dev/null; true",
94
66
  // Ensure br_netfilter is loaded so k3s CoreDNS service-IP routing works
95
67
  "sudo modprobe br_netfilter 2>/dev/null; sudo sysctl -qw net.bridge.bridge-nf-call-iptables=1 2>/dev/null; true",
96
- // Only inject FOUNDATION_PUBLIC_URL if not already set never overwrite
97
- `cd /opt/foundation-compose && grep -q '^FOUNDATION_PUBLIC_URL=' .env 2>/dev/null || echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env`,
68
+ // Ensure FOUNDATION_PUBLIC_URL is correct (remove stale values, set correct one)
69
+ `cd /opt/foundation-compose && sed -i '/^FOUNDATION_PUBLIC_URL=/d' .env 2>/dev/null; echo 'FOUNDATION_PUBLIC_URL=${publicUrl}' >> .env`,
70
+ // Ensure ENVIRONMENT_NAME is correct
71
+ `cd /opt/foundation-compose && sed -i '/^ENVIRONMENT_NAME=/d' .env 2>/dev/null; echo 'ENVIRONMENT_NAME=${environmentName}' >> .env`,
98
72
  ].join("\n");
99
73
  await ssh(setupBatch);
100
74
 
101
75
  let ghcrOk = false;
102
76
  if (githubToken) {
103
- console.log(chalk.dim(" Configuring GitHub/GHCR credentials..."));
77
+ if (!quiet) console.log(chalk.dim(" Configuring GitHub/GHCR credentials..."));
104
78
  ghcrOk = await ensureGhcrOnVm(ssh, user, githubToken);
105
- if (ghcrOk) {
106
- console.log(chalk.green(" ✓ GHCR credentials configured"));
107
- } else {
108
- console.log(chalk.yellow(" ⚠ GHCR login failed or deferred — will retry before compose pull"));
79
+ if (!quiet) {
80
+ if (ghcrOk) {
81
+ console.log(chalk.green(" ✓ GHCR credentials configured"));
82
+ } else {
83
+ console.log(chalk.yellow(" ⚠ GHCR login failed or deferred — will retry before compose pull"));
84
+ }
109
85
  }
110
86
  }
111
87
 
@@ -122,6 +98,9 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
122
98
  // provisioning only takes effect on the NEXT login, so the current SSH
123
99
  // session won't have it yet.
124
100
 
101
+ // When quiet (reconciliation mode), skip service status checks — handled by vmReconcileFoundation
102
+ if (quiet) return;
103
+
125
104
  // Skip starting if an up process is already running (provisionVm or cloud-init may have launched one)
126
105
  const { stdout: upProcs } = await ssh(
127
106
  "pgrep -af 'fops up|docker compose up' 2>/dev/null | grep -v pgrep | head -1 || true"
@@ -306,6 +285,7 @@ const VM_RECONCILERS = [
306
285
  { name: "vm-security", fn: vmReconcileSecurity },
307
286
  { name: "ssh-reachability", fn: vmReconcileSsh },
308
287
  { name: "dns-credentials", fn: vmReconcileDnsCreds },
288
+ { name: "repo", fn: vmReconcileRepo },
309
289
  { name: "fops-cli", fn: vmReconcileFopsCli },
310
290
  { name: "foundation", fn: vmReconcileFoundation },
311
291
  { name: "post-start", fn: vmReconcilePostStart },
@@ -334,6 +314,7 @@ export async function reconcileVm(execa, opts) {
334
314
  // ── Step: Power state ────────────────────────────────────────────────────────
335
315
 
336
316
  async function vmReconcilePower(ctx) {
317
+ reconcileSection("Infrastructure");
337
318
  const { execa, vmName, rg, sub } = ctx;
338
319
  const { stdout: ivJson } = await execa("az", [
339
320
  "vm", "get-instance-view", "-g", rg, "-n", vmName, "--output", "json",
@@ -502,6 +483,7 @@ async function vmReconcileNetworking(ctx) {
502
483
  // ── Step: NSG / firewall rules ───────────────────────────────────────────────
503
484
 
504
485
  async function vmReconcileNsg(ctx) {
486
+ reconcileSection("Firewall");
505
487
  const { execa, vmName, rg, sub, nicCode, nicJson, nicName, location, port, traefik } = ctx;
506
488
 
507
489
  let nsgName = "";
@@ -745,6 +727,7 @@ async function vmReconcileOpenAiNetwork(ctx) {
745
727
  // ── Step: VM security settings ───────────────────────────────────────────────
746
728
 
747
729
  async function vmReconcileSecurity(ctx) {
730
+ reconcileSection("Security");
748
731
  const { execa, vmName, rg, sub, location } = ctx;
749
732
  const { stdout: vmJson, exitCode } = await execa("az", [
750
733
  "vm", "show", "-g", rg, "-n", vmName, "--output", "json",
@@ -1047,8 +1030,8 @@ async function removeSshBypassViaRunCommand(execa, rg, vmName, sourceCidr, sub)
1047
1030
  // ── Step: SSH reachability ───────────────────────────────────────────────────
1048
1031
 
1049
1032
  async function vmReconcileSsh(ctx) {
1033
+ reconcileSection("Connectivity");
1050
1034
  const { execa, ip, adminUser, port, desiredUrl, vmName, rg, sub } = ctx;
1051
- console.log(chalk.dim(" Checking SSH..."));
1052
1035
 
1053
1036
  // Always fetch the knock sequence fresh from the Azure VM tag — local state can drift
1054
1037
  // after state recovery or manual changes, causing knocks with the wrong sequence.
@@ -1209,13 +1192,45 @@ async function vmReconcileDnsCreds(ctx) {
1209
1192
  const { execa, ip, adminUser, port, desiredUrl, cfToken, githubToken, k3s, traefik } = ctx;
1210
1193
  ctx.publicUrl = desiredUrl || buildPublicUrl(ip, port);
1211
1194
  await syncDns(cfToken, ctx.publicUrl, ip);
1212
- await configureVm(execa, ip, adminUser, ctx.publicUrl, { githubToken, k3s, traefik, deferStartToReconcile: true });
1195
+ await configureVm(execa, ip, adminUser, ctx.publicUrl, { githubToken, k3s, traefik, deferStartToReconcile: true, quiet: true });
1196
+ }
1197
+
1198
+ // ── Step: Ensure foundation-compose repo exists ──────────────────────────────
1199
+
1200
+ async function vmReconcileRepo(ctx) {
1201
+ if (ctx.done) return;
1202
+ const { execa, ip, adminUser } = ctx;
1203
+ const ssh = (cmd, timeout = 30000) => sshCmd(execa, ip, adminUser, cmd, timeout);
1204
+
1205
+ const { stdout: exists } = await ssh("[ -d /opt/foundation-compose/.git ] && echo yes || echo no");
1206
+ if (exists?.trim() === "yes") {
1207
+ reconcileOk("Repository", "/opt/foundation-compose");
1208
+ return;
1209
+ }
1210
+
1211
+ console.log(chalk.yellow(" ↻ Repository missing — cloning foundation-compose..."));
1212
+ const cloneResult = await ssh([
1213
+ "sudo rm -rf /opt/foundation-compose",
1214
+ "sudo git clone --branch main --depth 1 https://github.com/meshxdata/foundation-compose.git /opt/foundation-compose",
1215
+ ].join(" && "), 120000);
1216
+
1217
+ if (cloneResult.exitCode !== 0) {
1218
+ console.log(chalk.red(" ✗ Failed to clone repository"));
1219
+ return;
1220
+ }
1221
+
1222
+ console.log(chalk.dim(" Initializing submodules..."));
1223
+ await ssh("cd /opt/foundation-compose && sudo git submodule update --init --recursive --depth 1", 300000);
1224
+ await ssh("sudo chown -R azureuser:azureuser /opt/foundation-compose");
1225
+ await ssh("mkdir -p /opt/foundation-compose/credentials && touch /opt/foundation-compose/credentials/kubeconfig.yaml");
1226
+ reconcileOk("Repository", "cloned and initialized");
1213
1227
  }
1214
1228
 
1215
1229
  // ── Step: Ensure fops CLI is available on the VM ─────────────────────────────
1216
1230
 
1217
1231
  async function vmReconcileFopsCli(ctx) {
1218
1232
  if (ctx.done) return;
1233
+ reconcileSection("Services");
1219
1234
  const { execa, ip, adminUser } = ctx;
1220
1235
  const ssh = (cmd, timeout = 30000) => sshCmd(execa, ip, adminUser, cmd, timeout);
1221
1236
 
@@ -1375,400 +1390,7 @@ async function vmReconcilePostStart(ctx) {
1375
1390
  await postStartChecks(execa, ip, adminUser, { publicUrl, waitMode });
1376
1391
  }
1377
1392
 
1378
- // ── Post-start: postgres ready + grant admin ────────────────────────────────
1379
-
1380
- const POSTGRES_INITIAL_DELAY_MS = 45000; // give compose time to start containers after nohup fops up
1381
- const POSTGRES_POLL_INTERVAL_MS = 10000;
1382
- const POSTGRES_MAX_WAIT_MS = 600000; // 10 min for first boot / heavy stack or recovery
1383
- const POSTGRES_RECOVERY_HINT_INTERVAL_MS = 60000; // remind every 60s that we're waiting on recovery
1384
-
1385
- export async function postStartChecks(execa, ip, adminUser, { maxWait = POSTGRES_MAX_WAIT_MS, publicUrl, waitMode } = {}) {
1386
- banner("Post-start checks");
1387
-
1388
- const ssh = (cmd, timeout = 30000) => sshCmd(execa, ip, adminUser, cmd, timeout);
1389
-
1390
- hint("Waiting for Postgres…");
1391
- await new Promise((r) => setTimeout(r, POSTGRES_INITIAL_DELAY_MS));
1392
- const pgStart = Date.now();
1393
- let pgReady = false;
1394
- let lastRecoveryHintAt = -POSTGRES_RECOVERY_HINT_INTERVAL_MS; // so first recovery is reported immediately
1395
- let lastEventCheckAt = 0;
1396
- const EVENT_CHECK_INTERVAL_MS = 15000; // show docker events every 15s while waiting
1397
-
1398
- while (Date.now() - pgStart < maxWait) {
1399
- const { exitCode, stdout, stderr } = await ssh(
1400
- "cd /opt/foundation-compose && sudo docker compose exec -T postgres psql -U foundation -d foundation -c 'SELECT 1' 2>&1",
1401
- 15000
1402
- );
1403
- if (exitCode === 0) { pgReady = true; break; }
1404
- const out = (stdout || "") + (stderr || "");
1405
- const inRecovery = /recovery|not yet accepting connections|Consistent recovery state has not been yet reached/i.test(out);
1406
- if (inRecovery && Date.now() - lastRecoveryHintAt >= POSTGRES_RECOVERY_HINT_INTERVAL_MS) {
1407
- console.log(WARN(" Postgres in recovery (database was not properly shut down). Waiting for it to accept connections…"));
1408
- lastRecoveryHintAt = Date.now();
1409
- }
1410
-
1411
- // Show recent docker events while waiting
1412
- if (Date.now() - lastEventCheckAt >= EVENT_CHECK_INTERVAL_MS) {
1413
- const { stdout: events } = await ssh(
1414
- "docker events --since 30s --until 0s --format '{{.Type}}|{{.Action}}|{{.Actor.Attributes.name}}|{{.Actor.Attributes.image}}' 2>/dev/null | tail -8",
1415
- 10000
1416
- ).catch(() => ({ stdout: "" }));
1417
- if (events?.trim()) {
1418
- const eventIcons = { pull: "📥", start: "▶", create: "✦", die: "✗", kill: "⚡", stop: "■" };
1419
- const lines = events.trim().split("\n").map((line) => {
1420
- const [type, action, name, image] = line.split("|");
1421
- const icon = eventIcons[action] || "·";
1422
- const svc = name || image?.split("/").pop()?.split(":")[0] || "";
1423
- return `${icon} ${action.padEnd(7)} ${svc}`;
1424
- });
1425
- const elapsed = Math.round((Date.now() - pgStart) / 1000);
1426
- console.log(DIM(` [${elapsed}s] Docker activity:`));
1427
- for (const line of lines) console.log(DIM(` ${line}`));
1428
- }
1429
- lastEventCheckAt = Date.now();
1430
- }
1431
-
1432
- await new Promise((r) => setTimeout(r, POSTGRES_POLL_INTERVAL_MS));
1433
- }
1434
- if (!pgReady) {
1435
- console.log(WARN(" ⚠ Postgres not ready after timeout (may still be in recovery after unclean shutdown)"));
1436
-
1437
- const { stdout: composePs } = await ssh(
1438
- "cd /opt/foundation-compose && sudo docker compose ps -a --format '{{.Service}}\t{{.State}}\t{{.Status}}' 2>/dev/null | head -20"
1439
- );
1440
- const states = composePs?.trim()?.split("\n").map((line) => line.split("\t")[1]).filter(Boolean) ?? [];
1441
- const allCreated = states.length > 0 && states.every((s) => s === "created");
1442
- if (composePs?.trim()) {
1443
- hint("Container status:");
1444
- for (const line of composePs.trim().split("\n")) hint(` ${line}`);
1445
- if (allCreated) {
1446
- hint("Containers still in 'created' — stack may still be starting. Wait a few minutes then: fops azure deploy");
1447
- }
1448
- } else {
1449
- hint("No containers found — docker compose may not have started.");
1450
- }
1451
-
1452
- const { stdout: pullErrors } = await ssh(
1453
- "grep -i -E 'error|denied|unauthorized|manifest unknown|pull access denied' /tmp/fops-up.log 2>/dev/null | tail -5"
1454
- );
1455
- if (pullErrors?.trim()) {
1456
- console.log(WARN(" Possible pull errors:"));
1457
- for (const line of pullErrors.trim().split("\n")) hint(` ${line}`);
1458
- }
1459
-
1460
- const { stdout: ghcrCheck } = await ssh(
1461
- "sudo cat /root/.docker/config.json 2>/dev/null | grep -q ghcr.io && echo 'ok' || echo 'missing'"
1462
- );
1463
- if (ghcrCheck?.trim() !== "ok") {
1464
- console.log(WARN(" ⚠ GHCR credentials missing from /root/.docker/config.json"));
1465
- hint("Re-run: fops azure up (will re-configure credentials)");
1466
- }
1467
-
1468
- const { stdout: upLogTail } = await ssh(
1469
- "tail -80 /tmp/fops-up.log 2>/dev/null || echo '(log not found or empty)'"
1470
- );
1471
- if (upLogTail?.trim()) {
1472
- console.log(WARN(" Last lines of /tmp/fops-up.log:"));
1473
- for (const line of upLogTail.trim().split("\n").slice(-40)) {
1474
- console.log(chalk.dim(` ${line}`));
1475
- }
1476
- }
1477
-
1478
- hint(`\nDebug: ssh ${adminUser}@${ip} "tail -100 /tmp/fops-up.log"`);
1479
- hint("Retry: fops azure deploy\n");
1480
- return;
1481
- }
1482
- console.log(OK(" ✓ Postgres ready"));
1483
-
1484
- hint("Waiting for backend migrations…");
1485
- const migStart = Date.now();
1486
- let migReady = false;
1487
- while (Date.now() - migStart < 120000) {
1488
- const { exitCode: migCheck } = await ssh(
1489
- `cd /opt/foundation-compose && sudo docker compose exec -T postgres psql -U foundation -d foundation -c "SELECT 1 FROM \\"user\\" LIMIT 1" >/dev/null 2>&1`
1490
- );
1491
- if (migCheck === 0) { migReady = true; break; }
1492
- await new Promise((r) => setTimeout(r, 5000));
1493
- }
1494
- if (!migReady) {
1495
- hint("Retrying migration check in 30s…");
1496
- await new Promise((r) => setTimeout(r, 30000));
1497
- const { exitCode: retryCheck } = await ssh(
1498
- `cd /opt/foundation-compose && sudo docker compose exec -T postgres psql -U foundation -d foundation -c "SELECT 1 FROM \\"user\\" LIMIT 1" >/dev/null 2>&1`
1499
- );
1500
- if (retryCheck === 0) migReady = true;
1501
- }
1502
- if (!migReady) {
1503
- console.log(WARN(" ⚠ Migrations not complete — skipping grant-admin (schema not ready)"));
1504
- hint("Run manually after stack is up: fops azure grant admin");
1505
- } else {
1506
- const operatorEmail = process.env.FOUNDATION_USERNAME || process.env.FOUNDATION_OPERATOR_EMAIL || process.env.QA_USERNAME || "compose@meshx.io";
1507
- hint(`Ensuring ${operatorEmail} user + Foundation Admin…`);
1508
- const ensureUserSql = [
1509
- // Ensure the default operator user exists
1510
- `INSERT INTO "user" (identifier, urn, username, first_name, last_name, email, is_system)`,
1511
- ` VALUES (gen_random_uuid(), 'urn:meshx:user:operator', $op, 'Foundation', 'Operator', $op, false)`.replace(/\$op/g, `'${operatorEmail.replace(/'/g, "''")}'`),
1512
- ` ON CONFLICT (username) DO NOTHING;`,
1513
- // Grant Foundation Admin to operator user
1514
- `INSERT INTO role_member (identifier, role_id)`,
1515
- ` SELECT u.identifier, r.id FROM "user" u CROSS JOIN "role" r`,
1516
- ` WHERE u.username = '${operatorEmail.replace(/'/g, "''")}' AND r.name = 'Foundation Admin'`,
1517
- ` ON CONFLICT (role_id, identifier) DO NOTHING;`,
1518
- // Grant Foundation Admin to all other non-system users too
1519
- `INSERT INTO role_member (identifier, role_id)`,
1520
- ` SELECT u.identifier, r.id FROM "user" u CROSS JOIN "role" r`,
1521
- ` WHERE NOT u.is_system`,
1522
- ` AND u.username NOT IN ('admin@meshx.io', 'scheduler@meshx.io', 'opa@meshx.io')`,
1523
- ` AND r.name = 'Foundation Admin'`,
1524
- ` ON CONFLICT (role_id, identifier) DO NOTHING;`,
1525
- ].join("\n");
1526
- const b64Grant = Buffer.from(ensureUserSql).toString("base64");
1527
- let grantCode = -1;
1528
- let grantOut = "";
1529
- for (let attempt = 0; attempt < 2; attempt++) {
1530
- if (attempt === 1) {
1531
- hint("Grant failed; retrying in 20s…");
1532
- await new Promise((r) => setTimeout(r, 20000));
1533
- }
1534
- const result = await ssh(
1535
- `cd /opt/foundation-compose && echo '${b64Grant}' | base64 -d | sudo docker compose exec -T postgres psql -U foundation -d foundation -v ON_ERROR_STOP=1 2>&1`
1536
- );
1537
- grantOut = result.stdout || "";
1538
- grantCode = result.exitCode;
1539
- if (grantCode === 0) break;
1540
- }
1541
- if (grantCode === 0) {
1542
- console.log(OK(` ✓ ${operatorEmail} — Foundation Admin granted`));
1543
- } else {
1544
- console.log(WARN(" ⚠ Admin grant failed — run manually: fops azure grant admin"));
1545
- if (grantOut?.trim()) hint(grantOut.trim());
1546
- }
1547
-
1548
- const orgSql = `INSERT INTO user_organization (user_id, organization_id) SELECT u.id, o.id FROM "user" u CROSS JOIN organization o WHERE o.name = 'root' ON CONFLICT DO NOTHING;`;
1549
- const b64Org = Buffer.from(orgSql).toString("base64");
1550
- let orgCode = -1;
1551
- for (let attempt = 0; attempt < 2; attempt++) {
1552
- if (attempt === 1) await new Promise((r) => setTimeout(r, 15000));
1553
- const result = await ssh(
1554
- `cd /opt/foundation-compose && echo '${b64Org}' | base64 -d | sudo docker compose exec -T postgres psql -U foundation -d foundation -v ON_ERROR_STOP=1 2>&1`
1555
- );
1556
- orgCode = result.exitCode;
1557
- if (orgCode === 0) break;
1558
- }
1559
- if (orgCode === 0) {
1560
- console.log(OK(" ✓ Root organization membership ensured"));
1561
- }
1562
- }
1563
-
1564
- // Wait for the public URL to become reachable (Traefik + DNS propagation)
1565
- if (publicUrl) {
1566
- const urlWaitMode = normalizeUrlWaitMode(waitMode);
1567
- hint(`Waiting for ${publicUrl} to respond (mode: ${urlWaitMode})…`);
1568
- const urlMaxWait = 300000; // 5 min — Traefik + DNS can be slow after start
1569
- const urlStart = Date.now();
1570
- let lastProgressAt = 0;
1571
- let lastProbe = "not probed yet";
1572
- let urlOk = false;
1573
- while (Date.now() - urlStart < urlMaxWait) {
1574
- try {
1575
- const r = await tlsFetch(publicUrl, { signal: AbortSignal.timeout(8000), redirect: "follow" });
1576
- lastProbe = `HTTP ${r.status}`;
1577
- if (isUrlReadyByMode(r.status, urlWaitMode)) {
1578
- urlOk = true;
1579
- break;
1580
- }
1581
- } catch (err) {
1582
- // DNS not ready or connection refused — retry
1583
- lastProbe = err?.name || err?.message || "network error";
1584
- }
1585
- const now = Date.now();
1586
- if (now - lastProgressAt >= URL_WAIT_PROGRESS_INTERVAL_MS) {
1587
- const elapsed = Math.round((now - urlStart) / 1000);
1588
- hint(`Still waiting for ${publicUrl} [${elapsed}s] — last probe: ${lastProbe}`);
1589
- lastProgressAt = now;
1590
- }
1591
- await new Promise(r => setTimeout(r, 5000));
1592
- }
1593
- if (urlOk) {
1594
- console.log(OK(` ✓ ${publicUrl} is reachable (${lastProbe})`));
1595
- } else {
1596
- console.log(WARN(` ⚠ ${publicUrl} not responding yet (${lastProbe}) — Traefik, DNS, or edge config may still be propagating`));
1597
- hint(`Try stricter mode if needed: fops azure up --wait-mode ${URL_WAIT_MODE_HTTP_2XX}`);
1598
- hint("Check: curl -sk " + publicUrl);
1599
- }
1600
- }
1601
-
1602
- console.log("");
1603
- }
1604
-
1605
- // ═══════════════════════════════════════════════════════════════════════════
1606
- // Provision — install Docker, Node, fops, clone repo on a fresh Ubuntu VM
1607
- // ═══════════════════════════════════════════════════════════════════════════
1608
-
1609
- export async function provisionVm(execa, ip, adminUser, { githubToken, branch = "main", fopsVersion = "latest" } = {}) {
1610
- banner("Provisioning VM");
1611
- const ssh = (cmd, timeout = 120000) => sshCmd(execa, ip, adminUser, cmd, timeout);
1612
-
1613
- async function runScript(label, script, timeout = 180000) {
1614
- hint(`${label}…`);
1615
- const b64 = Buffer.from(script).toString("base64");
1616
- const { exitCode, stdout } = await ssh(
1617
- `echo '${b64}' | base64 -d | sudo bash -e 2>&1`, timeout,
1618
- );
1619
- if (exitCode !== 0) {
1620
- console.log(WARN(` ⚠ ${label} had issues`));
1621
- if (stdout?.trim()) hint(stdout.trim().split("\n").pop());
1622
- } else {
1623
- console.log(OK(` ✓ ${label}`));
1624
- }
1625
- return exitCode;
1626
- }
1627
-
1628
- const waitAptLock = "while fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; do echo 'Waiting for apt/dpkg lock…'; sleep 5; done";
1629
- await runScript("Waiting for cloud-init", [
1630
- "cloud-init status --wait 2>/dev/null || true",
1631
- "while fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock >/dev/null 2>&1; do sleep 3; done",
1632
- ].join("\n"), 300000);
1633
-
1634
- await runScript("Installing system packages", [
1635
- waitAptLock,
1636
- "export DEBIAN_FRONTEND=noninteractive",
1637
- "apt-get update -y -qq",
1638
- "apt-get install -y -qq apt-transport-https ca-certificates curl gnupg lsb-release jq git make unzip zsh software-properties-common python3-venv python3-pip",
1639
- ].join("\n"), 300000);
1640
-
1641
- await runScript("Installing Docker", [
1642
- waitAptLock,
1643
- "export DEBIAN_FRONTEND=noninteractive",
1644
- "install -m 0755 -d /etc/apt/keyrings",
1645
- "curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg",
1646
- "chmod a+r /etc/apt/keyrings/docker.gpg",
1647
- `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list`,
1648
- "apt-get update -qq",
1649
- "apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin",
1650
- "systemctl enable docker && systemctl start docker",
1651
- `usermod -aG docker ${adminUser}`,
1652
- ].join("\n"), 300000);
1653
-
1654
- await runScript("Configuring br_netfilter for k3s DNS", [
1655
- "modprobe br_netfilter",
1656
- "echo br_netfilter > /etc/modules-load.d/br_netfilter.conf",
1657
- "sysctl -w net.bridge.bridge-nf-call-iptables=1",
1658
- "echo 'net.bridge.bridge-nf-call-iptables = 1' > /etc/sysctl.d/99-br-netfilter.conf",
1659
- ].join("\n"));
1660
-
1661
- await runScript("Installing GitHub CLI", [
1662
- waitAptLock,
1663
- "set +e",
1664
- "curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg 2>/dev/null",
1665
- "chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg 2>/dev/null",
1666
- "echo \"deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main\" > /etc/apt/sources.list.d/github-cli.list",
1667
- "for _ in 1 2 3 4 5; do if apt-get update -qq && apt-get install -y -qq gh; then break; fi; echo 'Retrying in 10s…'; sleep 10; done",
1668
- "set -e",
1669
- "command -v gh >/dev/null 2>&1 || (echo 'gh not found after install attempts' && exit 1)",
1670
- ].join("\n"), 120000);
1671
-
1672
- const fopsPkg = fopsVersion === "latest"
1673
- ? "@meshxdata/fops"
1674
- : `@meshxdata/fops@${fopsVersion}`;
1675
- await runScript("Installing Node.js + fops", [
1676
- waitAptLock,
1677
- `curl -fsSL https://deb.nodesource.com/setup_20.x | bash -`,
1678
- "apt-get install -y -qq nodejs",
1679
- `npm install -g ${fopsPkg}`,
1680
- `FOPS_DIR=$(npm root -g)/@meshxdata/fops`,
1681
- `if [ ! -d "$FOPS_DIR/node_modules/commander" ]; then`,
1682
- ` echo "Dependencies missing — installing inside package dir…"`,
1683
- ` cd "$FOPS_DIR" && npm install --omit=dev`,
1684
- `fi`,
1685
- `fops --version || { echo "fops binary not on PATH — linking manually"; ln -sf "$(npm root -g)/@meshxdata/fops/fops.mjs" /usr/local/bin/fops; fops --version; }`,
1686
- ].join("\n"), 300000);
1687
-
1688
- if (githubToken) {
1689
- await runScript("Configuring GitHub credentials", [
1690
- `printf 'machine github.com login x-access-token password %s\\n' '${githubToken}' > /root/.netrc && chmod 600 /root/.netrc`,
1691
- `printf 'machine github.com login x-access-token password %s\\n' '${githubToken}' > /home/${adminUser}/.netrc && chmod 600 /home/${adminUser}/.netrc && chown ${adminUser}:${adminUser} /home/${adminUser}/.netrc`,
1692
- `echo '${githubToken}' | docker login ghcr.io -u x-access-token --password-stdin`,
1693
- `mkdir -p /home/${adminUser}/.docker && cp /root/.docker/config.json /home/${adminUser}/.docker/config.json 2>/dev/null && chown -R ${adminUser}:${adminUser} /home/${adminUser}/.docker || true`,
1694
- ].join("\n"));
1695
- }
1696
-
1697
- await runScript("Cloning foundation-compose", [
1698
- "rm -rf /opt/foundation-compose",
1699
- `git clone --branch ${branch} --depth 1 --recurse-submodules https://github.com/meshxdata/foundation-compose.git /opt/foundation-compose`,
1700
- "mkdir -p /opt/foundation-compose/credentials",
1701
- "touch /opt/foundation-compose/credentials/kubeconfig.yaml",
1702
- `chown -R ${adminUser}:${adminUser} /opt/foundation-compose`,
1703
- ].join("\n"), 300000);
1704
-
1705
- await runScript("Installing fops-api systemd service", [
1706
- `cat > /etc/systemd/system/fops-api.service <<'UNIT'`,
1707
- "[Unit]",
1708
- "Description=fops API server",
1709
- "After=network.target docker.service",
1710
- "Wants=docker.service",
1711
- "",
1712
- "[Service]",
1713
- "Type=simple",
1714
- `User=${adminUser}`,
1715
- "WorkingDirectory=/opt/foundation-compose",
1716
- "Environment=COMPOSE_ROOT=/opt/foundation-compose",
1717
- "EnvironmentFile=-/opt/foundation-compose/.env",
1718
- "ExecStart=/usr/bin/env fops serve --host 127.0.0.1 --port 4100",
1719
- "Restart=always",
1720
- "RestartSec=10",
1721
- "Environment=NODE_ENV=production",
1722
- "",
1723
- "[Install]",
1724
- "WantedBy=multi-user.target",
1725
- "UNIT",
1726
- "systemctl daemon-reload",
1727
- "systemctl enable --now fops-api",
1728
- ].join("\n"));
1729
-
1730
- const cloudInitSrc = path.resolve(
1731
- import.meta.dirname, "..", "..", "..", "..", "..", "packer", "scripts", "cloud-init-foundation.sh",
1732
- );
1733
- let cloudInitScript = "";
1734
- try {
1735
- cloudInitScript = fs.readFileSync(cloudInitSrc, "utf8");
1736
- } catch {
1737
- hint("cloud-init-foundation.sh not found locally — skipping per-boot script");
1738
- }
1739
- if (cloudInitScript) {
1740
- const b64 = Buffer.from(cloudInitScript).toString("base64");
1741
- await ssh(
1742
- `echo '${b64}' | base64 -d | sudo tee /var/lib/cloud/scripts/per-boot/foundation-startup.sh > /dev/null && sudo chmod +x /var/lib/cloud/scripts/per-boot/foundation-startup.sh`,
1743
- );
1744
- console.log(OK(" ✓ Cloud-init per-boot script installed"));
1745
- }
1393
+ // postStartChecks is now in azure-provision-health.js and re-exported above for backwards compatibility
1746
1394
 
1747
- await runScript("Setting MOTD", [
1748
- "chmod -x /etc/update-motd.d/* 2>/dev/null || true",
1749
- `cat > /etc/motd <<'MOTD'
1750
1395
 
1751
- ___ _ _ _
1752
- / __\\__ _ _ _ __ __| | __ _| |_(_) ___ _ __
1753
- / _\\/ _ \\| | | | '_ \\ / _\` |/ _\` | __| |/ _ \\| '_ \\
1754
- / / | (_) | |_| | | | | (_| | (_| | |_| | (_) | | | |
1755
- \\/ \\___/ \\__,_|_| |_|\\__,_|\\__,_|\\__|_|\\___/|_| |_|
1756
-
1757
- Data Mesh Platform
1758
-
1759
- Quick start:
1760
- fops status Show running services
1761
- fops up Start the platform
1762
- fops down Stop the platform
1763
- fops logs <service> Tail service logs
1764
- fops doctor Diagnose issues
1765
-
1766
- Project dir: /opt/foundation-compose
1767
-
1768
- MOTD`,
1769
- ].join("\n"));
1770
-
1771
- await ssh("sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/*", 30000);
1772
-
1773
- console.log(OK("\n ✓ Provisioning complete"));
1774
- }
1396
+ // provisionVm is now in azure-provision-init.js and re-exported above for backwards compatibility
@@ -450,6 +450,17 @@ export async function resultsShow(opts = {}) {
450
450
  }
451
451
  }
452
452
  }
453
+ if (result.failedTests?.length) {
454
+ console.log(ERR(`\n Failed tests (${result.failedTests.length}):`));
455
+ for (const { test, reason } of result.failedTests.slice(0, 20)) {
456
+ const shortTest = test.replace(/^tests\//, "");
457
+ const reasonStr = reason ? DIM(` - ${reason.slice(0, 60)}`) : "";
458
+ console.log(ERR(` • ${shortTest}${reasonStr}`));
459
+ }
460
+ if (result.failedTests.length > 20) {
461
+ console.log(DIM(` ... and ${result.failedTests.length - 20} more`));
462
+ }
463
+ }
453
464
  if (result.pushedBy) kvLine("Pushed by", result.pushedBy);
454
465
  if (result.pushedAt) kvLine("Pushed at", result.pushedAt);
455
466
  console.log("");