@meshxdata/fops 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/package.json +1 -1
  3. package/src/commands/lifecycle.js +101 -5
  4. package/src/commands/setup.js +45 -4
  5. package/src/plugins/bundled/fops-plugin-azure/index.js +29 -0
  6. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +1185 -0
  7. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-flux.js +1180 -0
  8. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-ingress.js +393 -0
  9. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-naming.js +104 -0
  10. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-network.js +296 -0
  11. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-postgres.js +768 -0
  12. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-reconcilers.js +538 -0
  13. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-secrets.js +849 -0
  14. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-stacks.js +643 -0
  15. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-state.js +145 -0
  16. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +496 -0
  17. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-terraform.js +1032 -0
  18. package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +155 -4245
  19. package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault.js +186 -0
  20. package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +29 -0
  21. package/src/plugins/bundled/fops-plugin-azure/lib/azure-results.js +78 -0
  22. package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +1 -1
  23. package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +758 -0
  24. package/src/plugins/bundled/fops-plugin-azure/lib/commands/registry-cmds.js +250 -0
  25. package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +52 -1
  26. package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +10 -0
  27. package/src/plugins/bundled/fops-plugin-foundation/lib/apply.js +3 -2
  28. package/src/plugins/bundled/fops-plugin-foundation/lib/helpers.js +21 -0
  29. package/src/plugins/bundled/fops-plugin-foundation/lib/tools-read.js +3 -5
  30. package/src/ui/tui/App.js +13 -13
  31. package/src/web/dist/assets/index-NXC8Hvnp.css +1 -0
  32. package/src/web/dist/assets/index-QH1N4ejK.js +112 -0
  33. package/src/web/dist/index.html +2 -2
  34. package/src/web/server.js +4 -4
  35. package/src/web/dist/assets/index-BphVaAUd.css +0 -1
  36. package/src/web/dist/assets/index-CSckLzuG.js +0 -129
@@ -1,4248 +1,158 @@
1
- import crypto from "node:crypto";
2
- import fs from "node:fs";
3
- import os from "node:os";
4
- import path from "node:path";
5
- import { fileURLToPath, pathToFileURL } from "node:url";
6
- import chalk from "chalk";
7
- import {
8
- DEFAULTS, DIM, OK, WARN, ERR, LABEL, ACCENT,
9
- banner, hint, kvLine,
10
- lazyExeca, ensureAzCli, ensureAzAuth, subArgs, buildTags, fetchMyIp,
11
- readState, saveState,
12
- resolveGithubToken, runReconcilers,
13
- resolveUniqueDomain,
14
- } from "./azure.js";
15
- import { syncDns } from "./cloudflare.js";
16
-
17
- /** Resolve a module path under the CLI's src/ directory (works from source and ~/.fops/plugins). */
18
- function resolveCliSrc(relPath) {
19
- const thisDir = path.dirname(fileURLToPath(import.meta.url));
20
- const fromSource = path.resolve(thisDir, "../../../..", relPath);
21
- if (fs.existsSync(fromSource)) return pathToFileURL(fromSource).href;
22
- const fopsBin = process.argv[1];
23
- if (fopsBin) {
24
- try {
25
- const cliRoot = path.dirname(fs.realpathSync(fopsBin));
26
- const fromCli = path.resolve(cliRoot, "src", relPath);
27
- if (fs.existsSync(fromCli)) return pathToFileURL(fromCli).href;
28
- } catch { /* fall through */ }
29
- }
30
- return "../../../../" + relPath;
31
- }
32
-
33
- function timeSince(isoStr) {
34
- const ms = Date.now() - new Date(isoStr).getTime();
35
- if (ms < 60_000) return `${Math.round(ms / 1000)}s`;
36
- if (ms < 3600_000) return `${Math.round(ms / 60_000)}m`;
37
- if (ms < 86400_000) return `${Math.round(ms / 3600_000)}h`;
38
- return `${Math.round(ms / 86400_000)}d`;
39
- }
40
-
41
- // ── AKS Defaults ──────────────────────────────────────────────────────────────
42
-
43
- const AKS_DEFAULTS = {
44
- clusterName: "foundation-aks",
45
- resourceGroup: process.env.AZURE_AKS_RESOURCE_GROUP || "foundation-aks-rg",
46
- location: DEFAULTS.location,
47
- nodeCount: 3,
48
- minCount: 1,
49
- maxCount: 3,
50
- nodeVmSize: "Standard_D8s_v3",
51
- kubernetesVersion: "1.33",
52
- tier: "standard", // "free" | "standard" | "premium"
53
- networkPlugin: "azure", // "azure" (CNI) | "kubenet"
54
- fluxOwner: "meshxdata",
55
- fluxRepo: "flux",
56
- fluxBranch: "main",
57
- fluxPath: "clusters/fops",
58
- };
59
-
60
- // ── K8s version resolver ─────────────────────────────────────────────────────
61
-
62
1
  /**
63
- * Query the latest GA (non-preview) Kubernetes version available in a region.
64
- * Falls back to AKS_DEFAULTS.kubernetesVersion if the query fails.
2
+ * azure-aks.js Barrel re-export for backward compatibility
3
+ *
4
+ * This module re-exports all public APIs from the split modules.
5
+ * Direct imports of sub-modules are preferred for new code.
65
6
  */
66
- async function resolveK8sVersion(execa, { location, subscription } = {}) {
67
- try {
68
- const args = [
69
- "aks", "get-versions",
70
- "--location", location || AKS_DEFAULTS.location,
71
- "--output", "json",
72
- ];
73
- if (subscription) args.push("--subscription", subscription);
74
-
75
- const { stdout } = await execa("az", args, { timeout: 30000 });
76
- const data = JSON.parse(stdout);
77
-
78
- // data.values is an array of { version, isPreview, patchVersions }
79
- const gaVersions = (data.values || [])
80
- .filter((v) => !v.isPreview)
81
- .map((v) => v.version)
82
- .sort((a, b) => {
83
- const pa = a.split(".").map(Number);
84
- const pb = b.split(".").map(Number);
85
- return pb[0] - pa[0] || pb[1] - pa[1] || (pb[2] || 0) - (pa[2] || 0);
86
- });
87
-
88
- if (gaVersions.length > 0) return gaVersions[0];
89
- } catch { /* fall through to default */ }
90
- return AKS_DEFAULTS.kubernetesVersion;
91
- }
92
-
93
- // ── Cluster state ─────────────────────────────────────────────────────────────
94
- // State layout: { azure: { ..., activeCluster: "<name>", clusters: { ... } } }
95
-
96
- export function readAksClusters() {
97
- const state = readState();
98
- const az = state.azure || {};
99
- return {
100
- activeCluster: az.activeCluster,
101
- clusters: az.clusters || {},
102
- };
103
- }
104
-
105
- export function readClusterState(name) {
106
- const { activeCluster, clusters } = readAksClusters();
107
- if (name) return clusters[name] || null;
108
- if (activeCluster && clusters[activeCluster]) return clusters[activeCluster];
109
- return null;
110
- }
111
-
112
- export function writeClusterState(name, patch) {
113
- const state = readState();
114
- const az = state.azure || {};
115
- const clusters = az.clusters || {};
116
- clusters[name] = { ...clusters[name], ...patch, clusterName: name };
117
- az.clusters = clusters;
118
- az.activeCluster = name;
119
- state.azure = az;
120
- saveState(state);
121
- }
122
-
123
- /** Read flux defaults from project root: .fops.json or config/azure-flux.json (azure.fluxOwner, etc.). */
124
- function readProjectFluxConfig(projectRoot) {
125
- if (!projectRoot || !fs.existsSync(projectRoot)) return null;
126
- const tryFile = (p) => {
127
- if (!fs.existsSync(p)) return null;
128
- try {
129
- const raw = JSON.parse(fs.readFileSync(p, "utf8"));
130
- const az = raw?.azure || raw;
131
- const out = {};
132
- if (az.fluxOwner != null) out.fluxOwner = az.fluxOwner;
133
- if (az.fluxRepo != null) out.fluxRepo = az.fluxRepo;
134
- if (az.fluxPath != null) out.fluxPath = az.fluxPath;
135
- if (az.fluxBranch != null) out.fluxBranch = az.fluxBranch;
136
- return Object.keys(out).length ? out : null;
137
- } catch { return null; }
138
- };
139
- return tryFile(path.join(projectRoot, ".fops.json")) || tryFile(path.join(projectRoot, "config", "azure-flux.json")) || null;
140
- }
141
-
142
- /** Resolve effective Flux repo: CLI opts > cluster state > global azure > project config > AKS_DEFAULTS. */
143
- function resolveFluxConfig(clusterName, opts) {
144
- const state = readState();
145
- const az = state.azure || {};
146
- const tracked = readClusterState(clusterName);
147
- const project = readProjectFluxConfig(az.projectRoot || state.projectRoot);
148
- return {
149
- fluxRepo: opts?.fluxRepo ?? tracked?.flux?.repo ?? az.fluxRepo ?? project?.fluxRepo ?? AKS_DEFAULTS.fluxRepo,
150
- fluxOwner: opts?.fluxOwner ?? tracked?.flux?.owner ?? az.fluxOwner ?? project?.fluxOwner ?? AKS_DEFAULTS.fluxOwner,
151
- fluxPath: opts?.fluxPath || tracked?.flux?.path || az.fluxPath || project?.fluxPath || AKS_DEFAULTS.fluxPath,
152
- fluxBranch: opts?.fluxBranch ?? tracked?.flux?.branch ?? az.fluxBranch ?? project?.fluxBranch ?? AKS_DEFAULTS.fluxBranch,
153
- };
154
- }
155
-
156
- function clearClusterState(name) {
157
- const state = readState();
158
- const az = state.azure || {};
159
- const clusters = az.clusters || {};
160
- delete clusters[name];
161
- if (az.activeCluster === name) {
162
- const remaining = Object.keys(clusters);
163
- az.activeCluster = remaining.length > 0 ? remaining[0] : undefined;
164
- }
165
- az.clusters = clusters;
166
- state.azure = az;
167
- saveState(state);
168
- }
169
-
170
- function requireCluster(name) {
171
- const cl = readClusterState(name);
172
- if (!cl || !cl.clusterName) {
173
- const label = name ? `"${name}"` : "(none active)";
174
- console.error(ERR(`\n No AKS cluster tracked: ${label}`));
175
- hint("Create one: fops azure aks up <name>");
176
- hint("List: fops azure aks list\n");
177
- process.exit(1);
178
- }
179
- return {
180
- ...cl,
181
- resourceGroup: cl.resourceGroup ?? AKS_DEFAULTS.resourceGroup,
182
- };
183
- }
184
-
185
- // ── Flux helpers ──────────────────────────────────────────────────────────────
186
-
187
- async function ensureFluxCli(execa) {
188
- try {
189
- await execa("flux", ["--version"], { timeout: 10000 });
190
- } catch {
191
- console.error(ERR("\n Flux CLI is not installed."));
192
- hint("Install: brew install fluxcd/tap/flux");
193
- hint("Or: curl -s https://fluxcd.io/install.sh | sudo bash\n");
194
- process.exit(1);
195
- }
196
- }
197
-
198
- async function ensureKubectl(execa) {
199
- try {
200
- await execa("kubectl", ["version", "--client", "--output=json"], { timeout: 10000 });
201
- } catch {
202
- console.error(ERR("\n kubectl is not installed."));
203
- hint("Install: brew install kubectl\n");
204
- process.exit(1);
205
- }
206
- }
207
-
208
- // ── aks up ────────────────────────────────────────────────────────────────────
209
-
210
- export async function aksUp(opts = {}) {
211
- const execa = await lazyExeca();
212
- const clusterName = opts.clusterName || AKS_DEFAULTS.clusterName;
213
- const rg = opts.resourceGroup || AKS_DEFAULTS.resourceGroup;
214
- const location = opts.location || AKS_DEFAULTS.location;
215
- const nodeCount = opts.nodeCount || AKS_DEFAULTS.nodeCount;
216
- const minCount = opts.minCount || AKS_DEFAULTS.minCount;
217
- const maxCount = opts.maxCount || AKS_DEFAULTS.maxCount;
218
- const nodeVmSize = opts.nodeVmSize || AKS_DEFAULTS.nodeVmSize;
219
- const tier = opts.tier || AKS_DEFAULTS.tier;
220
- const networkPlugin = opts.networkPlugin || AKS_DEFAULTS.networkPlugin;
221
- const sub = opts.profile;
222
-
223
- await ensureAzCli(execa);
224
- await ensureKubectl(execa);
225
- const account = await ensureAzAuth(execa, { subscription: sub });
226
-
227
- // Resolve K8s version: use explicit opt, otherwise auto-detect latest GA for region
228
- const k8sVersion = opts.kubernetesVersion ||
229
- await resolveK8sVersion(execa, { location, subscription: sub });
230
-
231
- banner(`AKS Up: ${clusterName}`);
232
- kvLine("Account", DIM(`${account.name} (${account.id})`));
233
- kvLine("Location", DIM(location));
234
- kvLine("Nodes", DIM(`${nodeCount} x ${nodeVmSize} (autoscale ${minCount}–${maxCount})`));
235
- kvLine("K8s", DIM(k8sVersion));
236
- kvLine("Tier", DIM(tier));
237
-
238
- // Check if cluster already exists
239
- const { exitCode: exists } = await execa("az", [
240
- "aks", "show", "-g", rg, "-n", clusterName, "--output", "none",
241
- ...subArgs(sub),
242
- ], { reject: false, timeout: 30000 });
243
-
244
- if (exists === 0) {
245
- console.log(WARN(`\n Cluster "${clusterName}" already exists — reconciling…`));
246
-
247
- const maxPods = opts.maxPods || 110;
248
- const ctx = { execa, clusterName, rg, sub, opts, minCount, maxCount, maxPods };
249
- await reconcileCluster(ctx);
250
-
251
- const tracked = readClusterState(clusterName);
252
- if (tracked) printClusterInfo(tracked);
253
- return tracked;
254
- }
255
-
256
- // Create resource group
257
- hint(`Ensuring resource group ${rg}…`);
258
- await execa("az", [
259
- "group", "create", "--name", rg, "--location", location,
260
- "--output", "none", ...subArgs(sub),
261
- ], { timeout: 30000 });
262
-
263
- // Create AKS cluster
264
- banner(`Creating AKS cluster "${clusterName}"`);
265
-
266
- // Detect operator IP to lock down the API server
267
- hint("Detecting your public IP…");
268
- const myIp = await fetchMyIp();
269
- if (myIp) {
270
- console.log(OK(` ✓ API server will be scoped to ${myIp}`));
271
- } else {
272
- console.log(WARN(" ⚠ Could not detect public IP — API server will be open to all"));
273
- hint("Lock it down later: az aks update -g <rg> -n <name> --api-server-authorized-ip-ranges <ip>/32");
274
- }
275
-
276
- hint("This takes 5–10 minutes…\n");
277
-
278
- const tags = buildTags(clusterName, {
279
- createdBy: account.user?.name || "fops",
280
- type: "aks",
281
- });
282
- const tagStr = Object.entries(tags).map(([k, v]) => `${k}=${v}`).join(" ");
283
-
284
- const maxPods = opts.maxPods || 110;
285
-
286
- const createArgs = [
287
- "aks", "create",
288
- "--resource-group", rg,
289
- "--name", clusterName,
290
- "--location", location,
291
- "--node-count", String(nodeCount),
292
- "--node-vm-size", nodeVmSize,
293
- "--max-pods", String(maxPods),
294
- "--enable-cluster-autoscaler",
295
- "--min-count", String(minCount),
296
- "--max-count", String(maxCount),
297
- "--kubernetes-version", k8sVersion,
298
- "--tier", tier,
299
- "--network-plugin", networkPlugin,
300
- "--generate-ssh-keys",
301
- "--enable-managed-identity",
302
- "--enable-oidc-issuer",
303
- "--enable-workload-identity",
304
- "--tags", ...tagStr.split(" "),
305
- "--output", "json",
306
- ...subArgs(sub),
307
- ];
308
-
309
- if (myIp) {
310
- createArgs.push("--api-server-authorized-ip-ranges", `${myIp}/32`);
311
- }
312
-
313
- let cluster;
314
- try {
315
- const { stdout: clusterJson } = await execa("az", createArgs, { timeout: 900000 });
316
- cluster = JSON.parse(clusterJson);
317
- } catch (err) {
318
- const msg = (err.stderr || err.message || "").replace(/^.*ERROR:\s*/m, "");
319
- console.error(ERR(`\n ✗ Cluster creation failed:\n ${msg.split("\n")[0]}`));
320
- throw err;
321
- }
322
- console.log(OK(` ✓ AKS cluster created`));
323
-
324
- // Get kubeconfig
325
- await getCredentials(execa, { clusterName, rg, sub });
326
-
327
- // Save state
328
- writeClusterState(clusterName, {
329
- resourceGroup: rg,
330
- location,
331
- nodeCount,
332
- nodeVmSize,
333
- kubernetesVersion: cluster.kubernetesVersion || k8sVersion,
334
- subscriptionId: account.id,
335
- fqdn: cluster.fqdn,
336
- provisioningState: cluster.provisioningState,
337
- createdAt: new Date().toISOString(),
338
- });
339
-
340
- // Create GHCR pull secret so the cluster can pull private images
341
- const githubToken = resolveGithubToken(opts);
342
- if (githubToken) {
343
- await ensureGhcrPullSecret(execa, { clusterName, githubToken });
344
- }
345
-
346
- // Bootstrap Flux — defaults to meshxdata/flux
347
- const fluxRepo = opts.fluxRepo ?? AKS_DEFAULTS.fluxRepo;
348
- const fluxOwner = opts.fluxOwner ?? AKS_DEFAULTS.fluxOwner;
349
- const fluxBranch = opts.fluxBranch ?? AKS_DEFAULTS.fluxBranch;
350
- const fluxPath = opts.fluxPath || AKS_DEFAULTS.fluxPath;
351
-
352
- if (opts.noFlux) {
353
- console.log("");
354
- hint("Flux not bootstrapped (--no-flux).");
355
- hint("Bootstrap later: fops azure aks flux bootstrap <name>");
356
- } else if (!githubToken) {
357
- console.log("");
358
- console.log(WARN(" ⚠ Skipping Flux bootstrap — no GitHub token found."));
359
- hint("Authenticate with: gh auth login (writes to ~/.netrc)");
360
- hint("Or set GITHUB_TOKEN, or pass --github-token, then run:");
361
- hint(` fops azure aks flux bootstrap ${clusterName}`);
362
- } else {
363
- await bootstrapFlux(execa, {
364
- clusterName, rg, sub,
365
- githubToken,
366
- repo: fluxRepo,
367
- owner: fluxOwner,
368
- path: fluxPath,
369
- branch: fluxBranch,
370
- });
371
- writeClusterState(clusterName, {
372
- flux: { repo: fluxRepo, owner: fluxOwner, path: fluxPath, branch: fluxBranch },
373
- });
374
- }
375
-
376
- // Pre-install CRDs and fix webhook scheduling so Flux kustomizations can reconcile
377
- if (!opts.noFlux) {
378
- try {
379
- await reconcileFluxPrereqs({ execa, clusterName, rg, sub, opts });
380
- } catch (err) {
381
- console.log(WARN(` ⚠ Flux prereqs: ${(err.message || "").split("\n")[0]}`));
382
- hint("Run again with: fops azure aks up " + clusterName);
383
- }
384
- }
385
-
386
- // Provision Postgres Flexible Server in the AKS VNet
387
- if (opts.noPostgres !== true) {
388
- try {
389
- // Re-fetch cluster to get nodeResourceGroup
390
- const { stdout: freshJson } = await execa("az", [
391
- "aks", "show", "-g", rg, "-n", clusterName, "--output", "json",
392
- ...subArgs(sub),
393
- ], { timeout: 30000 });
394
- const freshCluster = JSON.parse(freshJson);
395
- const pgCtx = { execa, clusterName, rg, sub, cluster: freshCluster, opts };
396
- await reconcilePostgres(pgCtx);
397
- } catch (err) {
398
- const msg = err.message || "";
399
- const stderr = err.stderr ? err.stderr.trim().split("\n").slice(-2).join(" ") : "";
400
- console.log(WARN(` ⚠ Postgres provisioning failed: ${msg.split("\n")[0]}`));
401
- if (stderr && !msg.includes(stderr)) hint(stderr);
402
- hint("Retry with: fops azure aks up " + clusterName);
403
- }
404
- }
405
-
406
- const info = readClusterState(clusterName);
407
- printClusterInfo(info);
408
- return info;
409
- }
410
-
411
- // ── aks down ──────────────────────────────────────────────────────────────────
412
-
413
- export async function aksDown(opts = {}) {
414
- const execa = await lazyExeca();
415
- const sub = opts.profile;
416
- await ensureAzCli(execa);
417
- await ensureAzAuth(execa, { subscription: sub });
418
-
419
- const name = opts.clusterName;
420
- let clusterName, rg;
421
-
422
- // Try local state first
423
- const tracked = readClusterState(name);
424
- if (tracked?.clusterName) {
425
- clusterName = tracked.clusterName;
426
- rg = tracked.resourceGroup;
427
- } else if (name) {
428
- // Not in local state — probe Azure directly for residual clusters
429
- rg = AKS_DEFAULTS.resourceGroup;
430
- const { exitCode, stdout } = await execa("az", [
431
- "aks", "show", "-g", rg, "-n", name, "--output", "json", ...subArgs(sub),
432
- ], { reject: false, timeout: 30000 });
433
-
434
- if (exitCode === 0 && stdout) {
435
- clusterName = name;
436
- const info = JSON.parse(stdout);
437
- rg = info.resourceGroup || rg;
438
- console.log(WARN(`\n Cluster "${name}" not in local state but found in Azure (residual).`));
439
- } else {
440
- // Also try listing all clusters in the default RG
441
- const { stdout: listJson } = await execa("az", [
442
- "aks", "list", "-g", rg, "--output", "json", ...subArgs(sub),
443
- ], { reject: false, timeout: 30000 });
444
- const clusters = listJson ? JSON.parse(listJson) : [];
445
- const match = clusters.find((c) => c.name === name);
446
- if (match) {
447
- clusterName = match.name;
448
- rg = match.resourceGroup || rg;
449
- console.log(WARN(`\n Cluster "${name}" not in local state but found in Azure (residual).`));
450
- } else {
451
- console.error(ERR(`\n Cluster "${name}" not found in local state or Azure (RG: ${rg}).`));
452
- hint("List Azure clusters: az aks list -g foundation-aks-rg -o table");
453
- hint("List tracked: fops azure aks list\n");
454
- process.exit(1);
455
- }
456
- }
457
- } else {
458
- // No name given, require tracked state
459
- requireCluster(name);
460
- return; // unreachable, requireCluster exits
461
- }
462
-
463
- banner(`Destroying AKS cluster "${clusterName}"`);
464
- kvLine("RG", DIM(rg));
465
-
466
- if (!opts.yes) {
467
- const { confirm } = await import(resolveCliSrc("ui/confirm.js"));
468
- const ok = await confirm(` Destroy cluster "${clusterName}" and all workloads?`);
469
- if (!ok) { console.log(DIM("\n Cancelled.\n")); return; }
470
- }
471
-
472
- hint("This takes 3–5 minutes…\n");
473
-
474
- await execa("az", [
475
- "aks", "delete", "--resource-group", rg, "--name", clusterName,
476
- "--yes", "--no-wait", "--output", "none",
477
- ...subArgs(sub),
478
- ], { timeout: 300000 });
479
- console.log(OK(" ✓ Cluster deletion initiated"));
480
-
481
- // Remove kubeconfig context
482
- try {
483
- await execa("kubectl", ["config", "delete-context", clusterName], { reject: false, timeout: 10000 });
484
- await execa("kubectl", ["config", "delete-cluster", clusterName], { reject: false, timeout: 10000 });
485
- console.log(OK(" ✓ Kubeconfig context removed"));
486
- } catch { /* best-effort */ }
487
-
488
- clearClusterState(clusterName);
489
- console.log(OK("\n ✓ Done.") + DIM(" State cleared.\n"));
490
- }
491
-
492
- // ── aks list ──────────────────────────────────────────────────────────────────
493
-
494
- export async function aksList(opts = {}) {
495
- let { activeCluster, clusters } = readAksClusters();
496
- let names = Object.keys(clusters);
497
-
498
- banner("AKS Clusters");
499
-
500
- // If no clusters tracked locally, try to discover fops-managed clusters from Azure
501
- if (names.length === 0) {
502
- const execa = await lazyExeca();
503
- try {
504
- await ensureAzCli(execa);
505
- await ensureAzAuth(execa, { subscription: opts.profile });
506
- } catch {
507
- hint("No clusters tracked.");
508
- hint("Create one: fops azure aks up <name>\n");
509
- return;
510
- }
511
-
512
- hint("No clusters tracked locally — checking Azure for fops-managed clusters…\n");
513
-
514
- try {
515
- // Query all AKS clusters and filter by managed=fops tag
516
- const { stdout, exitCode } = await execa("az", [
517
- "aks", "list",
518
- "--query", "[?tags.managed=='fops']",
519
- "--output", "json",
520
- ...subArgs(opts.profile),
521
- ], { timeout: 60000, reject: false });
522
-
523
- if (exitCode === 0 && stdout?.trim()) {
524
- const discovered = JSON.parse(stdout);
525
- if (discovered.length > 0) {
526
- for (const cl of discovered) {
527
- const name = cl.name;
528
- const info = {
529
- resourceGroup: cl.resourceGroup,
530
- location: cl.location,
531
- kubernetesVersion: cl.kubernetesVersion,
532
- fqdn: cl.fqdn,
533
- nodeCount: cl.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) || 0,
534
- nodeVmSize: cl.agentPoolProfiles?.[0]?.vmSize || "unknown",
535
- subscriptionId: cl.id?.split("/")[2],
536
- createdAt: cl.provisioningState === "Succeeded" ? new Date().toISOString() : null,
537
- };
538
- writeClusterState(name, info);
539
- console.log(OK(` + Discovered ${name} (${cl.location})`));
540
- }
541
- console.log("");
542
- // Re-read after discovery
543
- const updated = readAksClusters();
544
- activeCluster = updated.activeCluster;
545
- clusters = updated.clusters;
546
- names = Object.keys(clusters);
547
- }
548
- }
549
- } catch {
550
- // Discovery failed, continue with empty list
551
- }
552
-
553
- if (names.length === 0) {
554
- hint("No fops-managed clusters found in Azure.");
555
- hint("Create one: fops azure aks up <name>\n");
556
- return;
557
- }
558
- }
559
-
560
- // Refresh each tracked cluster from Azure so RG, Location, Nodes, FQDN, etc. are current
561
- try {
562
- const execa = await lazyExeca();
563
- await ensureAzCli(execa);
564
- await ensureAzAuth(execa, { subscription: opts.profile });
565
- for (const name of names) {
566
- const cl = clusters[name];
567
- let azCluster = null;
568
- if (cl.resourceGroup) {
569
- const { stdout, exitCode } = await execa("az", [
570
- "aks", "show", "-g", cl.resourceGroup, "-n", name, "--output", "json",
571
- ...subArgs(opts.profile),
572
- ], { timeout: 15000, reject: false });
573
- if (exitCode === 0 && stdout?.trim()) azCluster = JSON.parse(stdout);
574
- }
575
- if (!azCluster) {
576
- const { stdout, exitCode } = await execa("az", [
577
- "aks", "list", "--output", "json", ...subArgs(opts.profile),
578
- ], { timeout: 60000, reject: false });
579
- if (exitCode === 0 && stdout?.trim()) {
580
- const list = JSON.parse(stdout);
581
- azCluster = list.find((c) => c.name === name) || null;
582
- }
583
- }
584
- if (azCluster) {
585
- const info = {
586
- resourceGroup: azCluster.resourceGroup,
587
- location: azCluster.location,
588
- kubernetesVersion: azCluster.kubernetesVersion,
589
- fqdn: azCluster.fqdn,
590
- nodeCount: azCluster.agentPoolProfiles?.reduce((s, p) => s + (p.count || 0), 0) ?? null,
591
- nodeVmSize: azCluster.agentPoolProfiles?.[0]?.vmSize || null,
592
- subscriptionId: azCluster.id?.split("/")[2],
593
- };
594
- if (!cl.createdAt && azCluster.provisioningState === "Succeeded") {
595
- info.createdAt = new Date().toISOString();
596
- }
597
- writeClusterState(name, info);
598
- }
599
- }
600
- const updated = readAksClusters();
601
- clusters = updated.clusters;
602
- } catch {
603
- // Keep existing state if refresh fails (e.g. az not logged in)
604
- }
605
-
606
- for (const name of names) {
607
- const cl = clusters[name];
608
- const active = name === activeCluster ? OK(" ◀ active") : "";
609
- console.log(`\n ${LABEL(name)}${active}`);
610
- kvLine(" RG", DIM(cl.resourceGroup || "—"), { pad: 12 });
611
- kvLine(" Location", DIM(cl.location || "—"), { pad: 12 });
612
- kvLine(" Nodes", DIM(`${cl.nodeCount || "?"} x ${cl.nodeVmSize || "?"}`), { pad: 12 });
613
- kvLine(" K8s", DIM(cl.kubernetesVersion || "—"), { pad: 12 });
614
- kvLine(" FQDN", DIM(cl.fqdn || "—"), { pad: 12 });
615
- if (cl.flux) {
616
- kvLine(" Flux", DIM(`${cl.flux.owner}/${cl.flux.repo} (${cl.flux.path})`), { pad: 12 });
617
- }
618
- kvLine(" Created", DIM(cl.createdAt ? new Date(cl.createdAt).toLocaleString() : "—"), { pad: 12 });
619
- if (cl.domain) {
620
- kvLine(" Domain", DIM(cl.domain), { pad: 12 });
621
- }
622
- if (cl.qa) {
623
- const qaAge = timeSince(cl.qa.at);
624
- const qaLabel = cl.qa.passed ? OK(`✓ passed (${qaAge} ago)`) : ERR(`✗ failed (${qaAge} ago)`);
625
- kvLine(" QA", qaLabel, { pad: 12 });
626
- }
627
- }
628
- console.log("");
629
- }
630
-
631
- // ── aks status ────────────────────────────────────────────────────────────────
632
-
633
- export async function aksStatus(opts = {}) {
634
- const execa = await lazyExeca();
635
- const sub = opts.profile;
636
- await ensureAzCli(execa);
637
- await ensureAzAuth(execa, { subscription: sub });
638
- const { clusterName, resourceGroup: rg } = requireCluster(opts.clusterName);
639
-
640
- banner(`AKS Status: ${clusterName}`);
641
-
642
- // Cluster info from Azure
643
- const { stdout: showJson } = await execa("az", [
644
- "aks", "show", "-g", rg, "-n", clusterName, "--output", "json",
645
- ...subArgs(sub),
646
- ], { timeout: 30000 });
647
- const info = JSON.parse(showJson);
648
-
649
- kvLine("State", info.provisioningState === "Succeeded" ? OK(info.provisioningState) : WARN(info.provisioningState));
650
- kvLine("Power", info.powerState?.code === "Running" ? OK(info.powerState.code) : WARN(info.powerState?.code || "unknown"));
651
- kvLine("K8s", DIM(info.kubernetesVersion));
652
- kvLine("FQDN", DIM(info.fqdn));
653
- kvLine("Location", DIM(info.location));
654
- kvLine("Tier", DIM(info.sku?.tier || "—"));
655
-
656
- // Node pools
657
- const pools = info.agentPoolProfiles || [];
658
- console.log(`\n ${LABEL("Node Pools")}`);
659
- for (const pool of pools) {
660
- const status = pool.provisioningState === "Succeeded" ? OK("ready") : WARN(pool.provisioningState);
661
- console.log(` ${pool.name}: ${pool.count} x ${pool.vmSize} [${status}]`);
662
- }
663
-
664
- // Flux status (if available)
665
- try {
666
- await execa("flux", ["--version"], { timeout: 5000 });
667
- console.log(`\n ${LABEL("Flux Status")}`);
668
- const { stdout: fluxStatus } = await execa("flux", [
669
- "get", "all", "--context", clusterName, "--no-header",
670
- ], { timeout: 30000, reject: false });
671
- if (fluxStatus?.trim()) {
672
- for (const line of fluxStatus.trim().split("\n").slice(0, 20)) {
673
- console.log(` ${DIM(line)}`);
674
- }
675
- } else {
676
- hint(" Flux not installed or no resources found.");
677
- }
678
- } catch {
679
- hint(" Flux CLI not available — skipping Flux status.");
680
- }
681
-
682
- console.log("");
683
- }
684
-
685
- // ── aks config versions ──────────────────────────────────────────────────────
686
-
687
- const AKS_FOUNDATION_COMPONENTS = {
688
- "foundation-backend": { label: "Backend", short: "be" },
689
- "foundation-frontend": { label: "Frontend", short: "fe" },
690
- "foundation-processor": { label: "Processor", short: "pr" },
691
- "foundation-watcher": { label: "Watcher", short: "wa" },
692
- "foundation-scheduler": { label: "Scheduler", short: "sc" },
693
- "foundation-storage-engine": { label: "Storage", short: "se" },
694
- };
695
-
696
- export async function aksConfigVersions(opts = {}) {
697
- const execa = await lazyExeca();
698
- const { clusterName } = requireCluster(opts.clusterName);
699
-
700
- banner(`Service Versions: ${clusterName}`);
701
-
702
- const kubectl = (args) =>
703
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 15000, reject: false });
704
-
705
- const { stdout, exitCode } = await kubectl([
706
- "get", "deployments", "-n", "foundation",
707
- "-o", "jsonpath={range .items[*]}{.metadata.name}={.spec.template.spec.containers[0].image}{\"\\n\"}{end}",
708
- ]);
709
-
710
- if (exitCode !== 0 || !stdout?.trim()) {
711
- hint("Could not read deployments. Is kubeconfig merged?");
712
- hint(` Run: fops azure aks kubeconfig ${clusterName}\n`);
713
- return;
714
- }
715
-
716
- const versions = {};
717
- for (const line of stdout.trim().split("\n")) {
718
- const eq = line.indexOf("=");
719
- if (eq < 0) continue;
720
- const name = line.slice(0, eq).trim();
721
- const image = line.slice(eq + 1).trim();
722
- const comp = AKS_FOUNDATION_COMPONENTS[name];
723
- if (!comp) continue;
724
- const colon = image.lastIndexOf(":");
725
- const tag = colon >= 0 ? image.slice(colon + 1) : "latest";
726
- versions[name] = { ...comp, image, tag };
727
- }
728
-
729
- if (Object.keys(versions).length === 0) {
730
- hint("No Foundation services found in the 'foundation' namespace.\n");
731
- return;
732
- }
733
-
734
- const maxLabel = Math.max(...Object.values(versions).map(v => v.label.length));
735
- for (const [, v] of Object.entries(versions)) {
736
- console.log(` ${ACCENT(v.label.padEnd(maxLabel + 2))} ${chalk.white(v.tag)} ${DIM(v.image)}`);
737
- }
738
- console.log("");
739
-
740
- // Check for version drift
741
- const tags = [...new Set(Object.values(versions).map(v => v.tag))];
742
- if (tags.length === 1) {
743
- console.log(OK(` ✓ All services on ${tags[0]}`));
744
- } else {
745
- console.log(WARN(` ⚠ Version drift detected — ${tags.length} different tags in use`));
746
- }
747
- console.log("");
748
- }
749
-
750
- // ── aks terraform ────────────────────────────────────────────────────────────
751
-
752
- export async function aksTerraform(opts = {}) {
753
- const execa = await lazyExeca();
754
- const sub = opts.profile;
755
- await ensureAzCli(execa);
756
- await ensureAzAuth(execa, { subscription: sub });
757
- const { clusterName, resourceGroup: rg } = requireCluster(opts.clusterName);
758
-
759
- banner(`AKS Terraform: ${clusterName}`);
760
- hint("Fetching cluster details from Azure…");
761
-
762
- const { stdout: showJson } = await execa("az", [
763
- "aks", "show", "-g", rg, "-n", clusterName, "--output", "json",
764
- ...subArgs(sub),
765
- ], { timeout: 30000 });
766
- const cluster = JSON.parse(showJson);
767
-
768
- // Fetch resource group details for location
769
- let rgLocation = cluster.location;
770
- try {
771
- const { stdout: rgJson } = await execa("az", [
772
- "group", "show", "--name", rg, "--output", "json", ...subArgs(sub),
773
- ], { timeout: 15000 });
774
- rgLocation = JSON.parse(rgJson).location || rgLocation;
775
- } catch { /* use cluster location */ }
776
-
777
- // Discover companion resources in parallel
778
- hint("Discovering companion resources…");
779
- const serverName = `fops-${clusterName}-psql`;
780
- const vaultName = `fops-${clusterName}-kv`.replace(/[^a-zA-Z0-9-]/g, "").slice(0, 24);
781
-
782
- const [pgResult, kvResult, fluxExtResult, fluxCfgResult] = await Promise.allSettled([
783
- execa("az", [
784
- "postgres", "flexible-server", "show",
785
- "--name", serverName, "--resource-group", rg,
786
- "--output", "json", ...subArgs(sub),
787
- ], { reject: false, timeout: 30000 }),
788
- execa("az", [
789
- "keyvault", "show", "--name", vaultName,
790
- "--output", "json", ...subArgs(sub),
791
- ], { reject: false, timeout: 30000 }),
792
- execa("az", [
793
- "k8s-extension", "show",
794
- "--resource-group", rg, "--cluster-name", clusterName,
795
- "--cluster-type", "managedClusters", "--name", "flux",
796
- "--output", "json", ...subArgs(sub),
797
- ], { reject: false, timeout: 30000 }),
798
- execa("az", [
799
- "k8s-configuration", "flux", "list",
800
- "--resource-group", rg, "--cluster-name", clusterName,
801
- "--cluster-type", "managedClusters",
802
- "--output", "json", ...subArgs(sub),
803
- ], { reject: false, timeout: 30000 }),
804
- ]);
805
-
806
- const pgServer = pgResult.status === "fulfilled" && pgResult.value.exitCode === 0 && pgResult.value.stdout?.trim()
807
- ? JSON.parse(pgResult.value.stdout) : null;
808
- const keyVault = kvResult.status === "fulfilled" && kvResult.value.exitCode === 0 && kvResult.value.stdout?.trim()
809
- ? JSON.parse(kvResult.value.stdout) : null;
810
- const fluxExt = fluxExtResult.status === "fulfilled" && fluxExtResult.value.exitCode === 0 && fluxExtResult.value.stdout?.trim()
811
- ? JSON.parse(fluxExtResult.value.stdout) : null;
812
- const fluxConfigs = fluxCfgResult.status === "fulfilled" && fluxCfgResult.value.exitCode === 0 && fluxCfgResult.value.stdout?.trim()
813
- ? JSON.parse(fluxCfgResult.value.stdout) : [];
814
-
815
- const hcl = generateAksTerraform(cluster, { rg, rgLocation, pgServer, keyVault, fluxExt, fluxConfigs });
816
-
817
- console.log(OK(" ✓ Terraform HCL generated from live cluster state\n"));
818
-
819
- if (opts.output) {
820
- fs.mkdirSync(path.dirname(path.resolve(opts.output)), { recursive: true });
821
- fs.writeFileSync(path.resolve(opts.output), hcl, "utf8");
822
- console.log(OK(` ✓ Written to ${opts.output}\n`));
823
- } else {
824
- console.log(hcl);
825
- }
826
- }
827
-
828
- function generateAksTerraform(cluster, { rg, rgLocation, pgServer, keyVault, fluxExt, fluxConfigs }) {
829
- const sku = cluster.sku || {};
830
- const netProfile = cluster.networkProfile || {};
831
- const identity = cluster.identity || {};
832
- const pools = cluster.agentPoolProfiles || [];
833
- const defaultPool = pools.find((p) => p.mode === "System") || pools[0];
834
- const extraPools = pools.filter((p) => p !== defaultPool);
835
- const apiAccess = cluster.apiServerAccessProfile || {};
836
- const autoUpgrade = cluster.autoUpgradeProfile || {};
837
-
838
- const tfName = (cluster.name || "aks").replace(/[^a-zA-Z0-9_]/g, "_");
839
-
840
- const lines = [];
841
- const w = (s = "") => lines.push(s);
842
-
843
- // ── variables ──────────────────────────────────────────────────────────────
844
-
845
- w(`# ─── Variables ────────────────────────────────────────────────────────────────`);
846
- w();
847
- w(`variable "resource_group_name" {`);
848
- w(` description = "Name of the Azure resource group"`);
849
- w(` type = string`);
850
- w(` default = "${rg}"`);
851
- w(`}`);
852
- w();
853
- w(`variable "location" {`);
854
- w(` description = "Azure region"`);
855
- w(` type = string`);
856
- w(` default = "${rgLocation}"`);
857
- w(`}`);
858
- w();
859
- w(`variable "cluster_name" {`);
860
- w(` description = "AKS cluster name"`);
861
- w(` type = string`);
862
- w(` default = "${cluster.name}"`);
863
- w(`}`);
864
- w();
865
- w(`variable "kubernetes_version" {`);
866
- w(` description = "Kubernetes version"`);
867
- w(` type = string`);
868
- w(` default = "${cluster.kubernetesVersion}"`);
869
- w(`}`);
870
- w();
871
- w(`variable "dns_prefix" {`);
872
- w(` description = "DNS prefix for the cluster"`);
873
- w(` type = string`);
874
- w(` default = "${cluster.dnsPrefix || cluster.name}"`);
875
- w(`}`);
876
- w();
877
- w(`variable "sku_tier" {`);
878
- w(` description = "AKS SKU tier (Free, Standard, Premium)"`);
879
- w(` type = string`);
880
- w(` default = "${sku.tier || "Standard"}"`);
881
- w(`}`);
882
- w();
883
-
884
- if (defaultPool) {
885
- w(`variable "default_node_pool_vm_size" {`);
886
- w(` description = "VM size for the default node pool"`);
887
- w(` type = string`);
888
- w(` default = "${defaultPool.vmSize}"`);
889
- w(`}`);
890
- w();
891
- w(`variable "default_node_pool_count" {`);
892
- w(` description = "Node count for the default pool"`);
893
- w(` type = number`);
894
- w(` default = ${defaultPool.count}`);
895
- w(`}`);
896
- w();
897
- if (defaultPool.enableAutoScaling) {
898
- w(`variable "default_node_pool_min_count" {`);
899
- w(` description = "Autoscaler minimum node count"`);
900
- w(` type = number`);
901
- w(` default = ${defaultPool.minCount}`);
902
- w(`}`);
903
- w();
904
- w(`variable "default_node_pool_max_count" {`);
905
- w(` description = "Autoscaler maximum node count"`);
906
- w(` type = number`);
907
- w(` default = ${defaultPool.maxCount}`);
908
- w(`}`);
909
- w();
910
- }
911
- }
912
-
913
- if (pgServer) {
914
- w(`variable "postgres_admin_login" {`);
915
- w(` description = "Postgres administrator login"`);
916
- w(` type = string`);
917
- w(` default = "${pgServer.administratorLogin || "fopsadmin"}"`);
918
- w(`}`);
919
- w();
920
- w(`variable "postgres_admin_password" {`);
921
- w(` description = "Postgres administrator password"`);
922
- w(` type = string`);
923
- w(` sensitive = true`);
924
- w(`}`);
925
- w();
926
- }
927
-
928
- if (fluxConfigs?.length) {
929
- w(`variable "flux_github_token" {`);
930
- w(` description = "GitHub PAT for Flux HTTPS access"`);
931
- w(` type = string`);
932
- w(` sensitive = true`);
933
- w(` default = ""`);
934
- w(`}`);
935
- w();
936
- }
937
-
938
- if (keyVault) {
939
- w(`variable "tenant_id" {`);
940
- w(` description = "Azure AD tenant ID for Key Vault"`);
941
- w(` type = string`);
942
- w(` default = "${keyVault.properties?.tenantId || ""}"`);
943
- w(`}`);
944
- w();
945
- }
946
-
947
- // ── provider ───────────────────────────────────────────────────────────────
948
-
949
- w(`# ─── Provider ─────────────────────────────────────────────────────────────────`);
950
- w();
951
- w(`terraform {`);
952
- w(` required_providers {`);
953
- w(` azurerm = {`);
954
- w(` source = "hashicorp/azurerm"`);
955
- w(` version = "~> 4.0"`);
956
- w(` }`);
957
- w(` }`);
958
- w(`}`);
959
- w();
960
- w(`provider "azurerm" {`);
961
- w(` features {}`);
962
- w(`}`);
963
- w();
964
-
965
- // ── resource group ─────────────────────────────────────────────────────────
966
-
967
- w(`# ─── Resource Group ──────────────────────────────────────────────────────────`);
968
- w();
969
- w(`resource "azurerm_resource_group" "aks" {`);
970
- w(` name = var.resource_group_name`);
971
- w(` location = var.location`);
972
- w(`}`);
973
- w();
974
-
975
- // ── AKS cluster ────────────────────────────────────────────────────────────
976
-
977
- w(`# ─── AKS Cluster ─────────────────────────────────────────────────────────────`);
978
- w();
979
- w(`resource "azurerm_kubernetes_cluster" "${tfName}" {`);
980
- w(` name = var.cluster_name`);
981
- w(` location = azurerm_resource_group.aks.location`);
982
- w(` resource_group_name = azurerm_resource_group.aks.name`);
983
- w(` dns_prefix = var.dns_prefix`);
984
- w(` kubernetes_version = var.kubernetes_version`);
985
- w(` sku_tier = var.sku_tier`);
986
- w();
987
-
988
- if (apiAccess.authorizedIpRanges?.length) {
989
- w(` api_server_access_profile {`);
990
- w(` authorized_ip_ranges = [${apiAccess.authorizedIpRanges.map((r) => `"${r}"`).join(", ")}]`);
991
- w(` }`);
992
- w();
993
- }
994
-
995
- if (autoUpgrade.upgradeChannel) {
996
- w(` automatic_upgrade_channel = "${autoUpgrade.upgradeChannel}"`);
997
- w();
998
- }
999
-
1000
- if (identity.type) {
1001
- const identityType = identity.type === "SystemAssigned" ? "SystemAssigned" : "UserAssigned";
1002
- w(` identity {`);
1003
- w(` type = "${identityType}"`);
1004
- w(` }`);
1005
- w();
1006
- }
1007
-
1008
- if (defaultPool) {
1009
- w(` default_node_pool {`);
1010
- w(` name = "${defaultPool.name}"`);
1011
- w(` vm_size = var.default_node_pool_vm_size`);
1012
- w(` node_count = var.default_node_pool_count`);
1013
- if (defaultPool.enableAutoScaling) {
1014
- w(` auto_scaling_enabled = true`);
1015
- w(` min_count = var.default_node_pool_min_count`);
1016
- w(` max_count = var.default_node_pool_max_count`);
1017
- }
1018
- if (defaultPool.maxPods) {
1019
- w(` max_pods = ${defaultPool.maxPods}`);
1020
- }
1021
- if (defaultPool.osDiskSizeGb) {
1022
- w(` os_disk_size_gb = ${defaultPool.osDiskSizeGb}`);
1023
- }
1024
- if (defaultPool.osDiskType && defaultPool.osDiskType !== "Managed") {
1025
- w(` os_disk_type = "${defaultPool.osDiskType}"`);
1026
- }
1027
- if (defaultPool.vnetSubnetId) {
1028
- w(` vnet_subnet_id = "${defaultPool.vnetSubnetId}"`);
1029
- }
1030
- if (defaultPool.availabilityZones?.length) {
1031
- w(` zones = [${defaultPool.availabilityZones.map((z) => `"${z}"`).join(", ")}]`);
1032
- }
1033
- w(` }`);
1034
- w();
1035
- }
1036
-
1037
- if (netProfile.networkPlugin) {
1038
- w(` network_profile {`);
1039
- w(` network_plugin = "${netProfile.networkPlugin}"`);
1040
- if (netProfile.networkPolicy) w(` network_policy = "${netProfile.networkPolicy}"`);
1041
- if (netProfile.serviceCidr) w(` service_cidr = "${netProfile.serviceCidr}"`);
1042
- if (netProfile.dnsServiceIp) w(` dns_service_ip = "${netProfile.dnsServiceIp}"`);
1043
- if (netProfile.loadBalancerSku) w(` load_balancer_sku = "${netProfile.loadBalancerSku}"`);
1044
- w(` }`);
1045
- w();
1046
- }
1047
-
1048
- if (cluster.oidcIssuerProfile?.enabled) {
1049
- w(` oidc_issuer_enabled = true`);
1050
- }
1051
- if (cluster.securityProfile?.workloadIdentity?.enabled) {
1052
- w(` workload_identity_enabled = true`);
1053
- }
1054
-
1055
- const tags = cluster.tags || {};
1056
- const tagEntries = Object.entries(tags);
1057
- if (tagEntries.length) {
1058
- w();
1059
- w(` tags = {`);
1060
- for (const [k, v] of tagEntries) {
1061
- w(` ${JSON.stringify(k)} = ${JSON.stringify(v)}`);
1062
- }
1063
- w(` }`);
1064
- }
1065
-
1066
- w(`}`);
1067
- w();
1068
-
1069
- // ── extra node pools ───────────────────────────────────────────────────────
1070
-
1071
- for (const pool of extraPools) {
1072
- const poolTf = pool.name.replace(/[^a-zA-Z0-9_]/g, "_");
1073
- w(`resource "azurerm_kubernetes_cluster_node_pool" "${poolTf}" {`);
1074
- w(` name = "${pool.name}"`);
1075
- w(` kubernetes_cluster_id = azurerm_kubernetes_cluster.${tfName}.id`);
1076
- w(` vm_size = "${pool.vmSize}"`);
1077
- w(` node_count = ${pool.count}`);
1078
- if (pool.enableAutoScaling) {
1079
- w(` auto_scaling_enabled = true`);
1080
- w(` min_count = ${pool.minCount}`);
1081
- w(` max_count = ${pool.maxCount}`);
1082
- }
1083
- if (pool.maxPods) {
1084
- w(` max_pods = ${pool.maxPods}`);
1085
- }
1086
- if (pool.mode) {
1087
- w(` mode = "${pool.mode}"`);
1088
- }
1089
- if (pool.osDiskSizeGb) {
1090
- w(` os_disk_size_gb = ${pool.osDiskSizeGb}`);
1091
- }
1092
- if (pool.scaleSetPriority === "Spot") {
1093
- w(` priority = "Spot"`);
1094
- w(` eviction_policy = "${pool.scaleSetEvictionPolicy || "Delete"}"`);
1095
- if (pool.spotMaxPrice != null && pool.spotMaxPrice !== -1) {
1096
- w(` spot_max_price = ${pool.spotMaxPrice}`);
1097
- }
1098
- }
1099
- if (pool.vnetSubnetId) {
1100
- w(` vnet_subnet_id = "${pool.vnetSubnetId}"`);
1101
- }
1102
- if (pool.availabilityZones?.length) {
1103
- w(` zones = [${pool.availabilityZones.map((z) => `"${z}"`).join(", ")}]`);
1104
- }
1105
- const poolTags = pool.tags || {};
1106
- const poolTagEntries = Object.entries(poolTags);
1107
- if (poolTagEntries.length) {
1108
- w();
1109
- w(` tags = {`);
1110
- for (const [k, v] of poolTagEntries) {
1111
- w(` ${JSON.stringify(k)} = ${JSON.stringify(v)}`);
1112
- }
1113
- w(` }`);
1114
- }
1115
- w(`}`);
1116
- w();
1117
- }
1118
-
1119
- // ── Postgres Flexible Server ───────────────────────────────────────────────
1120
-
1121
- if (pgServer) {
1122
- const pgTf = (pgServer.name || "psql").replace(/[^a-zA-Z0-9_]/g, "_");
1123
- const pgSku = pgServer.sku || {};
1124
- const pgStorage = pgServer.storage || {};
1125
- w(`# ─── Postgres Flexible Server ─────────────────────────────────────────────────`);
1126
- w();
1127
- w(`resource "azurerm_postgresql_flexible_server" "${pgTf}" {`);
1128
- w(` name = "${pgServer.name}"`);
1129
- w(` resource_group_name = azurerm_resource_group.aks.name`);
1130
- w(` location = azurerm_resource_group.aks.location`);
1131
- w(` version = "${pgServer.version || "16"}"`);
1132
- w(` administrator_login = var.postgres_admin_login`);
1133
- w(` administrator_password = var.postgres_admin_password`);
1134
- if (pgSku.name) w(` sku_name = "${pgSku.name}"`);
1135
- if (pgSku.tier) w(` zone = "${pgServer.availabilityZone || "1"}"`);
1136
- if (pgStorage.storageSizeGb) {
1137
- w(` storage_mb = ${pgStorage.storageSizeGb * 1024}`);
1138
- }
1139
- if (pgServer.delegatedSubnetArguments?.subnetArmResourceId) {
1140
- w(` delegated_subnet_id = "${pgServer.delegatedSubnetArguments.subnetArmResourceId}"`);
1141
- }
1142
- if (pgServer.network?.privateDnsZoneArmResourceId) {
1143
- w(` private_dns_zone_id = "${pgServer.network.privateDnsZoneArmResourceId}"`);
1144
- }
1145
- const pgTags = pgServer.tags || {};
1146
- const pgTagEntries = Object.entries(pgTags);
1147
- if (pgTagEntries.length) {
1148
- w();
1149
- w(` tags = {`);
1150
- for (const [k, v] of pgTagEntries) {
1151
- w(` ${JSON.stringify(k)} = ${JSON.stringify(v)}`);
1152
- }
1153
- w(` }`);
1154
- }
1155
- w(`}`);
1156
- w();
1157
- }
1158
-
1159
- // ── Key Vault ──────────────────────────────────────────────────────────────
1160
-
1161
- if (keyVault) {
1162
- const kvTf = (keyVault.name || "kv").replace(/[^a-zA-Z0-9_]/g, "_");
1163
- w(`# ─── Key Vault ────────────────────────────────────────────────────────────────`);
1164
- w();
1165
- w(`resource "azurerm_key_vault" "${kvTf}" {`);
1166
- w(` name = "${keyVault.name}"`);
1167
- w(` resource_group_name = azurerm_resource_group.aks.name`);
1168
- w(` location = azurerm_resource_group.aks.location`);
1169
- w(` tenant_id = var.tenant_id`);
1170
- w(` sku_name = "${keyVault.properties?.sku?.name || "standard"}"`);
1171
- w(` enable_rbac_authorization = ${keyVault.properties?.enableRbacAuthorization ?? true}`);
1172
- if (keyVault.properties?.enableSoftDelete !== undefined) {
1173
- w(` soft_delete_retention_days = ${keyVault.properties?.softDeleteRetentionInDays || 90}`);
1174
- }
1175
- if (keyVault.properties?.enablePurgeProtection) {
1176
- w(` purge_protection_enabled = true`);
1177
- }
1178
- const kvTags = keyVault.tags || {};
1179
- const kvTagEntries = Object.entries(kvTags);
1180
- if (kvTagEntries.length) {
1181
- w();
1182
- w(` tags = {`);
1183
- for (const [k, v] of kvTagEntries) {
1184
- w(` ${JSON.stringify(k)} = ${JSON.stringify(v)}`);
1185
- }
1186
- w(` }`);
1187
- }
1188
- w(`}`);
1189
- w();
1190
- }
1191
-
1192
- // ── Flux extension ─────────────────────────────────────────────────────────
1193
-
1194
- if (fluxExt) {
1195
- w(`# ─── Flux ─────────────────────────────────────────────────────────────────────`);
1196
- w();
1197
- w(`resource "azurerm_kubernetes_cluster_extension" "flux" {`);
1198
- w(` name = "flux"`);
1199
- w(` cluster_id = azurerm_kubernetes_cluster.${tfName}.id`);
1200
- w(` extension_type = "microsoft.flux"`);
1201
- w(`}`);
1202
- w();
1203
- }
1204
-
1205
- // ── Flux GitOps configurations ─────────────────────────────────────────────
1206
-
1207
- for (const cfg of (fluxConfigs || [])) {
1208
- const cfgTf = (cfg.name || "flux_system").replace(/[^a-zA-Z0-9_]/g, "_");
1209
- const gitRepo = cfg.gitRepository || {};
1210
- const kustomizations = cfg.kustomizations || {};
1211
-
1212
- w(`resource "azurerm_kubernetes_flux_configuration" "${cfgTf}" {`);
1213
- w(` name = "${cfg.name}"`);
1214
- w(` cluster_id = azurerm_kubernetes_cluster.${tfName}.id`);
1215
- w(` namespace = "${cfg.namespace || "flux-system"}"`);
1216
- w(` scope = "${cfg.scope || "cluster"}"`);
1217
- w();
1218
- if (gitRepo.url) {
1219
- w(` git_repository {`);
1220
- w(` url = "${gitRepo.url}"`);
1221
- w(` reference_type = "branch"`);
1222
- w(` reference_value = "${gitRepo.repositoryRef?.branch || "main"}"`);
1223
- if (gitRepo.httpsUser) {
1224
- w(` https_user = "${gitRepo.httpsUser}"`);
1225
- w(` https_key = var.flux_github_token`);
1226
- }
1227
- w(` }`);
1228
- w();
1229
- }
1230
- for (const [ksName, ks] of Object.entries(kustomizations)) {
1231
- w(` kustomizations {`);
1232
- w(` name = "${ksName}"`);
1233
- if (ks.path) w(` path = "${ks.path}"`);
1234
- if (ks.prune != null) w(` prune = ${ks.prune}`);
1235
- w(` }`);
1236
- w();
1237
- }
1238
-
1239
- w(` depends_on = [azurerm_kubernetes_cluster_extension.flux]`);
1240
- w(`}`);
1241
- w();
1242
- }
1243
-
1244
- // ── outputs ────────────────────────────────────────────────────────────────
1245
-
1246
- w(`# ─── Outputs ─────────────────────────────────────────────────────────────────`);
1247
- w();
1248
- w(`output "cluster_name" {`);
1249
- w(` value = azurerm_kubernetes_cluster.${tfName}.name`);
1250
- w(`}`);
1251
- w();
1252
- w(`output "cluster_fqdn" {`);
1253
- w(` value = azurerm_kubernetes_cluster.${tfName}.fqdn`);
1254
- w(`}`);
1255
- w();
1256
- w(`output "kube_config" {`);
1257
- w(` value = azurerm_kubernetes_cluster.${tfName}.kube_config_raw`);
1258
- w(` sensitive = true`);
1259
- w(`}`);
1260
- w();
1261
- if (pgServer) {
1262
- w(`output "postgres_fqdn" {`);
1263
- const pgTf = (pgServer.name || "psql").replace(/[^a-zA-Z0-9_]/g, "_");
1264
- w(` value = azurerm_postgresql_flexible_server.${pgTf}.fqdn`);
1265
- w(`}`);
1266
- w();
1267
- }
1268
- if (keyVault) {
1269
- const kvTf = (keyVault.name || "kv").replace(/[^a-zA-Z0-9_]/g, "_");
1270
- w(`output "key_vault_uri" {`);
1271
- w(` value = azurerm_key_vault.${kvTf}.vault_uri`);
1272
- w(`}`);
1273
- w();
1274
- }
1275
-
1276
- return lines.join("\n");
1277
- }
1278
-
1279
- // ── aks kubeconfig ────────────────────────────────────────────────────────────
1280
-
1281
- export async function aksKubeconfig(opts = {}) {
1282
- const execa = await lazyExeca();
1283
- const sub = opts.profile;
1284
- await ensureAzCli(execa);
1285
- await ensureAzAuth(execa, { subscription: sub });
1286
- const { clusterName, resourceGroup: rg } = requireCluster(opts.clusterName);
1287
-
1288
- await getCredentials(execa, { clusterName, rg, sub, admin: opts.admin });
1289
- console.log(OK(`\n ✓ Kubeconfig merged for "${clusterName}"`));
1290
- hint(`kubectl config use-context ${clusterName}\n`);
1291
- }
1292
-
1293
- // ── aks node-pool add ─────────────────────────────────────────────────────────
1294
-
1295
- export async function aksNodePoolAdd(opts = {}) {
1296
- const execa = await lazyExeca();
1297
- const sub = opts.profile;
1298
- await ensureAzCli(execa);
1299
- await ensureAzAuth(execa, { subscription: sub });
1300
- const { clusterName, resourceGroup: rg } = requireCluster(opts.clusterName);
1301
-
1302
- const poolName = opts.poolName;
1303
- const nodeCount = opts.nodeCount || 3;
1304
- const vmSize = opts.nodeVmSize || AKS_DEFAULTS.nodeVmSize;
1305
-
1306
- if (!poolName) {
1307
- console.error(ERR("\n --pool-name is required.\n"));
1308
- process.exit(1);
1309
- }
1310
-
1311
- banner(`Adding node pool "${poolName}" to ${clusterName}`);
1312
- kvLine("Size", DIM(`${nodeCount} x ${vmSize}`));
1313
- hint("This takes a few minutes…\n");
1314
-
1315
- const args = [
1316
- "aks", "nodepool", "add",
1317
- "--resource-group", rg,
1318
- "--cluster-name", clusterName,
1319
- "--name", poolName,
1320
- "--node-count", String(nodeCount),
1321
- "--node-vm-size", vmSize,
1322
- "--output", "none",
1323
- ...subArgs(sub),
1324
- ];
1325
- if (opts.mode) args.push("--mode", opts.mode);
1326
- if (opts.labels) args.push("--labels", opts.labels);
1327
- if (opts.taints) args.push("--node-taints", opts.taints);
1328
- if (opts.maxPods) args.push("--max-pods", String(opts.maxPods));
1329
-
1330
- await execa("az", args, { timeout: 600000 });
1331
- console.log(OK(` ✓ Node pool "${poolName}" added\n`));
1332
- }
1333
-
1334
- // ── aks node-pool remove ──────────────────────────────────────────────────────
1335
-
1336
- export async function aksNodePoolRemove(opts = {}) {
1337
- const execa = await lazyExeca();
1338
- const sub = opts.profile;
1339
- await ensureAzCli(execa);
1340
- await ensureAzAuth(execa, { subscription: sub });
1341
- const { clusterName, resourceGroup: rg } = requireCluster(opts.clusterName);
1342
- const poolName = opts.poolName;
1343
-
1344
- if (!poolName) {
1345
- console.error(ERR("\n Pool name is required.\n"));
1346
- process.exit(1);
1347
- }
1348
-
1349
- banner(`Removing node pool "${poolName}" from ${clusterName}`);
1350
- hint("This takes a few minutes…\n");
1351
-
1352
- await execa("az", [
1353
- "aks", "nodepool", "delete",
1354
- "--resource-group", rg,
1355
- "--cluster-name", clusterName,
1356
- "--name", poolName,
1357
- "--output", "none",
1358
- ...subArgs(sub),
1359
- ], { timeout: 600000 });
1360
- console.log(OK(` ✓ Node pool "${poolName}" removed\n`));
1361
- }
1362
-
1363
- // ── flux init (scaffold cluster manifests) ───────────────────────────────────
1364
-
1365
- export async function aksFluxInit(opts = {}) {
1366
- const clusterName = opts.clusterName;
1367
- if (!clusterName) {
1368
- console.error(ERR("\n Cluster name is required."));
1369
- hint("Usage: fops azure aks flux init <name> --flux-repo <path>\n");
1370
- process.exit(1);
1371
- }
1372
-
1373
- const overlay = opts.overlay || "demo-azure";
1374
- const namespace = opts.namespace || "foundation";
1375
- const fluxRepoPath = opts.fluxRepo;
1376
-
1377
- if (!fluxRepoPath) {
1378
- console.error(ERR("\n --flux-repo <path> is required (path to your local flux repo clone)."));
1379
- hint("Example: fops azure aks flux init alessio --flux-repo ../flux\n");
1380
- process.exit(1);
1381
- }
1382
-
1383
- const fluxRepo = path.resolve(fluxRepoPath);
1384
- if (!fs.existsSync(path.join(fluxRepo, "clusters"))) {
1385
- console.error(ERR(`\n "${fluxRepo}" does not look like a flux repo (no clusters/ dir).`));
1386
- process.exit(1);
1387
- }
1388
-
1389
- const clusterDir = path.join(fluxRepo, "clusters", clusterName);
1390
- if (fs.existsSync(clusterDir)) {
1391
- console.error(ERR(`\n Cluster directory already exists: ${clusterDir}`));
1392
- hint("Remove it first or use a different name.\n");
1393
- process.exit(1);
1394
- }
1395
-
1396
- const state = readState();
1397
- const projectRoot = state.azure?.projectRoot || state.projectRoot;
1398
- const thisDir = path.dirname(fileURLToPath(import.meta.url));
1399
- let templateRoot = projectRoot && fs.existsSync(path.join(projectRoot, "flux-templates", "cluster"))
1400
- ? path.join(projectRoot, "flux-templates", "cluster")
1401
- : path.resolve(thisDir, "../../../../foundation-flux/cluster");
1402
- if (!fs.existsSync(templateRoot)) {
1403
- console.error(ERR("\n Template directory not found."));
1404
- hint(`Expected at: ${templateRoot}`);
1405
- hint("Create flux-templates/cluster/ in your project or install foundation-flux.\n");
1406
- process.exit(1);
1407
- }
1408
-
1409
- const vars = {
1410
- "{{CLUSTER_NAME}}": clusterName,
1411
- "{{OVERLAY}}": overlay,
1412
- "{{NAMESPACE}}": namespace,
1413
- "{{FLUX_PATH}}": opts.fluxPath || `clusters/${clusterName}`,
1414
- "{{POSTGRES_HOST}}": opts.postgresHost || `{{POSTGRES_HOST}}`,
1415
- "{{ACCESS_KEY_ID}}": opts.accessKeyId || `{{ACCESS_KEY_ID}}`,
1416
- "{{AZURE_SP_CLIENT_ID_B64}}": opts.azureSpClientId || `{{AZURE_SP_CLIENT_ID_B64}}`,
1417
- "{{AZURE_SP_CLIENT_SECRET_B64}}": opts.azureSpClientSecret || `{{AZURE_SP_CLIENT_SECRET_B64}}`,
1418
- "{{AZURE_IDENTITY_ID}}": opts.azureIdentityId || `{{AZURE_IDENTITY_ID}}`,
1419
- "{{AZURE_TENANT_ID}}": opts.azureTenantId || `{{AZURE_TENANT_ID}}`,
1420
- "{{AZURE_KEYVAULT_URL}}": opts.azureKeyvaultUrl || `{{AZURE_KEYVAULT_URL}}`,
1421
- };
1422
-
1423
- function applyVars(content) {
1424
- let out = content;
1425
- for (const [k, v] of Object.entries(vars)) {
1426
- out = out.replaceAll(k, v);
1427
- }
1428
- return out;
1429
- }
1430
-
1431
- function copyDir(src, dest) {
1432
- fs.mkdirSync(dest, { recursive: true });
1433
- for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
1434
- const srcPath = path.join(src, entry.name);
1435
- const destPath = path.join(dest, entry.name);
1436
- if (entry.isDirectory()) {
1437
- copyDir(srcPath, destPath);
1438
- } else {
1439
- const content = fs.readFileSync(srcPath, "utf8");
1440
- fs.writeFileSync(destPath, applyVars(content));
1441
- }
1442
- }
1443
- }
1444
-
1445
- banner(`Flux Init: ${clusterName}`);
1446
- kvLine("Cluster", clusterName);
1447
- kvLine("Overlay", overlay);
1448
- kvLine("Namespace", namespace);
1449
- kvLine("Output", DIM(clusterDir));
1450
- console.log("");
1451
-
1452
- // Scaffold cluster directory from templates
1453
- hint("Scaffolding cluster manifests…");
1454
- copyDir(templateRoot, clusterDir);
1455
- console.log(OK(" ✓ Cluster directory created"));
1456
-
1457
- // Check remaining placeholders
1458
- const remaining = Object.entries(vars)
1459
- .filter(([, v]) => v.startsWith("{{"))
1460
- .map(([k]) => k);
1461
-
1462
- if (remaining.length) {
1463
- console.log(`\n ${WARN("Placeholders to fill in:")}`);
1464
- for (const p of remaining) {
1465
- hint(` ${p}`);
1466
- }
1467
- }
1468
-
1469
- // Verify the overlay exists in the flux repo
1470
- const sampleOverlay = path.join(fluxRepo, "apps/foundation/backend/overlays/meshx", overlay);
1471
- if (!fs.existsSync(sampleOverlay)) {
1472
- console.log(WARN(`\n ⚠ Overlay "${overlay}" not found in apps/ — app Kustomizations will fail until it exists.`));
1473
- hint(`Create overlays or use an existing one: --overlay demo-azure`);
1474
- }
1475
-
1476
- console.log("");
1477
- hint("Next steps:");
1478
- hint(` 1. Fill in remaining {{…}} placeholders in clusters/${clusterName}/`);
1479
- hint(` 2. Commit and push to the flux repo`);
1480
- hint(` 3. Bootstrap Flux: fops azure aks flux bootstrap ${clusterName}`);
1481
- console.log("");
1482
- }
1483
-
1484
- // ── flux bootstrap ────────────────────────────────────────────────────────────
1485
-
1486
- export async function aksFluxBootstrap(opts = {}) {
1487
- const execa = await lazyExeca();
1488
- const sub = opts.profile;
1489
- await ensureAzCli(execa);
1490
- await ensureAzAuth(execa, { subscription: sub });
1491
- const cl = requireCluster(opts.clusterName);
1492
-
1493
- // Ensure kubeconfig is current
1494
- await getCredentials(execa, {
1495
- clusterName: cl.clusterName,
1496
- rg: cl.resourceGroup,
1497
- sub,
1498
- });
1499
-
1500
- const githubToken = resolveGithubToken(opts);
1501
- if (!githubToken) {
1502
- console.error(ERR("\n GitHub token required for Flux bootstrap."));
1503
- hint("Authenticate: gh auth login or set GITHUB_TOKEN\n");
1504
- process.exit(1);
1505
- }
1506
-
1507
- const fluxOpts = {
1508
- fluxRepo: opts.repo,
1509
- fluxOwner: opts.owner,
1510
- fluxPath: opts.path,
1511
- fluxBranch: opts.branch,
1512
- };
1513
- const { fluxRepo, fluxOwner, fluxPath, fluxBranch } = resolveFluxConfig(cl.clusterName, fluxOpts);
1514
-
1515
- await ensureGhcrPullSecret(execa, { clusterName: cl.clusterName, githubToken });
1516
-
1517
- await reconcileFlux(execa, {
1518
- clusterName: cl.clusterName,
1519
- rg: cl.resourceGroup,
1520
- sub,
1521
- githubToken,
1522
- repo: fluxRepo,
1523
- owner: fluxOwner,
1524
- path: fluxPath,
1525
- branch: fluxBranch,
1526
- });
1527
-
1528
- writeClusterState(cl.clusterName, {
1529
- flux: { repo: fluxRepo, owner: fluxOwner, path: fluxPath, branch: fluxBranch },
1530
- });
1531
- console.log(OK(` ✓ Flux now using ${fluxOwner}/${fluxRepo}\n`));
1532
- }
1533
-
1534
- // ── data bootstrap (demo data mesh, same as fops bootstrap targeting AKS backend) ─
1535
-
1536
- function findBootstrapRepoRoot() {
1537
- const scriptPath = "scripts/bootstrap_foundation.py";
1538
- const envRoot = process.env.FOUNDATION_ROOT;
1539
- if (envRoot && fs.existsSync(path.join(envRoot, scriptPath))) return path.resolve(envRoot);
1540
- try {
1541
- const fopsPath = path.join(os.homedir(), ".fops.json");
1542
- const raw = JSON.parse(fs.readFileSync(fopsPath, "utf8"));
1543
- const root = raw?.projectRoot;
1544
- if (root && fs.existsSync(path.join(root, scriptPath))) return path.resolve(root);
1545
- } catch {}
1546
- let dir = path.resolve(process.cwd());
1547
- for (;;) {
1548
- if (fs.existsSync(path.join(dir, scriptPath))) return dir;
1549
- const parent = path.dirname(dir);
1550
- if (parent === dir) break;
1551
- dir = parent;
1552
- }
1553
- return null;
1554
- }
1555
-
1556
- async function discoverFoundationApiUrlFromCluster(execa, clusterName) {
1557
- try {
1558
- const { stdout } = await execa("kubectl", [
1559
- "get", "ingress", "-A",
1560
- "-o", "jsonpath={.items[*].spec.rules[*].host}",
1561
- "--context", clusterName,
1562
- ], { timeout: 15000 });
1563
- const first = (stdout || "").trim().split(/\s+/).filter(Boolean)[0];
1564
- if (first) return `https://${first}/api`;
1565
- } catch {}
1566
- return null;
1567
- }
1568
-
1569
- export async function aksDataBootstrap(opts = {}) {
1570
- const execa = await lazyExeca();
1571
- await ensureAzCli(execa);
1572
- await ensureAzAuth(execa, { subscription: opts.profile });
1573
- const cl = requireCluster(opts.clusterName);
1574
-
1575
- await getCredentials(execa, {
1576
- clusterName: cl.clusterName,
1577
- rg: cl.resourceGroup,
1578
- sub: opts.profile,
1579
- });
1580
-
1581
- let apiUrl = opts.apiUrl?.trim() || cl.foundationApiUrl?.trim();
1582
- if (!apiUrl) {
1583
- apiUrl = await discoverFoundationApiUrlFromCluster(execa, cl.clusterName);
1584
- if (apiUrl) {
1585
- hint(`Using API URL from cluster ingress: ${apiUrl}`);
1586
- writeClusterState(cl.clusterName, { foundationApiUrl: apiUrl });
1587
- }
1588
- }
1589
- if (!apiUrl) {
1590
- console.error(ERR("\n Foundation backend API URL is required."));
1591
- hint("Pass the backend API base URL (e.g. https://foundation.example.com/api):");
1592
- hint(" fops azure aks bootstrap " + cl.clusterName + " --api-url https://your-foundation-host/api\n");
1593
- process.exit(1);
1594
- }
1595
- const normalized = apiUrl.replace(/\/+$/, "");
1596
- if (!normalized.endsWith("/api")) {
1597
- console.log(WARN(" API URL should usually end with /api (e.g. https://host/api). Using as-is."));
1598
- }
1599
-
1600
- const root = findBootstrapRepoRoot();
1601
- if (!root) {
1602
- console.error(ERR("\n Could not find foundation-compose root (scripts/bootstrap_foundation.py)."));
1603
- hint("Run from the foundation-compose directory, or set FOUNDATION_ROOT.\n");
1604
- process.exit(1);
1605
- }
1606
-
1607
- const { loadEnvFromFile } = await import("./azure-helpers.js");
1608
- const projectEnv = loadEnvFromFile(path.join(root, ".env"));
1609
- let bootstrapEnv = {
1610
- ...process.env,
1611
- ...projectEnv,
1612
- PYTHONUNBUFFERED: "1",
1613
- API_URL: normalized,
1614
- };
1615
-
1616
- let hasCreds = !!(bootstrapEnv.BEARER_TOKEN?.trim()
1617
- || (bootstrapEnv.QA_USERNAME?.trim() && bootstrapEnv.QA_PASSWORD != null));
1618
- if (!hasCreds) {
1619
- try {
1620
- const fopsPath = path.join(os.homedir(), ".fops.json");
1621
- const raw = JSON.parse(fs.readFileSync(fopsPath, "utf8"));
1622
- const cfg = raw?.plugins?.entries?.["fops-plugin-foundation"]?.config || {};
1623
- if (cfg.bearerToken?.trim()) {
1624
- bootstrapEnv.BEARER_TOKEN = cfg.bearerToken.trim();
1625
- hasCreds = true;
1626
- } else if (cfg.user?.trim() && cfg.password) {
1627
- bootstrapEnv.QA_USERNAME = cfg.user.trim();
1628
- bootstrapEnv.QA_PASSWORD = cfg.password;
1629
- hasCreds = true;
1630
- }
1631
- } catch {}
1632
- }
1633
- if (!hasCreds && !opts.yes) {
1634
- console.log(WARN(" No Foundation credentials in env or ~/.fops.json."));
1635
- const { getInquirer } = await import(resolveCliSrc("lazy.js"));
1636
- const inquirer = await getInquirer();
1637
- const { authMethod } = await inquirer.prompt([{
1638
- type: "list",
1639
- name: "authMethod",
1640
- message: "Authentication method:",
1641
- choices: [
1642
- { name: "Username / password", value: "password" },
1643
- { name: "Bearer token (JWT)", value: "jwt" },
1644
- ],
1645
- }]);
1646
- if (authMethod === "jwt") {
1647
- const { token } = await inquirer.prompt([{ type: "input", name: "token", message: "Bearer token:", validate: (v) => v?.trim() ? true : "Token required" }]);
1648
- bootstrapEnv.BEARER_TOKEN = token.trim();
1649
- } else {
1650
- const { user } = await inquirer.prompt([{ type: "input", name: "user", message: "Username (email):", validate: (v) => v?.trim() ? true : "Username required" }]);
1651
- const { password } = await inquirer.prompt([{ type: "password", name: "password", message: "Password:", mask: "*", validate: (v) => v ? true : "Password required" }]);
1652
- bootstrapEnv.QA_USERNAME = user.trim();
1653
- bootstrapEnv.QA_PASSWORD = password;
1654
- }
1655
- hasCreds = true;
1656
- }
1657
- if (!hasCreds) {
1658
- console.error(ERR(" Set BEARER_TOKEN or QA_USERNAME+QA_PASSWORD (env or ~/.fops.json), or run without --yes.\n"));
1659
- process.exit(1);
1660
- }
1661
-
1662
- banner("Bootstrap demo data (AKS)");
1663
- kvLine("Cluster", cl.clusterName);
1664
- kvLine("API URL", normalized);
1665
- hint("Same as fops bootstrap — creates demo data mesh via backend API.\n");
1666
-
1667
- const scriptsDir = path.join(root, "scripts");
1668
- const venvPython = path.join(scriptsDir, ".venv", "bin", "python");
1669
- const scriptPath = path.join(scriptsDir, "bootstrap_foundation.py");
1670
- if (!fs.existsSync(venvPython)) {
1671
- hint("Creating scripts/.venv…");
1672
- await execa("python3", ["-m", "venv", path.join(scriptsDir, ".venv")], { cwd: root, timeout: 30000 });
1673
- const pip = path.join(scriptsDir, ".venv", "bin", "pip");
1674
- if (!fs.existsSync(pip)) {
1675
- hint("Installing pip into venv…");
1676
- await execa("sh", ["-c", `curl -sS https://bootstrap.pypa.io/get-pip.py | ${venvPython}`], { cwd: root, timeout: 60000 });
1677
- }
1678
- const reqPath = path.join(scriptsDir, "requirements.txt");
1679
- if (fs.existsSync(reqPath)) {
1680
- await execa(venvPython, ["-m", "pip", "install", "--quiet", "-r", reqPath], { cwd: root, timeout: 120000 });
1681
- }
1682
- }
1683
-
1684
- let captured = "";
1685
- const proc = execa(venvPython, ["-u", scriptPath], {
1686
- cwd: root,
1687
- timeout: 600_000,
1688
- env: bootstrapEnv,
1689
- reject: false,
1690
- });
1691
- proc.stdout?.on("data", (chunk) => { const s = chunk.toString(); captured += s; process.stdout.write(s); });
1692
- proc.stderr?.on("data", (chunk) => { const s = chunk.toString(); captured += s; process.stderr.write(s); });
1693
- const result = await proc;
1694
-
1695
- if (result.exitCode === 0) {
1696
- console.log(OK("\n ✓ Bootstrap complete! Demo data mesh created on the cluster backend."));
1697
- writeClusterState(cl.clusterName, { foundationApiUrl: normalized });
1698
- return;
1699
- }
1700
- if ((captured || "").includes("401") || (captured || "").includes("Insufficient permissions")) {
1701
- hint("\n If the user needs Foundation Admin, grant it in the UI or via your IdP, then retry.");
1702
- }
1703
- const code = result.exitCode === 255 || result.exitCode === -1 ? 1 : result.exitCode;
1704
- console.error(ERR(`\n Bootstrap failed (exit code ${code}).`));
1705
- hint("Ensure the AKS backend is up and reachable at " + normalized);
1706
- process.exit(1);
1707
- }
1708
-
1709
- // ── flux status ───────────────────────────────────────────────────────────────
1710
-
1711
- export async function aksFluxStatus(opts = {}) {
1712
- const execa = await lazyExeca();
1713
- await ensureFluxCli(execa);
1714
- const cl = requireCluster(opts.clusterName);
1715
-
1716
- banner(`Flux Status: ${cl.clusterName}`);
1717
-
1718
- const commands = [
1719
- { label: "Sources", args: ["get", "sources", "all"] },
1720
- { label: "Kustomizations", args: ["get", "kustomizations"] },
1721
- { label: "Helm Releases", args: ["get", "helmreleases", "--all-namespaces"] },
1722
- ];
1723
-
1724
- for (const { label, args } of commands) {
1725
- console.log(`\n ${LABEL(label)}`);
1726
- const { stdout } = await execa("flux", [
1727
- ...args, "--context", cl.clusterName,
1728
- ], { timeout: 30000, reject: false });
1729
- if (stdout?.trim()) {
1730
- for (const line of stdout.trim().split("\n")) {
1731
- console.log(` ${DIM(line)}`);
1732
- }
1733
- } else {
1734
- hint(` No ${label.toLowerCase()} found.`);
1735
- }
1736
- }
1737
-
1738
- console.log("");
1739
- }
1740
-
1741
- // ── flux reconcile ────────────────────────────────────────────────────────────
1742
-
1743
- export async function aksFluxReconcile(opts = {}) {
1744
- const execa = await lazyExeca();
1745
- await ensureFluxCli(execa);
1746
- const cl = requireCluster(opts.clusterName);
1747
-
1748
- banner(`Flux Reconcile: ${cl.clusterName}`);
1749
-
1750
- const source = opts.source || "flux-system";
1751
- hint(`Triggering reconcile for source "${source}"…\n`);
1752
-
1753
- await execa("flux", [
1754
- "reconcile", "source", "git", source,
1755
- "--context", cl.clusterName,
1756
- ], { timeout: 60000, stdio: "inherit" });
1757
-
1758
- // Azure-managed Flux names kustomizations as "<config>-<kustomization>"
1759
- // (e.g. "flux-system-flux-system"), while standalone Flux uses just "flux-system".
1760
- // Try the Azure-managed name first, fall back to the standard name.
1761
- const ksNames = ["flux-system-flux-system", "flux-system"];
1762
- let reconciled = false;
1763
- for (const ks of ksNames) {
1764
- const { exitCode } = await execa("flux", [
1765
- "reconcile", "kustomization", ks,
1766
- "--context", cl.clusterName,
1767
- ], { timeout: 60000, stdio: "inherit", reject: false });
1768
- if (exitCode === 0) { reconciled = true; break; }
1769
- }
1770
-
1771
- if (reconciled) {
1772
- console.log(OK("\n ✓ Reconciliation triggered.\n"));
1773
- } else {
1774
- console.error(ERR("\n ✗ Could not reconcile kustomization. Check: kubectl --context " + cl.clusterName + " get kustomizations -n flux-system\n"));
1775
- }
1776
- }
1777
-
1778
- // ── Shared internals ──────────────────────────────────────────────────────────
1779
-
1780
- async function ensureGhcrPullSecret(execa, { clusterName, githubToken, namespace = "default" }) {
1781
- if (!githubToken) return;
1782
-
1783
- banner("GHCR Pull Secret");
1784
- hint("Creating image pull secret for ghcr.io…");
1785
-
1786
- const secretName = "ghcr-pull-secret";
1787
-
1788
- // Create the secret in the target namespace
1789
- const { exitCode } = await execa("kubectl", [
1790
- "create", "secret", "docker-registry", secretName,
1791
- "--docker-server=ghcr.io",
1792
- "--docker-username=x-access-token",
1793
- `--docker-password=${githubToken}`,
1794
- "--namespace", namespace,
1795
- "--context", clusterName,
1796
- "--dry-run=client", "-o", "yaml",
1797
- ], { timeout: 15000, reject: false }).then(async (dryRun) => {
1798
- if (dryRun.exitCode !== 0) return dryRun;
1799
- // Pipe through kubectl apply so it's idempotent
1800
- return execa("kubectl", [
1801
- "apply", "-f", "-", "--context", clusterName,
1802
- ], { input: dryRun.stdout, timeout: 15000, reject: false });
1803
- });
1804
-
1805
- if (exitCode === 0) {
1806
- console.log(OK(` ✓ Pull secret "${secretName}" in namespace "${namespace}"`));
1807
- } else {
1808
- console.log(WARN(` ⚠ Could not create pull secret — create manually:`));
1809
- hint(` kubectl create secret docker-registry ${secretName} --docker-server=ghcr.io --docker-username=x-access-token --docker-password=<token>`);
1810
- }
1811
-
1812
- // Also create in flux-system namespace (Flux needs it for image automation)
1813
- const { exitCode: fluxNsCode } = await execa("kubectl", [
1814
- "create", "namespace", "flux-system",
1815
- "--context", clusterName, "--dry-run=client", "-o", "yaml",
1816
- ], { timeout: 10000, reject: false }).then(async (ns) => {
1817
- if (ns.exitCode !== 0) return ns;
1818
- return execa("kubectl", ["apply", "-f", "-", "--context", clusterName],
1819
- { input: ns.stdout, timeout: 10000, reject: false });
1820
- });
1821
-
1822
- if (fluxNsCode === 0) {
1823
- const { exitCode: fsCode } = await execa("kubectl", [
1824
- "create", "secret", "docker-registry", secretName,
1825
- "--docker-server=ghcr.io",
1826
- "--docker-username=x-access-token",
1827
- `--docker-password=${githubToken}`,
1828
- "--namespace", "flux-system",
1829
- "--context", clusterName,
1830
- "--dry-run=client", "-o", "yaml",
1831
- ], { timeout: 15000, reject: false }).then(async (dryRun) => {
1832
- if (dryRun.exitCode !== 0) return dryRun;
1833
- return execa("kubectl", ["apply", "-f", "-", "--context", clusterName],
1834
- { input: dryRun.stdout, timeout: 15000, reject: false });
1835
- });
1836
- if (fsCode === 0) {
1837
- console.log(OK(` ✓ Pull secret "${secretName}" in namespace "flux-system"`));
1838
- }
1839
- }
1840
- }
1841
-
1842
- async function getCredentials(execa, { clusterName, rg, sub, admin } = {}) {
1843
- hint("Fetching kubeconfig…");
1844
- const args = [
1845
- "aks", "get-credentials",
1846
- "--resource-group", rg,
1847
- "--name", clusterName,
1848
- "--overwrite-existing",
1849
- "--output", "none",
1850
- ...subArgs(sub),
1851
- ];
1852
- if (admin) args.push("--admin");
1853
- await execa("az", args, { timeout: 30000 });
1854
- console.log(OK(` ✓ Kubeconfig merged`));
1855
- }
1856
-
1857
- async function bootstrapFlux(execa, { clusterName, rg, sub, githubToken, repo, owner, path: fluxPath, branch }) {
1858
- repo = repo || AKS_DEFAULTS.fluxRepo;
1859
- owner = owner || AKS_DEFAULTS.fluxOwner;
1860
- branch = branch || AKS_DEFAULTS.fluxBranch;
1861
-
1862
- if (!githubToken) {
1863
- console.error(ERR("\n GitHub token required for Flux bootstrap."));
1864
- hint("Authenticate: gh auth login (writes to ~/.netrc)");
1865
- hint("Or: export GITHUB_TOKEN=<token>");
1866
- hint("Or: --github-token <token>\n");
1867
- process.exit(1);
1868
- }
1869
-
1870
- const repoUrl = `https://github.com/${owner}/${repo}`;
1871
- const configName = "flux-system";
1872
-
1873
- banner("Flux Bootstrap (Azure GitOps)");
1874
- kvLine("Repo", DIM(`${owner}/${repo}`));
1875
- kvLine("Path", DIM(fluxPath));
1876
- kvLine("Branch", DIM(branch));
1877
- kvLine("Mode", DIM("Azure-managed extension (read-only)"));
1878
- hint("This takes 2–5 minutes…\n");
1879
-
1880
- // 1. Install the microsoft.flux extension on the cluster
1881
- hint("Installing Flux extension…");
1882
- try {
1883
- await execa("az", [
1884
- "k8s-extension", "create",
1885
- "--resource-group", rg,
1886
- "--cluster-name", clusterName,
1887
- "--cluster-type", "managedClusters",
1888
- "--name", "flux",
1889
- "--extension-type", "microsoft.flux",
1890
- "--scope", "cluster",
1891
- "--output", "none",
1892
- ...subArgs(sub),
1893
- ], { timeout: 600000 });
1894
- console.log(OK(" ✓ Flux extension installed"));
1895
- } catch (err) {
1896
- const msg = (err.stderr || err.message || "").toString();
1897
- if (/rpds|No module named|ModuleNotFoundError/.test(msg)) {
1898
- console.error(ERR("\n Azure k8s-extension failed (broken vendored rpds — known Azure CLI bug)."));
1899
- hint("Workaround (macOS Homebrew): install rpds-py into Azure CLI's Python, then remove the extension's vendored rpds:");
1900
- hint(" $(brew --prefix azure-cli)/libexec/bin/pip install rpds-py");
1901
- hint(" rm -rf ~/.azure/cliextensions/k8s-extension/rpds");
1902
- hint("Then re-run. See: https://github.com/Azure/azure-cli/issues/32709\n");
1903
- throw err;
1904
- }
1905
- throw err;
1906
- }
1907
-
1908
- // 2. Create the Flux GitOps configuration
1909
- hint("Creating GitOps configuration…");
1910
- await execa("az", [
1911
- "k8s-configuration", "flux", "create",
1912
- "--resource-group", rg,
1913
- "--cluster-name", clusterName,
1914
- "--cluster-type", "managedClusters",
1915
- "--name", configName,
1916
- "--namespace", "flux-system",
1917
- "--scope", "cluster",
1918
- "--url", repoUrl,
1919
- "--branch", branch,
1920
- "--https-user", "x-access-token",
1921
- "--https-key", githubToken,
1922
- "--kustomization", `name=${configName}`, `path=./${fluxPath}`, "prune=true",
1923
- "--output", "none",
1924
- ...subArgs(sub),
1925
- ], { timeout: 300000 });
1926
- console.log(OK(" ✓ GitOps configuration created (read-only — no push to repo)"));
1927
- }
1928
-
1929
- // ── Cluster reconciler ───────────────────────────────────────────────────────
1930
- // Each reconciler: { name, fn(ctx, cluster) } — returns silently on success,
1931
- // warns on non-fatal failure. Adding a new concern = one new entry in the array.
1932
-
1933
- async function reconcileCluster(ctx) {
1934
- const { execa, clusterName, rg, sub } = ctx;
1935
-
1936
- try {
1937
- const { stdout } = await execa("az", [
1938
- "aks", "show", "-g", rg, "-n", clusterName, "--output", "json",
1939
- ...subArgs(sub),
1940
- ], { timeout: 30000 });
1941
- ctx.cluster = JSON.parse(stdout);
1942
- } catch (err) {
1943
- console.log(WARN(` ⚠ Could not fetch cluster details: ${(err.message || "").split("\n")[0]}`));
1944
- ctx.cluster = {};
1945
- }
1946
-
1947
- await runReconcilers(AKS_RECONCILERS, ctx);
1948
- }
1949
-
1950
- // Kustomizations that fops manages directly — suspend them so Flux doesn't revert our patches.
1951
- const FOPS_MANAGED_KUSTOMIZATIONS = [
1952
- "foundation-backend",
1953
- "foundation-processor",
1954
- "foundation-scheduler",
1955
- "foundation-watcher",
1956
- "foundation-storage-engine",
1957
- "istio-controlplane",
1958
- ];
1959
-
1960
- const DAI_KUSTOMIZATIONS = [
1961
- "dai-backend",
1962
- "dai-trino",
1963
- ];
1964
-
1965
- async function suspendManagedKustomizations(ctx) {
1966
- const { execa, clusterName, opts } = ctx;
1967
- const kubectl = (args, o = {}) =>
1968
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 15000, reject: false, ...o });
1969
-
1970
- // When --dai is NOT set, also suspend DAI kustomizations so they don't schedule workloads
1971
- const targets = opts?.dai
1972
- ? FOPS_MANAGED_KUSTOMIZATIONS
1973
- : [...FOPS_MANAGED_KUSTOMIZATIONS, ...DAI_KUSTOMIZATIONS];
1974
-
1975
- let suspended = 0;
1976
- for (const name of targets) {
1977
- const { exitCode } = await kubectl([
1978
- "get", "kustomization", name, "-n", "flux-system",
1979
- ]);
1980
- if (exitCode !== 0) continue;
1981
-
1982
- const { stdout: isSuspended } = await kubectl([
1983
- "get", "kustomization", name, "-n", "flux-system",
1984
- "-o", "jsonpath={.spec.suspend}",
1985
- ]);
1986
- if (isSuspended === "true") continue;
1987
-
1988
- await kubectl([
1989
- "patch", "kustomization", name, "-n", "flux-system",
1990
- "--type", "merge", "-p", '{"spec":{"suspend":true}}',
1991
- ]);
1992
- suspended++;
1993
- }
1994
- if (suspended > 0) {
1995
- console.log(OK(` ✓ Suspended ${suspended} Flux Kustomization(s) to prevent revert`));
1996
- } else {
1997
- console.log(OK(" ✓ Managed Kustomizations already suspended"));
1998
- }
1999
- }
2000
-
2001
- const AKS_RECONCILERS = [
2002
- { name: "api-server-ip", fn: reconcileApiServerIp },
2003
- { name: "addons", fn: reconcileAddons },
2004
- { name: "autoscaler", fn: reconcileAutoscaler },
2005
- { name: "spot-pool", fn: reconcileSpotPool },
2006
- { name: "kubeconfig", fn: reconcileKubeconfig },
2007
- { name: "suspend-flux", fn: suspendManagedKustomizations },
2008
- { name: "descheduler", fn: reconcileDescheduler },
2009
- { name: "postgres", fn: reconcilePostgres },
2010
- { name: "pg-databases", fn: reconcilePgDatabases },
2011
- { name: "secret-store", fn: reconcileSecretStore },
2012
- { name: "k8s-secrets", fn: reconcileK8sSecrets },
2013
- { name: "storage-engine", fn: reconcileStorageEngine },
2014
- { name: "flux", fn: reconcileFluxStep },
2015
- { name: "helm-repos", fn: reconcileHelmRepos },
2016
- { name: "flux-prereqs", fn: reconcileFluxPrereqs },
2017
- { name: "acr-webhooks", fn: reconcileAcrWebhooks },
2018
- { name: "helm-values", fn: reconcileHelmValues },
2019
- { name: "vault-unseal", fn: reconcileVaultUnseal },
2020
- { name: "ingress-ip", fn: reconcileIngressIp },
2021
- { name: "frontend-auth", fn: reconcileFrontendAuth },
2022
- { name: "fops-api", fn: reconcileFopsApi },
2023
- ];
2024
-
2025
- async function reconcileApiServerIp(ctx) {
2026
- const { execa, clusterName, rg, sub, cluster } = ctx;
2027
- const myIp = await fetchMyIp();
2028
- if (!myIp) return;
2029
-
2030
- const ranges = cluster.apiServerAccessProfile?.authorizedIpRanges || [];
2031
- if (ranges.length === 0) {
2032
- hint(`Scoping API server to ${myIp}…`);
2033
- await execa("az", [
2034
- "aks", "update", "-g", rg, "-n", clusterName,
2035
- "--api-server-authorized-ip-ranges", `${myIp}/32`,
2036
- "--output", "none", ...subArgs(sub),
2037
- ], { timeout: 120000 });
2038
- console.log(OK(` ✓ API server scoped to ${myIp}/32`));
2039
- } else if (!ranges.some(r => r.startsWith(myIp))) {
2040
- const updated = [...ranges, `${myIp}/32`].join(",");
2041
- hint(`Adding ${myIp} to authorized IP ranges…`);
2042
- await execa("az", [
2043
- "aks", "update", "-g", rg, "-n", clusterName,
2044
- "--api-server-authorized-ip-ranges", updated,
2045
- "--output", "none", ...subArgs(sub),
2046
- ], { timeout: 120000 });
2047
- console.log(OK(` ✓ API server ranges updated (${ranges.length + 1} IPs)`));
2048
- } else {
2049
- console.log(OK(` ✓ API server already includes ${myIp}`));
2050
- }
2051
- }
2052
-
2053
- async function reconcileAddons(ctx) {
2054
- const { execa, clusterName, rg, sub } = ctx;
2055
- const cluster = ctx.cluster || {};
2056
- const baseArgs = ["-g", rg, "-n", clusterName, ...subArgs(sub)];
2057
-
2058
- const defenderOn = cluster.securityProfile?.defender?.securityMonitoring?.enabled === true;
2059
- const monitoringOn = cluster.addonProfiles?.omsagent?.enabled === true;
2060
- const azurePolicyOn = cluster.addonProfiles?.azurepolicy?.enabled === true;
2061
- const fileDriverOn = cluster.storageProfile?.fileCSIDriver?.enabled !== false;
2062
-
2063
- if (!defenderOn && !monitoringOn && !azurePolicyOn && !fileDriverOn) {
2064
- console.log(OK(" ✓ Unwanted addons already disabled"));
2065
- return;
2066
- }
2067
-
2068
- let changed = false;
2069
-
2070
- // Defender for Containers: az aks create/update do not support --disable-defender;
2071
- // disable via portal or: az security auto-provisioning update --auto-provision Off
2072
- if (defenderOn) {
2073
- hint("Defender is enabled; to disable use Azure Portal or security auto-provisioning.");
2074
- }
2075
-
2076
- // Monitoring + Azure Policy use `aks disable-addons` with a comma-separated list
2077
- const addons = [];
2078
- if (monitoringOn) addons.push("monitoring");
2079
- if (azurePolicyOn) addons.push("azure-policy");
2080
-
2081
- if (addons.length > 0) {
2082
- hint(`Disabling ${addons.join(", ")}…`);
2083
- const r = await execa("az", [
2084
- "aks", "disable-addons", ...baseArgs, "--addons", addons.join(","),
2085
- "--output", "none",
2086
- ], { reject: false, timeout: 300000 });
2087
- if (r.exitCode === 0) { console.log(OK(` ✓ Disabled: ${addons.join(", ")}`)); changed = true; }
2088
- else { console.log(WARN(` ⚠ Addons: ${(r.stderr || "").split("\n")[0]}`)); }
2089
- }
2090
-
2091
- if (fileDriverOn) {
2092
- hint("Disabling Azure File CSI driver (not needed)…");
2093
- const r = await execa("az", [
2094
- "aks", "update", ...baseArgs, "--disable-file-driver", "--yes", "--output", "none",
2095
- ], { reject: false, timeout: 300000 });
2096
- if (r.exitCode === 0) { console.log(OK(" ✓ Azure File driver disabled (−8 pods)")); changed = true; }
2097
- else { console.log(WARN(` ⚠ File driver: ${(r.stderr || "").split("\n")[0]}`)); }
2098
- }
2099
-
2100
- if (changed) {
2101
- try {
2102
- const { stdout } = await execa("az", [
2103
- "aks", "show", ...baseArgs, "--output", "json",
2104
- ], { timeout: 30000 });
2105
- ctx.cluster = JSON.parse(stdout);
2106
- } catch {}
2107
- }
2108
- }
2109
-
2110
- async function reconcileAutoscaler(ctx) {
2111
- const { execa, clusterName, rg, sub, minCount, maxCount, cluster } = ctx;
2112
- const pool = (cluster.agentPoolProfiles || []).find(p => p.mode === "System")
2113
- || (cluster.agentPoolProfiles || [])[0];
2114
- if (!pool) return;
2115
-
2116
- if (!pool.enableAutoScaling) {
2117
- hint(`Enabling autoscaler on pool "${pool.name}" (${minCount}–${maxCount})…`);
2118
- // Use nodepool update instead of aks update to support multi-pool clusters
2119
- await execa("az", [
2120
- "aks", "nodepool", "update",
2121
- "--resource-group", rg,
2122
- "--cluster-name", clusterName,
2123
- "--name", pool.name,
2124
- "--enable-cluster-autoscaler",
2125
- "--min-count", String(minCount),
2126
- "--max-count", String(maxCount),
2127
- "--output", "none", ...subArgs(sub),
2128
- ], { timeout: 120000 });
2129
- console.log(OK(` ✓ Autoscaler enabled on "${pool.name}" (${minCount}–${maxCount})`));
2130
- } else {
2131
- console.log(OK(` ✓ Autoscaler already enabled on "${pool.name}" (${pool.minCount}–${pool.maxCount})`));
2132
- }
2133
- }
2134
-
2135
- async function reconcileSpotPool(ctx) {
2136
- const { execa, clusterName, rg, sub, cluster, maxPods } = ctx;
2137
- const pools = cluster.agentPoolProfiles || [];
2138
- const spotPool = pools.find(p => p.scaleSetPriority === "Spot");
2139
-
2140
- if (spotPool) {
2141
- const count = spotPool.count || 0;
2142
- const scaling = spotPool.enableAutoScaling
2143
- ? `autoscale ${spotPool.minCount}–${spotPool.maxCount}`
2144
- : `${count} nodes`;
2145
- console.log(OK(` ✓ Spot pool "${spotPool.name}" present (${scaling}, max-pods ${spotPool.maxPods})`));
2146
- return;
2147
- }
2148
-
2149
- hint("Creating spot node pool for workloads that require spot instances…");
2150
- const sysPool = pools.find(p => p.mode === "System") || pools[0];
2151
- const vmSize = sysPool?.vmSize || "Standard_D8s_v3";
2152
-
2153
- const { exitCode, stderr } = await execa("az", [
2154
- "aks", "nodepool", "add",
2155
- "--resource-group", rg,
2156
- "--cluster-name", clusterName,
2157
- "--name", "spot",
2158
- "--node-count", "2",
2159
- "--node-vm-size", vmSize,
2160
- "--max-pods", String(maxPods || 110),
2161
- "--priority", "Spot",
2162
- "--eviction-policy", "Delete",
2163
- "--spot-max-price", "-1",
2164
- "--enable-cluster-autoscaler",
2165
- "--min-count", "1",
2166
- "--max-count", "3",
2167
- "--labels", "kubernetes.azure.com/scalesetpriority=spot",
2168
- "--node-taints", "kubernetes.azure.com/scalesetpriority=spot:NoSchedule",
2169
- "--output", "none",
2170
- ...subArgs(sub),
2171
- ], { timeout: 300000, reject: false });
2172
-
2173
- if (exitCode === 0) {
2174
- console.log(OK(" ✓ Spot pool created (2 nodes, autoscale 1–3)"));
2175
- } else {
2176
- const errMsg = (stderr || "").split("\n")[0];
2177
- console.log(WARN(` ⚠ Spot pool creation failed: ${errMsg}`));
2178
- }
2179
- }
2180
-
2181
- async function reconcileDescheduler(ctx) {
2182
- const { execa, clusterName } = ctx;
2183
- const kubectl = (args, opts = {}) =>
2184
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
2185
-
2186
- const { exitCode } = await kubectl(["get", "cronjob", "descheduler", "-n", "kube-system"]);
2187
- if (exitCode === 0) {
2188
- console.log(OK(" ✓ Descheduler already installed"));
2189
- return;
2190
- }
2191
-
2192
- hint("Installing descheduler…");
2193
-
2194
- const { exitCode: helmCheck } = await execa("helm", ["version", "--short"], { reject: false, timeout: 10000 });
2195
- if (helmCheck !== 0) {
2196
- console.log(WARN(" ⚠ Helm not found — skipping descheduler install"));
2197
- return;
2198
- }
2199
-
2200
- const { exitCode: repoAdd } = await execa("helm", [
2201
- "repo", "add", "descheduler", "https://kubernetes-sigs.github.io/descheduler/",
2202
- ], { reject: false, timeout: 30000 });
2203
- if (repoAdd !== 0) {
2204
- await execa("helm", ["repo", "update", "descheduler"], { reject: false, timeout: 30000 });
2205
- }
2206
-
2207
- const manifest = `
2208
- apiVersion: v1
2209
- kind: ConfigMap
2210
- metadata:
2211
- name: descheduler-policy
2212
- namespace: kube-system
2213
- data:
2214
- policy.yaml: |
2215
- apiVersion: "descheduler/v1alpha2"
2216
- kind: "DeschedulerPolicy"
2217
- profiles:
2218
- - name: default
2219
- pluginConfig:
2220
- - name: RemoveDuplicates
2221
- args:
2222
- excludeOwnerKinds: ["ReplicaSet"]
2223
- - name: LowNodeUtilization
2224
- args:
2225
- thresholds:
2226
- cpu: 20
2227
- memory: 20
2228
- pods: 20
2229
- targetThresholds:
2230
- cpu: 50
2231
- memory: 50
2232
- pods: 50
2233
- - name: RemovePodsHavingTooManyRestarts
2234
- args:
2235
- podRestartThreshold: 10
2236
- includingInitContainers: true
2237
- plugins:
2238
- balance:
2239
- enabled:
2240
- - RemoveDuplicates
2241
- - LowNodeUtilization
2242
- deschedule:
2243
- enabled:
2244
- - RemovePodsHavingTooManyRestarts
2245
- `;
2246
-
2247
- const { exitCode: installCode, stderr } = await execa("helm", [
2248
- "upgrade", "--install", "descheduler", "descheduler/descheduler",
2249
- "--namespace", "kube-system",
2250
- "--kube-context", clusterName,
2251
- "--set", "schedule=*/5 * * * *",
2252
- "--set", "deschedulerPolicy.strategies.RemoveDuplicates.enabled=true",
2253
- "--set", "deschedulerPolicy.strategies.LowNodeUtilization.enabled=true",
2254
- "--set", "deschedulerPolicy.strategies.RemovePodsHavingTooManyRestarts.enabled=true",
2255
- "--wait", "--timeout", "120s",
2256
- ], { timeout: 180000, reject: false });
2257
-
2258
- if (installCode === 0) {
2259
- console.log(OK(" ✓ Descheduler installed (runs every 5 min)"));
2260
- } else {
2261
- const errMsg = (stderr || "").split("\n")[0];
2262
- console.log(WARN(` ⚠ Descheduler install failed: ${errMsg}`));
2263
- }
2264
- }
2265
-
2266
- async function reconcileKubeconfig(ctx) {
2267
- await getCredentials(ctx.execa, { clusterName: ctx.clusterName, rg: ctx.rg, sub: ctx.sub });
2268
- }
2269
-
2270
- // ── Postgres Flexible Server reconciler ──────────────────────────────────────
2271
-
2272
- const PG_DEFAULTS = {
2273
- sku: "Standard_B2ms",
2274
- tier: "Burstable",
2275
- version: "15",
2276
- storageSizeGb: 32,
2277
- adminUser: "foundation",
2278
- };
2279
-
2280
- function pgServerName(clusterName) {
2281
- return `fops-${clusterName}-psql`;
2282
- }
2283
-
2284
- function generatePassword(len = 32) {
2285
- const chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
2286
- const bytes = crypto.randomBytes(len);
2287
- return Array.from(bytes, b => chars[b % chars.length]).join("");
2288
- }
2289
-
2290
- async function reconcilePostgres(ctx) {
2291
- const { execa, clusterName, rg, sub, cluster } = ctx;
2292
- const location = cluster.location || DEFAULTS.location;
2293
- const serverName = pgServerName(clusterName);
2294
-
2295
- // 1. Check if server already exists
2296
- const { exitCode: pgExists, stdout: pgJson } = await execa("az", [
2297
- "postgres", "flexible-server", "show",
2298
- "--name", serverName, "--resource-group", rg,
2299
- "--output", "json", ...subArgs(sub),
2300
- ], { reject: false, timeout: 30000 });
2301
-
2302
- let fqdn;
2303
- let adminPassword;
2304
-
2305
- if (pgExists === 0 && pgJson?.trim()) {
2306
- const pg = JSON.parse(pgJson);
2307
- fqdn = pg.fullyQualifiedDomainName;
2308
- console.log(OK(` ✓ Postgres Flexible Server "${serverName}" exists (${fqdn})`));
2309
-
2310
- // Reconcile storage auto-grow
2311
- const autoGrow = pg.storage?.autoGrow;
2312
- if (autoGrow !== "Enabled") {
2313
- hint("Enabling storage auto-grow…");
2314
- await execa("az", [
2315
- "postgres", "flexible-server", "update",
2316
- "--name", serverName, "--resource-group", rg,
2317
- "--storage-auto-grow", "Enabled",
2318
- "--output", "none", ...subArgs(sub),
2319
- ], { reject: false, timeout: 120000 });
2320
- console.log(OK(" ✓ Storage auto-grow enabled"));
2321
- }
2322
-
2323
- adminPassword = readClusterState(clusterName)?.postgres?.adminPassword;
2324
- if (!adminPassword) {
2325
- console.log(WARN(" ⚠ Postgres admin password not in local state — secret sync skipped"));
2326
- hint(` Password was set at creation time. If lost, reset with:`);
2327
- hint(` az postgres flexible-server update -g ${rg} -n ${serverName} --admin-password <new>`);
2328
- return;
2329
- }
2330
- } else {
2331
- // 2. Resolve the AKS VNet + create a delegated subnet for Postgres
2332
- const nodeRg = cluster.nodeResourceGroup;
2333
- if (!nodeRg) {
2334
- console.log(WARN(" ⚠ Could not determine node resource group — skipping Postgres"));
2335
- return;
2336
- }
2337
-
2338
- hint(`Resolving AKS VNet in ${nodeRg}…`);
2339
- const { stdout: vnetListJson, exitCode: vnetCode } = await execa("az", [
2340
- "network", "vnet", "list", "-g", nodeRg, "--output", "json",
2341
- ...subArgs(sub),
2342
- ], { reject: false, timeout: 15000 });
2343
-
2344
- if (vnetCode !== 0 || !vnetListJson?.trim()) {
2345
- console.log(WARN(" ⚠ No VNet found in node resource group — skipping Postgres"));
2346
- return;
2347
- }
2348
-
2349
- const vnets = JSON.parse(vnetListJson);
2350
- const vnet = vnets[0];
2351
- if (!vnet) {
2352
- console.log(WARN(" ⚠ No VNet found — skipping Postgres"));
2353
- return;
2354
- }
2355
-
2356
- const vnetName = vnet.name;
2357
- const pgSubnetName = "postgres-subnet";
2358
-
2359
- // Check if the postgres subnet already exists
2360
- const { exitCode: subnetExists } = await execa("az", [
2361
- "network", "vnet", "subnet", "show",
2362
- "-g", nodeRg, "--vnet-name", vnetName, "-n", pgSubnetName,
2363
- "--output", "none", ...subArgs(sub),
2364
- ], { reject: false, timeout: 15000 });
2365
-
2366
- if (subnetExists !== 0) {
2367
- const vnetPrefix = vnet.addressSpace?.addressPrefixes?.[0] || "10.224.0.0/12";
2368
- const pgCidr = await findAvailableSubnetCidr(execa, nodeRg, vnetName, vnetPrefix, sub);
2369
-
2370
- hint(`Creating subnet "${pgSubnetName}" (${pgCidr}) in ${vnetName}…`);
2371
- const { exitCode: createCode, stderr: createErr } = await execa("az", [
2372
- "network", "vnet", "subnet", "create",
2373
- "-g", nodeRg, "--vnet-name", vnetName, "-n", pgSubnetName,
2374
- "--address-prefixes", pgCidr,
2375
- "--delegations", "Microsoft.DBforPostgreSQL/flexibleServers",
2376
- "--output", "none", ...subArgs(sub),
2377
- ], { reject: false, timeout: 60000 });
2378
- if (createCode !== 0) {
2379
- const detail = (createErr || "").split("\n").filter(l => l.trim()).slice(-2).join(" ");
2380
- throw new Error(`Subnet creation failed (${pgCidr}): ${detail || "exit code " + createCode}`);
2381
- }
2382
- console.log(OK(` ✓ Subnet "${pgSubnetName}" created`));
2383
- }
2384
-
2385
- // Get subnet ID
2386
- const { stdout: subnetJson } = await execa("az", [
2387
- "network", "vnet", "subnet", "show",
2388
- "-g", nodeRg, "--vnet-name", vnetName, "-n", pgSubnetName,
2389
- "--output", "json", ...subArgs(sub),
2390
- ], { timeout: 15000 });
2391
- const subnetId = JSON.parse(subnetJson).id;
2392
-
2393
- // Create a private DNS zone for Postgres in the VNet
2394
- const dnsZone = `${serverName}.private.postgres.database.azure.com`;
2395
- const { exitCode: dnsExists } = await execa("az", [
2396
- "network", "private-dns", "zone", "show",
2397
- "-g", rg, "-n", dnsZone, "--output", "none",
2398
- ...subArgs(sub),
2399
- ], { reject: false, timeout: 15000 });
2400
-
2401
- if (dnsExists !== 0) {
2402
- hint(`Creating private DNS zone ${dnsZone}…`);
2403
- await execa("az", [
2404
- "network", "private-dns", "zone", "create",
2405
- "-g", rg, "-n", dnsZone, "--output", "none",
2406
- ...subArgs(sub),
2407
- ], { timeout: 60000 });
2408
- }
2409
-
2410
- // 3. Create the Flexible Server
2411
- adminPassword = generatePassword();
2412
- console.log(chalk.yellow(` ↻ Creating Postgres Flexible Server "${serverName}"…`));
2413
- hint("This takes 3–5 minutes…");
2414
-
2415
- await execa("az", [
2416
- "postgres", "flexible-server", "create",
2417
- "--name", serverName,
2418
- "--resource-group", rg,
2419
- "--location", location,
2420
- "--admin-user", PG_DEFAULTS.adminUser,
2421
- "--admin-password", adminPassword,
2422
- "--sku-name", PG_DEFAULTS.sku,
2423
- "--tier", PG_DEFAULTS.tier,
2424
- "--version", PG_DEFAULTS.version,
2425
- "--storage-size", String(PG_DEFAULTS.storageSizeGb),
2426
- "--storage-auto-grow", "Enabled",
2427
- "--subnet", subnetId,
2428
- "--private-dns-zone", dnsZone,
2429
- "--yes",
2430
- "--output", "json", ...subArgs(sub),
2431
- ], { timeout: 600000 });
2432
-
2433
- // Read the created server to get the FQDN
2434
- const { stdout: createdJson } = await execa("az", [
2435
- "postgres", "flexible-server", "show",
2436
- "--name", serverName, "--resource-group", rg,
2437
- "--output", "json", ...subArgs(sub),
2438
- ], { timeout: 15000 });
2439
- fqdn = JSON.parse(createdJson).fullyQualifiedDomainName;
2440
-
2441
- console.log(OK(` ✓ Postgres Flexible Server created (${fqdn})`));
2442
-
2443
- // Save password + FQDN to local state
2444
- writeClusterState(clusterName, {
2445
- postgres: { serverName, fqdn, adminUser: PG_DEFAULTS.adminUser, adminPassword },
2446
- });
2447
- }
2448
-
2449
- // 4. Allowlist pg_trgm extension (required by backend search migration)
2450
- const { stdout: extVal } = await execa("az", [
2451
- "postgres", "flexible-server", "parameter", "show",
2452
- "--resource-group", rg, "--server-name", serverName,
2453
- "--name", "azure.extensions", "--query", "value", "-o", "tsv",
2454
- ...subArgs(sub),
2455
- ], { reject: false, timeout: 15000 });
2456
- const currentExts = (extVal || "").trim().split(",").map(e => e.trim()).filter(Boolean);
2457
- if (!currentExts.includes("PG_TRGM") && !currentExts.includes("pg_trgm")) {
2458
- const newExts = [...currentExts, "pg_trgm"].join(",");
2459
- hint("Allowlisting pg_trgm extension…");
2460
- await execa("az", [
2461
- "postgres", "flexible-server", "parameter", "set",
2462
- "--resource-group", rg, "--server-name", serverName,
2463
- "--name", "azure.extensions", "--value", newExts,
2464
- "--output", "none", ...subArgs(sub),
2465
- ], { reject: false, timeout: 60000 });
2466
- console.log(OK(" ✓ pg_trgm extension allowlisted"));
2467
- }
2468
-
2469
- // 5. Sync the K8s "postgres" secret into the foundation namespace
2470
- if (fqdn && adminPassword) {
2471
- await syncPostgresSecret(execa, { clusterName, fqdn, adminUser: PG_DEFAULTS.adminUser, adminPassword });
2472
- }
2473
- }
2474
-
2475
- function parseCidr(cidr) {
2476
- const [ip, bits] = cidr.split("/");
2477
- const octets = ip.split(".").map(Number);
2478
- const addr = ((octets[0] << 24) | (octets[1] << 16) | (octets[2] << 8) | octets[3]) >>> 0;
2479
- const mask = bits === "0" ? 0 : (0xFFFFFFFF << (32 - Number(bits))) >>> 0;
2480
- return { start: (addr & mask) >>> 0, end: ((addr & mask) | ~mask) >>> 0 };
2481
- }
2482
-
2483
- function cidrOverlaps(cidr, existingCidrs) {
2484
- const a = parseCidr(cidr);
2485
- return existingCidrs.some(ec => {
2486
- const b = parseCidr(ec);
2487
- return a.start <= b.end && a.end >= b.start;
2488
- });
2489
- }
2490
-
2491
- async function findAvailableSubnetCidr(execa, nodeRg, vnetName, vnetPrefix, sub) {
2492
- let existingCidrs = [];
2493
- try {
2494
- const { stdout } = await execa("az", [
2495
- "network", "vnet", "subnet", "list",
2496
- "-g", nodeRg, "--vnet-name", vnetName, "--output", "json",
2497
- ...subArgs(sub),
2498
- ], { timeout: 15000 });
2499
- const subnets = JSON.parse(stdout || "[]");
2500
- existingCidrs = subnets.flatMap(s => s.addressPrefix ? [s.addressPrefix] : (s.addressPrefixes || []));
2501
- } catch (err) {
2502
- console.log(WARN(` ⚠ Could not list existing subnets: ${err.message}`));
2503
- }
2504
-
2505
- const { start: vnetStart, end: vnetEnd } = parseCidr(vnetPrefix);
2506
-
2507
- // Scan /24 blocks from the top of the VNet range downward to avoid
2508
- // collisions with AKS's default low-range subnets (often a wide /16).
2509
- const blockSize = 256;
2510
- const topBlock = (vnetEnd - blockSize + 1) >>> 0;
2511
- let iterations = 0;
2512
- for (let addr = topBlock; addr >= vnetStart && iterations < 4096; addr = (addr - blockSize) >>> 0, iterations++) {
2513
- const o1 = (addr >>> 24) & 0xFF;
2514
- const o2 = (addr >>> 16) & 0xFF;
2515
- const o3 = (addr >>> 8) & 0xFF;
2516
- const candidate = `${o1}.${o2}.${o3}.0/24`;
2517
- if (!cidrOverlaps(candidate, existingCidrs)) return candidate;
2518
- if (addr === 0) break;
2519
- }
2520
-
2521
- throw new Error(
2522
- `No available /24 subnet in VNet ${vnetName} (${vnetPrefix}). ` +
2523
- `Existing subnets: ${existingCidrs.join(", ")}. Free a subnet or expand the VNet address space.`
2524
- );
2525
- }
2526
-
2527
- async function syncPostgresSecret(execa, { clusterName, fqdn, adminUser, adminPassword }) {
2528
- const namespaces = ["foundation"];
2529
- for (const ns of namespaces) {
2530
- // Ensure namespace exists
2531
- await execa("kubectl", [
2532
- "--context", clusterName,
2533
- "create", "namespace", ns, "--dry-run=client", "-o", "yaml",
2534
- ], { reject: false, timeout: 10000 }).then(({ stdout }) =>
2535
- execa("kubectl", ["--context", clusterName, "apply", "-f", "-"], { input: stdout, timeout: 10000, reject: false })
2536
- );
2537
-
2538
- // Create or update the postgres secret
2539
- const { exitCode } = await execa("kubectl", [
2540
- "--context", clusterName, "-n", ns,
2541
- "create", "secret", "generic", "postgres",
2542
- "--from-literal=host=" + fqdn,
2543
- "--from-literal=superUserPassword=" + adminPassword,
2544
- "--from-literal=user=" + adminUser,
2545
- "--from-literal=password=" + adminPassword,
2546
- "--dry-run=client", "-o", "yaml",
2547
- ], { timeout: 10000 }).then(({ stdout }) =>
2548
- execa("kubectl", [
2549
- "--context", clusterName, "-n", ns, "apply", "-f", "-",
2550
- ], { input: stdout, timeout: 10000, reject: false })
2551
- );
2552
-
2553
- if (exitCode === 0) {
2554
- console.log(OK(` ✓ Secret "postgres" synced to ${ns} namespace`));
2555
- } else {
2556
- console.log(WARN(` ⚠ Could not sync postgres secret to ${ns}`));
2557
- }
2558
- }
2559
- }
2560
-
2561
- // ── Postgres databases & roles: create service databases on the Flexible Server ──
2562
-
2563
- const PG_SERVICE_DBS = ["foundation", "processor", "scheduler", "watcher", "mlflow"];
2564
-
2565
- async function reconcilePgDatabases(ctx) {
2566
- const { execa, clusterName } = ctx;
2567
- const kubectl = (args, opts = {}) =>
2568
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 60000, reject: false, ...opts });
2569
-
2570
- const pgServer = pgServerName(clusterName);
2571
- const pgHost = `${pgServer}.postgres.database.azure.com`;
2572
-
2573
- // Read password from the postgres secret
2574
- const { stdout: pwB64 } = await kubectl([
2575
- "get", "secret", "postgres", "-n", "foundation",
2576
- "-o", "jsonpath={.data.password}",
2577
- ]);
2578
- if (!pwB64) {
2579
- console.log(WARN(" ⚠ No postgres secret found — skipping DB setup"));
2580
- return;
2581
- }
2582
- const pgPass = Buffer.from(pwB64, "base64").toString();
2583
-
2584
- // Run a psql job to create all databases and roles
2585
- const sqlLines = PG_SERVICE_DBS.map(db => [
2586
- `SELECT 'CREATE DATABASE ${db}' WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '${db}');`,
2587
- `DO \\$\\$ BEGIN IF NOT EXISTS (SELECT FROM pg_database WHERE datname = '${db}') THEN EXECUTE 'CREATE DATABASE ${db}'; END IF; IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '${db}') THEN CREATE ROLE ${db} LOGIN PASSWORD '${pgPass}'; END IF; END \\$\\$;`,
2588
- `GRANT ALL ON DATABASE ${db} TO ${db};`,
2589
- ]).flat();
2590
-
2591
- const script = sqlLines.map(s => `psql -c "${s}"`).join(" && ") + " && echo DONE";
2592
-
2593
- const jobManifest = JSON.stringify({
2594
- apiVersion: "batch/v1", kind: "Job",
2595
- metadata: { name: "fops-pg-setup", namespace: "foundation" },
2596
- spec: {
2597
- backoffLimit: 2, ttlSecondsAfterFinished: 60,
2598
- template: {
2599
- spec: {
2600
- restartPolicy: "Never",
2601
- containers: [{
2602
- name: "psql",
2603
- image: "postgres:16-alpine",
2604
- env: [
2605
- { name: "PGHOST", value: pgHost },
2606
- { name: "PGUSER", value: "foundation" },
2607
- { name: "PGDATABASE", value: "postgres" },
2608
- { name: "PGPASSWORD", value: pgPass },
2609
- { name: "PGSSLMODE", value: "require" },
2610
- ],
2611
- command: ["sh", "-c", script],
2612
- }],
2613
- },
2614
- },
2615
- },
2616
- });
2617
-
2618
- // Delete old job if it exists
2619
- await kubectl(["delete", "job", "fops-pg-setup", "-n", "foundation", "--ignore-not-found"]);
2620
- await new Promise(r => setTimeout(r, 2000));
2621
-
2622
- const { exitCode, stderr } = await kubectl(["apply", "-f", "-"], { input: jobManifest });
2623
- if (exitCode !== 0) {
2624
- console.log(WARN(` ⚠ pg-setup job failed: ${(stderr || "").split("\n")[0]}`));
2625
- return;
2626
- }
2627
-
2628
- // Wait for job to complete (max 60s)
2629
- const { exitCode: waitCode } = await execa("kubectl", [
2630
- "--context", clusterName,
2631
- "wait", "--for=condition=complete", "job/fops-pg-setup",
2632
- "-n", "foundation", "--timeout=60s",
2633
- ], { timeout: 70000, reject: false });
2634
-
2635
- if (waitCode === 0) {
2636
- console.log(OK(` ✓ Postgres databases ready (${PG_SERVICE_DBS.join(", ")})`));
2637
- } else {
2638
- const { stdout: logs } = await kubectl([
2639
- "logs", "job/fops-pg-setup", "-n", "foundation", "--tail=5",
2640
- ]);
2641
- if (logs?.includes("DONE")) {
2642
- console.log(OK(` ✓ Postgres databases ready (${PG_SERVICE_DBS.join(", ")})`));
2643
- } else {
2644
- console.log(WARN(" ⚠ pg-setup job didn't complete — check: kubectl logs job/fops-pg-setup -n foundation"));
2645
- }
2646
- }
2647
-
2648
- await kubectl(["delete", "job", "fops-pg-setup", "-n", "foundation", "--ignore-not-found"]);
2649
- }
2650
-
2651
- // ── K8s secrets: ensure required secrets exist with correct keys ──────────────
2652
-
2653
- async function reconcileK8sSecrets(ctx) {
2654
- const { execa, clusterName, rg, sub } = ctx;
2655
- const kubectl = (args, opts = {}) =>
2656
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
2657
-
2658
- // Read current postgres password
2659
- const { stdout: pwB64 } = await kubectl([
2660
- "get", "secret", "postgres", "-n", "foundation",
2661
- "-o", "jsonpath={.data.password}",
2662
- ]);
2663
- if (!pwB64) return;
2664
- let pgPass = Buffer.from(pwB64, "base64").toString();
2665
- const pgHost = `${pgServerName(clusterName)}.postgres.database.azure.com`;
2666
- const serverName = pgServerName(clusterName);
2667
-
2668
- // Check for URL-unsafe characters that break pogo-migrate connection strings
2669
- const URL_UNSAFE = /[^a-zA-Z0-9]/;
2670
- if (URL_UNSAFE.test(pgPass)) {
2671
- console.log(WARN(" ⚠ Postgres password contains URL-unsafe characters — regenerating…"));
2672
- const newPass = generatePassword();
2673
-
2674
- // Update the Azure Flexible Server admin password
2675
- await execa("az", [
2676
- "postgres", "flexible-server", "update",
2677
- "--name", serverName, "--resource-group", rg,
2678
- "--admin-password", newPass,
2679
- "--output", "none", ...subArgs(sub),
2680
- ], { reject: false, timeout: 120000 });
2681
-
2682
- // Update all database role passwords via psql job
2683
- const sqlStatements = PG_SERVICE_DBS
2684
- .map(role => `ALTER ROLE ${role} WITH PASSWORD '${newPass}';`)
2685
- .join(" ");
2686
- const jobYaml = JSON.stringify({
2687
- apiVersion: "batch/v1", kind: "Job",
2688
- metadata: { name: "fops-pg-repass", namespace: "foundation" },
2689
- spec: {
2690
- backoffLimit: 2, ttlSecondsAfterFinished: 120,
2691
- template: {
2692
- spec: {
2693
- restartPolicy: "Never",
2694
- containers: [{
2695
- name: "psql", image: "postgres:16-alpine",
2696
- command: ["sh", "-c", `export PGPASSWORD='${newPass}' PGHOST='${pgHost}' PGUSER=foundation PGSSLMODE=require; psql -d foundation -c "${sqlStatements}"`],
2697
- }],
2698
- },
2699
- },
2700
- },
2701
- });
2702
- await kubectl(["delete", "job", "fops-pg-repass", "-n", "foundation", "--ignore-not-found"]);
2703
- await kubectl(["apply", "-f", "-"], { input: jobYaml, timeout: 60000 });
2704
- // Wait for the job to complete
2705
- await kubectl(["wait", "--for=condition=complete", "job/fops-pg-repass", "-n", "foundation", "--timeout=60s"], { timeout: 70000 });
2706
-
2707
- pgPass = newPass;
2708
-
2709
- // Update local state
2710
- writeClusterState(clusterName, {
2711
- postgres: { adminPassword: newPass },
2712
- });
2713
- console.log(OK(" ✓ Postgres password regenerated (URL-safe)"));
2714
- }
2715
-
2716
- // Ensure postgres secret has all required keys (mlflow needs mlflow-username etc.)
2717
- const { stdout: existingKeys } = await kubectl([
2718
- "get", "secret", "postgres", "-n", "foundation",
2719
- "-o", "jsonpath={.data}",
2720
- ]);
2721
- const keys = existingKeys ? Object.keys(JSON.parse(existingKeys)) : [];
2722
- const requiredKeys = [
2723
- "host", "user", "password", "superUserPassword",
2724
- "postgres-password", "mlflow-username", "mlflow-password",
2725
- ];
2726
- const missing = requiredKeys.filter(k => !keys.includes(k));
2727
- const needsUpdate = missing.length > 0 || URL_UNSAFE.test(Buffer.from(pwB64, "base64").toString());
2728
-
2729
- if (needsUpdate) {
2730
- if (missing.length) hint(`Adding missing keys to postgres secret: ${missing.join(", ")}`);
2731
- const args = [
2732
- "create", "secret", "generic", "postgres", "-n", "foundation",
2733
- "--from-literal", `host=${pgHost}`,
2734
- "--from-literal", `user=foundation`,
2735
- "--from-literal", `password=${pgPass}`,
2736
- "--from-literal", `superUserPassword=${pgPass}`,
2737
- "--from-literal", `postgres-password=${pgPass}`,
2738
- "--from-literal", `mlflow-username=mlflow`,
2739
- "--from-literal", `mlflow-password=${pgPass}`,
2740
- "--dry-run=client", "-o", "yaml",
2741
- ];
2742
- const { stdout: secretYaml } = await kubectl(args);
2743
- if (secretYaml) {
2744
- await kubectl(["apply", "-f", "-"], { input: secretYaml });
2745
- console.log(OK(" ✓ Postgres secret updated with all required keys"));
2746
- }
2747
- }
2748
-
2749
- // Ensure OPA keypair secret exists
2750
- const { exitCode: opaExists } = await kubectl([
2751
- "get", "secret", "foundation-opa-keypair", "-n", "foundation",
2752
- ]);
2753
- if (opaExists !== 0) {
2754
- hint("Creating OPA keypair secret…");
2755
- // Use the OPA AWS-style keys from the VM env or generate placeholders
2756
- const { listVms } = await import("./azure-state.js");
2757
- const { vms: vmMap } = listVms();
2758
- const vms = Object.entries(vmMap || {}).filter(([, v]) => v.ip);
2759
-
2760
- let opaAccessKey = "placeholder";
2761
- let opaSecretKey = "placeholder";
2762
-
2763
- if (vms.length > 0) {
2764
- const [, vm] = vms[0];
2765
- const { sshCmd } = await import("./azure-helpers.js");
2766
- const user = vm.adminUser || "azureuser";
2767
- const { stdout: envContent } = await sshCmd(
2768
- execa, vm.ip, user,
2769
- "grep -E '^OPA_(ACCESS_KEY_ID|SECRET_ACCESS_KEY)=' /opt/foundation-compose/.env 2>/dev/null",
2770
- 15000,
2771
- );
2772
- if (envContent) {
2773
- const m1 = envContent.match(/OPA_ACCESS_KEY_ID=(.+)/);
2774
- const m2 = envContent.match(/OPA_SECRET_ACCESS_KEY=(.+)/);
2775
- if (m1) opaAccessKey = m1[1].trim();
2776
- if (m2) opaSecretKey = m2[1].trim();
2777
- }
2778
- }
2779
-
2780
- const { stdout: opaYaml } = await kubectl([
2781
- "create", "secret", "generic", "foundation-opa-keypair", "-n", "foundation",
2782
- "--from-literal", `OPA_ACCESS_KEY_ID=${opaAccessKey}`,
2783
- "--from-literal", `OPA_SECRET_ACCESS_KEY=${opaSecretKey}`,
2784
- "--dry-run=client", "-o", "yaml",
2785
- ]);
2786
- if (opaYaml) {
2787
- await kubectl(["apply", "-f", "-"], { input: opaYaml });
2788
- console.log(OK(" ✓ OPA keypair secret created"));
2789
- }
2790
- } else {
2791
- console.log(OK(" ✓ OPA keypair secret exists"));
2792
- }
2793
- }
2794
-
2795
- // ── ACR webhooks: clean up orphaned webhooks that block pods ──────────────────
2796
-
2797
- async function reconcileAcrWebhooks(ctx) {
2798
- const { execa, clusterName } = ctx;
2799
- const kubectl = (args, opts = {}) =>
2800
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 15000, reject: false, ...opts });
2801
-
2802
- for (const wh of ["acr-pod-webhook", "acr-helm-webhook"]) {
2803
- const { exitCode } = await kubectl([
2804
- "get", "mutatingwebhookconfiguration", wh,
2805
- ]);
2806
- if (exitCode === 0) {
2807
- await kubectl(["delete", "mutatingwebhookconfiguration", wh]);
2808
- console.log(OK(` ✓ Removed orphaned ${wh}`));
2809
- }
2810
- }
2811
- }
2812
-
2813
- // ── Secret Store: Azure Key Vault + ExternalSecrets SecretStore ───────────────
2814
-
2815
- const SECRET_STORE_NAMESPACES_BASE = ["foundation"];
2816
- const SECRET_STORE_NAMESPACES_DAI = ["dai"];
2817
- const SECRET_STORE_NAME = "azure-secretsmanager";
2818
-
2819
- function kvName(clusterName) {
2820
- const base = `fops-${clusterName}-kv`.replace(/[^a-zA-Z0-9-]/g, "").slice(0, 24);
2821
- return base;
2822
- }
2823
-
2824
- async function reconcileSecretStore(ctx) {
2825
- const { execa, clusterName, rg, sub, opts } = ctx;
2826
- const vaultName = kvName(clusterName);
2827
- const location = ctx.cluster?.location || DEFAULTS.location;
2828
- const ssNamespaces = opts?.dai
2829
- ? [...SECRET_STORE_NAMESPACES_BASE, ...SECRET_STORE_NAMESPACES_DAI]
2830
- : SECRET_STORE_NAMESPACES_BASE;
2831
-
2832
- const kubectl = (args, opts = {}) =>
2833
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
2834
-
2835
- // 1. Ensure Key Vault exists
2836
- const { exitCode: kvExists } = await execa("az", [
2837
- "keyvault", "show", "--name", vaultName, "--output", "none",
2838
- ...subArgs(sub),
2839
- ], { reject: false, timeout: 30000 });
2840
-
2841
- if (kvExists !== 0) {
2842
- hint(`Creating Key Vault "${vaultName}"…`);
2843
- const { exitCode, stderr } = await execa("az", [
2844
- "keyvault", "create",
2845
- "--name", vaultName,
2846
- "--resource-group", rg,
2847
- "--location", location,
2848
- "--enable-rbac-authorization", "true",
2849
- "--output", "none",
2850
- ...subArgs(sub),
2851
- ], { timeout: 120000, reject: false });
2852
- if (exitCode !== 0) {
2853
- console.log(WARN(` ⚠ Key Vault creation failed: ${(stderr || "").split("\n")[0]}`));
2854
- return;
2855
- }
2856
- console.log(OK(` ✓ Key Vault "${vaultName}" created`));
2857
- } else {
2858
- console.log(OK(` ✓ Key Vault "${vaultName}" exists`));
2859
- }
2860
-
2861
- // 2. Get Key Vault resource ID and ensure SP has Secrets Officer role
2862
- const spClientId = await getSpClientId(kubectl);
2863
- if (spClientId) {
2864
- const { stdout: kvJson } = await execa("az", [
2865
- "keyvault", "show", "--name", vaultName, "--query", "id", "-o", "tsv",
2866
- ...subArgs(sub),
2867
- ], { reject: false, timeout: 30000 });
2868
- const kvId = (kvJson || "").trim();
2869
-
2870
- if (kvId) {
2871
- const { stdout: spObjId } = await execa("az", [
2872
- "ad", "sp", "show", "--id", spClientId, "--query", "id", "-o", "tsv",
2873
- ], { reject: false, timeout: 30000 });
2874
- const objectId = (spObjId || "").trim();
2875
-
2876
- if (objectId) {
2877
- const { exitCode: roleExists } = await execa("az", [
2878
- "role", "assignment", "list",
2879
- "--assignee", objectId,
2880
- "--role", "Key Vault Secrets Officer",
2881
- "--scope", kvId,
2882
- "--query", "[0].id", "-o", "tsv",
2883
- ...subArgs(sub),
2884
- ], { reject: false, timeout: 30000 });
2885
-
2886
- const hasRole = roleExists === 0;
2887
- if (!hasRole) {
2888
- await execa("az", [
2889
- "role", "assignment", "create",
2890
- "--assignee-object-id", objectId,
2891
- "--assignee-principal-type", "ServicePrincipal",
2892
- "--role", "Key Vault Secrets Officer",
2893
- "--scope", kvId,
2894
- ...subArgs(sub),
2895
- ], { reject: false, timeout: 30000 });
2896
- console.log(OK(" ✓ SP granted Key Vault Secrets Officer role"));
2897
- }
2898
- }
2899
- }
2900
- }
2901
-
2902
- // 3. Ensure azure-secret-sp exists in each target namespace
2903
- const { stdout: spSecretJson } = await kubectl([
2904
- "get", "secret", "azure-secret-sp", "-n", "foundation", "-o", "json",
2905
- ]);
2906
- const spSecret = spSecretJson ? JSON.parse(spSecretJson) : null;
2907
-
2908
- if (!spSecret || !spSecret.data?.ClientID) {
2909
- console.log(WARN(" ⚠ Secret 'azure-secret-sp' not found in foundation — SecretStore needs SP credentials"));
2910
- hint("Create it with: kubectl create secret generic azure-secret-sp -n foundation --from-literal=ClientID=<sp-client-id> --from-literal=ClientSecret=<sp-secret>");
2911
- return;
2912
- }
2913
-
2914
- // Replicate azure-secret-sp to other namespaces
2915
- for (const ns of ssNamespaces) {
2916
- await kubectl(["create", "namespace", ns, "--dry-run=client", "-o", "yaml"]);
2917
- const { exitCode: nsExists } = await kubectl(["get", "namespace", ns]);
2918
- if (nsExists !== 0) {
2919
- await kubectl(["create", "namespace", ns]);
2920
- }
2921
- if (ns === "foundation") continue;
2922
- const { exitCode: spExists } = await kubectl(["get", "secret", "azure-secret-sp", "-n", ns]);
2923
- if (spExists !== 0) {
2924
- await kubectl([
2925
- "create", "secret", "generic", "azure-secret-sp",
2926
- "-n", ns,
2927
- "--from-literal", `ClientID=${Buffer.from(spSecret.data.ClientID, "base64").toString()}`,
2928
- "--from-literal", `ClientSecret=${Buffer.from(spSecret.data.ClientSecret, "base64").toString()}`,
2929
- ]);
2930
- console.log(OK(` ✓ Replicated azure-secret-sp to ${ns}`));
2931
- }
2932
- }
2933
-
2934
- // 4. Get tenant ID
2935
- const { stdout: tenantId } = await execa("az", [
2936
- "account", "show", "--query", "tenantId", "-o", "tsv",
2937
- ...subArgs(sub),
2938
- ], { reject: false, timeout: 15000 });
2939
-
2940
- // 5. Create SecretStore in each namespace
2941
- const apiVersion = await detectEsApiVersion(kubectl);
2942
-
2943
- for (const ns of ssNamespaces) {
2944
- const { exitCode: ssExists } = await kubectl([
2945
- "get", "secretstore", SECRET_STORE_NAME, "-n", ns,
2946
- ]);
2947
- if (ssExists === 0) continue;
2948
-
2949
- const manifest = `apiVersion: ${apiVersion}
2950
- kind: SecretStore
2951
- metadata:
2952
- name: ${SECRET_STORE_NAME}
2953
- namespace: ${ns}
2954
- spec:
2955
- provider:
2956
- azurekv:
2957
- authType: ServicePrincipal
2958
- vaultUrl: https://${vaultName}.vault.azure.net
2959
- tenantId: ${(tenantId || "").trim()}
2960
- authSecretRef:
2961
- clientId:
2962
- name: azure-secret-sp
2963
- key: ClientID
2964
- clientSecret:
2965
- name: azure-secret-sp
2966
- key: ClientSecret
2967
- `;
2968
- const { exitCode: applyCode, stderr } = await kubectl(
2969
- ["apply", "-f", "-"],
2970
- { input: manifest },
2971
- );
2972
- if (applyCode === 0) {
2973
- console.log(OK(` ✓ SecretStore "${SECRET_STORE_NAME}" created in ${ns}`));
2974
- } else {
2975
- console.log(WARN(` ⚠ SecretStore creation failed in ${ns}: ${(stderr || "").split("\n")[0]}`));
2976
- }
2977
- }
2978
-
2979
- // 6. Seed Key Vault secrets from a running VM if the vault is empty
2980
- await seedKeyVaultFromVm(execa, { vaultName, sub, dai: opts?.dai });
2981
- }
2982
-
2983
- async function getSpClientId(kubectl) {
2984
- const { stdout } = await kubectl([
2985
- "get", "secret", "azure-secret-sp", "-n", "foundation",
2986
- "-o", "jsonpath={.data.ClientID}",
2987
- ]);
2988
- if (!stdout) return null;
2989
- return Buffer.from(stdout, "base64").toString();
2990
- }
2991
-
2992
- async function detectEsApiVersion(kubectl) {
2993
- const { exitCode } = await kubectl([
2994
- "api-resources", "--api-group=external-secrets.io",
2995
- "-o", "name",
2996
- ]);
2997
- if (exitCode !== 0) return "external-secrets.io/v1beta1";
2998
- const { stdout } = await kubectl([
2999
- "get", "crd", "secretstores.external-secrets.io",
3000
- "-o", "jsonpath={.spec.versions[*].name}",
3001
- ]);
3002
- const versions = (stdout || "").split(/\s+/);
3003
- if (versions.includes("v1")) return "external-secrets.io/v1";
3004
- if (versions.includes("v1beta1")) return "external-secrets.io/v1beta1";
3005
- return "external-secrets.io/v1";
3006
- }
3007
-
3008
- const KV_SEED_SECRETS = [
3009
- { name: "foundation-secrets", envKeys: {
3010
- password: "POSTGRES_PASSWORD",
3011
- secret_key: "MX_SECRET_KEY",
3012
- "auth0.secret": "AUTH0_SECRET",
3013
- "auth0.client_id": "AUTH0_CLIENT_ID",
3014
- "auth0.client_secret": "AUTH0_CLIENT_SECRET",
3015
- "auth0.domain": "AUTH0_DOMAIN",
3016
- "auth0.audience": "AUTH0_AUDIENCE",
3017
- "auth0.issuer_base_url": "AUTH0_ISSUER_BASE_URL",
3018
- "auth0.base_url": "AUTH0_BASE_URL",
3019
- }},
3020
- { name: "auth0", envKeys: {
3021
- client_id: "AUTH0_CLIENT_ID",
3022
- client_secret: "AUTH0_CLIENT_SECRET",
3023
- }},
3024
- { name: "foundation-trino-jwt", envKeys: {
3025
- "jwt-secret.pem": "MX_SECRET_KEY",
3026
- secret_key: "MX_SECRET_KEY",
3027
- }},
3028
- { name: "foundation-nats", envKeys: {
3029
- "nkeys-secret": "MX_SECRET_KEY",
3030
- secret_key: "MX_SECRET_KEY",
3031
- }},
3032
- { name: "foundation-storage-engine-auth", envKeys: {
3033
- AUTH_IDENTITY: "AUTH_IDENTITY",
3034
- AUTH_CREDENTIAL: "AUTH_CREDENTIAL",
3035
- }},
3036
- { name: "dai-secrets", daiOnly: true, envKeys: {
3037
- AUTH0_CLIENT_ID: "AUTH0_CLIENT_ID",
3038
- AUTH0_CLIENT_SECRET: "AUTH0_CLIENT_SECRET",
3039
- AUTH0_SECRET: "AUTH0_SECRET",
3040
- NATS_NKEYS_SECRET: "MX_SECRET_KEY",
3041
- AZURE_OPENAI_API_KEY: "MX_OPENAI_API_KEY",
3042
- }},
3043
- { name: "foundation-scheduler-secrets", envKeys: {
3044
- secretKey: "MX_SECRET_KEY",
3045
- }},
3046
- ];
3047
-
3048
- async function seedKeyVaultFromVm(execa, { vaultName, sub, dai }) {
3049
- // Check if vault already has secrets
3050
- const { stdout: existingSecrets } = await execa("az", [
3051
- "keyvault", "secret", "list", "--vault-name", vaultName,
3052
- "--query", "length(@)", "-o", "tsv",
3053
- ...subArgs(sub),
3054
- ], { reject: false, timeout: 30000 });
3055
-
3056
- if (parseInt(existingSecrets || "0", 10) > 0) {
3057
- console.log(OK(` ✓ Key Vault has ${existingSecrets.trim()} secret(s) — skipping seed`));
3058
- return;
3059
- }
3060
-
3061
- // Try to pull .env from a running VM
3062
- const { listVms } = await import("./azure-state.js");
3063
- const { vms: vmMap } = listVms();
3064
- const vms = Object.entries(vmMap || {}).filter(([, v]) => v.ip);
3065
- if (vms.length === 0) {
3066
- hint("No VMs tracked — seed Key Vault manually or run fops azure aks seed-secrets");
3067
- return;
3068
- }
3069
-
3070
- const [vmName, vm] = vms[0];
3071
- hint(`Seeding Key Vault from VM "${vmName}" (${vm.ip})…`);
3072
-
3073
- const { sshCmd } = await import("./azure-helpers.js");
3074
- const user = vm.adminUser || "azureuser";
3075
- const { stdout: envContent, exitCode } = await sshCmd(
3076
- execa, vm.ip, user,
3077
- "cat /opt/foundation-compose/.env 2>/dev/null",
3078
- 30000,
3079
- );
3080
- if (exitCode !== 0 || !envContent) {
3081
- console.log(WARN(` ⚠ Could not read .env from ${vmName} — seed manually`));
3082
- return;
3083
- }
3084
-
3085
- const envMap = {};
3086
- for (const line of envContent.split("\n")) {
3087
- const m = line.match(/^([A-Z0-9_]+)=(.+)$/);
3088
- if (m) envMap[m[1]] = m[2].replace(/^["']|["']$/g, "");
3089
- }
3090
-
3091
- let seeded = 0;
3092
- const secrets = dai ? KV_SEED_SECRETS : KV_SEED_SECRETS.filter(s => !s.daiOnly);
3093
- for (const { name: secretName, envKeys } of secrets) {
3094
- const secretObj = {};
3095
- for (const [prop, envKey] of Object.entries(envKeys)) {
3096
- if (envMap[envKey]) secretObj[prop] = envMap[envKey];
3097
- }
3098
- if (Object.keys(secretObj).length === 0) continue;
3099
-
3100
- const value = JSON.stringify(secretObj);
3101
- const { exitCode: setCode } = await execa("az", [
3102
- "keyvault", "secret", "set",
3103
- "--vault-name", vaultName,
3104
- "--name", secretName,
3105
- "--value", value,
3106
- "--content-type", "application/json",
3107
- ...subArgs(sub),
3108
- ], { reject: false, timeout: 30000 });
3109
-
3110
- if (setCode === 0) seeded++;
3111
- }
3112
-
3113
- if (seeded > 0) {
3114
- console.log(OK(` ✓ Seeded ${seeded} secret(s) into Key Vault from VM "${vmName}"`));
3115
- } else {
3116
- console.log(WARN(" ⚠ No matching env vars found on VM — seed secrets manually"));
3117
- }
3118
- }
3119
-
3120
- async function reconcileFluxStep(ctx) {
3121
- const { execa, clusterName, rg, sub, opts } = ctx;
3122
- if (opts.noFlux) return;
3123
-
3124
- const githubToken = resolveGithubToken(opts);
3125
- if (!githubToken) {
3126
- console.log(WARN(" ⚠ Skipping Flux — no GitHub token found."));
3127
- hint("Authenticate with: gh auth login");
3128
- return;
3129
- }
3130
-
3131
- const { fluxRepo, fluxOwner, fluxPath, fluxBranch } = resolveFluxConfig(clusterName, opts);
3132
-
3133
- await reconcileFlux(execa, {
3134
- clusterName, rg, sub, githubToken,
3135
- repo: fluxRepo, owner: fluxOwner,
3136
- path: fluxPath, branch: fluxBranch,
3137
- });
3138
-
3139
- writeClusterState(clusterName, {
3140
- flux: { repo: fluxRepo, owner: fluxOwner, path: fluxPath, branch: fluxBranch },
3141
- });
3142
- }
3143
-
3144
- // ── Helm repos: ensure HelmRepository sources exist for Foundation charts ─────
3145
-
3146
- const HELM_REPOS = [
3147
- {
3148
- name: "foundation", namespace: "flux-system",
3149
- spec: { type: "oci", interval: "1m0s", url: "oci://meshxregistry.azurecr.io/helm", secretRef: { name: "meshxregistry-helm-secret" } },
3150
- },
3151
- {
3152
- name: "hive-metastore", namespace: "flux-system",
3153
- spec: { type: "oci", interval: "1m0s", url: "oci://meshxregistry.azurecr.io/helm", secretRef: { name: "meshxregistry-helm-secret" } },
3154
- },
3155
- {
3156
- name: "trinodb", namespace: "flux-system",
3157
- spec: { interval: "30m", url: "https://trinodb.github.io/charts" },
3158
- },
3159
- ];
3160
-
3161
- async function reconcileHelmRepos(ctx) {
3162
- const { execa, clusterName } = ctx;
3163
- const tracked = readClusterState(clusterName);
3164
- if (!tracked?.flux) return;
3165
-
3166
- const kubectl = (args, opts = {}) =>
3167
- execa("kubectl", ["--context", clusterName, ...args], { reject: false, timeout: 30000, ...opts });
3168
-
3169
- // Detect which API version the cluster supports
3170
- const { stdout: crdJson } = await kubectl([
3171
- "get", "crd", "helmrepositories.source.toolkit.fluxcd.io",
3172
- "-o", "jsonpath={.spec.versions[*].name}",
3173
- ]);
3174
- const versions = (crdJson || "").split(/\s+/).filter(Boolean);
3175
- const apiVersion = versions.includes("v1")
3176
- ? "source.toolkit.fluxcd.io/v1"
3177
- : versions.includes("v1beta2")
3178
- ? "source.toolkit.fluxcd.io/v1beta2"
3179
- : "source.toolkit.fluxcd.io/v1";
3180
-
3181
- // Ensure the ACR helm secret exists in flux-system for HelmRepository auth
3182
- const { exitCode: secretExists } = await kubectl([
3183
- "get", "secret", "meshxregistry-helm-secret", "-n", "flux-system",
3184
- ]);
3185
- if (secretExists !== 0) {
3186
- hint("Creating ACR helm secret in flux-system…");
3187
- const secretNs = ["acr-cache-system", "foundation"].find(async ns => {
3188
- const { exitCode } = await kubectl(["get", "secret", "meshxregistry-helm-secret", "-n", ns]);
3189
- return exitCode === 0;
3190
- });
3191
- if (secretNs) {
3192
- const { stdout: secretYaml } = await kubectl([
3193
- "get", "secret", "meshxregistry-helm-secret", "-n", secretNs, "-o", "json",
3194
- ]);
3195
- if (secretYaml) {
3196
- try {
3197
- const secret = JSON.parse(secretYaml);
3198
- delete secret.metadata.namespace;
3199
- delete secret.metadata.resourceVersion;
3200
- delete secret.metadata.uid;
3201
- delete secret.metadata.creationTimestamp;
3202
- if (secret.metadata.annotations) {
3203
- delete secret.metadata.annotations["kubectl.kubernetes.io/last-applied-configuration"];
3204
- }
3205
- const clean = JSON.stringify(secret);
3206
- await execa("kubectl", [
3207
- "--context", clusterName, "apply", "-n", "flux-system", "-f", "-",
3208
- ], { input: clean, reject: false, timeout: 10000 });
3209
- console.log(OK(" ✓ ACR helm secret replicated to flux-system"));
3210
- } catch { /* reflector should handle this eventually */ }
3211
- }
3212
- }
3213
- }
3214
-
3215
- let updated = 0;
3216
- let unchanged = 0;
3217
- let failed = 0;
3218
-
3219
- const repos = ctx.opts?.dai ? HELM_REPOS : HELM_REPOS.filter(r => !r.daiOnly);
3220
- for (const repo of repos) {
3221
- const specLines = [];
3222
- for (const [k, v] of Object.entries(repo.spec)) {
3223
- if (k === "secretRef") {
3224
- specLines.push(` secretRef:`);
3225
- specLines.push(` name: ${v.name}`);
3226
- } else {
3227
- specLines.push(` ${k}: "${v}"`);
3228
- }
3229
- }
3230
-
3231
- const yaml = [
3232
- `apiVersion: ${apiVersion}`,
3233
- `kind: HelmRepository`,
3234
- `metadata:`,
3235
- ` name: ${repo.name}`,
3236
- ` namespace: ${repo.namespace}`,
3237
- `spec:`,
3238
- ...specLines,
3239
- ].join("\n");
3240
-
3241
- const applyResult = await execa("kubectl", [
3242
- "--context", clusterName, "apply", "-f", "-",
3243
- ], { input: yaml, reject: false, timeout: 15000 });
3244
-
3245
- if (applyResult.exitCode === 0) {
3246
- const out = (applyResult.stdout || "").trim();
3247
- if (out.includes("configured")) updated++;
3248
- else unchanged++;
3249
- } else {
3250
- failed++;
3251
- const errMsg = (applyResult.stderr || applyResult.stdout || "unknown error").trim().split("\n")[0];
3252
- console.log(WARN(` ⚠ HelmRepository ${repo.namespace}/${repo.name}: ${errMsg}`));
3253
- }
3254
- }
3255
-
3256
- // Verify they actually exist
3257
- const { stdout: verify } = await kubectl([
3258
- "get", "helmrepository.source.toolkit.fluxcd.io", "-n", "flux-system",
3259
- "--no-headers", "-o", "custom-columns=NAME:.metadata.name",
3260
- ]);
3261
- const actual = (verify || "").trim().split("\n").filter(Boolean);
3262
- if (updated > 0) {
3263
- console.log(OK(` ✓ ${actual.length} HelmRepository source(s) in flux-system (${updated} updated, ${unchanged} unchanged)`));
3264
- } else if (failed === 0) {
3265
- console.log(OK(` ✓ All ${actual.length} HelmRepository sources up to date`));
3266
- }
3267
- if (actual.length < HELM_REPOS.length) {
3268
- const missing = HELM_REPOS.filter(r => !actual.includes(r.name));
3269
- for (const m of missing) {
3270
- console.log(WARN(` ⚠ Missing: ${m.name}`));
3271
- }
3272
- }
3273
- }
3274
-
3275
- // ── Flux prereqs: pre-install CRDs and fix scheduling so Flux dry-runs pass ──
3276
-
3277
- async function reconcileFluxPrereqs(ctx) {
3278
- const { execa, clusterName } = ctx;
3279
- const tracked = readClusterState(clusterName);
3280
- if (!tracked?.flux) return;
3281
-
3282
- banner("Flux Prerequisites");
3283
-
3284
- const kubectl = (args, opts = {}) =>
3285
- execa("kubectl", ["--context", clusterName, ...args], { reject: false, timeout: 60000, ...opts });
3286
-
3287
- // 1. Clean up legacy acr-cache-system if present (replaced by direct pull secrets)
3288
- try {
3289
- const { exitCode } = await kubectl(["get", "namespace", "acr-cache-system"]);
3290
- if (exitCode === 0) {
3291
- hint("Cleaning up legacy acr-cache-system namespace…");
3292
- for (const wh of ["acr-pod-webhook", "acr-helm-webhook"]) {
3293
- await kubectl(["delete", "mutatingwebhookconfiguration", wh, "--ignore-not-found"]);
3294
- }
3295
- await kubectl(["delete", "namespace", "acr-cache-system", "--ignore-not-found"]);
3296
- console.log(OK(" ✓ Legacy acr-cache-system removed"));
3297
- }
3298
- } catch { /* not present, nothing to do */ }
3299
-
3300
- // 2. Pre-install CRDs that Flux manifests reference before their operators deploy.
3301
- // Without these, Flux dry-run rejects the entire kustomization.
3302
- const crdBundles = [
3303
- {
3304
- name: "external-secrets",
3305
- check: "externalsecrets.external-secrets.io",
3306
- url: "https://raw.githubusercontent.com/external-secrets/external-secrets/main/deploy/crds/bundle.yaml",
3307
- serverSide: true,
3308
- },
3309
- {
3310
- name: "Istio",
3311
- check: "virtualservices.networking.istio.io",
3312
- helm: { repo: "https://istio-release.storage.googleapis.com/charts", chart: "base", release: "istio-base", namespace: "istio-system" },
3313
- },
3314
- ];
3315
-
3316
- for (const bundle of crdBundles) {
3317
- const { exitCode } = await kubectl(["get", "crd", bundle.check], { timeout: 10000 });
3318
- if (exitCode === 0) {
3319
- console.log(OK(` ✓ ${bundle.name} CRDs present`));
3320
- continue;
3321
- }
3322
-
3323
- hint(`Installing ${bundle.name} CRDs…`);
3324
- try {
3325
- if (bundle.helm) {
3326
- await execa("helm", ["repo", "add", bundle.name, bundle.helm.repo, "--force-update"], { reject: false, timeout: 30000 });
3327
- await execa("helm", [
3328
- "--kube-context", clusterName,
3329
- "install", bundle.helm.release, `${bundle.name}/${bundle.helm.chart}`,
3330
- "-n", bundle.helm.namespace, "--create-namespace",
3331
- "--wait", "--timeout", "90s",
3332
- ], { timeout: 120000 });
3333
- } else {
3334
- const applyArgs = ["apply", "-f", bundle.url];
3335
- if (bundle.serverSide) applyArgs.push("--server-side");
3336
- await kubectl(applyArgs, { timeout: 120000 });
3337
- }
3338
- console.log(OK(` ✓ ${bundle.name} CRDs installed`));
3339
- } catch (err) {
3340
- console.log(WARN(` ⚠ ${bundle.name} CRDs failed: ${(err.message || "").split("\n")[0]}`));
3341
- }
3342
- }
3343
-
3344
- // 3. Enable v1beta1 on ExternalSecret CRD if the operator disabled it.
3345
- // Some Flux manifests still reference v1beta1.
3346
- try {
3347
- const { stdout: crdJson } = await kubectl([
3348
- "get", "crd", "externalsecrets.external-secrets.io",
3349
- "-o", "jsonpath={.spec.versions[?(@.name==\"v1beta1\")].served}",
3350
- ], { timeout: 10000 });
3351
- if (crdJson === "false") {
3352
- hint("Enabling v1beta1 on ExternalSecret CRD…");
3353
- const { stdout: fullCrd } = await kubectl([
3354
- "get", "crd", "externalsecrets.external-secrets.io", "-o", "json",
3355
- ], { timeout: 10000 });
3356
- const crd = JSON.parse(fullCrd);
3357
- for (const v of crd.spec.versions) {
3358
- if (v.name === "v1beta1") v.served = true;
3359
- }
3360
- await execa("kubectl", [
3361
- "--context", clusterName, "apply", "--server-side", "-f", "-",
3362
- ], { input: JSON.stringify(crd), timeout: 30000, reject: false });
3363
-
3364
- // Same for SecretStore CRD
3365
- const { stdout: ssCrd } = await kubectl([
3366
- "get", "crd", "secretstores.external-secrets.io", "-o", "json",
3367
- ], { timeout: 10000 });
3368
- const ss = JSON.parse(ssCrd);
3369
- for (const v of ss.spec.versions) {
3370
- if (v.name === "v1beta1") v.served = true;
3371
- }
3372
- await execa("kubectl", [
3373
- "--context", clusterName, "apply", "--server-side", "-f", "-",
3374
- ], { input: JSON.stringify(ss), timeout: 30000, reject: false });
3375
-
3376
- console.log(OK(" ✓ v1beta1 enabled on ExternalSecret CRDs"));
3377
- }
3378
- } catch { /* CRD not installed yet — will be handled on next reconcile */ }
3379
-
3380
- // 4. Trigger a full Flux reconciliation so kustomizations pick up the new CRDs
3381
- try {
3382
- hint("Triggering Flux reconciliation…");
3383
- const { stdout: ksList } = await kubectl([
3384
- "get", "kustomization", "-n", "flux-system", "--no-headers",
3385
- "-o", "custom-columns=NAME:.metadata.name",
3386
- ]);
3387
- const names = ksList.trim().split("\n").map(n => n.trim()).filter(Boolean);
3388
- const ts = String(Date.now());
3389
- for (const ks of names) {
3390
- await kubectl([
3391
- "annotate", "kustomization", ks, "-n", "flux-system",
3392
- `reconcile.fluxcd.io/requestedAt=${ts}-${ks}`, "--overwrite",
3393
- ], { timeout: 10000 });
3394
- }
3395
- console.log(OK(` ✓ Reconciliation triggered for ${names.length} kustomizations`));
3396
- } catch {
3397
- hint("Could not trigger reconciliation — Flux may not be ready yet");
3398
- }
3399
- }
3400
-
3401
- // ── fops-api deployment ──────────────────────────────────────────────────────
3402
-
3403
- const FOPS_API_NS = "fops-system";
3404
-
3405
- function fopsApiManifests(clusterName) {
3406
- const labels = { app: "fops-api", "app.kubernetes.io/managed-by": "fops" };
3407
- return [
3408
- {
3409
- apiVersion: "v1", kind: "Namespace",
3410
- metadata: { name: FOPS_API_NS, labels: { "app.kubernetes.io/managed-by": "fops" } },
3411
- },
3412
- {
3413
- apiVersion: "v1", kind: "ServiceAccount",
3414
- metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
3415
- },
3416
- {
3417
- apiVersion: "rbac.authorization.k8s.io/v1", kind: "ClusterRoleBinding",
3418
- metadata: { name: "fops-api-viewer", labels },
3419
- subjects: [{ kind: "ServiceAccount", name: "fops-api", namespace: FOPS_API_NS }],
3420
- roleRef: { kind: "ClusterRole", name: "view", apiGroup: "rbac.authorization.k8s.io" },
3421
- },
3422
- {
3423
- apiVersion: "apps/v1", kind: "Deployment",
3424
- metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
3425
- spec: {
3426
- replicas: 1,
3427
- selector: { matchLabels: { app: "fops-api" } },
3428
- template: {
3429
- metadata: { labels },
3430
- spec: {
3431
- serviceAccountName: "fops-api",
3432
- containers: [{
3433
- name: "fops-api",
3434
- image: "node:20-alpine",
3435
- command: ["sh", "-c", "npm install -g @meshxdata/fops && fops serve --host 0.0.0.0 --port 4100"],
3436
- ports: [{ name: "http", containerPort: 4100 }],
3437
- env: [
3438
- { name: "NODE_ENV", value: "production" },
3439
- { name: "FOPS_CLUSTER", value: clusterName },
3440
- ],
3441
- resources: {
3442
- requests: { cpu: "100m", memory: "256Mi" },
3443
- limits: { cpu: "500m", memory: "512Mi" },
3444
- },
3445
- readinessProbe: {
3446
- httpGet: { path: "/api/health", port: 4100 },
3447
- initialDelaySeconds: 30, periodSeconds: 10,
3448
- },
3449
- livenessProbe: {
3450
- httpGet: { path: "/api/health", port: 4100 },
3451
- initialDelaySeconds: 60, periodSeconds: 30,
3452
- },
3453
- }],
3454
- },
3455
- },
3456
- },
3457
- },
3458
- {
3459
- apiVersion: "v1", kind: "Service",
3460
- metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
3461
- spec: {
3462
- selector: { app: "fops-api" },
3463
- ports: [{ name: "http", port: 4100, targetPort: 4100 }],
3464
- },
3465
- },
3466
- ];
3467
- }
3468
-
3469
- // ── Helm Values: patch HelmRelease values that reference cluster-specific settings ──
3470
-
3471
- const OLD_PG_HOSTS = [
3472
- "az-vel-app-data-demo-uaen-psql.postgres.database.azure.com",
3473
- ];
3474
-
3475
- // ── Vault: patch unsealConfig to use the cluster's Key Vault + grant SP RBAC ──
3476
-
3477
- async function reconcileVaultUnseal(ctx) {
3478
- const { execa, clusterName, rg, sub } = ctx;
3479
- const kubectl = (args, opts = {}) =>
3480
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
3481
-
3482
- const correctKv = kvName(clusterName);
3483
-
3484
- // Check if Vault CR exists
3485
- const { exitCode, stdout } = await kubectl([
3486
- "get", "vault", "vault", "-n", "foundation",
3487
- "-o", "jsonpath={.spec.unsealConfig.azure.keyVaultName}",
3488
- ]);
3489
- if (exitCode !== 0) return;
3490
-
3491
- const currentKv = (stdout || "").trim();
3492
- if (currentKv === correctKv) {
3493
- console.log(OK(` ✓ Vault unseal config → ${correctKv}`));
3494
- return;
3495
- }
3496
-
3497
- hint(`Patching Vault unsealConfig: ${currentKv || "(unset)"} → ${correctKv}`);
3498
- await kubectl([
3499
- "patch", "vault", "vault", "-n", "foundation",
3500
- "--type", "merge", "-p",
3501
- JSON.stringify({ spec: { unsealConfig: { azure: { keyVaultName: correctKv } } } }),
3502
- ]);
3503
-
3504
- // Ensure the Vault SP has Secrets Officer on the KV
3505
- const spClientId = await getSpClientId(kubectl);
3506
- if (spClientId) {
3507
- // Also check envsConfig for AZURE_CLIENT_ID which vault uses directly
3508
- const { stdout: vaultSpId } = await kubectl([
3509
- "get", "vault", "vault", "-n", "foundation",
3510
- "-o", "jsonpath={.spec.envsConfig[?(@.name=='AZURE_CLIENT_ID')].value}",
3511
- ]);
3512
- const targetSp = (vaultSpId || "").trim() || spClientId;
3513
-
3514
- const { stdout: kvId } = await execa("az", [
3515
- "keyvault", "show", "--name", correctKv, "--query", "id", "-o", "tsv",
3516
- ...subArgs(sub),
3517
- ], { reject: false, timeout: 15000 });
3518
-
3519
- if (kvId?.trim()) {
3520
- await execa("az", [
3521
- "role", "assignment", "create",
3522
- "--assignee", targetSp,
3523
- "--role", "Key Vault Secrets Officer",
3524
- "--scope", kvId.trim(),
3525
- "--output", "none", ...subArgs(sub),
3526
- ], { reject: false, timeout: 30000 });
3527
- }
3528
- }
3529
-
3530
- // Restart vault pod to pick up the new unseal config
3531
- await kubectl(["delete", "pod", "vault-0", "-n", "foundation", "--ignore-not-found"]);
3532
- console.log(OK(` ✓ Vault unseal config patched → ${correctKv}`));
3533
- }
3534
-
3535
- async function reconcileHelmValues(ctx) {
3536
- const { execa, clusterName } = ctx;
3537
- const kubectl = (args, opts = {}) =>
3538
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
3539
-
3540
- const pgServer = pgServerName(clusterName);
3541
- const correctHost = `${pgServer}.postgres.database.azure.com`;
3542
-
3543
- // List HelmReleases in foundation namespace
3544
- const { stdout: hrJson } = await kubectl(["get", "helmrelease", "-n", "foundation", "-o", "json"]);
3545
- if (!hrJson) return;
3546
-
3547
- const hrs = JSON.parse(hrJson).items || [];
3548
- let patched = 0;
3549
-
3550
- for (const hr of hrs) {
3551
- const name = hr.metadata?.name;
3552
- const valsStr = JSON.stringify(hr.spec?.values || {});
3553
- let needsPatch = false;
3554
-
3555
- for (const oldHost of OLD_PG_HOSTS) {
3556
- if (valsStr.includes(oldHost)) {
3557
- needsPatch = true;
3558
- break;
3559
- }
3560
- }
3561
- if (!needsPatch) continue;
3562
-
3563
- let newVals = valsStr;
3564
- for (const oldHost of OLD_PG_HOSTS) {
3565
- newVals = newVals.replaceAll(oldHost, correctHost);
3566
- }
3567
-
3568
- const patch = JSON.stringify({ spec: { values: JSON.parse(newVals) } });
3569
- const tmpFile = `/tmp/fops-hr-patch-${name}.json`;
3570
- const { writeFileSync, unlinkSync } = await import("node:fs");
3571
- writeFileSync(tmpFile, patch);
3572
-
3573
- const { exitCode } = await kubectl([
3574
- "patch", "helmrelease", name, "-n", "foundation",
3575
- "--type", "merge", "--patch-file", tmpFile,
3576
- ]);
3577
- try { unlinkSync(tmpFile); } catch {}
3578
-
3579
- if (exitCode === 0) patched++;
3580
- }
3581
-
3582
- if (patched > 0) {
3583
- console.log(OK(` ✓ Patched postgres host in ${patched} HelmRelease(s) → ${correctHost}`));
3584
- } else {
3585
- console.log(OK(" ✓ HelmRelease postgres hosts are correct"));
3586
- }
3587
- }
3588
-
3589
- // ── Storage engine: ensure foundation-storage-engine Deployment + Service exist ──
3590
-
3591
- async function reconcileStorageEngine(ctx) {
3592
- const { execa, clusterName } = ctx;
3593
- const kubectl = (args, opts = {}) =>
3594
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
3595
-
3596
- // Check if deployment already exists
3597
- const { exitCode } = await kubectl([
3598
- "get", "deployment", "foundation-storage-engine", "-n", "foundation",
3599
- ]);
3600
- if (exitCode === 0) {
3601
- console.log(OK(" ✓ Storage engine deployment exists"));
3602
- return;
3603
- }
3604
-
3605
- hint("Creating foundation-storage-engine deployment…");
3606
- const manifest = JSON.stringify({
3607
- apiVersion: "apps/v1", kind: "Deployment",
3608
- metadata: { name: "foundation-storage-engine", namespace: "foundation", labels: { app: "foundation-storage-engine" } },
3609
- spec: {
3610
- replicas: 1,
3611
- selector: { matchLabels: { app: "foundation-storage-engine" } },
3612
- template: {
3613
- metadata: { labels: { app: "foundation-storage-engine" } },
3614
- spec: {
3615
- containers: [{
3616
- name: "storage-engine",
3617
- image: "minio/minio:RELEASE.2024-11-07T00-52-20Z",
3618
- args: ["server", "/data", "--console-address", ":9001"],
3619
- env: [
3620
- { name: "MINIO_ROOT_USER", value: "minio" },
3621
- { name: "MINIO_ROOT_PASSWORD", value: "minio123" },
3622
- ],
3623
- ports: [
3624
- { containerPort: 9000, name: "api" },
3625
- { containerPort: 9001, name: "console" },
3626
- ],
3627
- volumeMounts: [{ name: "data", mountPath: "/data" }],
3628
- resources: { requests: { cpu: "100m", memory: "256Mi" }, limits: { cpu: "500m", memory: "512Mi" } },
3629
- readinessProbe: { httpGet: { path: "/minio/health/ready", port: 9000 }, initialDelaySeconds: 5, periodSeconds: 10 },
3630
- }],
3631
- volumes: [{ name: "data", emptyDir: {} }],
3632
- },
3633
- },
3634
- },
3635
- });
3636
- await kubectl(["apply", "-f", "-"], { input: manifest });
3637
-
3638
- // Service: foundation-storage-engine (port 8080 → 9000)
3639
- const svcManifest = JSON.stringify({
3640
- apiVersion: "v1", kind: "Service",
3641
- metadata: { name: "foundation-storage-engine", namespace: "foundation" },
3642
- spec: {
3643
- selector: { app: "foundation-storage-engine" },
3644
- ports: [
3645
- { port: 8080, targetPort: 9000, name: "api" },
3646
- { port: 9000, targetPort: 9000, name: "s3" },
3647
- ],
3648
- },
3649
- });
3650
- await kubectl(["apply", "-f", "-"], { input: svcManifest });
3651
-
3652
- // Also patch the existing "minio" service to point here (Vault uses minio.foundation.svc)
3653
- const minioSvcManifest = JSON.stringify({
3654
- apiVersion: "v1", kind: "Service",
3655
- metadata: { name: "minio", namespace: "foundation" },
3656
- spec: {
3657
- selector: { app: "foundation-storage-engine" },
3658
- ports: [
3659
- { port: 80, targetPort: 9000, name: "http" },
3660
- { port: 9000, targetPort: 9000, name: "s3" },
3661
- { port: 8080, targetPort: 9000, name: "api" },
3662
- ],
3663
- },
3664
- });
3665
- await kubectl(["apply", "-f", "-"], { input: minioSvcManifest });
3666
-
3667
- // Wait for the deployment to be ready, then create the vault bucket
3668
- await kubectl(["rollout", "status", "deployment/foundation-storage-engine", "-n", "foundation", "--timeout=60s"], { timeout: 70000 });
3669
-
3670
- // Create the vault bucket via a one-shot mc pod
3671
- const mcJobYaml = JSON.stringify({
3672
- apiVersion: "batch/v1", kind: "Job",
3673
- metadata: { name: "fops-mc-init", namespace: "foundation" },
3674
- spec: {
3675
- backoffLimit: 3, ttlSecondsAfterFinished: 60,
3676
- template: {
3677
- spec: {
3678
- restartPolicy: "Never",
3679
- containers: [{
3680
- name: "mc",
3681
- image: "minio/mc:latest",
3682
- command: ["sh", "-c", "mc alias set local http://foundation-storage-engine:8080 minio minio123 && mc mb local/vault --ignore-existing"],
3683
- }],
3684
- },
3685
- },
3686
- },
3687
- });
3688
- await kubectl(["delete", "job", "fops-mc-init", "-n", "foundation", "--ignore-not-found"]);
3689
- await kubectl(["apply", "-f", "-"], { input: mcJobYaml });
3690
- await kubectl(["wait", "--for=condition=complete", "job/fops-mc-init", "-n", "foundation", "--timeout=60s"], { timeout: 70000 });
3691
-
3692
- console.log(OK(" ✓ Storage engine deployed with vault bucket"));
3693
- }
3694
-
3695
- // ── Ingress IP: ensure Istio gateway LB has an external IP ───────────────────
3696
-
3697
- export function clusterDomain(clusterName) {
3698
- return resolveUniqueDomain(clusterName, "aks");
3699
- }
3700
-
3701
- const INGRESS_VIRTUALSERVICES = [
3702
- { name: "frontend", namespace: "foundation", gateway: "foundation-gateway", hostPrefix: "" },
3703
- { name: "foundation-api", namespace: "foundation", gateway: "foundation-gateway", hostPrefix: "api." },
3704
- ];
3705
-
3706
- async function reconcileIngressIp(ctx) {
3707
- const { execa, clusterName } = ctx;
3708
- const kubectl = (args, opts = {}) =>
3709
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 15000, reject: false, ...opts });
3710
-
3711
- const domain = clusterDomain(clusterName);
3712
- writeClusterState(clusterName, { domain });
3713
-
3714
- // ── 1. Ensure LB has an external IP ──
3715
-
3716
- const { exitCode, stdout } = await kubectl([
3717
- "get", "svc", "istio-ingressgateway", "-n", "istio-system",
3718
- "-o", "json",
3719
- ]);
3720
- if (exitCode !== 0 || !stdout) return;
3721
-
3722
- const svc = JSON.parse(stdout);
3723
- const annotations = svc.metadata?.annotations || {};
3724
- const pipName = annotations["service.beta.kubernetes.io/azure-pip-name"] || "";
3725
- let externalIp = svc.status?.loadBalancer?.ingress?.[0]?.ip;
3726
-
3727
- // Remove stale annotations from other clouds / restrictive source ranges
3728
- const staleKeys = Object.keys(annotations).filter(k =>
3729
- k.startsWith("service.beta.kubernetes.io/aws-") ||
3730
- k === "service.beta.kubernetes.io/load-balancer-source-ranges" ||
3731
- k === "service.beta.kubernetes.io/azure-allowed-service-tags"
3732
- );
3733
- if (staleKeys.length > 0) {
3734
- hint(`Removing ${staleKeys.length} stale/restrictive annotation(s) from istio-ingressgateway…`);
3735
- await kubectl([
3736
- "annotate", "svc", "istio-ingressgateway", "-n", "istio-system",
3737
- ...staleKeys.map(k => `${k}-`),
3738
- ]);
3739
- console.log(OK(` ✓ Cleaned ${staleKeys.length} stale annotation(s)`));
3740
- }
3741
-
3742
- if (!externalIp && pipName) {
3743
- hint(`Removing stale PIP annotation "${pipName}" from istio-ingressgateway…`);
3744
- await kubectl([
3745
- "annotate", "svc", "istio-ingressgateway", "-n", "istio-system",
3746
- "service.beta.kubernetes.io/azure-pip-name-",
3747
- "service.beta.kubernetes.io/azure-pip-name-IPv6-",
3748
- ]);
3749
- for (let i = 0; i < 6; i++) {
3750
- await new Promise(r => setTimeout(r, 10000));
3751
- const { stdout: refreshed } = await kubectl([
3752
- "get", "svc", "istio-ingressgateway", "-n", "istio-system",
3753
- "-o", "jsonpath={.status.loadBalancer.ingress[0].ip}",
3754
- ]);
3755
- if (refreshed?.trim()) { externalIp = refreshed.trim(); break; }
3756
- }
3757
- }
3758
-
3759
- if (externalIp) {
3760
- console.log(OK(` ✓ Ingress gateway IP: ${externalIp}`));
3761
- } else {
3762
- console.log(WARN(" ⚠ Ingress gateway has no external IP yet"));
3763
- hint(` kubectl describe svc istio-ingressgateway -n istio-system --context ${clusterName}`);
3764
- }
3765
-
3766
- // ── 2. Self-signed TLS cert for Istio ingress ──
3767
-
3768
- const TLS_SECRET = "istio-ingressgateway-certs";
3769
- const { exitCode: tlsCheck } = await kubectl([
3770
- "get", "secret", TLS_SECRET, "-n", "istio-system",
3771
- ]);
3772
- if (tlsCheck !== 0) {
3773
- hint("Creating self-signed TLS certificate for ingress…");
3774
- const { stdout: certOut, exitCode: certCode } = await execa("openssl", [
3775
- "req", "-x509", "-newkey", "rsa:2048",
3776
- "-keyout", "/tmp/fops-aks-tls.key", "-out", "/tmp/fops-aks-tls.crt",
3777
- "-days", "3650", "-nodes",
3778
- "-subj", `/CN=*.meshx.app`,
3779
- "-addext", `subjectAltName=DNS:*.meshx.app,DNS:meshx.app,DNS:*.${domain}`,
3780
- ], { timeout: 15000, reject: false });
3781
- if (certCode === 0) {
3782
- await execa("kubectl", [
3783
- "--context", clusterName,
3784
- "create", "secret", "tls", TLS_SECRET, "-n", "istio-system",
3785
- "--cert=/tmp/fops-aks-tls.crt", "--key=/tmp/fops-aks-tls.key",
3786
- ], { timeout: 15000 });
3787
- console.log(OK(" ✓ Self-signed TLS secret created"));
3788
- } else {
3789
- console.log(WARN(" ⚠ Could not generate TLS cert — HTTPS on ingress may not work"));
3790
- }
3791
- }
3792
-
3793
- // ── 3. Reconcile Istio Gateways ──
3794
-
3795
- const allHosts = INGRESS_VIRTUALSERVICES.map(vs => `${vs.hostPrefix}${domain}`);
3796
- const uniqueHosts = [...new Set(allHosts)];
3797
-
3798
- // Single gateway in foundation namespace with HTTP + HTTPS
3799
- const gwYaml = `apiVersion: networking.istio.io/v1beta1
3800
- kind: Gateway
3801
- metadata:
3802
- name: foundation-gateway
3803
- namespace: foundation
3804
- spec:
3805
- selector:
3806
- istio: ingressgateway
3807
- servers:
3808
- - port:
3809
- number: 80
3810
- name: http
3811
- protocol: HTTP
3812
- hosts:
3813
- ${uniqueHosts.map(h => ` - "${h}"`).join("\n")}
3814
- - port:
3815
- number: 443
3816
- name: https
3817
- protocol: HTTPS
3818
- tls:
3819
- mode: SIMPLE
3820
- credentialName: ${TLS_SECRET}
3821
- hosts:
3822
- ${uniqueHosts.map(h => ` - "${h}"`).join("\n")}`;
3823
-
3824
- await kubectl(["apply", "-f", "-"], { input: gwYaml });
3825
- // Remove duplicate gateway from legacy velora namespace if it exists
3826
- await kubectl(["delete", "gateway", "foundation-gateway", "-n", "velora", "--ignore-not-found"]);
3827
- console.log(OK(` ✓ Istio Gateway → ${domain} (HTTP + HTTPS)`));
3828
-
3829
- // ── 4. Reconcile VirtualService hosts + gateway refs ──
3830
-
3831
- let patched = 0;
3832
- for (const vs of INGRESS_VIRTUALSERVICES) {
3833
- const correctHost = `${vs.hostPrefix}${domain}`;
3834
- const { stdout: vsJson } = await kubectl([
3835
- "get", "virtualservice", vs.name, "-n", vs.namespace, "-o", "json",
3836
- ]);
3837
- if (!vsJson) continue;
3838
-
3839
- const vsObj = JSON.parse(vsJson);
3840
- const hosts = vsObj.spec?.hosts || [];
3841
- const gateways = vsObj.spec?.gateways || [];
3842
- const needsHostFix = hosts.length !== 1 || hosts[0] !== correctHost;
3843
- const needsGwFix = gateways.some(g => g.includes("velora/"));
3844
-
3845
- if (!needsHostFix && !needsGwFix) continue;
3846
-
3847
- const patches = [];
3848
- if (needsHostFix) patches.push({ op: "replace", path: "/spec/hosts", value: [correctHost] });
3849
- if (needsGwFix) patches.push({ op: "replace", path: "/spec/gateways", value: ["foundation-gateway"] });
3850
-
3851
- await kubectl([
3852
- "patch", "virtualservice", vs.name, "-n", vs.namespace,
3853
- "--type", "json", "-p", JSON.stringify(patches),
3854
- ]);
3855
- patched++;
3856
- }
3857
-
3858
- if (patched > 0) {
3859
- console.log(OK(` ✓ Patched ${patched} VirtualService(s) → *.${domain}`));
3860
- } else {
3861
- console.log(OK(` ✓ VirtualService hosts correct (*.${domain})`));
3862
- }
3863
-
3864
- // ── 5. DNS records ──
3865
- // Root domain is CF-proxied (universal SSL covers *.meshx.app).
3866
- // Subdomains (api.X.meshx.app) go dns-only because CF universal SSL
3867
- // doesn't cover two levels deep (*.X.meshx.app).
3868
- if (externalIp) {
3869
- const cfToken = process.env.CLOUDFLARE_API_TOKEN || ctx.cfToken;
3870
- if (cfToken) {
3871
- const dnsNames = [...new Set(INGRESS_VIRTUALSERVICES.map(vs => `${vs.hostPrefix}${domain}`))];
3872
- for (const host of dnsNames) {
3873
- const isRoot = host === domain;
3874
- await syncDns(cfToken, `https://${host}`, externalIp, { proxied: isRoot });
3875
- }
3876
- } else {
3877
- hint(` No CLOUDFLARE_API_TOKEN — add DNS A records manually:`);
3878
- const dnsNames = [...new Set(INGRESS_VIRTUALSERVICES.map(vs => `${vs.hostPrefix}${domain}`))];
3879
- for (const host of dnsNames) {
3880
- hint(` ${host} → ${externalIp}`);
3881
- }
3882
- }
3883
- }
3884
-
3885
- // Save the ingress info to state
3886
- if (externalIp) {
3887
- writeClusterState(clusterName, { ingressIp: externalIp, domain });
3888
- }
3889
- }
3890
-
3891
- // ── Frontend Auth0: APP_BASE_URL, API_URL, and Auth0 callback registration ───
3892
-
3893
- async function reconcileFrontendAuth(ctx) {
3894
- const { execa, clusterName } = ctx;
3895
- const kubectl = (args, opts = {}) =>
3896
- execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
3897
-
3898
- const domain = clusterDomain(clusterName);
3899
- const baseUrl = `https://${domain}`;
3900
-
3901
- // Read Auth0 creds from env (fops loads .env at startup) or project .env
3902
- let auth0 = {
3903
- clientId: process.env.AUTH0_CLIENT_ID,
3904
- clientSecret: process.env.AUTH0_CLIENT_SECRET,
3905
- domain: process.env.AUTH0_DOMAIN,
3906
- audience: process.env.AUTH0_AUDIENCE,
3907
- issuerBaseUrl: process.env.AUTH0_ISSUER_BASE_URL,
3908
- secret: process.env.AUTH0_SECRET,
3909
- };
3910
-
3911
- if (!auth0.clientId) {
3912
- try {
3913
- const { rootDir } = await import(resolveCliSrc("project.js"));
3914
- const root = rootDir();
3915
- if (root) {
3916
- const envRaw = fs.readFileSync(path.join(root, ".env"), "utf8");
3917
- for (const line of envRaw.split("\n")) {
3918
- const m = line.match(/^([A-Z0-9_]+)=(.+)$/);
3919
- if (!m) continue;
3920
- const [, k, v] = m;
3921
- const val = v.replace(/^["']|["']$/g, "");
3922
- if (k === "AUTH0_CLIENT_ID") auth0.clientId = val;
3923
- if (k === "AUTH0_CLIENT_SECRET") auth0.clientSecret = val;
3924
- if (k === "AUTH0_DOMAIN") auth0.domain = val;
3925
- if (k === "AUTH0_AUDIENCE") auth0.audience = val;
3926
- if (k === "AUTH0_ISSUER_BASE_URL") auth0.issuerBaseUrl = val;
3927
- if (k === "AUTH0_SECRET") auth0.secret = val;
3928
- }
3929
- }
3930
- } catch { /* no project root or .env */ }
3931
- }
3932
-
3933
- if (!auth0.clientId || !auth0.domain) {
3934
- console.log(WARN(" ⚠ No Auth0 credentials found — skipping frontend auth setup"));
3935
- hint(" Set AUTH0_CLIENT_ID / AUTH0_DOMAIN in .env or environment");
3936
- return;
3937
- }
3938
-
3939
- // ── 1. Create/update the frontend auth0 K8s secret ──
3940
-
3941
- const secretArgs = [
3942
- "create", "secret", "generic", "foundation-frontend-auth0", "-n", "foundation",
3943
- "--from-literal", `APP_BASE_URL=${baseUrl}`,
3944
- "--from-literal", `AUTH0_CLIENT_ID=${auth0.clientId}`,
3945
- "--from-literal", `AUTH0_CLIENT_SECRET=${auth0.clientSecret || ""}`,
3946
- "--from-literal", `AUTH0_DOMAIN=${auth0.domain}`,
3947
- "--from-literal", `AUTH0_AUDIENCE=${auth0.audience || ""}`,
3948
- "--from-literal", `AUTH0_ISSUER_BASE_URL=${auth0.issuerBaseUrl || `https://${auth0.domain}/`}`,
3949
- "--from-literal", `AUTH0_SECRET=${auth0.secret || auth0.clientSecret || ""}`,
3950
- "--from-literal", `NEXT_PUBLIC_AUTH0_CLIENT_ID=${auth0.clientId}`,
3951
- "--from-literal", `NEXT_PUBLIC_AUTH0_DOMAIN=${auth0.domain}`,
3952
- "--dry-run=client", "-o", "yaml",
3953
- ];
3954
- const { stdout: secretYaml } = await kubectl(secretArgs);
3955
- if (secretYaml) {
3956
- await kubectl(["apply", "-f", "-"], { input: secretYaml });
3957
- console.log(OK(` ✓ Frontend Auth0 secret → APP_BASE_URL=${baseUrl}`));
3958
- }
3959
-
3960
- // ── 2. Patch frontend deployment API_URL to point at own ingress ──
3961
-
3962
- const { exitCode: feExists } = await kubectl([
3963
- "get", "deploy", "foundation-frontend", "-n", "foundation",
3964
- ]);
3965
- if (feExists === 0) {
3966
- const apiUrl = `${baseUrl}/api/`;
3967
- const { exitCode: envExit } = await execa("kubectl", [
3968
- "--context", clusterName,
3969
- "set", "env", "deploy/foundation-frontend", "-n", "foundation",
3970
- `API_URL=${apiUrl}`,
3971
- ], { timeout: 15000, reject: false });
3972
- if (envExit === 0) {
3973
- console.log(OK(` ✓ Frontend API_URL → ${apiUrl}`));
3974
- }
3975
- }
3976
-
3977
- // ── 3. Ensure Auth0 wildcard callback URLs exist ──
3978
- // Uses https://*.meshx.app so any cluster/VM domain is automatically allowed.
3979
-
3980
- if (!auth0.clientSecret) {
3981
- hint(" No AUTH0_CLIENT_SECRET — cannot verify Auth0 wildcard callbacks");
3982
- return;
3983
- }
3984
-
3985
- try {
3986
- const tokenResp = await fetch(`https://${auth0.domain}/oauth/token`, {
3987
- method: "POST",
3988
- headers: { "Content-Type": "application/json" },
3989
- body: JSON.stringify({
3990
- client_id: auth0.clientId,
3991
- client_secret: auth0.clientSecret,
3992
- audience: `https://${auth0.domain}/api/v2/`,
3993
- grant_type: "client_credentials",
3994
- }),
3995
- signal: AbortSignal.timeout(10_000),
3996
- });
3997
- if (!tokenResp.ok) { hint(" Could not get Auth0 management token"); return; }
3998
- const mgmtToken = (await tokenResp.json()).access_token;
3999
- if (!mgmtToken) return;
4000
-
4001
- const appResp = await fetch(
4002
- `https://${auth0.domain}/api/v2/clients/${auth0.clientId}?fields=callbacks,allowed_logout_urls,web_origins,allowed_origins`,
4003
- { headers: { Authorization: `Bearer ${mgmtToken}` }, signal: AbortSignal.timeout(10_000) },
4004
- );
4005
- if (!appResp.ok) return;
4006
- const app = await appResp.json();
4007
-
4008
- const wildcardCb1 = "https://*.meshx.app/auth/callback";
4009
- const wildcardCb2 = "https://*.meshx.app/api/auth/callback";
4010
- const wildcardOrigin = "https://*.meshx.app";
4011
-
4012
- const callbacks = new Set(app.callbacks || []);
4013
- const logoutUrls = new Set(app.allowed_logout_urls || []);
4014
- const webOrigins = new Set(app.web_origins || []);
4015
- const allowedOrigins = new Set(app.allowed_origins || []);
4016
-
4017
- // Auth0 requires exact callback URLs (wildcards are not matched). Add this cluster's URL.
4018
- callbacks.add(`${baseUrl}/auth/callback`);
4019
- callbacks.add(`${baseUrl}/api/auth/callback`);
4020
- logoutUrls.add(baseUrl);
4021
- webOrigins.add(baseUrl);
4022
- allowedOrigins.add(baseUrl);
4023
-
4024
- const before = callbacks.size + logoutUrls.size + webOrigins.size + allowedOrigins.size;
4025
- callbacks.add(wildcardCb1);
4026
- callbacks.add(wildcardCb2);
4027
- logoutUrls.add(wildcardOrigin);
4028
- webOrigins.add(wildcardOrigin);
4029
- allowedOrigins.add(wildcardOrigin);
4030
- const after = callbacks.size + logoutUrls.size + webOrigins.size + allowedOrigins.size;
4031
-
4032
- if (after > before) {
4033
- const patchResp = await fetch(
4034
- `https://${auth0.domain}/api/v2/clients/${auth0.clientId}`,
4035
- {
4036
- method: "PATCH",
4037
- headers: { Authorization: `Bearer ${mgmtToken}`, "Content-Type": "application/json" },
4038
- body: JSON.stringify({
4039
- callbacks: [...callbacks],
4040
- allowed_logout_urls: [...logoutUrls],
4041
- web_origins: [...webOrigins],
4042
- allowed_origins: [...allowedOrigins],
4043
- }),
4044
- signal: AbortSignal.timeout(10_000),
4045
- },
4046
- );
4047
- if (patchResp.ok) {
4048
- console.log(OK(" ✓ Auth0 wildcard callbacks ensured (*.meshx.app)"));
4049
- } else {
4050
- console.log(WARN(` ⚠ Auth0 callback update failed: HTTP ${patchResp.status}`));
4051
- }
4052
- } else {
4053
- console.log(OK(" ✓ Auth0 wildcard callbacks already configured"));
4054
- }
4055
- } catch (e) {
4056
- hint(` Auth0 callback check failed: ${e.message}`);
4057
- }
4058
- }
4059
-
4060
- async function reconcileFopsApi(ctx) {
4061
- const { execa, clusterName } = ctx;
4062
-
4063
- const kubectl = (args, opts = {}) =>
4064
- execa("kubectl", ["--context", clusterName, ...args], { reject: false, timeout: 30000, ...opts });
4065
-
4066
- // Check if already deployed and running
4067
- const { stdout: existing, exitCode } = await kubectl([
4068
- "get", "deployment", "fops-api", "-n", FOPS_API_NS,
4069
- "-o", "jsonpath={.status.readyReplicas}",
4070
- ]);
4071
-
4072
- if (exitCode === 0 && parseInt(existing) > 0) {
4073
- console.log(OK(" ✓ fops-api deployment running"));
4074
- return;
4075
- }
4076
-
4077
- hint("Deploying fops-api to cluster…");
4078
-
4079
- const manifests = fopsApiManifests(clusterName);
4080
- const yaml = manifests.map(m => JSON.stringify(m)).join("\n---\n");
4081
-
4082
- const result = await execa("kubectl", [
4083
- "--context", clusterName, "apply", "-f", "-",
4084
- ], { input: yaml, reject: false, timeout: 30000 });
4085
-
4086
- if (result.exitCode === 0) {
4087
- console.log(OK(` ✓ fops-api deployed to ${FOPS_API_NS} namespace`));
4088
- hint(` Access: kubectl --context ${clusterName} port-forward svc/fops-api -n ${FOPS_API_NS} 4100:4100`);
4089
- } else {
4090
- const errMsg = (result.stderr || result.stdout || "").trim().split("\n")[0];
4091
- console.log(WARN(` ⚠ fops-api deploy failed: ${errMsg}`));
4092
- }
4093
- }
4094
-
4095
- async function reconcileFlux(execa, { clusterName, rg, sub, githubToken, repo, owner, path: fluxPath, branch }) {
4096
- repo = repo || AKS_DEFAULTS.fluxRepo;
4097
- owner = owner || AKS_DEFAULTS.fluxOwner;
4098
- branch = branch || AKS_DEFAULTS.fluxBranch;
4099
- const repoUrl = `https://github.com/${owner}/${repo}`;
4100
- const configName = "flux-system";
4101
-
4102
- // Check if the Flux extension is already installed
4103
- const { exitCode: extExists } = await execa("az", [
4104
- "k8s-extension", "show",
4105
- "--resource-group", rg,
4106
- "--cluster-name", clusterName,
4107
- "--cluster-type", "managedClusters",
4108
- "--name", "flux",
4109
- "--output", "none",
4110
- ...subArgs(sub),
4111
- ], { reject: false, timeout: 30000 });
4112
-
4113
- if (extExists !== 0) {
4114
- hint("Installing Flux extension…");
4115
- try {
4116
- await execa("az", [
4117
- "k8s-extension", "create",
4118
- "--resource-group", rg,
4119
- "--cluster-name", clusterName,
4120
- "--cluster-type", "managedClusters",
4121
- "--name", "flux",
4122
- "--extension-type", "microsoft.flux",
4123
- "--scope", "cluster",
4124
- "--output", "none",
4125
- ...subArgs(sub),
4126
- ], { timeout: 600000 });
4127
- console.log(OK(" ✓ Flux extension installed"));
4128
- } catch (err) {
4129
- const msg = (err.stderr || err.message || "").toString();
4130
- if (/rpds|No module named|ModuleNotFoundError/.test(msg)) {
4131
- console.error(ERR("\n Azure k8s-extension failed (broken vendored rpds — known Azure CLI bug)."));
4132
- hint("Workaround (macOS Homebrew): install rpds-py into Azure CLI's Python, then remove the extension's vendored rpds:");
4133
- hint(" $(brew --prefix azure-cli)/libexec/bin/pip install rpds-py");
4134
- hint(" rm -rf ~/.azure/cliextensions/k8s-extension/rpds");
4135
- hint("Then re-run. See: https://github.com/Azure/azure-cli/issues/32709\n");
4136
- throw err;
4137
- }
4138
- throw err;
4139
- }
4140
- } else {
4141
- console.log(OK(" ✓ Flux extension already installed"));
4142
- }
4143
-
4144
- // Check if the GitOps configuration already exists
4145
- const { exitCode: cfgExists } = await execa("az", [
4146
- "k8s-configuration", "flux", "show",
4147
- "--resource-group", rg,
4148
- "--cluster-name", clusterName,
4149
- "--cluster-type", "managedClusters",
4150
- "--name", configName,
4151
- "--output", "none",
4152
- ...subArgs(sub),
4153
- ], { reject: false, timeout: 30000 });
4154
-
4155
- if (cfgExists !== 0) {
4156
- hint("Creating GitOps configuration…");
4157
- try {
4158
- await execa("az", [
4159
- "k8s-configuration", "flux", "create",
4160
- "--resource-group", rg,
4161
- "--cluster-name", clusterName,
4162
- "--cluster-type", "managedClusters",
4163
- "--name", configName,
4164
- "--namespace", "flux-system",
4165
- "--scope", "cluster",
4166
- "--url", repoUrl,
4167
- "--branch", branch,
4168
- "--https-user", "x-access-token",
4169
- "--https-key", githubToken,
4170
- "--kustomization", `name=${configName}`, `path=./${fluxPath}`, "prune=true",
4171
- "--output", "none",
4172
- ...subArgs(sub),
4173
- ], { timeout: 300000 });
4174
- console.log(OK(" ✓ GitOps configuration created"));
4175
- } catch (err) {
4176
- const azErr = (err.stderr || err.message || "").replace(/^.*ERROR:\s*/m, "").split("\n")[0];
4177
- console.log(WARN(` ⚠ GitOps configuration failed: ${azErr}`));
4178
- hint("Create manually: az k8s-configuration flux create -g " + rg + " -c " + clusterName + " -t managedClusters -n " + configName + " ...");
4179
- }
4180
- } else {
4181
- const { stdout: cfgJson } = await execa("az", [
4182
- "k8s-configuration", "flux", "show",
4183
- "--resource-group", rg,
4184
- "--cluster-name", clusterName,
4185
- "--cluster-type", "managedClusters",
4186
- "--name", configName,
4187
- "--output", "json",
4188
- ...subArgs(sub),
4189
- ], { reject: false, timeout: 30000 });
4190
- let currentUrl = "";
4191
- let currentPath = "";
4192
- if (cfgJson) {
4193
- try {
4194
- const cfg = JSON.parse(cfgJson);
4195
- currentUrl = (cfg.gitRepository && cfg.gitRepository.url) || "";
4196
- const ks = cfg.kustomizations && cfg.kustomizations[configName];
4197
- currentPath = (ks && ks.path) || "";
4198
- } catch { /* ignore */ }
4199
- }
4200
- const desiredPath = fluxPath.startsWith("./") ? fluxPath : `./${fluxPath}`;
4201
- const urlMismatch = currentUrl && currentUrl !== repoUrl;
4202
- const pathMismatch = currentPath && currentPath !== desiredPath;
4203
- if (urlMismatch || pathMismatch) {
4204
- hint(`Updating Flux to ${owner}/${repo} (was ${currentUrl || "unknown"})…`);
4205
- try {
4206
- await execa("az", [
4207
- "k8s-configuration", "flux", "update",
4208
- "--resource-group", rg,
4209
- "--cluster-name", clusterName,
4210
- "--cluster-type", "managedClusters",
4211
- "--name", configName,
4212
- "--url", repoUrl,
4213
- "--branch", branch,
4214
- "--https-user", "x-access-token",
4215
- "--https-key", githubToken,
4216
- "--kustomization", `name=${configName}`, `path=${desiredPath}`, "prune=true",
4217
- "--output", "none",
4218
- ...subArgs(sub),
4219
- ], { timeout: 300000 });
4220
- console.log(OK(" ✓ GitOps configuration updated to your repo"));
4221
- } catch (err) {
4222
- const azErr = (err.stderr || err.message || "").replace(/^.*ERROR:\s*/m, "").split("\n")[0];
4223
- console.log(WARN(` ⚠ Flux update failed: ${azErr}`));
4224
- }
4225
- } else {
4226
- console.log(OK(" ✓ GitOps configuration already exists"));
4227
- }
4228
- }
4229
- }
4230
7
 
4231
- function printClusterInfo(cl) {
4232
- console.log(`\n ${LABEL("Cluster Info")}`);
4233
- kvLine("Name", cl.clusterName, { pad: 12 });
4234
- kvLine("RG", cl.resourceGroup, { pad: 12 });
4235
- kvLine("FQDN", cl.fqdn || "—", { pad: 12 });
4236
- kvLine("K8s", cl.kubernetesVersion || "—", { pad: 12 });
4237
- kvLine("Nodes", `${cl.nodeCount || "?"} x ${cl.nodeVmSize || "?"}`, { pad: 12 });
4238
- if (cl.flux) {
4239
- kvLine("Flux", `${cl.flux.owner}/${cl.flux.repo}`, { pad: 12 });
4240
- }
4241
- hint("");
4242
- hint(`kubectl: kubectl --context ${cl.clusterName} get nodes`);
4243
- hint(`status: fops azure aks status ${cl.clusterName}`);
4244
- if (!cl.flux) {
4245
- hint(`flux: fops azure aks flux bootstrap ${cl.clusterName} --flux-owner <org> --flux-repo <repo>`);
4246
- }
4247
- console.log("");
4248
- }
8
+ // ── Naming helpers and constants ─────────────────────────────────────────────
9
+ export {
10
+ AKS_DEFAULTS,
11
+ PG_DEFAULTS,
12
+ EH_DEFAULTS,
13
+ PG_REPLICA_REGIONS,
14
+ pgServerName,
15
+ kvName,
16
+ ehNamespaceName,
17
+ generatePassword,
18
+ timeSince,
19
+ parseCidr,
20
+ cidrOverlaps,
21
+ } from "./azure-aks-naming.js";
22
+
23
+ // ── State management ─────────────────────────────────────────────────────────
24
+ export {
25
+ readAksClusters,
26
+ readClusterState,
27
+ writeClusterState,
28
+ clearClusterState,
29
+ readStackState,
30
+ writeStackState,
31
+ deleteStackState,
32
+ listStacks,
33
+ readProjectFluxConfig,
34
+ resolveFluxConfig,
35
+ requireCluster,
36
+ } from "./azure-aks-state.js";
37
+
38
+ // ── Network ──────────────────────────────────────────────────────────────────
39
+ export {
40
+ reconcileApiServerIp,
41
+ reconcileNetworkAccess,
42
+ findAvailableSubnetCidr,
43
+ findAksVnet,
44
+ aksWhitelistMe,
45
+ } from "./azure-aks-network.js";
46
+
47
+ // ── Secrets ──────────────────────────────────────────────────────────────────
48
+ export {
49
+ SECRET_STORE_NAMESPACES_BASE,
50
+ SECRET_STORE_NAMESPACES_DAI,
51
+ SECRET_STORE_NAME,
52
+ KV_SEED_SECRETS,
53
+ VAULT_UNSEAL_KEY_NAME,
54
+ reconcileSecretStore,
55
+ reconcileK8sSecrets,
56
+ reconcileVaultUnseal,
57
+ seedKeyVaultFromVm,
58
+ getSpClientId,
59
+ detectEsApiVersion,
60
+ aksVaultInit,
61
+ } from "./azure-aks-secrets.js";
62
+
63
+ // ── Storage ──────────────────────────────────────────────────────────────────
64
+ export {
65
+ HELM_REPOS,
66
+ OLD_PG_HOSTS,
67
+ reconcileStorageAccount,
68
+ reconcileStorageEngine,
69
+ reconcileHelmRepos,
70
+ reconcileHelmValues,
71
+ reconcileAcrWebhooks,
72
+ } from "./azure-aks-storage.js";
73
+
74
+ // ── PostgreSQL ───────────────────────────────────────────────────────────────
75
+ export {
76
+ PG_SERVICE_DBS,
77
+ reconcilePostgres,
78
+ reconcilePgDatabases,
79
+ syncPostgresSecret,
80
+ aksPostgresReplicaCreate,
81
+ aksPostgresReplicaList,
82
+ aksPostgresReplicaPromote,
83
+ aksPostgresReplicaDelete,
84
+ reconcileEventHubs,
85
+ } from "./azure-aks-postgres.js";
86
+
87
+ // ── Flux ─────────────────────────────────────────────────────────────────────
88
+ export {
89
+ TEMPLATE_DEFAULTS,
90
+ FOPS_MANAGED_KUSTOMIZATIONS,
91
+ DAI_KUSTOMIZATIONS,
92
+ ensureFluxCli,
93
+ cloneOrUpdateRepo,
94
+ renderTemplate,
95
+ copyAndRenderTemplateDir,
96
+ commitClusterToFlux,
97
+ provisionFluxFromTemplate,
98
+ bootstrapFlux,
99
+ reconcileFlux,
100
+ reconcileFluxStep,
101
+ suspendManagedKustomizations,
102
+ reconcileFluxPrereqs,
103
+ aksFluxInit,
104
+ aksFluxBootstrap,
105
+ aksFluxStatus,
106
+ aksFluxReconcile,
107
+ aksDataBootstrap,
108
+ } from "./azure-aks-flux.js";
109
+
110
+ // ── Ingress ──────────────────────────────────────────────────────────────────
111
+ export {
112
+ clusterDomain,
113
+ INGRESS_VIRTUALSERVICES,
114
+ reconcileIngressIp,
115
+ reconcileFrontendAuth,
116
+ } from "./azure-aks-ingress.js";
117
+
118
+ // ── Terraform ────────────────────────────────────────────────────────────────
119
+ export {
120
+ aksTerraform,
121
+ generateAksTerraform,
122
+ } from "./azure-aks-terraform.js";
123
+
124
+ // ── Reconcilers ──────────────────────────────────────────────────────────────
125
+ export {
126
+ AKS_RECONCILERS,
127
+ reconcileCluster,
128
+ aksDoctor,
129
+ } from "./azure-aks-reconcilers.js";
130
+
131
+ // ── Stacks ───────────────────────────────────────────────────────────────────
132
+ export {
133
+ printClusterInfo,
134
+ stackDomain,
135
+ pgDatabaseName,
136
+ STACK_RECONCILERS,
137
+ aksStackUp,
138
+ aksStackDown,
139
+ aksStackList,
140
+ aksStackStatus,
141
+ } from "./azure-aks-stacks.js";
142
+
143
+ // ── Core CLI commands ────────────────────────────────────────────────────────
144
+ export {
145
+ resolveK8sVersion,
146
+ ensureKubectl,
147
+ ensureGhcrPullSecret,
148
+ getCredentials,
149
+ aksUp,
150
+ aksDown,
151
+ aksList,
152
+ aksStatus,
153
+ aksConfigVersions,
154
+ aksKubeconfig,
155
+ aksNodePoolAdd,
156
+ aksNodePoolRemove,
157
+ aksGrantAdmin,
158
+ } from "./azure-aks-core.js";