@meshxdata/fops 0.1.44 → 0.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +183 -0
- package/package.json +1 -1
- package/src/commands/lifecycle.js +101 -5
- package/src/commands/setup.js +45 -4
- package/src/plugins/bundled/fops-plugin-azure/index.js +29 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +1185 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-flux.js +1180 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-ingress.js +393 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-naming.js +104 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-network.js +296 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-postgres.js +768 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-reconcilers.js +538 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-secrets.js +849 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-stacks.js +643 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-state.js +145 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-storage.js +496 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-terraform.js +1032 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks.js +155 -4245
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-keyvault.js +186 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-ops.js +29 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-results.js +78 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure.js +1 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/infra-cmds.js +758 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/registry-cmds.js +250 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +52 -1
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/vm-cmds.js +10 -0
- package/src/plugins/bundled/fops-plugin-foundation/lib/apply.js +3 -2
- package/src/plugins/bundled/fops-plugin-foundation/lib/helpers.js +21 -0
- package/src/plugins/bundled/fops-plugin-foundation/lib/tools-read.js +3 -5
- package/src/ui/tui/App.js +13 -13
- package/src/web/dist/assets/index-NXC8Hvnp.css +1 -0
- package/src/web/dist/assets/index-QH1N4ejK.js +112 -0
- package/src/web/dist/index.html +2 -2
- package/src/web/server.js +4 -4
- package/src/web/dist/assets/index-BphVaAUd.css +0 -1
- package/src/web/dist/assets/index-CSckLzuG.js +0 -129
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* azure-aks-reconcilers.js - Reconciler arrays and orchestration
|
|
3
|
+
*
|
|
4
|
+
* Depends on: azure-aks-naming.js, azure-aks-state.js, azure-aks-network.js,
|
|
5
|
+
* azure-aks-secrets.js, azure-aks-storage.js, azure-aks-postgres.js,
|
|
6
|
+
* azure-aks-flux.js, azure-aks-ingress.js
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
OK, WARN, ERR, DIM,
|
|
11
|
+
banner, hint, kvLine, subArgs,
|
|
12
|
+
lazyExeca, runReconcilers,
|
|
13
|
+
} from "./azure.js";
|
|
14
|
+
import { AKS_DEFAULTS } from "./azure-aks-naming.js";
|
|
15
|
+
import { readClusterState, requireCluster } from "./azure-aks-state.js";
|
|
16
|
+
import { reconcileApiServerIp } from "./azure-aks-network.js";
|
|
17
|
+
import { reconcileSecretStore, reconcileK8sSecrets, reconcileVaultUnseal } from "./azure-aks-secrets.js";
|
|
18
|
+
import { reconcileStorageAccount, reconcileStorageEngine, reconcileHelmRepos, reconcileHelmValues, reconcileAcrWebhooks } from "./azure-aks-storage.js";
|
|
19
|
+
import { reconcilePostgres, reconcilePgDatabases } from "./azure-aks-postgres.js";
|
|
20
|
+
import { reconcileFluxStep, reconcileFluxPrereqs, suspendManagedKustomizations } from "./azure-aks-flux.js";
|
|
21
|
+
import { reconcileIngressIp, reconcileFrontendAuth } from "./azure-aks-ingress.js";
|
|
22
|
+
|
|
23
|
+
// ── fops-api deployment ──────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
const FOPS_API_NS = "fops-system";
|
|
26
|
+
|
|
27
|
+
function fopsApiManifests(clusterName) {
|
|
28
|
+
const labels = { app: "fops-api", "app.kubernetes.io/managed-by": "fops" };
|
|
29
|
+
return [
|
|
30
|
+
{
|
|
31
|
+
apiVersion: "v1", kind: "Namespace",
|
|
32
|
+
metadata: { name: FOPS_API_NS, labels: { "app.kubernetes.io/managed-by": "fops" } },
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
apiVersion: "v1", kind: "ServiceAccount",
|
|
36
|
+
metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
apiVersion: "rbac.authorization.k8s.io/v1", kind: "ClusterRoleBinding",
|
|
40
|
+
metadata: { name: "fops-api-viewer", labels },
|
|
41
|
+
subjects: [{ kind: "ServiceAccount", name: "fops-api", namespace: FOPS_API_NS }],
|
|
42
|
+
roleRef: { kind: "ClusterRole", name: "view", apiGroup: "rbac.authorization.k8s.io" },
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
apiVersion: "apps/v1", kind: "Deployment",
|
|
46
|
+
metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
|
|
47
|
+
spec: {
|
|
48
|
+
replicas: 1,
|
|
49
|
+
selector: { matchLabels: { app: "fops-api" } },
|
|
50
|
+
template: {
|
|
51
|
+
metadata: { labels },
|
|
52
|
+
spec: {
|
|
53
|
+
serviceAccountName: "fops-api",
|
|
54
|
+
containers: [{
|
|
55
|
+
name: "fops-api",
|
|
56
|
+
image: "node:20-alpine",
|
|
57
|
+
command: ["sh", "-c", "npm install -g @meshxdata/fops && fops serve --host 0.0.0.0 --port 4100"],
|
|
58
|
+
ports: [{ name: "http", containerPort: 4100 }],
|
|
59
|
+
env: [
|
|
60
|
+
{ name: "NODE_ENV", value: "production" },
|
|
61
|
+
{ name: "FOPS_CLUSTER", value: clusterName },
|
|
62
|
+
],
|
|
63
|
+
resources: {
|
|
64
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
65
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
66
|
+
},
|
|
67
|
+
readinessProbe: {
|
|
68
|
+
httpGet: { path: "/api/health", port: 4100 },
|
|
69
|
+
initialDelaySeconds: 30, periodSeconds: 10,
|
|
70
|
+
},
|
|
71
|
+
livenessProbe: {
|
|
72
|
+
httpGet: { path: "/api/health", port: 4100 },
|
|
73
|
+
initialDelaySeconds: 60, periodSeconds: 30,
|
|
74
|
+
},
|
|
75
|
+
}],
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
apiVersion: "v1", kind: "Service",
|
|
82
|
+
metadata: { name: "fops-api", namespace: FOPS_API_NS, labels },
|
|
83
|
+
spec: {
|
|
84
|
+
selector: { app: "fops-api" },
|
|
85
|
+
ports: [{ name: "http", port: 4100, targetPort: 4100 }],
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ── Internal reconcilers ──────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
async function reconcileAddons(ctx) {
|
|
94
|
+
const { execa, clusterName, rg, sub } = ctx;
|
|
95
|
+
const cluster = ctx.cluster || {};
|
|
96
|
+
const baseArgs = ["-g", rg, "-n", clusterName, ...subArgs(sub)];
|
|
97
|
+
|
|
98
|
+
const defenderOn = cluster.securityProfile?.defender?.securityMonitoring?.enabled === true;
|
|
99
|
+
const monitoringOn = cluster.addonProfiles?.omsagent?.enabled === true;
|
|
100
|
+
const azurePolicyOn = cluster.addonProfiles?.azurepolicy?.enabled === true;
|
|
101
|
+
const fileDriverOn = cluster.storageProfile?.fileCSIDriver?.enabled !== false;
|
|
102
|
+
|
|
103
|
+
if (!defenderOn && !monitoringOn && !azurePolicyOn && !fileDriverOn) {
|
|
104
|
+
console.log(OK(" ✓ Unwanted addons already disabled"));
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let changed = false;
|
|
109
|
+
|
|
110
|
+
if (defenderOn) {
|
|
111
|
+
hint("Disabling Defender for Containers…");
|
|
112
|
+
const r = await execa("az", [
|
|
113
|
+
"aks", "update", ...baseArgs, "--disable-defender", "--yes", "--output", "none",
|
|
114
|
+
], { reject: false, timeout: 300000 });
|
|
115
|
+
if (r.exitCode === 0) { console.log(OK(" ✓ Defender disabled")); changed = true; }
|
|
116
|
+
else { console.log(WARN(` ⚠ Defender: ${(r.stderr || "").split("\n")[0]}`)); }
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const addons = [];
|
|
120
|
+
if (monitoringOn) addons.push("monitoring");
|
|
121
|
+
if (azurePolicyOn) addons.push("azure-policy");
|
|
122
|
+
|
|
123
|
+
if (addons.length > 0) {
|
|
124
|
+
hint(`Disabling ${addons.join(", ")}…`);
|
|
125
|
+
const r = await execa("az", [
|
|
126
|
+
"aks", "disable-addons", ...baseArgs, "--addons", addons.join(","),
|
|
127
|
+
"--output", "none",
|
|
128
|
+
], { reject: false, timeout: 300000 });
|
|
129
|
+
if (r.exitCode === 0) { console.log(OK(` ✓ Disabled: ${addons.join(", ")}`)); changed = true; }
|
|
130
|
+
else { console.log(WARN(` ⚠ Addons: ${(r.stderr || "").split("\n")[0]}`)); }
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const fileDriverExplicitlyOn = cluster.storageProfile?.fileCSIDriver?.enabled === true;
|
|
134
|
+
if (fileDriverExplicitlyOn) {
|
|
135
|
+
hint("Disabling Azure File CSI driver (not needed)…");
|
|
136
|
+
const r = await execa("az", [
|
|
137
|
+
"aks", "update", ...baseArgs, "--disable-file-driver", "--yes", "--output", "none",
|
|
138
|
+
], { reject: false, timeout: 300000 });
|
|
139
|
+
if (r.exitCode === 0) { console.log(OK(" ✓ Azure File driver disabled (−8 pods)")); changed = true; }
|
|
140
|
+
else { console.log(WARN(` ⚠ File driver: ${(r.stderr || "").split("\n")[0]}`)); }
|
|
141
|
+
} else if (cluster.storageProfile?.fileCSIDriver?.enabled === false) {
|
|
142
|
+
console.log(OK(" ✓ Azure File driver already disabled"));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (changed) {
|
|
146
|
+
try {
|
|
147
|
+
const { stdout } = await execa("az", [
|
|
148
|
+
"aks", "show", ...baseArgs, "--output", "json",
|
|
149
|
+
], { timeout: 30000 });
|
|
150
|
+
ctx.cluster = JSON.parse(stdout);
|
|
151
|
+
} catch {}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function reconcileAutoscaler(ctx) {
|
|
156
|
+
const { execa, clusterName, rg, sub, minCount, maxCount, cluster } = ctx;
|
|
157
|
+
const pool = (cluster.agentPoolProfiles || []).find(p => p.mode === "System")
|
|
158
|
+
|| (cluster.agentPoolProfiles || [])[0];
|
|
159
|
+
if (!pool) return;
|
|
160
|
+
|
|
161
|
+
if (!pool.enableAutoScaling) {
|
|
162
|
+
hint(`Enabling autoscaler on pool "${pool.name}" (${minCount}–${maxCount})…`);
|
|
163
|
+
await execa("az", [
|
|
164
|
+
"aks", "nodepool", "update",
|
|
165
|
+
"--resource-group", rg,
|
|
166
|
+
"--cluster-name", clusterName,
|
|
167
|
+
"--name", pool.name,
|
|
168
|
+
"--enable-cluster-autoscaler",
|
|
169
|
+
"--min-count", String(minCount),
|
|
170
|
+
"--max-count", String(maxCount),
|
|
171
|
+
"--output", "none", ...subArgs(sub),
|
|
172
|
+
], { timeout: 120000 });
|
|
173
|
+
console.log(OK(` ✓ Autoscaler enabled on "${pool.name}" (${minCount}–${maxCount})`));
|
|
174
|
+
} else {
|
|
175
|
+
console.log(OK(` ✓ Autoscaler already enabled on "${pool.name}" (${pool.minCount}–${pool.maxCount})`));
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async function reconcileSpotPool(ctx) {
|
|
180
|
+
const { execa, clusterName, rg, sub, cluster, maxPods } = ctx;
|
|
181
|
+
const pools = cluster.agentPoolProfiles || [];
|
|
182
|
+
const spotPool = pools.find(p => p.scaleSetPriority === "Spot");
|
|
183
|
+
|
|
184
|
+
if (spotPool) {
|
|
185
|
+
const count = spotPool.count || 0;
|
|
186
|
+
const scaling = spotPool.enableAutoScaling
|
|
187
|
+
? `autoscale ${spotPool.minCount}–${spotPool.maxCount}`
|
|
188
|
+
: `${count} nodes`;
|
|
189
|
+
console.log(OK(` ✓ Spot pool "${spotPool.name}" present (${scaling}, max-pods ${spotPool.maxPods})`));
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
hint("Creating spot node pool for Spark workloads…");
|
|
194
|
+
const sysPool = pools.find(p => p.mode === "System") || pools[0];
|
|
195
|
+
const vmSize = sysPool?.vmSize || "Standard_D8s_v3";
|
|
196
|
+
const zones = sysPool?.availabilityZones || [];
|
|
197
|
+
|
|
198
|
+
const createArgs = [
|
|
199
|
+
"aks", "nodepool", "add",
|
|
200
|
+
"--resource-group", rg,
|
|
201
|
+
"--cluster-name", clusterName,
|
|
202
|
+
"--name", "spot",
|
|
203
|
+
"--node-count", "1",
|
|
204
|
+
"--node-vm-size", vmSize,
|
|
205
|
+
"--max-pods", String(maxPods || 110),
|
|
206
|
+
"--priority", "Spot",
|
|
207
|
+
"--eviction-policy", "Delete",
|
|
208
|
+
"--spot-max-price", "-1",
|
|
209
|
+
"--enable-cluster-autoscaler",
|
|
210
|
+
"--min-count", "1",
|
|
211
|
+
"--max-count", "20",
|
|
212
|
+
"--labels", "kubernetes.azure.com/scalesetpriority=spot",
|
|
213
|
+
"--node-taints", "kubernetes.azure.com/scalesetpriority=spot:NoSchedule",
|
|
214
|
+
"--ssh-access", "disabled",
|
|
215
|
+
"--output", "none",
|
|
216
|
+
...subArgs(sub),
|
|
217
|
+
];
|
|
218
|
+
|
|
219
|
+
if (zones.length > 0) {
|
|
220
|
+
createArgs.push("--zones", ...zones);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const { exitCode, stderr } = await execa("az", createArgs, { timeout: 300000, reject: false });
|
|
224
|
+
|
|
225
|
+
if (exitCode === 0) {
|
|
226
|
+
const zoneInfo = zones.length > 0 ? `, zones ${zones.join(",")}` : "";
|
|
227
|
+
console.log(OK(` ✓ Spot pool created (1 node, autoscale 0–3${zoneInfo})`));
|
|
228
|
+
} else {
|
|
229
|
+
const lines = (stderr || "").split("\n").filter(l => !l.startsWith("WARNING:") && l.trim());
|
|
230
|
+
const errMsg = lines[0] || "unknown error";
|
|
231
|
+
console.log(WARN(` ⚠ Spot pool creation failed: ${errMsg}`));
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async function reconcileDescheduler(ctx) {
|
|
236
|
+
const { execa, clusterName } = ctx;
|
|
237
|
+
const kubectl = (args, opts = {}) =>
|
|
238
|
+
execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
|
|
239
|
+
|
|
240
|
+
const { exitCode } = await kubectl(["get", "cronjob", "descheduler", "-n", "kube-system"]);
|
|
241
|
+
if (exitCode === 0) {
|
|
242
|
+
console.log(OK(" ✓ Descheduler already installed"));
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
hint("Installing descheduler…");
|
|
247
|
+
|
|
248
|
+
const { exitCode: helmCheck } = await execa("helm", ["version", "--short"], { reject: false, timeout: 10000 });
|
|
249
|
+
if (helmCheck !== 0) {
|
|
250
|
+
console.log(WARN(" ⚠ Helm not found — skipping descheduler install"));
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const { exitCode: repoAdd } = await execa("helm", [
|
|
255
|
+
"repo", "add", "descheduler", "https://kubernetes-sigs.github.io/descheduler/",
|
|
256
|
+
], { reject: false, timeout: 30000 });
|
|
257
|
+
if (repoAdd !== 0) {
|
|
258
|
+
await execa("helm", ["repo", "update", "descheduler"], { reject: false, timeout: 30000 });
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const { exitCode: installCode, stderr } = await execa("helm", [
|
|
262
|
+
"upgrade", "--install", "descheduler", "descheduler/descheduler",
|
|
263
|
+
"--namespace", "kube-system",
|
|
264
|
+
"--kube-context", clusterName,
|
|
265
|
+
"--set", "schedule=*/5 * * * *",
|
|
266
|
+
"--set", "deschedulerPolicy.strategies.RemoveDuplicates.enabled=true",
|
|
267
|
+
"--set", "deschedulerPolicy.strategies.LowNodeUtilization.enabled=true",
|
|
268
|
+
"--set", "deschedulerPolicy.strategies.RemovePodsHavingTooManyRestarts.enabled=true",
|
|
269
|
+
"--wait", "--timeout", "120s",
|
|
270
|
+
], { timeout: 180000, reject: false });
|
|
271
|
+
|
|
272
|
+
if (installCode === 0) {
|
|
273
|
+
console.log(OK(" ✓ Descheduler installed (runs every 5 min)"));
|
|
274
|
+
} else {
|
|
275
|
+
const errMsg = (stderr || "").split("\n")[0];
|
|
276
|
+
console.log(WARN(` ⚠ Descheduler install failed: ${errMsg}`));
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
async function reconcileKubeconfig(ctx) {
|
|
281
|
+
const { getCredentials } = await import("./azure-aks-core.js");
|
|
282
|
+
await getCredentials(ctx.execa, { clusterName: ctx.clusterName, rg: ctx.rg, sub: ctx.sub });
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
async function reconcileFopsApi(ctx) {
|
|
286
|
+
const { execa, clusterName } = ctx;
|
|
287
|
+
|
|
288
|
+
const kubectl = (args, opts = {}) =>
|
|
289
|
+
execa("kubectl", ["--context", clusterName, ...args], { reject: false, timeout: 30000, ...opts });
|
|
290
|
+
|
|
291
|
+
const { stdout: existing, exitCode } = await kubectl([
|
|
292
|
+
"get", "deployment", "fops-api", "-n", FOPS_API_NS,
|
|
293
|
+
"-o", "jsonpath={.status.readyReplicas}",
|
|
294
|
+
]);
|
|
295
|
+
|
|
296
|
+
if (exitCode === 0 && parseInt(existing) > 0) {
|
|
297
|
+
console.log(OK(" ✓ fops-api deployment running"));
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
hint("Deploying fops-api to cluster…");
|
|
302
|
+
|
|
303
|
+
const manifests = fopsApiManifests(clusterName);
|
|
304
|
+
const yaml = manifests.map(m => JSON.stringify(m)).join("\n---\n");
|
|
305
|
+
|
|
306
|
+
const result = await execa("kubectl", [
|
|
307
|
+
"--context", clusterName, "apply", "-f", "-",
|
|
308
|
+
], { input: yaml, reject: false, timeout: 30000 });
|
|
309
|
+
|
|
310
|
+
if (result.exitCode === 0) {
|
|
311
|
+
console.log(OK(` ✓ fops-api deployed to ${FOPS_API_NS} namespace`));
|
|
312
|
+
hint(` Access: kubectl --context ${clusterName} port-forward svc/fops-api -n ${FOPS_API_NS} 4100:4100`);
|
|
313
|
+
} else {
|
|
314
|
+
const errMsg = (result.stderr || result.stdout || "").trim().split("\n")[0];
|
|
315
|
+
console.log(WARN(` ⚠ fops-api deploy failed: ${errMsg}`));
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ── AKS Reconcilers array ─────────────────────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
export const AKS_RECONCILERS = [
|
|
322
|
+
{ name: "api-server-ip", fn: reconcileApiServerIp },
|
|
323
|
+
{ name: "addons", fn: reconcileAddons },
|
|
324
|
+
{ name: "autoscaler", fn: reconcileAutoscaler },
|
|
325
|
+
{ name: "spot-pool", fn: reconcileSpotPool },
|
|
326
|
+
{ name: "kubeconfig", fn: reconcileKubeconfig },
|
|
327
|
+
{ name: "suspend-flux", fn: suspendManagedKustomizations },
|
|
328
|
+
{ name: "descheduler", fn: reconcileDescheduler },
|
|
329
|
+
{ name: "postgres", fn: reconcilePostgres },
|
|
330
|
+
{ name: "pg-databases", fn: reconcilePgDatabases },
|
|
331
|
+
{ name: "secret-store", fn: reconcileSecretStore },
|
|
332
|
+
{ name: "storage-account", fn: reconcileStorageAccount },
|
|
333
|
+
{ name: "k8s-secrets", fn: reconcileK8sSecrets },
|
|
334
|
+
{ name: "storage-engine", fn: reconcileStorageEngine },
|
|
335
|
+
{ name: "flux", fn: reconcileFluxStep },
|
|
336
|
+
{ name: "helm-repos", fn: reconcileHelmRepos },
|
|
337
|
+
{ name: "flux-prereqs", fn: reconcileFluxPrereqs },
|
|
338
|
+
{ name: "acr-webhooks", fn: reconcileAcrWebhooks },
|
|
339
|
+
{ name: "helm-values", fn: reconcileHelmValues },
|
|
340
|
+
{ name: "vault-unseal", fn: reconcileVaultUnseal },
|
|
341
|
+
{ name: "ingress-ip", fn: reconcileIngressIp },
|
|
342
|
+
{ name: "frontend-auth", fn: reconcileFrontendAuth },
|
|
343
|
+
{ name: "fops-api", fn: reconcileFopsApi },
|
|
344
|
+
];
|
|
345
|
+
|
|
346
|
+
// ── Cluster reconciler ────────────────────────────────────────────────────────
|
|
347
|
+
|
|
348
|
+
export async function reconcileCluster(ctx) {
|
|
349
|
+
const { execa, clusterName, rg, sub } = ctx;
|
|
350
|
+
|
|
351
|
+
try {
|
|
352
|
+
const { stdout } = await execa("az", [
|
|
353
|
+
"aks", "show", "-g", rg, "-n", clusterName, "--output", "json",
|
|
354
|
+
...subArgs(sub),
|
|
355
|
+
], { timeout: 30000 });
|
|
356
|
+
ctx.cluster = JSON.parse(stdout);
|
|
357
|
+
} catch (err) {
|
|
358
|
+
console.log(WARN(` ⚠ Could not fetch cluster details: ${(err.message || "").split("\n")[0]}`));
|
|
359
|
+
ctx.cluster = {};
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
await runReconcilers(AKS_RECONCILERS, ctx);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// ── AKS Doctor ────────────────────────────────────────────────────────────────
|
|
366
|
+
|
|
367
|
+
export async function aksDoctor(opts = {}) {
|
|
368
|
+
const execa = await lazyExeca();
|
|
369
|
+
const cl = requireCluster(opts.clusterName);
|
|
370
|
+
const clusterName = cl.clusterName;
|
|
371
|
+
const rg = cl.resourceGroup || AKS_DEFAULTS.resourceGroup;
|
|
372
|
+
const sub = cl.subscription || opts.profile;
|
|
373
|
+
|
|
374
|
+
banner(`AKS Doctor: ${clusterName}`);
|
|
375
|
+
|
|
376
|
+
const { getCredentials, ensureGhcrPullSecret } = await import("./azure-aks-core.js");
|
|
377
|
+
await getCredentials(execa, { clusterName, rg, sub });
|
|
378
|
+
|
|
379
|
+
let fixed = 0;
|
|
380
|
+
let issues = 0;
|
|
381
|
+
const pgServices = ["foundation-backend", "foundation-processor", "foundation-watcher", "foundation-scheduler"];
|
|
382
|
+
|
|
383
|
+
// 1. Check and fix GHCR pull secret
|
|
384
|
+
hint("Checking GHCR pull secret…");
|
|
385
|
+
const githubToken = opts.githubToken || process.env.GITHUB_TOKEN;
|
|
386
|
+
if (!githubToken) {
|
|
387
|
+
console.log(WARN(" ⚠ No GitHub token found — set GITHUB_TOKEN or pass --github-token"));
|
|
388
|
+
issues++;
|
|
389
|
+
} else {
|
|
390
|
+
await ensureGhcrPullSecret(execa, { clusterName, githubToken, namespace: "default" });
|
|
391
|
+
fixed++;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// 2. Check and fix SecretStore identity
|
|
395
|
+
hint("Checking SecretStore identity configuration…");
|
|
396
|
+
const { stdout: ssJson } = await execa("kubectl", [
|
|
397
|
+
"get", "secretstore", "azure-secretsmanager", "-n", "foundation",
|
|
398
|
+
"-o", "json", "--context", clusterName,
|
|
399
|
+
], { reject: false, timeout: 15000 });
|
|
400
|
+
|
|
401
|
+
if (ssJson) {
|
|
402
|
+
try {
|
|
403
|
+
const ss = JSON.parse(ssJson);
|
|
404
|
+
const identityId = ss?.spec?.provider?.azurekv?.identityId || "";
|
|
405
|
+
const tenantId = ss?.spec?.provider?.azurekv?.tenantId || "";
|
|
406
|
+
|
|
407
|
+
if (!identityId || !tenantId) {
|
|
408
|
+
hint(" Fetching AKS managed identity…");
|
|
409
|
+
const { stdout: aksId } = await execa("az", [
|
|
410
|
+
"aks", "show", "-g", rg, "-n", clusterName,
|
|
411
|
+
"--query", "identityProfile.kubeletidentity.clientId", "-o", "tsv",
|
|
412
|
+
...subArgs(sub),
|
|
413
|
+
], { reject: false, timeout: 30000 });
|
|
414
|
+
const { stdout: tenant } = await execa("az", [
|
|
415
|
+
"account", "show", "--query", "tenantId", "-o", "tsv",
|
|
416
|
+
], { reject: false, timeout: 10000 });
|
|
417
|
+
|
|
418
|
+
const newIdentityId = (aksId || "").trim();
|
|
419
|
+
const newTenantId = (tenant || "").trim();
|
|
420
|
+
|
|
421
|
+
if (newIdentityId && newTenantId) {
|
|
422
|
+
hint(" Patching SecretStore with identity values…");
|
|
423
|
+
await execa("kubectl", [
|
|
424
|
+
"patch", "secretstore", "azure-secretsmanager", "-n", "foundation",
|
|
425
|
+
"--type=merge", "-p", JSON.stringify({
|
|
426
|
+
spec: { provider: { azurekv: { identityId: newIdentityId, tenantId: newTenantId }}}
|
|
427
|
+
}),
|
|
428
|
+
"--context", clusterName,
|
|
429
|
+
], { reject: false, timeout: 15000 });
|
|
430
|
+
console.log(OK(` ✓ SecretStore patched with identity: ${newIdentityId.slice(0, 8)}…`));
|
|
431
|
+
fixed++;
|
|
432
|
+
} else {
|
|
433
|
+
console.log(WARN(" ⚠ Could not retrieve AKS identity"));
|
|
434
|
+
issues++;
|
|
435
|
+
}
|
|
436
|
+
} else {
|
|
437
|
+
console.log(OK(` ✓ SecretStore has identity configured`));
|
|
438
|
+
}
|
|
439
|
+
} catch { /* ignore parse errors */ }
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// 3. Force refresh ExternalSecrets
|
|
443
|
+
hint("Refreshing ExternalSecrets…");
|
|
444
|
+
const { stdout: esList } = await execa("kubectl", [
|
|
445
|
+
"get", "externalsecret", "-n", "foundation", "-o", "name", "--context", clusterName,
|
|
446
|
+
], { reject: false, timeout: 15000 });
|
|
447
|
+
|
|
448
|
+
if (esList) {
|
|
449
|
+
const esNames = esList.trim().split("\n").filter(Boolean);
|
|
450
|
+
for (const es of esNames) {
|
|
451
|
+
await execa("kubectl", [
|
|
452
|
+
"annotate", es, "-n", "foundation",
|
|
453
|
+
`force-sync=${Date.now()}`, "--overwrite", "--context", clusterName,
|
|
454
|
+
], { reject: false, timeout: 10000 });
|
|
455
|
+
}
|
|
456
|
+
console.log(OK(` ✓ Refreshed ${esNames.length} ExternalSecrets`));
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// 4. Check ExternalSecrets sync status
|
|
460
|
+
hint("Checking ExternalSecrets sync status…");
|
|
461
|
+
const { stdout: esStatusJson } = await execa("kubectl", [
|
|
462
|
+
"get", "externalsecret", "-n", "foundation", "-o", "json", "--context", clusterName,
|
|
463
|
+
], { reject: false, timeout: 15000 });
|
|
464
|
+
|
|
465
|
+
if (esStatusJson) {
|
|
466
|
+
try {
|
|
467
|
+
const esData = JSON.parse(esStatusJson);
|
|
468
|
+
const failedEs = (esData.items || []).filter(es => {
|
|
469
|
+
const ready = (es.status?.conditions || []).find(c => c.type === "Ready");
|
|
470
|
+
return ready && ready.status !== "True";
|
|
471
|
+
});
|
|
472
|
+
if (failedEs.length > 0) {
|
|
473
|
+
console.log(WARN(` ⚠ ${failedEs.length} ExternalSecret(s) have sync errors:`));
|
|
474
|
+
for (const es of failedEs) {
|
|
475
|
+
const ready = (es.status?.conditions || []).find(c => c.type === "Ready");
|
|
476
|
+
const msg = ready?.message || "Unknown error";
|
|
477
|
+
console.log(WARN(` - ${es.metadata.name}: ${msg.slice(0, 80)}…`));
|
|
478
|
+
}
|
|
479
|
+
issues += failedEs.length;
|
|
480
|
+
} else {
|
|
481
|
+
console.log(OK(` ✓ All ExternalSecrets synced successfully`));
|
|
482
|
+
}
|
|
483
|
+
} catch { /* ignore */ }
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// 5. Check PGSSLMODE in ConfigMaps for Azure PostgreSQL
|
|
487
|
+
hint("Checking PostgreSQL SSL configuration…");
|
|
488
|
+
for (const svc of pgServices) {
|
|
489
|
+
const { stdout: cmJson } = await execa("kubectl", [
|
|
490
|
+
"get", "configmap", svc, "-n", "foundation", "-o", "json", "--context", clusterName,
|
|
491
|
+
], { reject: false, timeout: 10000 });
|
|
492
|
+
|
|
493
|
+
if (cmJson) {
|
|
494
|
+
try {
|
|
495
|
+
const cm = JSON.parse(cmJson);
|
|
496
|
+
const hasPgsslmode = cm.data?.PGSSLMODE || cm.data?.MX_POSTGRES_SSLMODE;
|
|
497
|
+
if (!hasPgsslmode) {
|
|
498
|
+
hint(` Patching ${svc} ConfigMap with PGSSLMODE=require…`);
|
|
499
|
+
await execa("kubectl", [
|
|
500
|
+
"patch", "configmap", svc, "-n", "foundation", "--type=merge",
|
|
501
|
+
"-p", JSON.stringify({ data: { PGSSLMODE: "require", MX_POSTGRES_SSLMODE: "require" } }),
|
|
502
|
+
"--context", clusterName,
|
|
503
|
+
], { reject: false, timeout: 10000 });
|
|
504
|
+
console.log(OK(` ✓ Added PGSSLMODE to ${svc}`));
|
|
505
|
+
fixed++;
|
|
506
|
+
}
|
|
507
|
+
} catch { /* ignore */ }
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// 6. Reconcile Flux
|
|
512
|
+
hint("Reconciling Flux…");
|
|
513
|
+
await execa("flux", [
|
|
514
|
+
"reconcile", "source", "git", "flux-system", "--context", clusterName,
|
|
515
|
+
], { reject: false, timeout: 60000 });
|
|
516
|
+
console.log(OK(" ✓ Flux reconcile triggered"));
|
|
517
|
+
|
|
518
|
+
// 7. Restart deployments to pick up ConfigMap changes
|
|
519
|
+
if (fixed > 0) {
|
|
520
|
+
hint("Restarting affected deployments…");
|
|
521
|
+
for (const svc of pgServices) {
|
|
522
|
+
await execa("kubectl", [
|
|
523
|
+
"rollout", "restart", "deployment", svc, "-n", "foundation", "--context", clusterName,
|
|
524
|
+
], { reject: false, timeout: 15000 });
|
|
525
|
+
}
|
|
526
|
+
console.log(OK(" ✓ Deployments restarted"));
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Summary
|
|
530
|
+
console.log("");
|
|
531
|
+
if (issues > 0) {
|
|
532
|
+
console.log(WARN(` Doctor found ${issues} issue(s) that need manual attention.`));
|
|
533
|
+
}
|
|
534
|
+
if (fixed > 0) {
|
|
535
|
+
console.log(OK(` ✓ Fixed ${fixed} issue(s) automatically.`));
|
|
536
|
+
}
|
|
537
|
+
hint("Run 'kubectl get pods -n foundation' to check pod status.");
|
|
538
|
+
}
|