@meshxdata/fops 0.1.51 → 0.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -195
- package/package.json +1 -1
- package/src/doctor.js +11 -8
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-core.js +9 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-aks-secrets.js +151 -0
- package/src/plugins/bundled/fops-plugin-azure/lib/azure-fleet.js +12 -4
- package/src/plugins/bundled/fops-plugin-azure/lib/commands/test-cmds.js +28 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
## [0.1.52] - 2026-03-24
|
|
2
|
+
|
|
3
|
+
- fix(doctor): set KUBECONFIG for k3s kubectl commands (db9359b)
|
|
4
|
+
- fix(azure): move --landscape to test run command, not separate subcommand (4b9b089)
|
|
5
|
+
- feat(azure): add test integration command with landscape support (b2990a0)
|
|
6
|
+
- fix(fleet): skip VMs without public IPs in fleet exec (39acbaa)
|
|
7
|
+
- feat(azure): detect and fix External Secrets identity issues (f907d11)
|
|
8
|
+
- operator cli bump 0.1.51 (db55bdc)
|
|
9
|
+
- feat: add postgres-exporter and Azure tray menu improvements (2a337ac)
|
|
7
10
|
- operator cli plugin fix (4dae908)
|
|
8
11
|
- operator cli plugin fix (25620cc)
|
|
9
12
|
- operator cli test fixes (1d1c18f)
|
|
@@ -176,195 +179,6 @@ All notable changes to @meshxdata/fops (Foundation Operator CLI) are documented
|
|
|
176
179
|
- azure packer (12175b8)
|
|
177
180
|
- init hashed pwd (db8523c)
|
|
178
181
|
- packer (5b5c7c4)
|
|
179
|
-
- doctor for azure vm (ed524fa)
|
|
180
|
-
- packer and 1pwd (c6d053e)
|
|
181
|
-
- split big index.js (dc85a1b)
|
|
182
|
-
- kafka volume update (21815ec)
|
|
183
|
-
- fix openai azure tools confirmation and flow (0118cd1)
|
|
184
|
-
- nighly fixx, test fix (5e0d04f)
|
|
185
|
-
- open ai training (cdc494a)
|
|
186
|
-
|
|
187
|
-
## [0.1.50] - 2026-03-24
|
|
188
|
-
|
|
189
|
-
- operator cli plugin fix (25620cc)
|
|
190
|
-
- operator cli test fixes (1d1c18f)
|
|
191
|
-
- feat(test): add setup-users command for QA test user creation (b929507)
|
|
192
|
-
- feat(aks): show HA standby clusters with visual grouping (8fb640c)
|
|
193
|
-
- refactor(provision): extract VM provisioning to dedicated module (af321a7)
|
|
194
|
-
- refactor(provision): extract post-start health checks to dedicated module (6ed5f2d)
|
|
195
|
-
- fix: ping timeout 15s, fix prometheus sed escaping (d11ac14)
|
|
196
|
-
- refactor(vm): extract terraform HCL generation to dedicated module (896a64b)
|
|
197
|
-
- refactor(keyvault): extract key operations to dedicated module (716bbe4)
|
|
198
|
-
- refactor(azure): extract swarm functions to azure-fleet-swarm.js (4690e34)
|
|
199
|
-
- refactor(azure): extract SSH/remote functions to azure-ops-ssh.js (e62b8f0)
|
|
200
|
-
- refactor(azure): split azure-ops.js into smaller modules (4515425)
|
|
201
|
-
- feat(aks): add --ha flag for full cross-region HA setup (ece68c5)
|
|
202
|
-
- feat(fops): inject ENVIRONMENT_NAME on VM provisioning (6ef2a27)
|
|
203
|
-
- fix(postgres): disable SSL mode to fix connection issues (c789ae9)
|
|
204
|
-
- feat(trino): add caching configuration for docker-compose (3668224)
|
|
205
|
-
- fix(fops-azure): run pytest directly instead of missing scripts (29f8410)
|
|
206
|
-
- add -d detach option for local frontend dev, remove hive cpu limits (3306667)
|
|
207
|
-
- release 0.1.49 (dcca32b)
|
|
208
|
-
- release 0.1.48 (9b195e5)
|
|
209
|
-
- stash on updates (2916c01)
|
|
210
|
-
- stash on updates (b5c14df)
|
|
211
|
-
- stash on updates (d0453d1)
|
|
212
|
-
- frontend dev fixes (0ca7b00)
|
|
213
|
-
- fix: update azure test commands (77c81da)
|
|
214
|
-
- default locust to CLI mode, add --web for UI (ca35bff)
|
|
215
|
-
- add locust command for load testing AKS clusters (1278722)
|
|
216
|
-
- update spot node pool default autoscaling to 1-20 (617c182)
|
|
217
|
-
- module for aks (3dd1a61)
|
|
218
|
-
- add hive to PG_SERVICE_DBS for fops pg-setup (afccb16)
|
|
219
|
-
- feat(azure): enhance aks doctor with ExternalSecrets and PGSSLMODE checks (8b14861)
|
|
220
|
-
- add foundation-postgres ExternalName service to reconciler (ea88e11)
|
|
221
|
-
- new flux templates (0e2e372)
|
|
222
|
-
- feat(azure): add storage-engine secrets to Key Vault (a4f488e)
|
|
223
|
-
- feat(azure-aks): add AUTH0_DOMAIN to template rendering variables (216c37e)
|
|
224
|
-
- feat(azure): add storage account creation per cluster (aa1b138)
|
|
225
|
-
- bump watcher (ab24473)
|
|
226
|
-
- fix: concurrent compute calls (#66) (03e2edf)
|
|
227
|
-
- bump backend version (5058ff5)
|
|
228
|
-
- bump fops to 0.1.44 (8c0ef5d)
|
|
229
|
-
- Mlflow and azure plugin fix (176881f)
|
|
230
|
-
- fix lifecycle (a2cb9e7)
|
|
231
|
-
- callback url for localhost (821fb94)
|
|
232
|
-
- disable 4 scaffolding plugin by default. (bfb2b76)
|
|
233
|
-
- jaccard improvements (b7494a0)
|
|
234
|
-
- refactor azure plugin (68dfef4)
|
|
235
|
-
- refactor azure plugin (b24a008)
|
|
236
|
-
- fix trino catalog missing (4928a55)
|
|
237
|
-
- v36 bump and changelog generation on openai (37a0440)
|
|
238
|
-
- v36 bump and changelog generation on openai (a3b02d9)
|
|
239
|
-
- bump (a990058)
|
|
240
|
-
- status bar fix and new plugin for ttyd (27dde1e)
|
|
241
|
-
- file demo and tray (1a3e704)
|
|
242
|
-
- electron app (59ad0bb)
|
|
243
|
-
- compose and fops file plugin (1cf0e81)
|
|
244
|
-
- bump (346ffc1)
|
|
245
|
-
- localhost replaced by 127.0.0.1 (82b9f30)
|
|
246
|
-
- .29 (587b0e1)
|
|
247
|
-
- improve up down and bootstrap script (b79ebaf)
|
|
248
|
-
- checksum (22c8086)
|
|
249
|
-
- checksum (96b434f)
|
|
250
|
-
- checksum (15ed3c0)
|
|
251
|
-
- checksum (8a6543a)
|
|
252
|
-
- bump embed trino linksg (8440504)
|
|
253
|
-
- bump data (765ffd9)
|
|
254
|
-
- bump (cb8b232)
|
|
255
|
-
- broken tests (c532229)
|
|
256
|
-
- release 0.1.18, preflight checks (d902249)
|
|
257
|
-
- fix compute display bug (d10f5d9)
|
|
258
|
-
- cleanup packer files (6330f18)
|
|
259
|
-
- plan mode (cb36a8a)
|
|
260
|
-
- bump to 0.1.16 - agent ui (41ac1a2)
|
|
261
|
-
- bump to 0.1.15 - agent ui (4ebe2e1)
|
|
262
|
-
- bump to 0.1.14 (6c3a7fa)
|
|
263
|
-
- bump to 0.1.13 (8db570f)
|
|
264
|
-
- release 0.1.12 (c1c79e5)
|
|
265
|
-
- bump (11aa3b0)
|
|
266
|
-
- git keep and bump tui (be1678e)
|
|
267
|
-
- skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
|
|
268
|
-
- cloudflare and token consumption, graphs indexing (0ad9eec)
|
|
269
|
-
- bump storage default (22c83ba)
|
|
270
|
-
- storage fix (68a22a0)
|
|
271
|
-
- skills update (7f56500)
|
|
272
|
-
- v9 bump (3864446)
|
|
273
|
-
- bump (c95eedc)
|
|
274
|
-
- rrf (dbf8c95)
|
|
275
|
-
- feat: warning when running predictions (95e8c52)
|
|
276
|
-
- feat: support for local predictions (45cf26b)
|
|
277
|
-
- feat: wip support for predictions + mlflow (3457052)
|
|
278
|
-
- add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
|
|
279
|
-
- validate CSV headers in compute_run readiness check (a8c7a43)
|
|
280
|
-
- fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
|
|
281
|
-
- enforce: never use foundation_apply to fix broken products (2e049bf)
|
|
282
|
-
- update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
|
|
283
|
-
- add storage read, input DP table probe, and compute_run improvements (34e6c4c)
|
|
284
|
-
- skills update (1220385)
|
|
285
|
-
- skills update (bb66958)
|
|
286
|
-
- some tui improvement andd tools apply overwrite (e90c35c)
|
|
287
|
-
- skills update (e9227a1)
|
|
288
|
-
- skills update (669c4b3)
|
|
289
|
-
- fix plugin pre-flight checks (f741743)
|
|
290
|
-
- increase agent context (6479aaa)
|
|
291
|
-
- skills and init sql fixes (5fce35e)
|
|
292
|
-
- checksum (3518b56)
|
|
293
|
-
- penging job limit (a139861)
|
|
294
|
-
- checksum (575d28c)
|
|
295
|
-
- bump (92049ba)
|
|
296
|
-
- fix bug per tab status (0a33657)
|
|
297
|
-
- fix bug per tab status (50457c6)
|
|
298
|
-
- checksumming (0ad842e)
|
|
299
|
-
- shot af mardkwon overlapping (51f63b9)
|
|
300
|
-
- add spark dockerfile for multiarch builds (95abbd1)
|
|
301
|
-
- fix plugin initialization (16b9782)
|
|
302
|
-
- split index.js (50902a2)
|
|
303
|
-
- cloudflare cidr (cc4e021)
|
|
304
|
-
- cloduflare restrictions (2f6ba2d)
|
|
305
|
-
- sequential start (86b496e)
|
|
306
|
-
- sequential start (4930fe1)
|
|
307
|
-
- sequential start (353f014)
|
|
308
|
-
- qa tests (2dc6a1a)
|
|
309
|
-
- bump sha for .85 (dc2edfe)
|
|
310
|
-
- preserve env on sudo (7831227)
|
|
311
|
-
- bump sha for .84 (6c052f9)
|
|
312
|
-
- non interactive for azure vms (0aa8a2f)
|
|
313
|
-
- keep .env if present (d072450)
|
|
314
|
-
- bump (7a8e732)
|
|
315
|
-
- ensure opa is on compose if not set (f4a5228)
|
|
316
|
-
- checksum bump (a2ccc20)
|
|
317
|
-
- netrc defensive checks (a0b0ccc)
|
|
318
|
-
- netrc defensive checks (ae37403)
|
|
319
|
-
- checksum (ec45d11)
|
|
320
|
-
- update sync and fix up (7f9af72)
|
|
321
|
-
- expand test for azure and add new per app tag support (388a168)
|
|
322
|
-
- checksum on update (44005fc)
|
|
323
|
-
- cleanup for later (15e5313)
|
|
324
|
-
- cleanup for later (11c9597)
|
|
325
|
-
- switch branch feature (822fecc)
|
|
326
|
-
- add pull (d1c19ab)
|
|
327
|
-
- Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
|
|
328
|
-
- tests (f180a9a)
|
|
329
|
-
- cleanup (39c49a3)
|
|
330
|
-
- registry (7b7126a)
|
|
331
|
-
- reconcile kafka (832d0db)
|
|
332
|
-
- gh login bug (025886c)
|
|
333
|
-
- cleanup (bb96cab)
|
|
334
|
-
- strip envs from process (2421180)
|
|
335
|
-
- force use of gh creds not tokens in envs var (fff7787)
|
|
336
|
-
- resolve import between npm installs and npm link (79522e1)
|
|
337
|
-
- fix gh scope and azure states (afd846c)
|
|
338
|
-
- refactoring (da50352)
|
|
339
|
-
- split fops repo (d447638)
|
|
340
|
-
- aks (b791f8f)
|
|
341
|
-
- refactor azure (67d3bad)
|
|
342
|
-
- wildcard (391f023)
|
|
343
|
-
- azure plugin (c074074)
|
|
344
|
-
- zap (d7e6e7f)
|
|
345
|
-
- fix knock (cf89c05)
|
|
346
|
-
- azure (4adec98)
|
|
347
|
-
- Bump tar from 7.5.7 to 7.5.9 in /operator-cli (e41e98e)
|
|
348
|
-
- azure stack index.js split (de12272)
|
|
349
|
-
- Bump ajv from 8.17.1 to 8.18.0 in /operator-cli (76da21f)
|
|
350
|
-
- packer (9665fbc)
|
|
351
|
-
- remove stack api (db0fd4d)
|
|
352
|
-
- packer cleanup (fe1bf14)
|
|
353
|
-
- force refresh token (3a3d7e2)
|
|
354
|
-
- provision shell (2ad505f)
|
|
355
|
-
- azure vm management (91dcb31)
|
|
356
|
-
- azure specific (2b0cca8)
|
|
357
|
-
- azure packer (12175b8)
|
|
358
|
-
- init hashed pwd (db8523c)
|
|
359
|
-
- packer (5b5c7c4)
|
|
360
|
-
- doctor for azure vm (ed524fa)
|
|
361
|
-
- packer and 1pwd (c6d053e)
|
|
362
|
-
- split big index.js (dc85a1b)
|
|
363
|
-
- kafka volume update (21815ec)
|
|
364
|
-
- fix openai azure tools confirmation and flow (0118cd1)
|
|
365
|
-
- nighly fixx, test fix (5e0d04f)
|
|
366
|
-
- open ai training (cdc494a)
|
|
367
|
-
- openai integration in azure (1ca1475)
|
|
368
182
|
|
|
369
183
|
# Changelog
|
|
370
184
|
|
package/package.json
CHANGED
package/src/doctor.js
CHANGED
|
@@ -22,6 +22,9 @@ const KEY_PORTS = {
|
|
|
22
22
|
18201: "Vault",
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
// K3s kubectl requires explicit KUBECONFIG inside the container
|
|
26
|
+
const K3S_KUBECTL = ["exec", "-e", "KUBECONFIG=/etc/rancher/k3s/k3s.yaml", "k3s-server", "kubectl"];
|
|
27
|
+
|
|
25
28
|
const LABEL_WIDTH = 36;
|
|
26
29
|
|
|
27
30
|
function header(title) {
|
|
@@ -1187,7 +1190,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1187
1190
|
// 1. Cluster reachable
|
|
1188
1191
|
try {
|
|
1189
1192
|
const { stdout: nodesOut, exitCode: nodesExit } = await execa("docker", [
|
|
1190
|
-
|
|
1193
|
+
...K3S_KUBECTL, "get", "nodes",
|
|
1191
1194
|
], { timeout: 10000, reject: false });
|
|
1192
1195
|
if (nodesExit === 0 && /Ready/.test(nodesOut)) {
|
|
1193
1196
|
ok("K3s cluster reachable", nodesOut.trim().split("\n").find((l) => /Ready/.test(l))?.trim().replace(/\s+/g, " ") || "ready");
|
|
@@ -1216,11 +1219,11 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1216
1219
|
}
|
|
1217
1220
|
console.log(chalk.cyan(" ▶ Deleting old ghcr-secret…"));
|
|
1218
1221
|
await execa("docker", [
|
|
1219
|
-
|
|
1222
|
+
...K3S_KUBECTL, "delete", "secret", "ghcr-secret", "-n", "spark-jobs",
|
|
1220
1223
|
], { timeout: 10000, reject: false });
|
|
1221
1224
|
console.log(chalk.cyan(" ▶ Creating new ghcr-secret…"));
|
|
1222
1225
|
await execa("docker", [
|
|
1223
|
-
|
|
1226
|
+
...K3S_KUBECTL, "create", "secret", "docker-registry", "ghcr-secret",
|
|
1224
1227
|
"--docker-server=ghcr.io", "--docker-username=x-access-token", `--docker-password=${freshToken}`,
|
|
1225
1228
|
"--namespace=spark-jobs",
|
|
1226
1229
|
], { timeout: 10000 });
|
|
@@ -1229,7 +1232,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1229
1232
|
const patchJson = '{"imagePullSecrets":[{"name":"ghcr-secret"}]}';
|
|
1230
1233
|
for (const sa of ["spark-operator-spark", "spark", "default"]) {
|
|
1231
1234
|
await execa("docker", [
|
|
1232
|
-
|
|
1235
|
+
...K3S_KUBECTL, "patch", "serviceaccount", sa,
|
|
1233
1236
|
"-n", "spark-jobs", "-p", patchJson,
|
|
1234
1237
|
], { timeout: 10000, reject: false });
|
|
1235
1238
|
}
|
|
@@ -1238,7 +1241,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1238
1241
|
|
|
1239
1242
|
try {
|
|
1240
1243
|
const { exitCode: secretExit } = await execa("docker", [
|
|
1241
|
-
|
|
1244
|
+
...K3S_KUBECTL, "get", "secret", "ghcr-secret", "-n", "spark-jobs",
|
|
1242
1245
|
], { timeout: 10000, reject: false });
|
|
1243
1246
|
|
|
1244
1247
|
if (secretExit !== 0) {
|
|
@@ -1248,7 +1251,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1248
1251
|
let ghcrTokenValid = false;
|
|
1249
1252
|
try {
|
|
1250
1253
|
const { stdout: b64Data } = await execa("docker", [
|
|
1251
|
-
|
|
1254
|
+
...K3S_KUBECTL, "get", "secret", "ghcr-secret",
|
|
1252
1255
|
"-n", "spark-jobs", "-o", "jsonpath={.data.\\.dockerconfigjson}",
|
|
1253
1256
|
], { timeout: 10000 });
|
|
1254
1257
|
if (b64Data?.trim()) {
|
|
@@ -1280,7 +1283,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1280
1283
|
// 3. ECR / regcred secret (informational)
|
|
1281
1284
|
try {
|
|
1282
1285
|
const { exitCode: ecrExit } = await execa("docker", [
|
|
1283
|
-
|
|
1286
|
+
...K3S_KUBECTL, "get", "secret", "ecr-secret", "-n", "spark-jobs",
|
|
1284
1287
|
], { timeout: 10000, reject: false });
|
|
1285
1288
|
if (ecrExit === 0) {
|
|
1286
1289
|
ok("ECR pull secret (ecr-secret)", "exists in spark-jobs");
|
|
@@ -1291,7 +1294,7 @@ export async function runDoctor(opts = {}, registry = null) {
|
|
|
1291
1294
|
|
|
1292
1295
|
try {
|
|
1293
1296
|
const { exitCode: regExit } = await execa("docker", [
|
|
1294
|
-
|
|
1297
|
+
...K3S_KUBECTL, "get", "secret", "regcred", "-n", "spark-jobs",
|
|
1295
1298
|
], { timeout: 10000, reject: false });
|
|
1296
1299
|
if (regExit === 0) {
|
|
1297
1300
|
ok("regcred secret", "exists in spark-jobs");
|
|
@@ -1250,6 +1250,15 @@ export async function aksStatus(opts = {}) {
|
|
|
1250
1250
|
hint(" Flux CLI not available — skipping Flux status.");
|
|
1251
1251
|
}
|
|
1252
1252
|
|
|
1253
|
+
// External Secrets health check
|
|
1254
|
+
console.log(`\n ${LABEL("External Secrets")}`);
|
|
1255
|
+
try {
|
|
1256
|
+
const { validateExternalSecretsHealth } = await import("./azure-aks-secrets.js");
|
|
1257
|
+
await validateExternalSecretsHealth({ execa, clusterName, rg, sub });
|
|
1258
|
+
} catch (e) {
|
|
1259
|
+
hint(` Could not check External Secrets: ${e.message}`);
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1253
1262
|
console.log("");
|
|
1254
1263
|
}
|
|
1255
1264
|
|
|
@@ -172,6 +172,32 @@ export async function reconcileSecretStore(ctx) {
|
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
174
|
|
|
175
|
+
// 2c. Check for External Secrets managed identity (ext-* prefix) and grant Key Vault access
|
|
176
|
+
const extSecretsIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
|
|
177
|
+
if (extSecretsIdentity && kvId) {
|
|
178
|
+
const { stdout: hasExtRole } = await execa("az", [
|
|
179
|
+
"role", "assignment", "list",
|
|
180
|
+
"--assignee", extSecretsIdentity.clientId,
|
|
181
|
+
"--role", "Key Vault Secrets User",
|
|
182
|
+
"--scope", kvId,
|
|
183
|
+
"--query", "[0].id", "-o", "tsv",
|
|
184
|
+
...subArgs(sub),
|
|
185
|
+
], { reject: false, timeout: 30000 });
|
|
186
|
+
|
|
187
|
+
if (!hasExtRole?.trim()) {
|
|
188
|
+
await execa("az", [
|
|
189
|
+
"role", "assignment", "create",
|
|
190
|
+
"--assignee", extSecretsIdentity.clientId,
|
|
191
|
+
"--role", "Key Vault Secrets User",
|
|
192
|
+
"--scope", kvId,
|
|
193
|
+
...subArgs(sub),
|
|
194
|
+
], { reject: false, timeout: 30000 });
|
|
195
|
+
console.log(OK(` ✓ External Secrets identity granted Key Vault Secrets User role`));
|
|
196
|
+
}
|
|
197
|
+
// Store the identity ID for SecretStore configuration
|
|
198
|
+
ctx.extSecretsIdentityId = extSecretsIdentity.clientId;
|
|
199
|
+
}
|
|
200
|
+
|
|
175
201
|
// 3. Ensure azure-secret-sp exists in each target namespace
|
|
176
202
|
const { stdout: spSecretJson } = await kubectl([
|
|
177
203
|
"get", "secret", "azure-secret-sp", "-n", "foundation", "-o", "json",
|
|
@@ -579,6 +605,131 @@ export async function detectEsApiVersion(kubectl) {
|
|
|
579
605
|
return "external-secrets.io/v1";
|
|
580
606
|
}
|
|
581
607
|
|
|
608
|
+
/**
|
|
609
|
+
* Detect External Secrets managed identity (ext-* prefix) for clusters with multiple identities.
|
|
610
|
+
* When AKS has multiple user-assigned identities, SecretStore needs to specify which one to use.
|
|
611
|
+
*/
|
|
612
|
+
export async function detectExternalSecretsIdentity(execa, clusterName, sub) {
|
|
613
|
+
const { subArgs } = await import("./azure.js");
|
|
614
|
+
|
|
615
|
+
// List all managed identities that match the external-secrets pattern
|
|
616
|
+
const { stdout: identitiesJson } = await execa("az", [
|
|
617
|
+
"identity", "list",
|
|
618
|
+
"--query", `[?contains(name, '${clusterName}')].{name:name,clientId:clientId}`,
|
|
619
|
+
"-o", "json",
|
|
620
|
+
...subArgs(sub),
|
|
621
|
+
], { reject: false, timeout: 30000 });
|
|
622
|
+
|
|
623
|
+
let identities = [];
|
|
624
|
+
try { identities = JSON.parse(identitiesJson || "[]"); } catch {}
|
|
625
|
+
|
|
626
|
+
// Look for ext-* identity (External Secrets workload identity)
|
|
627
|
+
const extIdentity = identities.find(i => i.name?.startsWith("ext-") && i.name?.includes(clusterName));
|
|
628
|
+
if (extIdentity) {
|
|
629
|
+
return { name: extIdentity.name, clientId: extIdentity.clientId };
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// If multiple identities exist but no ext-* found, warn about potential issues
|
|
633
|
+
if (identities.length > 1) {
|
|
634
|
+
const { WARN, hint } = await import("./azure.js");
|
|
635
|
+
console.log(WARN(` ⚠ Multiple managed identities found for ${clusterName} but no ext-* identity detected`));
|
|
636
|
+
hint("External Secrets may fail with 'Multiple user assigned identities exist' error");
|
|
637
|
+
hint("Create a dedicated identity: az identity create -n ext-<cluster> -g <rg>");
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
return null;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Validate External Secrets health - checks SecretStore config and ExternalSecret status.
|
|
645
|
+
* Reports issues like missing identityId when multiple identities exist.
|
|
646
|
+
*/
|
|
647
|
+
export async function validateExternalSecretsHealth(ctx) {
|
|
648
|
+
const { execa, clusterName, sub } = ctx;
|
|
649
|
+
const { OK, WARN, DIM, hint, subArgs } = await import("./azure.js");
|
|
650
|
+
|
|
651
|
+
const kubectl = (args, opts = {}) =>
|
|
652
|
+
execa("kubectl", ["--context", clusterName, ...args], { timeout: 30000, reject: false, ...opts });
|
|
653
|
+
|
|
654
|
+
const issues = [];
|
|
655
|
+
|
|
656
|
+
// Check SecretStore status
|
|
657
|
+
const { stdout: ssJson } = await kubectl([
|
|
658
|
+
"get", "secretstore", SECRET_STORE_NAME, "-n", "foundation", "-o", "json",
|
|
659
|
+
]);
|
|
660
|
+
if (!ssJson) {
|
|
661
|
+
issues.push({ level: "error", msg: "SecretStore not found in foundation namespace" });
|
|
662
|
+
return issues;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const ss = JSON.parse(ssJson);
|
|
666
|
+
const ssReady = ss.status?.conditions?.find(c => c.type === "Ready")?.status === "True";
|
|
667
|
+
const authType = ss.spec?.provider?.azurekv?.authType;
|
|
668
|
+
const identityId = ss.spec?.provider?.azurekv?.identityId;
|
|
669
|
+
|
|
670
|
+
// Check if using ManagedIdentity auth without identityId when multiple identities exist
|
|
671
|
+
if (authType === "ManagedIdentity" && !identityId) {
|
|
672
|
+
const extIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
|
|
673
|
+
if (extIdentity) {
|
|
674
|
+
issues.push({
|
|
675
|
+
level: "warn",
|
|
676
|
+
msg: `SecretStore uses ManagedIdentity but identityId is empty`,
|
|
677
|
+
fix: `Set identityId to "${extIdentity.clientId}" in clusters/${clusterName}/config/secret-store.yaml`,
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// Check ExternalSecret status
|
|
683
|
+
const { stdout: esJson } = await kubectl([
|
|
684
|
+
"get", "externalsecret", "-n", "foundation", "-o", "json",
|
|
685
|
+
]);
|
|
686
|
+
const externalSecrets = esJson ? JSON.parse(esJson).items : [];
|
|
687
|
+
|
|
688
|
+
for (const es of externalSecrets) {
|
|
689
|
+
const ready = es.status?.conditions?.find(c => c.type === "Ready");
|
|
690
|
+
if (ready?.status !== "True") {
|
|
691
|
+
const msg = ready?.message || "Unknown error";
|
|
692
|
+
if (msg.includes("Multiple user assigned identities exist")) {
|
|
693
|
+
const extIdentity = await detectExternalSecretsIdentity(execa, clusterName, sub);
|
|
694
|
+
issues.push({
|
|
695
|
+
level: "error",
|
|
696
|
+
msg: `ExternalSecret "${es.metadata.name}" failing: Multiple identities detected`,
|
|
697
|
+
fix: extIdentity
|
|
698
|
+
? `Add identityId: "${extIdentity.clientId}" to SecretStore spec`
|
|
699
|
+
: "Create ext-* managed identity and grant Key Vault access",
|
|
700
|
+
});
|
|
701
|
+
} else if (msg.includes("Forbidden") || msg.includes("not authorized")) {
|
|
702
|
+
issues.push({
|
|
703
|
+
level: "error",
|
|
704
|
+
msg: `ExternalSecret "${es.metadata.name}" failing: Key Vault access denied`,
|
|
705
|
+
fix: "Run: fops azure aks doctor --fix to grant Key Vault permissions",
|
|
706
|
+
});
|
|
707
|
+
} else {
|
|
708
|
+
issues.push({
|
|
709
|
+
level: "error",
|
|
710
|
+
msg: `ExternalSecret "${es.metadata.name}" failing: ${msg.substring(0, 100)}`,
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
// Report findings
|
|
717
|
+
if (issues.length === 0) {
|
|
718
|
+
console.log(OK(" ✓ External Secrets healthy"));
|
|
719
|
+
} else {
|
|
720
|
+
for (const issue of issues) {
|
|
721
|
+
if (issue.level === "error") {
|
|
722
|
+
console.log(WARN(` ✗ ${issue.msg}`));
|
|
723
|
+
} else {
|
|
724
|
+
console.log(WARN(` ⚠ ${issue.msg}`));
|
|
725
|
+
}
|
|
726
|
+
if (issue.fix) hint(` Fix: ${issue.fix}`);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
return issues;
|
|
731
|
+
}
|
|
732
|
+
|
|
582
733
|
// ── Vault auto-unseal bootstrap ──────────────────────────────────────────────
|
|
583
734
|
|
|
584
735
|
export const VAULT_UNSEAL_KEY_NAME = "vault-unseal";
|
|
@@ -71,20 +71,28 @@ async function forEachVm({
|
|
|
71
71
|
return { results: [], vms };
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
const
|
|
75
|
-
for (const t of
|
|
74
|
+
const allTargets = opts.vmName ? [opts.vmName] : names;
|
|
75
|
+
for (const t of allTargets) {
|
|
76
76
|
if (!vms[t]) {
|
|
77
77
|
console.error(ERR(`\n VM "${t}" not tracked. Run: fops azure list\n`));
|
|
78
78
|
process.exit(1);
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
// Filter out VMs without public IPs (e.g., local stack) unless explicitly targeted
|
|
83
|
+
const skippedVms = opts.vmName ? [] : allTargets.filter(t => !vms[t].publicIp);
|
|
84
|
+
const targets = opts.vmName ? allTargets : allTargets.filter(t => vms[t].publicIp);
|
|
85
|
+
|
|
82
86
|
banner(title);
|
|
83
|
-
|
|
87
|
+
if (targets.length === 0) {
|
|
88
|
+
hint("No VMs with public IPs to target.\n");
|
|
89
|
+
return { results: [], vms, activeVm: listVms().activeVm };
|
|
90
|
+
}
|
|
91
|
+
hint(`${targets.length} VM(s)${skippedVms.length ? ` (${skippedVms.length} skipped: no public IP)` : ""}${concurrency ? ` (concurrency: ${concurrency})` : ""}…\n`);
|
|
84
92
|
|
|
85
93
|
async function runOne(name) {
|
|
86
94
|
const vm = vms[name];
|
|
87
|
-
if (!vm.publicIp) return { name, ok: false, reason: "no public IP" };
|
|
95
|
+
if (!vm.publicIp) return { name, ok: false, reason: "no public IP (local stack?)" };
|
|
88
96
|
|
|
89
97
|
try {
|
|
90
98
|
await knockForVm(vm);
|
|
@@ -14,6 +14,8 @@ export function registerTestCommands(azure) {
|
|
|
14
14
|
.command("run [name]", { isDefault: true })
|
|
15
15
|
.description("Run QA automation tests locally against a remote VM")
|
|
16
16
|
.option("--vm-name <name>", "Target VM (default: active)")
|
|
17
|
+
.option("--landscape <file>", "Apply landscape file (FCL/HCL/YAML) before running tests")
|
|
18
|
+
.option("--landscape-template <name>", "Use built-in landscape template (demo, pipeline_demo)")
|
|
17
19
|
.action(async (name, opts) => {
|
|
18
20
|
const { resolveCliSrc, lazyExeca, ensureAzCli, ensureAzAuth, resolvePublicIp } = await import("../azure-helpers.js");
|
|
19
21
|
const { requireVmState, knockForVm, sshCmd, MUX_OPTS } = await import("../azure.js");
|
|
@@ -46,6 +48,32 @@ export function registerTestCommands(azure) {
|
|
|
46
48
|
process.exit(1);
|
|
47
49
|
}
|
|
48
50
|
|
|
51
|
+
// Apply landscape if specified
|
|
52
|
+
let landscapeFile = opts.landscape;
|
|
53
|
+
if (!landscapeFile && opts.landscapeTemplate) {
|
|
54
|
+
const templateDir = path.join(root, "operator-cli/src/plugins/bundled/fops-plugin-foundation/templates/landscapes");
|
|
55
|
+
const templateName = opts.landscapeTemplate.endsWith(".fcl") ? opts.landscapeTemplate : `${opts.landscapeTemplate}.fcl`;
|
|
56
|
+
landscapeFile = path.join(templateDir, templateName);
|
|
57
|
+
try {
|
|
58
|
+
await fsp.access(landscapeFile);
|
|
59
|
+
} catch {
|
|
60
|
+
console.error(chalk.red(`\n Landscape template not found: ${templateName}`));
|
|
61
|
+
console.error(chalk.dim(` Available: demo.fcl, pipeline_demo.fcl\n`));
|
|
62
|
+
process.exit(1);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (landscapeFile) {
|
|
66
|
+
console.log(chalk.cyan(`\n Applying landscape: ${path.basename(landscapeFile)}…\n`));
|
|
67
|
+
const { azureApply } = await import("../azure.js");
|
|
68
|
+
try {
|
|
69
|
+
await azureApply(landscapeFile, { vmName: state.vmName });
|
|
70
|
+
console.log(chalk.green(" ✓ Landscape applied\n"));
|
|
71
|
+
} catch (err) {
|
|
72
|
+
console.error(chalk.red(`\n Failed to apply landscape: ${err.message}\n`));
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
49
77
|
const vmUrl = state.publicUrl || `https://${ip}`;
|
|
50
78
|
const apiUrl = `${vmUrl}/api`;
|
|
51
79
|
|